All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
@ 2020-10-30 11:32 Tianci Yin
  2020-10-30 11:48 ` Zhang, Hawking
  0 siblings, 1 reply; 12+ messages in thread
From: Tianci Yin @ 2020-10-30 11:32 UTC (permalink / raw)
  To: amd-gfx
  Cc: Long Gang, Guchun Chen, Feifei Xu, Tianci Yin, Tuikov Luben,
	Deucher Alexander, Flora Cui, Hawking Zhang

From: "Tianci.Yin" <tianci.yin@amd.com>

The crash caused by the NULL pointer of
adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
but this function should not be called on headless SKU.

Fix the mismatch between the return value of
amdgpu_device_has_dc_support() and the real DCN supporting
state to avoid calling to drm_kms_helper_hotplug_event()
in amdgpu_device_resume().

Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
Signed-off-by: Tianci.Yin <tianci.yin@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ba65d4f2ab67..f0183271456f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 				   u32 pcie_index, u32 pcie_data,
 				   u32 reg_addr, u64 reg_data);
 
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type, struct pci_dev *pdev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
 
 int emu_soc_asic_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fe850e0a94d..323ed69032a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
  *
  * @asic_type: AMD asic type
+ * @pdev: pointer to pci_dev instance
  *
  * Check if there is DC (new modesetting infrastructre) support for an asic.
  * returns true if DC has support, false if not.
  */
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type, struct pci_dev *pdev)
 {
 	switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
@@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 	case CHIP_RAVEN:
+		return amdgpu_dc != 0;
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
 	case CHIP_NAVI12:
+		if (nv_is_headless_sku(pdev))
+			return false;
+		else
+			return amdgpu_dc != 0;
 	case CHIP_RENOIR:
 #endif
 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
@@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
 		return false;
 
-	return amdgpu_device_asic_has_dc_support(adev->asic_type);
+	return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9e92d2a070ac..97014458d7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
 	 */
 	if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
 	    amdgpu_bo_support_uswc(bo_flags) &&
-	    amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+	    amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
 		switch (adev->asic_type) {
 		case CHIP_CARRIZO:
 		case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4b78ecfd35f7..b23110241267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	bool supports_atomic = false;
 
 	if (!amdgpu_virtual_display &&
-	    amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+	    amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
 		supports_atomic = true;
 
 	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 026e0a8fd526..97446ae75b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
 	adev->virt.ops = &xgpu_nv_virt_ops;
 }
 
-static bool nv_is_headless_sku(struct pci_dev *pdev)
+bool nv_is_headless_sku(struct pci_dev *pdev)
 {
 	if ((pdev->device == 0x731E &&
 	    (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h
index 515d67bf249f..7880ad0073c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -29,6 +29,7 @@
 void nv_grbm_select(struct amdgpu_device *adev,
 		    u32 me, u32 pipe, u32 queue, u32 vmid);
 void nv_set_virt_ops(struct amdgpu_device *adev);
+bool nv_is_headless_sku(struct pci_dev *pdev);
 int nv_set_ip_blocks(struct amdgpu_device *adev);
 int navi10_reg_base_init(struct amdgpu_device *adev);
 int navi14_reg_base_init(struct amdgpu_device *adev);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-10-30 11:32 [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU Tianci Yin
@ 2020-10-30 11:48 ` Zhang, Hawking
  2020-10-30 12:05   ` Yin, Tianci (Rico)
  0 siblings, 1 reply; 12+ messages in thread
From: Zhang, Hawking @ 2020-10-30 11:48 UTC (permalink / raw)
  To: Yin, Tianci (Rico), amd-gfx
  Cc: Long, Gang, Chen, Guchun, Xu, Feifei, Yin, Tianci (Rico),
	Tuikov, Luben, Deucher, Alexander, Cui, Flora

[AMD Public Use]

I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward. 

Regards,
Hawking
-----Original Message-----
From: Tianci Yin <tianci.yin@amd.com> 
Sent: Friday, October 30, 2020 19:32
To: amd-gfx@lists.freedesktop.org
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

From: "Tianci.Yin" <tianci.yin@amd.com>

The crash caused by the NULL pointer of
adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
but this function should not be called on headless SKU.

Fix the mismatch between the return value of
amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().

Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
Signed-off-by: Tianci.Yin <tianci.yin@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ba65d4f2ab67..f0183271456f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 				   u32 pcie_index, u32 pcie_data,
 				   u32 reg_addr, u64 reg_data);
 
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type, 
+struct pci_dev *pdev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
 
 int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fe850e0a94d..323ed69032a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
  *
  * @asic_type: AMD asic type
+ * @pdev: pointer to pci_dev instance
  *
  * Check if there is DC (new modesetting infrastructre) support for an asic.
  * returns true if DC has support, false if not.
  */
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type, 
+struct pci_dev *pdev)
 {
 	switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
@@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 	case CHIP_RAVEN:
+		return amdgpu_dc != 0;
 	case CHIP_NAVI10:
 	case CHIP_NAVI14:
 	case CHIP_NAVI12:
+		if (nv_is_headless_sku(pdev))
+			return false;
+		else
+			return amdgpu_dc != 0;
 	case CHIP_RENOIR:
 #endif
 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
@@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
 		return false;
 
-	return amdgpu_device_asic_has_dc_support(adev->asic_type);
+	return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9e92d2a070ac..97014458d7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
 	 */
 	if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
 	    amdgpu_bo_support_uswc(bo_flags) &&
-	    amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+	    amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
 		switch (adev->asic_type) {
 		case CHIP_CARRIZO:
 		case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4b78ecfd35f7..b23110241267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	bool supports_atomic = false;
 
 	if (!amdgpu_virtual_display &&
-	    amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+	    amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
 		supports_atomic = true;
 
 	if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
 	adev->virt.ops = &xgpu_nv_virt_ops;
 }
 
-static bool nv_is_headless_sku(struct pci_dev *pdev)
+bool nv_is_headless_sku(struct pci_dev *pdev)
 {
 	if ((pdev->device == 0x731E &&
 	    (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -29,6 +29,7 @@
 void nv_grbm_select(struct amdgpu_device *adev,
 		    u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
+bool nv_is_headless_sku(struct pci_dev *pdev);
 int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-10-30 11:48 ` Zhang, Hawking
@ 2020-10-30 12:05   ` Yin, Tianci (Rico)
  2020-10-30 12:48     ` Zhang, Hawking
  2020-10-30 13:00     ` Zhang, Hawking
  0 siblings, 2 replies; 12+ messages in thread
From: Yin, Tianci (Rico) @ 2020-10-30 12:05 UTC (permalink / raw)
  To: Zhang, Hawking, amd-gfx
  Cc: Long, Gang, Chen, Guchun, Xu, Feifei, Tuikov, Luben, Deucher,
	Alexander, Cui, Flora


[-- Attachment #1.1: Type: text/plain, Size: 7729 bytes --]

[AMD Public Use]

Hi Hawking,

amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),
at that point, adev has not been allocated yet.

My change can make it to right code path.
int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{
...
if (!amdgpu_device_has_dc_support(adev))
drm_helper_hpd_irq_event(dev);               //right path for headless SKU
else
drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU
...
}

Thanks!
Rico

________________________________
From: Zhang, Hawking <Hawking.Zhang@amd.com>
Sent: Friday, October 30, 2020 19:48
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

[AMD Public Use]

I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.

Regards,
Hawking
-----Original Message-----
From: Tianci Yin <tianci.yin@amd.com>
Sent: Friday, October 30, 2020 19:32
To: amd-gfx@lists.freedesktop.org
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

From: "Tianci.Yin" <tianci.yin@amd.com>

The crash caused by the NULL pointer of
adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
but this function should not be called on headless SKU.

Fix the mismatch between the return value of
amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().

Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
Signed-off-by: Tianci.Yin <tianci.yin@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ba65d4f2ab67..f0183271456f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
                                    u32 pcie_index, u32 pcie_data,
                                    u32 reg_addr, u64 reg_data);

-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);

 int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fe850e0a94d..323ed69032a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
  *
  * @asic_type: AMD asic type
+ * @pdev: pointer to pci_dev instance
  *
  * Check if there is DC (new modesetting infrastructre) support for an asic.
  * returns true if DC has support, false if not.
  */
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev)
 {
         switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
@@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
         case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN)
         case CHIP_RAVEN:
+               return amdgpu_dc != 0;
         case CHIP_NAVI10:
         case CHIP_NAVI14:
         case CHIP_NAVI12:
+               if (nv_is_headless_sku(pdev))
+                       return false;
+               else
+                       return amdgpu_dc != 0;
         case CHIP_RENOIR:
 #endif
 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
@@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
         if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
                 return false;

-       return amdgpu_device_asic_has_dc_support(adev->asic_type);
+       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
 }


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9e92d2a070ac..97014458d7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
          */
         if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
             amdgpu_bo_support_uswc(bo_flags) &&
-           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
                 switch (adev->asic_type) {
                 case CHIP_CARRIZO:
                 case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4b78ecfd35f7..b23110241267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
         bool supports_atomic = false;

         if (!amdgpu_virtual_display &&
-           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
                 supports_atomic = true;

         if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
         adev->virt.ops = &xgpu_nv_virt_ops;
 }

-static bool nv_is_headless_sku(struct pci_dev *pdev)
+bool nv_is_headless_sku(struct pci_dev *pdev)
 {
         if ((pdev->device == 0x731E &&
             (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -29,6 +29,7 @@
 void nv_grbm_select(struct amdgpu_device *adev,
                     u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
+bool nv_is_headless_sku(struct pci_dev *pdev);
 int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
--
2.17.1

[-- Attachment #1.2: Type: text/html, Size: 13724 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-10-30 12:05   ` Yin, Tianci (Rico)
@ 2020-10-30 12:48     ` Zhang, Hawking
  2020-10-30 13:00     ` Zhang, Hawking
  1 sibling, 0 replies; 12+ messages in thread
From: Zhang, Hawking @ 2020-10-30 12:48 UTC (permalink / raw)
  To: Yin, Tianci (Rico), amd-gfx
  Cc: Long, Gang, Chen, Guchun, Xu, Feifei, Tuikov, Luben, Deucher,
	Alexander, Cui, Flora


[-- Attachment #1.1: Type: text/plain, Size: 9142 bytes --]

[AMD Public Use]


[AMD Public Use]

I see. Thanks for the clarifying, Tianci.

In such case, how about we add a new flag AMD_IS_HEADLESS to amd_chip_flags, so we can identify headless asic at the beginning when we add a new item to pciidlist.

Regards,
Hawking
From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Sent: Friday, October 30, 2020 20:05
To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Public Use]

Hi Hawking,

amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),
at that point, adev has not been allocated yet.

My change can make it to right code path.
int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{
...
if (!amdgpu_device_has_dc_support(adev))
drm_helper_hpd_irq_event(dev);               //right path for headless SKU
else
drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU
...
}

Thanks!
Rico

________________________________
From: Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>
Sent: Friday, October 30, 2020 19:48
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

[AMD Public Use]

I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.

Regards,
Hawking
-----Original Message-----
From: Tianci Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
Sent: Friday, October 30, 2020 19:32
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

From: "Tianci.Yin" <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>

The crash caused by the NULL pointer of
adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
but this function should not be called on headless SKU.

Fix the mismatch between the return value of
amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().

Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
Signed-off-by: Tianci.Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ba65d4f2ab67..f0183271456f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
                                    u32 pcie_index, u32 pcie_data,
                                    u32 reg_addr, u64 reg_data);

-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);

 int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fe850e0a94d..323ed69032a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
  *
  * @asic_type: AMD asic type
+ * @pdev: pointer to pci_dev instance
  *
  * Check if there is DC (new modesetting infrastructre) support for an asic.
  * returns true if DC has support, false if not.
  */
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev)
 {
         switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
@@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
         case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN)
         case CHIP_RAVEN:
+               return amdgpu_dc != 0;
         case CHIP_NAVI10:
         case CHIP_NAVI14:
         case CHIP_NAVI12:
+               if (nv_is_headless_sku(pdev))
+                       return false;
+               else
+                       return amdgpu_dc != 0;
         case CHIP_RENOIR:
 #endif
 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
@@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
         if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
                 return false;

-       return amdgpu_device_asic_has_dc_support(adev->asic_type);
+       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
 }


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9e92d2a070ac..97014458d7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
          */
         if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
             amdgpu_bo_support_uswc(bo_flags) &&
-           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
                 switch (adev->asic_type) {
                 case CHIP_CARRIZO:
                 case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4b78ecfd35f7..b23110241267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
         bool supports_atomic = false;

         if (!amdgpu_virtual_display &&
-           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
                 supports_atomic = true;

         if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
         adev->virt.ops = &xgpu_nv_virt_ops;
 }

-static bool nv_is_headless_sku(struct pci_dev *pdev)
+bool nv_is_headless_sku(struct pci_dev *pdev)
 {
         if ((pdev->device == 0x731E &&
             (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -29,6 +29,7 @@
 void nv_grbm_select(struct amdgpu_device *adev,
                     u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
+bool nv_is_headless_sku(struct pci_dev *pdev);
 int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
--
2.17.1

[-- Attachment #1.2: Type: text/html, Size: 18272 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-10-30 12:05   ` Yin, Tianci (Rico)
  2020-10-30 12:48     ` Zhang, Hawking
@ 2020-10-30 13:00     ` Zhang, Hawking
  2020-10-30 14:18       ` Deucher, Alexander
  1 sibling, 1 reply; 12+ messages in thread
From: Zhang, Hawking @ 2020-10-30 13:00 UTC (permalink / raw)
  To: Yin, Tianci (Rico), amd-gfx
  Cc: Long, Gang, Chen, Guchun, Xu, Feifei, Tuikov, Luben, Deucher,
	Alexander, Cui, Flora


[-- Attachment #1.1: Type: text/plain, Size: 9211 bytes --]

[AMD Public Use]


[AMD Public Use]

I see, thanks for the clarifying. The patch is

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>

I was thinking to have a new flag like AMD_IS_HEADLESS in amd_chip_flag, but after think it a bit more, that's just complicated the case, I agree with your current approach.

Regards,
Hawking
From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Sent: Friday, October 30, 2020 20:05
To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Public Use]

Hi Hawking,

amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),
at that point, adev has not been allocated yet.

My change can make it to right code path.
int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{
...
if (!amdgpu_device_has_dc_support(adev))
drm_helper_hpd_irq_event(dev);               //right path for headless SKU
else
drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU
...
}

Thanks!
Rico

________________________________
From: Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>
Sent: Friday, October 30, 2020 19:48
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

[AMD Public Use]

I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.

Regards,
Hawking
-----Original Message-----
From: Tianci Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
Sent: Friday, October 30, 2020 19:32
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

From: "Tianci.Yin" <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>

The crash caused by the NULL pointer of
adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
but this function should not be called on headless SKU.

Fix the mismatch between the return value of
amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().

Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
Signed-off-by: Tianci.Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ba65d4f2ab67..f0183271456f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
                                    u32 pcie_index, u32 pcie_data,
                                    u32 reg_addr, u64 reg_data);

-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);

 int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fe850e0a94d..323ed69032a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
  *
  * @asic_type: AMD asic type
+ * @pdev: pointer to pci_dev instance
  *
  * Check if there is DC (new modesetting infrastructre) support for an asic.
  * returns true if DC has support, false if not.
  */
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev)
 {
         switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
@@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
         case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN)
         case CHIP_RAVEN:
+               return amdgpu_dc != 0;
         case CHIP_NAVI10:
         case CHIP_NAVI14:
         case CHIP_NAVI12:
+               if (nv_is_headless_sku(pdev))
+                       return false;
+               else
+                       return amdgpu_dc != 0;
         case CHIP_RENOIR:
 #endif
 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
@@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
         if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
                 return false;

-       return amdgpu_device_asic_has_dc_support(adev->asic_type);
+       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
 }


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9e92d2a070ac..97014458d7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
          */
         if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
             amdgpu_bo_support_uswc(bo_flags) &&
-           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
                 switch (adev->asic_type) {
                 case CHIP_CARRIZO:
                 case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4b78ecfd35f7..b23110241267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
         bool supports_atomic = false;

         if (!amdgpu_virtual_display &&
-           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
                 supports_atomic = true;

         if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
         adev->virt.ops = &xgpu_nv_virt_ops;
 }

-static bool nv_is_headless_sku(struct pci_dev *pdev)
+bool nv_is_headless_sku(struct pci_dev *pdev)
 {
         if ((pdev->device == 0x731E &&
             (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -29,6 +29,7 @@
 void nv_grbm_select(struct amdgpu_device *adev,
                     u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
+bool nv_is_headless_sku(struct pci_dev *pdev);
 int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
--
2.17.1

[-- Attachment #1.2: Type: text/html, Size: 18432 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-10-30 13:00     ` Zhang, Hawking
@ 2020-10-30 14:18       ` Deucher, Alexander
  2020-11-02  3:46         ` Yin, Tianci (Rico)
  0 siblings, 1 reply; 12+ messages in thread
From: Deucher, Alexander @ 2020-10-30 14:18 UTC (permalink / raw)
  To: Zhang, Hawking, Yin, Tianci (Rico), amd-gfx
  Cc: Cui, Flora, Xu, Feifei, Tuikov, Luben, Chen, Guchun, Long, Gang


[-- Attachment #1.1: Type: text/plain, Size: 10263 bytes --]

[AMD Public Use]

Maybe it would be easier to check if the DCE IP exists? E.g.,
if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE) ||
(!amdgpu_device_has_dc_support(adev))
...

Also do we even need to skip DCN init for these cards, or will the display code handle it automatically since there will be no displays in the atombios object table.  We didn't do anything special for display with the polaris blockchain cards.

Alex

________________________________
From: Zhang, Hawking <Hawking.Zhang@amd.com>
Sent: Friday, October 30, 2020 9:00 AM
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Public Use]


[AMD Public Use]



I see, thanks for the clarifying. The patch is



Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>



I was thinking to have a new flag like AMD_IS_HEADLESS in amd_chip_flag, but after think it a bit more, that’s just complicated the case, I agree with your current approach.



Regards,
Hawking

From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Sent: Friday, October 30, 2020 20:05
To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU



[AMD Public Use]



Hi Hawking,



amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),

at that point, adev has not been allocated yet.



My change can make it to right code path.

int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{

...

if (!amdgpu_device_has_dc_support(adev))

drm_helper_hpd_irq_event(dev);               //right path for headless SKU

else

drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU

...

}



Thanks!

Rico



________________________________

From: Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>
Sent: Friday, October 30, 2020 19:48
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU



[AMD Public Use]

I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.

Regards,
Hawking
-----Original Message-----
From: Tianci Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
Sent: Friday, October 30, 2020 19:32
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

From: "Tianci.Yin" <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>

The crash caused by the NULL pointer of
adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
but this function should not be called on headless SKU.

Fix the mismatch between the return value of
amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().

Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
Signed-off-by: Tianci.Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ba65d4f2ab67..f0183271456f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
                                    u32 pcie_index, u32 pcie_data,
                                    u32 reg_addr, u64 reg_data);

-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);

 int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fe850e0a94d..323ed69032a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
  *
  * @asic_type: AMD asic type
+ * @pdev: pointer to pci_dev instance
  *
  * Check if there is DC (new modesetting infrastructre) support for an asic.
  * returns true if DC has support, false if not.
  */
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev)
 {
         switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
@@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
         case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN)
         case CHIP_RAVEN:
+               return amdgpu_dc != 0;
         case CHIP_NAVI10:
         case CHIP_NAVI14:
         case CHIP_NAVI12:
+               if (nv_is_headless_sku(pdev))
+                       return false;
+               else
+                       return amdgpu_dc != 0;
         case CHIP_RENOIR:
 #endif
 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
@@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
         if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
                 return false;

-       return amdgpu_device_asic_has_dc_support(adev->asic_type);
+       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
 }


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9e92d2a070ac..97014458d7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
          */
         if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
             amdgpu_bo_support_uswc(bo_flags) &&
-           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
                 switch (adev->asic_type) {
                 case CHIP_CARRIZO:
                 case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4b78ecfd35f7..b23110241267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
         bool supports_atomic = false;

         if (!amdgpu_virtual_display &&
-           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
                 supports_atomic = true;

         if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
         adev->virt.ops = &xgpu_nv_virt_ops;
 }

-static bool nv_is_headless_sku(struct pci_dev *pdev)
+bool nv_is_headless_sku(struct pci_dev *pdev)
 {
         if ((pdev->device == 0x731E &&
             (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -29,6 +29,7 @@
 void nv_grbm_select(struct amdgpu_device *adev,
                     u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
+bool nv_is_headless_sku(struct pci_dev *pdev);
 int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
--
2.17.1

[-- Attachment #1.2: Type: text/html, Size: 19628 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-10-30 14:18       ` Deucher, Alexander
@ 2020-11-02  3:46         ` Yin, Tianci (Rico)
  2020-11-02 15:38           ` Deucher, Alexander
  0 siblings, 1 reply; 12+ messages in thread
From: Yin, Tianci (Rico) @ 2020-11-02  3:46 UTC (permalink / raw)
  To: Deucher, Alexander, Zhang, Hawking, amd-gfx
  Cc: Cui, Flora, Xu, Feifei, Tuikov, Luben, Chen, Guchun, Long, Gang


[-- Attachment #1.1: Type: text/plain, Size: 12210 bytes --]

[AMD Public Use]

Thanks very much Hawking!


Hi Alex,

The amdgpu_device_ip_get_ip_block() depends on the adev->ip_blocks that initialized by nv_set_ip_blocks().
In nv_set_ip_blocks(), whether add dm_ip_block depends on amdgpu_device_has_dc_support().
And amdgpu_device_has_dc_support() depends on amdgpu_device_asic_has_dc_support(),
So amdgpu_device_asic_has_dc_support() can't conditional on amdgpu_device_ip_get_ip_block(), it makes a dependency loop.

I have just checked the atombios object table in the headless VBIOS, and find DCN and VCN are still there.
[  174.815714] [drm:amdgpu_discovery_reg_base_init [amdgpu]] DMU(271) #0 v2.0.2:
[  174.815771] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00000012
[  174.815829] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000000c0
[  174.815887] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000034c0
[  174.815945] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00009000
[  174.816002] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403c00
[  174.821854] [drm:amdgpu_discovery_reg_base_init [amdgpu]] UVD(12) #0 v2.0.0:
[  174.821908] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007800
[  174.821962] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007e00
[  174.822017] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403000
So I think DAL driver can't tell it's a normal ASIC or headless ASIC.

Thanks!
Rico

________________________________
From: Deucher, Alexander <Alexander.Deucher@amd.com>
Sent: Friday, October 30, 2020 22:18
To: Zhang, Hawking <Hawking.Zhang@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Public Use]

Maybe it would be easier to check if the DCE IP exists? E.g.,
if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE) ||
(!amdgpu_device_has_dc_support(adev))
...

Also do we even need to skip DCN init for these cards, or will the display code handle it automatically since there will be no displays in the atombios object table.  We didn't do anything special for display with the polaris blockchain cards.

Alex

________________________________
From: Zhang, Hawking <Hawking.Zhang@amd.com>
Sent: Friday, October 30, 2020 9:00 AM
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Public Use]


[AMD Public Use]



I see, thanks for the clarifying. The patch is



Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>



I was thinking to have a new flag like AMD_IS_HEADLESS in amd_chip_flag, but after think it a bit more, that’s just complicated the case, I agree with your current approach.



Regards,
Hawking

From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Sent: Friday, October 30, 2020 20:05
To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU



[AMD Public Use]



Hi Hawking,



amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),

at that point, adev has not been allocated yet.



My change can make it to right code path.

int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{

...

if (!amdgpu_device_has_dc_support(adev))

drm_helper_hpd_irq_event(dev);               //right path for headless SKU

else

drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU

...

}



Thanks!

Rico



________________________________

From: Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>
Sent: Friday, October 30, 2020 19:48
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU



[AMD Public Use]

I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.

Regards,
Hawking
-----Original Message-----
From: Tianci Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
Sent: Friday, October 30, 2020 19:32
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

From: "Tianci.Yin" <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>

The crash caused by the NULL pointer of
adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
but this function should not be called on headless SKU.

Fix the mismatch between the return value of
amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().

Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
Signed-off-by: Tianci.Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ba65d4f2ab67..f0183271456f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
                                    u32 pcie_index, u32 pcie_data,
                                    u32 reg_addr, u64 reg_data);

-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);

 int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fe850e0a94d..323ed69032a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
  *
  * @asic_type: AMD asic type
+ * @pdev: pointer to pci_dev instance
  *
  * Check if there is DC (new modesetting infrastructre) support for an asic.
  * returns true if DC has support, false if not.
  */
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev)
 {
         switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
@@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
         case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN)
         case CHIP_RAVEN:
+               return amdgpu_dc != 0;
         case CHIP_NAVI10:
         case CHIP_NAVI14:
         case CHIP_NAVI12:
+               if (nv_is_headless_sku(pdev))
+                       return false;
+               else
+                       return amdgpu_dc != 0;
         case CHIP_RENOIR:
 #endif
 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
@@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
         if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
                 return false;

-       return amdgpu_device_asic_has_dc_support(adev->asic_type);
+       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
 }


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9e92d2a070ac..97014458d7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
          */
         if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
             amdgpu_bo_support_uswc(bo_flags) &&
-           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
                 switch (adev->asic_type) {
                 case CHIP_CARRIZO:
                 case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4b78ecfd35f7..b23110241267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
         bool supports_atomic = false;

         if (!amdgpu_virtual_display &&
-           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
                 supports_atomic = true;

         if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
         adev->virt.ops = &xgpu_nv_virt_ops;
 }

-static bool nv_is_headless_sku(struct pci_dev *pdev)
+bool nv_is_headless_sku(struct pci_dev *pdev)
 {
         if ((pdev->device == 0x731E &&
             (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -29,6 +29,7 @@
 void nv_grbm_select(struct amdgpu_device *adev,
                     u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
+bool nv_is_headless_sku(struct pci_dev *pdev);
 int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
--
2.17.1

[-- Attachment #1.2: Type: text/html, Size: 24549 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-11-02  3:46         ` Yin, Tianci (Rico)
@ 2020-11-02 15:38           ` Deucher, Alexander
  2020-11-02 17:49             ` Alex Deucher
  0 siblings, 1 reply; 12+ messages in thread
From: Deucher, Alexander @ 2020-11-02 15:38 UTC (permalink / raw)
  To: Yin, Tianci (Rico), Zhang, Hawking, amd-gfx
  Cc: Cui, Flora, Xu, Feifei, Tuikov, Luben, Chen, Guchun, Long, Gang


[-- Attachment #1.1: Type: text/plain, Size: 12895 bytes --]

[AMD Public Use]

That's the IP discovery table.  The Object header is part of atombios.  Do you have an vbios image from that system?

Alex


________________________________
From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Sent: Sunday, November 1, 2020 10:46 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Public Use]

Thanks very much Hawking!


Hi Alex,

The amdgpu_device_ip_get_ip_block() depends on the adev->ip_blocks that initialized by nv_set_ip_blocks().
In nv_set_ip_blocks(), whether add dm_ip_block depends on amdgpu_device_has_dc_support().
And amdgpu_device_has_dc_support() depends on amdgpu_device_asic_has_dc_support(),
So amdgpu_device_asic_has_dc_support() can't conditional on amdgpu_device_ip_get_ip_block(), it makes a dependency loop.

I have just checked the atombios object table in the headless VBIOS, and find DCN and VCN are still there.
[  174.815714] [drm:amdgpu_discovery_reg_base_init [amdgpu]] DMU(271) #0 v2.0.2:
[  174.815771] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00000012
[  174.815829] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000000c0
[  174.815887] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000034c0
[  174.815945] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00009000
[  174.816002] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403c00
[  174.821854] [drm:amdgpu_discovery_reg_base_init [amdgpu]] UVD(12) #0 v2.0.0:
[  174.821908] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007800
[  174.821962] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007e00
[  174.822017] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403000
So I think DAL driver can't tell it's a normal ASIC or headless ASIC.

Thanks!
Rico

________________________________
From: Deucher, Alexander <Alexander.Deucher@amd.com>
Sent: Friday, October 30, 2020 22:18
To: Zhang, Hawking <Hawking.Zhang@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Public Use]

Maybe it would be easier to check if the DCE IP exists? E.g.,
if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE) ||
(!amdgpu_device_has_dc_support(adev))
...

Also do we even need to skip DCN init for these cards, or will the display code handle it automatically since there will be no displays in the atombios object table.  We didn't do anything special for display with the polaris blockchain cards.

Alex

________________________________
From: Zhang, Hawking <Hawking.Zhang@amd.com>
Sent: Friday, October 30, 2020 9:00 AM
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Public Use]


[AMD Public Use]



I see, thanks for the clarifying. The patch is



Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>



I was thinking to have a new flag like AMD_IS_HEADLESS in amd_chip_flag, but after think it a bit more, that’s just complicated the case, I agree with your current approach.



Regards,
Hawking

From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Sent: Friday, October 30, 2020 20:05
To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU



[AMD Public Use]



Hi Hawking,



amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),

at that point, adev has not been allocated yet.



My change can make it to right code path.

int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
{

...

if (!amdgpu_device_has_dc_support(adev))

drm_helper_hpd_irq_event(dev);               //right path for headless SKU

else

drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU

...

}



Thanks!

Rico



________________________________

From: Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>
Sent: Friday, October 30, 2020 19:48
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org> <amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU



[AMD Public Use]

I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.

Regards,
Hawking
-----Original Message-----
From: Tianci Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
Sent: Friday, October 30, 2020 19:32
To: amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
Cc: Tuikov, Luben <Luben.Tuikov@amd.com<mailto:Luben.Tuikov@amd.com>>; Deucher, Alexander <Alexander.Deucher@amd.com<mailto:Alexander.Deucher@amd.com>>; Zhang, Hawking <Hawking.Zhang@amd.com<mailto:Hawking.Zhang@amd.com>>; Chen, Guchun <Guchun.Chen@amd.com<mailto:Guchun.Chen@amd.com>>; Cui, Flora <Flora.Cui@amd.com<mailto:Flora.Cui@amd.com>>; Xu, Feifei <Feifei.Xu@amd.com<mailto:Feifei.Xu@amd.com>>; Long, Gang <Gang.Long@amd.com<mailto:Gang.Long@amd.com>>; Yin, Tianci (Rico) <Tianci.Yin@amd.com<mailto:Tianci.Yin@amd.com>>
Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

From: "Tianci.Yin" <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>

The crash caused by the NULL pointer of
adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
but this function should not be called on headless SKU.

Fix the mismatch between the return value of
amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().

Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
Signed-off-by: Tianci.Yin <tianci.yin@amd.com<mailto:tianci.yin@amd.com>>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
 6 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ba65d4f2ab67..f0183271456f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
                                    u32 pcie_index, u32 pcie_data,
                                    u32 reg_addr, u64 reg_data);

-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);

 int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1fe850e0a94d..323ed69032a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
  *
  * @asic_type: AMD asic type
+ * @pdev: pointer to pci_dev instance
  *
  * Check if there is DC (new modesetting infrastructre) support for an asic.
  * returns true if DC has support, false if not.
  */
-bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
+bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
+struct pci_dev *pdev)
 {
         switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
@@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
         case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN)
         case CHIP_RAVEN:
+               return amdgpu_dc != 0;
         case CHIP_NAVI10:
         case CHIP_NAVI14:
         case CHIP_NAVI12:
+               if (nv_is_headless_sku(pdev))
+                       return false;
+               else
+                       return amdgpu_dc != 0;
         case CHIP_RENOIR:
 #endif
 #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
@@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
         if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
                 return false;

-       return amdgpu_device_asic_has_dc_support(adev->asic_type);
+       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
 }


diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 9e92d2a070ac..97014458d7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
          */
         if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
             amdgpu_bo_support_uswc(bo_flags) &&
-           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
+           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
                 switch (adev->asic_type) {
                 case CHIP_CARRIZO:
                 case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 4b78ecfd35f7..b23110241267 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
         bool supports_atomic = false;

         if (!amdgpu_virtual_display &&
-           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
                 supports_atomic = true;

         if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
         adev->virt.ops = &xgpu_nv_virt_ops;
 }

-static bool nv_is_headless_sku(struct pci_dev *pdev)
+bool nv_is_headless_sku(struct pci_dev *pdev)
 {
         if ((pdev->device == 0x731E &&
             (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/nv.h
@@ -29,6 +29,7 @@
 void nv_grbm_select(struct amdgpu_device *adev,
                     u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
+bool nv_is_headless_sku(struct pci_dev *pdev);
 int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
--
2.17.1

[-- Attachment #1.2: Type: text/html, Size: 30940 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-11-02 15:38           ` Deucher, Alexander
@ 2020-11-02 17:49             ` Alex Deucher
  2020-11-03  2:51               ` Yin, Tianci (Rico)
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Deucher @ 2020-11-02 17:49 UTC (permalink / raw)
  To: Deucher, Alexander
  Cc: Long, Gang, Chen, Guchun, Xu, Feifei, Yin, Tianci (Rico),
	amd-gfx, Tuikov, Luben, Cui, Flora, Zhang, Hawking

Does everything work as expected if we don't skip DCN in nv_set_ip_blocks?

Alex

On Mon, Nov 2, 2020 at 10:38 AM Deucher, Alexander
<Alexander.Deucher@amd.com> wrote:
>
> [AMD Public Use]
>
>
> That's the IP discovery table.  The Object header is part of atombios.  Do you have an vbios image from that system?
>
> Alex
>
>
> ________________________________
> From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Sent: Sunday, November 1, 2020 10:46 PM
> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> Thanks very much Hawking!
>
>
> Hi Alex,
>
> The amdgpu_device_ip_get_ip_block() depends on the adev->ip_blocks that initialized by nv_set_ip_blocks().
> In nv_set_ip_blocks(), whether add dm_ip_block depends on amdgpu_device_has_dc_support().
> And amdgpu_device_has_dc_support() depends on amdgpu_device_asic_has_dc_support(),
> So amdgpu_device_asic_has_dc_support() can't conditional on amdgpu_device_ip_get_ip_block(), it makes a dependency loop.
>
> I have just checked the atombios object table in the headless VBIOS, and find DCN and VCN are still there.
> [  174.815714] [drm:amdgpu_discovery_reg_base_init [amdgpu]] DMU(271) #0 v2.0.2:
> [  174.815771] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00000012
> [  174.815829] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000000c0
> [  174.815887] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000034c0
> [  174.815945] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00009000
> [  174.816002] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403c00
> [  174.821854] [drm:amdgpu_discovery_reg_base_init [amdgpu]] UVD(12) #0 v2.0.0:
> [  174.821908] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007800
> [  174.821962] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007e00
> [  174.822017] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403000
> So I think DAL driver can't tell it's a normal ASIC or headless ASIC.
>
> Thanks!
> Rico
>
> ________________________________
> From: Deucher, Alexander <Alexander.Deucher@amd.com>
> Sent: Friday, October 30, 2020 22:18
> To: Zhang, Hawking <Hawking.Zhang@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> Maybe it would be easier to check if the DCE IP exists? E.g.,
> if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE) ||
> (!amdgpu_device_has_dc_support(adev))
> ...
>
> Also do we even need to skip DCN init for these cards, or will the display code handle it automatically since there will be no displays in the atombios object table.  We didn't do anything special for display with the polaris blockchain cards.
>
> Alex
>
> ________________________________
> From: Zhang, Hawking <Hawking.Zhang@amd.com>
> Sent: Friday, October 30, 2020 9:00 AM
> To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> [AMD Public Use]
>
>
>
> I see, thanks for the clarifying. The patch is
>
>
>
> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
>
>
>
> I was thinking to have a new flag like AMD_IS_HEADLESS in amd_chip_flag, but after think it a bit more, that’s just complicated the case, I agree with your current approach.
>
>
>
> Regards,
> Hawking
>
> From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Sent: Friday, October 30, 2020 20:05
> To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
>
> [AMD Public Use]
>
>
>
> Hi Hawking,
>
>
>
> amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),
>
> at that point, adev has not been allocated yet.
>
>
>
> My change can make it to right code path.
>
> int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
> {
>
> ...
>
> if (!amdgpu_device_has_dc_support(adev))
>
> drm_helper_hpd_irq_event(dev);               //right path for headless SKU
>
> else
>
> drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU
>
> ...
>
> }
>
>
>
> Thanks!
>
> Rico
>
>
>
> ________________________________
>
> From: Zhang, Hawking <Hawking.Zhang@amd.com>
> Sent: Friday, October 30, 2020 19:48
> To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
>
> [AMD Public Use]
>
> I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.
>
> Regards,
> Hawking
> -----Original Message-----
> From: Tianci Yin <tianci.yin@amd.com>
> Sent: Friday, October 30, 2020 19:32
> To: amd-gfx@lists.freedesktop.org
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
> From: "Tianci.Yin" <tianci.yin@amd.com>
>
> The crash caused by the NULL pointer of
> adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
> but this function should not be called on headless SKU.
>
> Fix the mismatch between the return value of
> amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().
>
> Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
> Signed-off-by: Tianci.Yin <tianci.yin@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
>  drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
>  6 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ba65d4f2ab67..f0183271456f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
>                                     u32 pcie_index, u32 pcie_data,
>                                     u32 reg_addr, u64 reg_data);
>
> -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
> +bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
> +struct pci_dev *pdev);
>  bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
>
>  int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 1fe850e0a94d..323ed69032a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
>   * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
>   *
>   * @asic_type: AMD asic type
> + * @pdev: pointer to pci_dev instance
>   *
>   * Check if there is DC (new modesetting infrastructre) support for an asic.
>   * returns true if DC has support, false if not.
>   */
> -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
> +bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
> +struct pci_dev *pdev)
>  {
>          switch (asic_type) {
>  #if defined(CONFIG_DRM_AMD_DC)
> @@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
>          case CHIP_VEGA20:
>  #if defined(CONFIG_DRM_AMD_DC_DCN)
>          case CHIP_RAVEN:
> +               return amdgpu_dc != 0;
>          case CHIP_NAVI10:
>          case CHIP_NAVI14:
>          case CHIP_NAVI12:
> +               if (nv_is_headless_sku(pdev))
> +                       return false;
> +               else
> +                       return amdgpu_dc != 0;
>          case CHIP_RENOIR:
>  #endif
>  #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
> @@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
>          if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
>                  return false;
>
> -       return amdgpu_device_asic_has_dc_support(adev->asic_type);
> +       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
>  }
>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index 9e92d2a070ac..97014458d7de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
>           */
>          if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
>              amdgpu_bo_support_uswc(bo_flags) &&
> -           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
> +           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
>                  switch (adev->asic_type) {
>                  case CHIP_CARRIZO:
>                  case CHIP_STONEY:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 4b78ecfd35f7..b23110241267 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>          bool supports_atomic = false;
>
>          if (!amdgpu_virtual_display &&
> -           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
> +           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
>                  supports_atomic = true;
>
>          if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
>          adev->virt.ops = &xgpu_nv_virt_ops;
>  }
>
> -static bool nv_is_headless_sku(struct pci_dev *pdev)
> +bool nv_is_headless_sku(struct pci_dev *pdev)
>  {
>          if ((pdev->device == 0x731E &&
>              (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.h
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.h
> @@ -29,6 +29,7 @@
>  void nv_grbm_select(struct amdgpu_device *adev,
>                      u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
> +bool nv_is_headless_sku(struct pci_dev *pdev);
>  int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-11-02 17:49             ` Alex Deucher
@ 2020-11-03  2:51               ` Yin, Tianci (Rico)
  2020-11-03 14:19                 ` Deucher, Alexander
  0 siblings, 1 reply; 12+ messages in thread
From: Yin, Tianci (Rico) @ 2020-11-03  2:51 UTC (permalink / raw)
  To: Alex Deucher, Deucher, Alexander
  Cc: Long, Gang, Chen, Guchun, Xu, Feifei, amd-gfx, Tuikov, Luben,
	Cui, Flora, Zhang, Hawking


[-- Attachment #1.1: Type: text/plain, Size: 14258 bytes --]

[AMD Official Use Only - Internal Distribution Only]

Hi Alex,

If we don't skip DCN in nv_set_ip_blocks, I find below error,
[  103.949926] [drm:amdgpu_dm_init.cold [amdgpu]] *ERROR* amdgpu: failed to initialize hdcp_workqueue.

Thanks!
Rico
________________________________
From: Alex Deucher <alexdeucher@gmail.com>
Sent: Tuesday, November 3, 2020 1:49
To: Deucher, Alexander <Alexander.Deucher@amd.com>
Cc: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

Does everything work as expected if we don't skip DCN in nv_set_ip_blocks?

Alex

On Mon, Nov 2, 2020 at 10:38 AM Deucher, Alexander
<Alexander.Deucher@amd.com> wrote:
>
> [AMD Public Use]
>
>
> That's the IP discovery table.  The Object header is part of atombios.  Do you have an vbios image from that system?
>
> Alex
>
>
> ________________________________
> From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Sent: Sunday, November 1, 2020 10:46 PM
> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> Thanks very much Hawking!
>
>
> Hi Alex,
>
> The amdgpu_device_ip_get_ip_block() depends on the adev->ip_blocks that initialized by nv_set_ip_blocks().
> In nv_set_ip_blocks(), whether add dm_ip_block depends on amdgpu_device_has_dc_support().
> And amdgpu_device_has_dc_support() depends on amdgpu_device_asic_has_dc_support(),
> So amdgpu_device_asic_has_dc_support() can't conditional on amdgpu_device_ip_get_ip_block(), it makes a dependency loop.
>
> I have just checked the atombios object table in the headless VBIOS, and find DCN and VCN are still there.
> [  174.815714] [drm:amdgpu_discovery_reg_base_init [amdgpu]] DMU(271) #0 v2.0.2:
> [  174.815771] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00000012
> [  174.815829] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000000c0
> [  174.815887] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000034c0
> [  174.815945] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00009000
> [  174.816002] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403c00
> [  174.821854] [drm:amdgpu_discovery_reg_base_init [amdgpu]] UVD(12) #0 v2.0.0:
> [  174.821908] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007800
> [  174.821962] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007e00
> [  174.822017] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403000
> So I think DAL driver can't tell it's a normal ASIC or headless ASIC.
>
> Thanks!
> Rico
>
> ________________________________
> From: Deucher, Alexander <Alexander.Deucher@amd.com>
> Sent: Friday, October 30, 2020 22:18
> To: Zhang, Hawking <Hawking.Zhang@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> Maybe it would be easier to check if the DCE IP exists? E.g.,
> if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE) ||
> (!amdgpu_device_has_dc_support(adev))
> ...
>
> Also do we even need to skip DCN init for these cards, or will the display code handle it automatically since there will be no displays in the atombios object table.  We didn't do anything special for display with the polaris blockchain cards.
>
> Alex
>
> ________________________________
> From: Zhang, Hawking <Hawking.Zhang@amd.com>
> Sent: Friday, October 30, 2020 9:00 AM
> To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> [AMD Public Use]
>
>
>
> I see, thanks for the clarifying. The patch is
>
>
>
> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
>
>
>
> I was thinking to have a new flag like AMD_IS_HEADLESS in amd_chip_flag, but after think it a bit more, that’s just complicated the case, I agree with your current approach.
>
>
>
> Regards,
> Hawking
>
> From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Sent: Friday, October 30, 2020 20:05
> To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
>
> [AMD Public Use]
>
>
>
> Hi Hawking,
>
>
>
> amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),
>
> at that point, adev has not been allocated yet.
>
>
>
> My change can make it to right code path.
>
> int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
> {
>
> ...
>
> if (!amdgpu_device_has_dc_support(adev))
>
> drm_helper_hpd_irq_event(dev);               //right path for headless SKU
>
> else
>
> drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU
>
> ...
>
> }
>
>
>
> Thanks!
>
> Rico
>
>
>
> ________________________________
>
> From: Zhang, Hawking <Hawking.Zhang@amd.com>
> Sent: Friday, October 30, 2020 19:48
> To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
>
> [AMD Public Use]
>
> I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.
>
> Regards,
> Hawking
> -----Original Message-----
> From: Tianci Yin <tianci.yin@amd.com>
> Sent: Friday, October 30, 2020 19:32
> To: amd-gfx@lists.freedesktop.org
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
> From: "Tianci.Yin" <tianci.yin@amd.com>
>
> The crash caused by the NULL pointer of
> adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
> but this function should not be called on headless SKU.
>
> Fix the mismatch between the return value of
> amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().
>
> Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
> Signed-off-by: Tianci.Yin <tianci.yin@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
>  drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
>  6 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ba65d4f2ab67..f0183271456f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
>                                     u32 pcie_index, u32 pcie_data,
>                                     u32 reg_addr, u64 reg_data);
>
> -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
> +bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
> +struct pci_dev *pdev);
>  bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
>
>  int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 1fe850e0a94d..323ed69032a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
>   * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
>   *
>   * @asic_type: AMD asic type
> + * @pdev: pointer to pci_dev instance
>   *
>   * Check if there is DC (new modesetting infrastructre) support for an asic.
>   * returns true if DC has support, false if not.
>   */
> -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
> +bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
> +struct pci_dev *pdev)
>  {
>          switch (asic_type) {
>  #if defined(CONFIG_DRM_AMD_DC)
> @@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
>          case CHIP_VEGA20:
>  #if defined(CONFIG_DRM_AMD_DC_DCN)
>          case CHIP_RAVEN:
> +               return amdgpu_dc != 0;
>          case CHIP_NAVI10:
>          case CHIP_NAVI14:
>          case CHIP_NAVI12:
> +               if (nv_is_headless_sku(pdev))
> +                       return false;
> +               else
> +                       return amdgpu_dc != 0;
>          case CHIP_RENOIR:
>  #endif
>  #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
> @@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
>          if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
>                  return false;
>
> -       return amdgpu_device_asic_has_dc_support(adev->asic_type);
> +       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
>  }
>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index 9e92d2a070ac..97014458d7de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
>           */
>          if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
>              amdgpu_bo_support_uswc(bo_flags) &&
> -           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
> +           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
>                  switch (adev->asic_type) {
>                  case CHIP_CARRIZO:
>                  case CHIP_STONEY:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 4b78ecfd35f7..b23110241267 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>          bool supports_atomic = false;
>
>          if (!amdgpu_virtual_display &&
> -           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
> +           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
>                  supports_atomic = true;
>
>          if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
>          adev->virt.ops = &xgpu_nv_virt_ops;
>  }
>
> -static bool nv_is_headless_sku(struct pci_dev *pdev)
> +bool nv_is_headless_sku(struct pci_dev *pdev)
>  {
>          if ((pdev->device == 0x731E &&
>              (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.h
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.h
> @@ -29,6 +29,7 @@
>  void nv_grbm_select(struct amdgpu_device *adev,
>                      u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
> +bool nv_is_headless_sku(struct pci_dev *pdev);
>  int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CTianci.Yin%40amd.com%7C7bb51412bcd34843e77508d87f57a76b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637399361731262326%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=6HI1w2buiO4YlD%2BWFrV9suA7gmlVRX2IlJmYiAkukcM%3D&amp;reserved=0

[-- Attachment #1.2: Type: text/html, Size: 21942 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-11-03  2:51               ` Yin, Tianci (Rico)
@ 2020-11-03 14:19                 ` Deucher, Alexander
  2020-11-04  9:54                   ` Yin, Tianci (Rico)
  0 siblings, 1 reply; 12+ messages in thread
From: Deucher, Alexander @ 2020-11-03 14:19 UTC (permalink / raw)
  To: Yin, Tianci (Rico), Alex Deucher
  Cc: Long, Gang, Chen, Guchun, Xu, Feifei, amd-gfx, Tuikov, Luben,
	Cui, Flora, Zhang, Hawking


[-- Attachment #1.1: Type: text/plain, Size: 14953 bytes --]

That should be harmless and we can fix that up.  Does everything work as expected other than the error message?

Alex

________________________________
From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Sent: Monday, November 2, 2020 9:51 PM
To: Alex Deucher <alexdeucher@gmail.com>; Deucher, Alexander <Alexander.Deucher@amd.com>
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Official Use Only - Internal Distribution Only]

Hi Alex,

If we don't skip DCN in nv_set_ip_blocks, I find below error,
[  103.949926] [drm:amdgpu_dm_init.cold [amdgpu]] *ERROR* amdgpu: failed to initialize hdcp_workqueue.

Thanks!
Rico
________________________________
From: Alex Deucher <alexdeucher@gmail.com>
Sent: Tuesday, November 3, 2020 1:49
To: Deucher, Alexander <Alexander.Deucher@amd.com>
Cc: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

Does everything work as expected if we don't skip DCN in nv_set_ip_blocks?

Alex

On Mon, Nov 2, 2020 at 10:38 AM Deucher, Alexander
<Alexander.Deucher@amd.com> wrote:
>
> [AMD Public Use]
>
>
> That's the IP discovery table.  The Object header is part of atombios.  Do you have an vbios image from that system?
>
> Alex
>
>
> ________________________________
> From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Sent: Sunday, November 1, 2020 10:46 PM
> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> Thanks very much Hawking!
>
>
> Hi Alex,
>
> The amdgpu_device_ip_get_ip_block() depends on the adev->ip_blocks that initialized by nv_set_ip_blocks().
> In nv_set_ip_blocks(), whether add dm_ip_block depends on amdgpu_device_has_dc_support().
> And amdgpu_device_has_dc_support() depends on amdgpu_device_asic_has_dc_support(),
> So amdgpu_device_asic_has_dc_support() can't conditional on amdgpu_device_ip_get_ip_block(), it makes a dependency loop.
>
> I have just checked the atombios object table in the headless VBIOS, and find DCN and VCN are still there.
> [  174.815714] [drm:amdgpu_discovery_reg_base_init [amdgpu]] DMU(271) #0 v2.0.2:
> [  174.815771] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00000012
> [  174.815829] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000000c0
> [  174.815887] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000034c0
> [  174.815945] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00009000
> [  174.816002] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403c00
> [  174.821854] [drm:amdgpu_discovery_reg_base_init [amdgpu]] UVD(12) #0 v2.0.0:
> [  174.821908] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007800
> [  174.821962] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007e00
> [  174.822017] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403000
> So I think DAL driver can't tell it's a normal ASIC or headless ASIC.
>
> Thanks!
> Rico
>
> ________________________________
> From: Deucher, Alexander <Alexander.Deucher@amd.com>
> Sent: Friday, October 30, 2020 22:18
> To: Zhang, Hawking <Hawking.Zhang@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> Maybe it would be easier to check if the DCE IP exists? E.g.,
> if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE) ||
> (!amdgpu_device_has_dc_support(adev))
> ...
>
> Also do we even need to skip DCN init for these cards, or will the display code handle it automatically since there will be no displays in the atombios object table.  We didn't do anything special for display with the polaris blockchain cards.
>
> Alex
>
> ________________________________
> From: Zhang, Hawking <Hawking.Zhang@amd.com>
> Sent: Friday, October 30, 2020 9:00 AM
> To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> [AMD Public Use]
>
>
>
> I see, thanks for the clarifying. The patch is
>
>
>
> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
>
>
>
> I was thinking to have a new flag like AMD_IS_HEADLESS in amd_chip_flag, but after think it a bit more, that’s just complicated the case, I agree with your current approach.
>
>
>
> Regards,
> Hawking
>
> From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Sent: Friday, October 30, 2020 20:05
> To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
>
> [AMD Public Use]
>
>
>
> Hi Hawking,
>
>
>
> amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),
>
> at that point, adev has not been allocated yet.
>
>
>
> My change can make it to right code path.
>
> int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
> {
>
> ...
>
> if (!amdgpu_device_has_dc_support(adev))
>
> drm_helper_hpd_irq_event(dev);               //right path for headless SKU
>
> else
>
> drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU
>
> ...
>
> }
>
>
>
> Thanks!
>
> Rico
>
>
>
> ________________________________
>
> From: Zhang, Hawking <Hawking.Zhang@amd.com>
> Sent: Friday, October 30, 2020 19:48
> To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
>
> [AMD Public Use]
>
> I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.
>
> Regards,
> Hawking
> -----Original Message-----
> From: Tianci Yin <tianci.yin@amd.com>
> Sent: Friday, October 30, 2020 19:32
> To: amd-gfx@lists.freedesktop.org
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
> From: "Tianci.Yin" <tianci.yin@amd.com>
>
> The crash caused by the NULL pointer of
> adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
> but this function should not be called on headless SKU.
>
> Fix the mismatch between the return value of
> amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().
>
> Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
> Signed-off-by: Tianci.Yin <tianci.yin@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
>  drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
>  6 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ba65d4f2ab67..f0183271456f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
>                                     u32 pcie_index, u32 pcie_data,
>                                     u32 reg_addr, u64 reg_data);
>
> -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
> +bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
> +struct pci_dev *pdev);
>  bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
>
>  int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 1fe850e0a94d..323ed69032a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
>   * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
>   *
>   * @asic_type: AMD asic type
> + * @pdev: pointer to pci_dev instance
>   *
>   * Check if there is DC (new modesetting infrastructre) support for an asic.
>   * returns true if DC has support, false if not.
>   */
> -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
> +bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
> +struct pci_dev *pdev)
>  {
>          switch (asic_type) {
>  #if defined(CONFIG_DRM_AMD_DC)
> @@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
>          case CHIP_VEGA20:
>  #if defined(CONFIG_DRM_AMD_DC_DCN)
>          case CHIP_RAVEN:
> +               return amdgpu_dc != 0;
>          case CHIP_NAVI10:
>          case CHIP_NAVI14:
>          case CHIP_NAVI12:
> +               if (nv_is_headless_sku(pdev))
> +                       return false;
> +               else
> +                       return amdgpu_dc != 0;
>          case CHIP_RENOIR:
>  #endif
>  #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
> @@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
>          if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
>                  return false;
>
> -       return amdgpu_device_asic_has_dc_support(adev->asic_type);
> +       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
>  }
>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index 9e92d2a070ac..97014458d7de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
>           */
>          if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
>              amdgpu_bo_support_uswc(bo_flags) &&
> -           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
> +           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
>                  switch (adev->asic_type) {
>                  case CHIP_CARRIZO:
>                  case CHIP_STONEY:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 4b78ecfd35f7..b23110241267 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>          bool supports_atomic = false;
>
>          if (!amdgpu_virtual_display &&
> -           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
> +           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
>                  supports_atomic = true;
>
>          if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
>          adev->virt.ops = &xgpu_nv_virt_ops;
>  }
>
> -static bool nv_is_headless_sku(struct pci_dev *pdev)
> +bool nv_is_headless_sku(struct pci_dev *pdev)
>  {
>          if ((pdev->device == 0x731E &&
>              (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.h
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.h
> @@ -29,6 +29,7 @@
>  void nv_grbm_select(struct amdgpu_device *adev,
>                      u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
> +bool nv_is_headless_sku(struct pci_dev *pdev);
>  int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CTianci.Yin%40amd.com%7C7bb51412bcd34843e77508d87f57a76b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637399361731262326%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=6HI1w2buiO4YlD%2BWFrV9suA7gmlVRX2IlJmYiAkukcM%3D&amp;reserved=0

[-- Attachment #1.2: Type: text/html, Size: 23473 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
  2020-11-03 14:19                 ` Deucher, Alexander
@ 2020-11-04  9:54                   ` Yin, Tianci (Rico)
  0 siblings, 0 replies; 12+ messages in thread
From: Yin, Tianci (Rico) @ 2020-11-04  9:54 UTC (permalink / raw)
  To: Deucher, Alexander, Alex Deucher
  Cc: Long, Gang, Chen, Guchun, Xu, Feifei, amd-gfx, Tuikov, Luben,
	Cui, Flora, Zhang, Hawking


[-- Attachment #1.1: Type: text/plain, Size: 15810 bytes --]

[AMD Official Use Only - Internal Distribution Only]

Hi Alex,

With DCN enabled,  I take amdgpu_test/kfdtest/PhinoexMiner/S3 tests, they are all PASS.
I ask @Long, Gang<mailto:Gang.Long@amd.com> to take a large coverage test,  if it's ok, I will enable DCN block.

Thanks!
Rico
________________________________
From: Deucher, Alexander <Alexander.Deucher@amd.com>
Sent: Tuesday, November 3, 2020 22:19
To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; Alex Deucher <alexdeucher@gmail.com>
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

That should be harmless and we can fix that up.  Does everything work as expected other than the error message?

Alex

________________________________
From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
Sent: Monday, November 2, 2020 9:51 PM
To: Alex Deucher <alexdeucher@gmail.com>; Deucher, Alexander <Alexander.Deucher@amd.com>
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU


[AMD Official Use Only - Internal Distribution Only]

Hi Alex,

If we don't skip DCN in nv_set_ip_blocks, I find below error,
[  103.949926] [drm:amdgpu_dm_init.cold [amdgpu]] *ERROR* amdgpu: failed to initialize hdcp_workqueue.

Thanks!
Rico
________________________________
From: Alex Deucher <alexdeucher@gmail.com>
Sent: Tuesday, November 3, 2020 1:49
To: Deucher, Alexander <Alexander.Deucher@amd.com>
Cc: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Long, Gang <Gang.Long@amd.com>
Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU

Does everything work as expected if we don't skip DCN in nv_set_ip_blocks?

Alex

On Mon, Nov 2, 2020 at 10:38 AM Deucher, Alexander
<Alexander.Deucher@amd.com> wrote:
>
> [AMD Public Use]
>
>
> That's the IP discovery table.  The Object header is part of atombios.  Do you have an vbios image from that system?
>
> Alex
>
>
> ________________________________
> From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Sent: Sunday, November 1, 2020 10:46 PM
> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> Thanks very much Hawking!
>
>
> Hi Alex,
>
> The amdgpu_device_ip_get_ip_block() depends on the adev->ip_blocks that initialized by nv_set_ip_blocks().
> In nv_set_ip_blocks(), whether add dm_ip_block depends on amdgpu_device_has_dc_support().
> And amdgpu_device_has_dc_support() depends on amdgpu_device_asic_has_dc_support(),
> So amdgpu_device_asic_has_dc_support() can't conditional on amdgpu_device_ip_get_ip_block(), it makes a dependency loop.
>
> I have just checked the atombios object table in the headless VBIOS, and find DCN and VCN are still there.
> [  174.815714] [drm:amdgpu_discovery_reg_base_init [amdgpu]] DMU(271) #0 v2.0.2:
> [  174.815771] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00000012
> [  174.815829] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000000c0
> [  174.815887] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x000034c0
> [  174.815945] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00009000
> [  174.816002] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403c00
> [  174.821854] [drm:amdgpu_discovery_reg_base_init [amdgpu]] UVD(12) #0 v2.0.0:
> [  174.821908] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007800
> [  174.821962] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x00007e00
> [  174.822017] [drm:amdgpu_discovery_reg_base_init [amdgpu]] 0x02403000
> So I think DAL driver can't tell it's a normal ASIC or headless ASIC.
>
> Thanks!
> Rico
>
> ________________________________
> From: Deucher, Alexander <Alexander.Deucher@amd.com>
> Sent: Friday, October 30, 2020 22:18
> To: Zhang, Hawking <Hawking.Zhang@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> Maybe it would be easier to check if the DCE IP exists? E.g.,
> if (!amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE) ||
> (!amdgpu_device_has_dc_support(adev))
> ...
>
> Also do we even need to skip DCN init for these cards, or will the display code handle it automatically since there will be no displays in the atombios object table.  We didn't do anything special for display with the polaris blockchain cards.
>
> Alex
>
> ________________________________
> From: Zhang, Hawking <Hawking.Zhang@amd.com>
> Sent: Friday, October 30, 2020 9:00 AM
> To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
> [AMD Public Use]
>
>
> [AMD Public Use]
>
>
>
> I see, thanks for the clarifying. The patch is
>
>
>
> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
>
>
>
> I was thinking to have a new flag like AMD_IS_HEADLESS in amd_chip_flag, but after think it a bit more, that’s just complicated the case, I agree with your current approach.
>
>
>
> Regards,
> Hawking
>
> From: Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Sent: Friday, October 30, 2020 20:05
> To: Zhang, Hawking <Hawking.Zhang@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
>
> [AMD Public Use]
>
>
>
> Hi Hawking,
>
>
>
> amdgpu_device_asic_has_dc_support() is referrenced by amdgpu_pci_probe(),
>
> at that point, adev has not been allocated yet.
>
>
>
> My change can make it to right code path.
>
> int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
> {
>
> ...
>
> if (!amdgpu_device_has_dc_support(adev))
>
> drm_helper_hpd_irq_event(dev);               //right path for headless SKU
>
> else
>
> drm_kms_helper_hotplug_event(dev);      //wrong path for headless SKU
>
> ...
>
> }
>
>
>
> Thanks!
>
> Rico
>
>
>
> ________________________________
>
> From: Zhang, Hawking <Hawking.Zhang@amd.com>
> Sent: Friday, October 30, 2020 19:48
> To: Yin, Tianci (Rico) <Tianci.Yin@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org>
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Subject: RE: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
>
>
> [AMD Public Use]
>
> I'm not sure I get your point on changing amdgpu_device_has_dc_support() interface by adding new parameter. but it seems to me change input parameter from pdev to adev for nv_is_headless_sku is more straightforward.
>
> Regards,
> Hawking
> -----Original Message-----
> From: Tianci Yin <tianci.yin@amd.com>
> Sent: Friday, October 30, 2020 19:32
> To: amd-gfx@lists.freedesktop.org
> Cc: Tuikov, Luben <Luben.Tuikov@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>; Chen, Guchun <Guchun.Chen@amd.com>; Cui, Flora <Flora.Cui@amd.com>; Xu, Feifei <Feifei.Xu@amd.com>; Long, Gang <Gang.Long@amd.com>; Yin, Tianci (Rico) <Tianci.Yin@amd.com>
> Subject: [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU
>
> From: "Tianci.Yin" <tianci.yin@amd.com>
>
> The crash caused by the NULL pointer of
> adev->ddev.mode_config.funcs in drm_kms_helper_hotplug_event(),
> but this function should not be called on headless SKU.
>
> Fix the mismatch between the return value of
> amdgpu_device_has_dc_support() and the real DCN supporting state to avoid calling to drm_kms_helper_hotplug_event() in amdgpu_device_resume().
>
> Change-Id: I3a3d387e6ab5b774abb3911ea1bf6de60797759d
> Signed-off-by: Tianci.Yin <tianci.yin@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 10 ++++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  2 +-
>  drivers/gpu/drm/amd/amdgpu/nv.c             |  2 +-
>  drivers/gpu/drm/amd/amdgpu/nv.h             |  1 +
>  6 files changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ba65d4f2ab67..f0183271456f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1090,7 +1090,7 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
>                                     u32 pcie_index, u32 pcie_data,
>                                     u32 reg_addr, u64 reg_data);
>
> -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
> +bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
> +struct pci_dev *pdev);
>  bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
>
>  int emu_soc_asic_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 1fe850e0a94d..323ed69032a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2960,11 +2960,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
>   * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
>   *
>   * @asic_type: AMD asic type
> + * @pdev: pointer to pci_dev instance
>   *
>   * Check if there is DC (new modesetting infrastructre) support for an asic.
>   * returns true if DC has support, false if not.
>   */
> -bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
> +bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type,
> +struct pci_dev *pdev)
>  {
>          switch (asic_type) {
>  #if defined(CONFIG_DRM_AMD_DC)
> @@ -3000,9 +3001,14 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
>          case CHIP_VEGA20:
>  #if defined(CONFIG_DRM_AMD_DC_DCN)
>          case CHIP_RAVEN:
> +               return amdgpu_dc != 0;
>          case CHIP_NAVI10:
>          case CHIP_NAVI14:
>          case CHIP_NAVI12:
> +               if (nv_is_headless_sku(pdev))
> +                       return false;
> +               else
> +                       return amdgpu_dc != 0;
>          case CHIP_RENOIR:
>  #endif
>  #if defined(CONFIG_DRM_AMD_DC_DCN3_0)
> @@ -3033,7 +3039,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
>          if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
>                  return false;
>
> -       return amdgpu_device_asic_has_dc_support(adev->asic_type);
> +       return amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev);
>  }
>
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index 9e92d2a070ac..97014458d7de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -516,7 +516,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
>           */
>          if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
>              amdgpu_bo_support_uswc(bo_flags) &&
> -           amdgpu_device_asic_has_dc_support(adev->asic_type)) {
> +           amdgpu_device_asic_has_dc_support(adev->asic_type, adev->pdev)) {
>                  switch (adev->asic_type) {
>                  case CHIP_CARRIZO:
>                  case CHIP_STONEY:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 4b78ecfd35f7..b23110241267 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -1117,7 +1117,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>          bool supports_atomic = false;
>
>          if (!amdgpu_virtual_display &&
> -           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
> +           amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK, pdev))
>                  supports_atomic = true;
>
>          if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 026e0a8fd526..97446ae75b0b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -493,7 +493,7 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
>          adev->virt.ops = &xgpu_nv_virt_ops;
>  }
>
> -static bool nv_is_headless_sku(struct pci_dev *pdev)
> +bool nv_is_headless_sku(struct pci_dev *pdev)
>  {
>          if ((pdev->device == 0x731E &&
>              (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index 515d67bf249f..7880ad0073c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.h
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.h
> @@ -29,6 +29,7 @@
>  void nv_grbm_select(struct amdgpu_device *adev,
>                      u32 me, u32 pipe, u32 queue, u32 vmid);  void nv_set_virt_ops(struct amdgpu_device *adev);
> +bool nv_is_headless_sku(struct pci_dev *pdev);
>  int nv_set_ip_blocks(struct amdgpu_device *adev);  int navi10_reg_base_init(struct amdgpu_device *adev);  int navi14_reg_base_init(struct amdgpu_device *adev);
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7CTianci.Yin%40amd.com%7C7bb51412bcd34843e77508d87f57a76b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637399361731262326%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=6HI1w2buiO4YlD%2BWFrV9suA7gmlVRX2IlJmYiAkukcM%3D&amp;reserved=0

[-- Attachment #1.2: Type: text/html, Size: 25648 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-11-04  9:54 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-30 11:32 [PATCH] drm/amdgpu: fix NULL pointer crash on navi10 headless SKU Tianci Yin
2020-10-30 11:48 ` Zhang, Hawking
2020-10-30 12:05   ` Yin, Tianci (Rico)
2020-10-30 12:48     ` Zhang, Hawking
2020-10-30 13:00     ` Zhang, Hawking
2020-10-30 14:18       ` Deucher, Alexander
2020-11-02  3:46         ` Yin, Tianci (Rico)
2020-11-02 15:38           ` Deucher, Alexander
2020-11-02 17:49             ` Alex Deucher
2020-11-03  2:51               ` Yin, Tianci (Rico)
2020-11-03 14:19                 ` Deucher, Alexander
2020-11-04  9:54                   ` Yin, Tianci (Rico)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.