All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
@ 2019-05-27  7:43 Emily Deng
       [not found] ` <1558943005-16557-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Emily Deng @ 2019-05-27  7:43 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Emily Deng

For passthrough, after rebooted the VM, driver will do
a baco reset before doing other driver initialization during loading
 driver. For doing the baco reset, it will first
check the baco reset capability. So first need to set the
cap from the vbios information or baco reset won't be
enabled.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24 ++++++++++------------
 drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
 .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24 ++++++++++++++++++++++
 .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
 5 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b6ded84..7a8c220 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1541,6 +1541,17 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
+	/* Read BIOS */
+	if (!amdgpu_get_bios(adev))
+		return -EINVAL;
+
+	r = amdgpu_atombios_init(adev);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+		return r;
+	}
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 			DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		goto fence_driver_init;
 	}
 
-	/* Read BIOS */
-	if (!amdgpu_get_bios(adev)) {
-		r = -EINVAL;
-		goto failed;
-	}
-
-	r = amdgpu_atombios_init(adev);
-	if (r) {
-		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
-		goto failed;
-	}
-
 	/* detect if we are with an SRIOV vbios */
 	amdgpu_device_detect_sriov_bios(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 78bd4fc..d9fdd95 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
 	/* Just return false for soc15 GPUs.  Reset does not seem to
 	 * be necessary.
 	 */
-	return false;
+	if (!amdgpu_passthrough(adev))
+		return false;
 
 	if (adev->flags & AMD_IS_APU)
 		return false;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index ce6aeb5..1d9bb29 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 
 int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
+
 	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
 	hwmgr->pptable_func = &vega10_pptable_funcs;
+	if (amdgpu_passthrough(adev))
+		return vega10_baco_set_cap(hwmgr);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index b6767d7..83d22cd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
 
 	return result;
 }
+
+int vega10_baco_set_cap(struct pp_hwmgr *hwmgr)
+{
+	int result = 0;
+
+	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
+
+	powerplay_table = get_powerplay_table(hwmgr);
+
+	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
+		"Missing PowerPlay Table!", return -1);
+
+	result = check_powerplay_tables(hwmgr, powerplay_table);
+
+	PP_ASSERT_WITH_CODE((result == 0),
+			    "check_powerplay_tables failed", return result);
+
+	set_hw_cap(
+			hwmgr,
+			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
+			PHM_PlatformCaps_BACO);
+	return result;
+}
+
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
index d83ed2a..da5fbec 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
@@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);
 extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,
 		struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *,
 				struct pp_power_state *, void *, uint32_t));
+extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
 #endif
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found] ` <1558943005-16557-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-28  1:17   ` Quan, Evan
       [not found]     ` <MN2PR12MB3344E228ADA29C20F5BA9DDBE41E0-rweVpJHSKToDMgCC8P//OwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Quan, Evan @ 2019-05-28  1:17 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Deng, Emily

Reviewed-by: Evan Quan <evan.quan@amd.com>

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Emily Deng
> Sent: Monday, May 27, 2019 3:43 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deng, Emily <Emily.Deng@amd.com>
> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
> 
> For passthrough, after rebooted the VM, driver will do a baco reset before
> doing other driver initialization during loading  driver. For doing the baco
> reset, it will first check the baco reset capability. So first need to set the cap
> from the vbios information or baco reset won't be enabled.
> 
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24 ++++++++++-----
> -------
>  drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
> ++++++++++++++++++++++
>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>  5 files changed, 42 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b6ded84..7a8c220 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1541,6 +1541,17 @@ static int amdgpu_device_ip_early_init(struct
> amdgpu_device *adev)
>  	if (amdgpu_sriov_vf(adev))
>  		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
> 
> +	/* Read BIOS */
> +	if (!amdgpu_get_bios(adev))
> +		return -EINVAL;
> +
> +	r = amdgpu_atombios_init(adev);
> +	if (r) {
> +		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
> +		amdgpu_vf_error_put(adev,
> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
> +		return r;
> +	}
> +
>  	for (i = 0; i < adev->num_ip_blocks; i++) {
>  		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
>  			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>  		goto fence_driver_init;
>  	}
> 
> -	/* Read BIOS */
> -	if (!amdgpu_get_bios(adev)) {
> -		r = -EINVAL;
> -		goto failed;
> -	}
> -
> -	r = amdgpu_atombios_init(adev);
> -	if (r) {
> -		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
> -		amdgpu_vf_error_put(adev,
> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
> -		goto failed;
> -	}
> -
>  	/* detect if we are with an SRIOV vbios */
>  	amdgpu_device_detect_sriov_bios(adev);
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 78bd4fc..d9fdd95 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
> amdgpu_device *adev)
>  	/* Just return false for soc15 GPUs.  Reset does not seem to
>  	 * be necessary.
>  	 */
> -	return false;
> +	if (!amdgpu_passthrough(adev))
> +		return false;
> 
>  	if (adev->flags & AMD_IS_APU)
>  		return false;
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index ce6aeb5..1d9bb29 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func
> vega10_hwmgr_funcs = {
> 
>  int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
> +	struct amdgpu_device *adev = hwmgr->adev;
> +
>  	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
>  	hwmgr->pptable_func = &vega10_pptable_funcs;
> +	if (amdgpu_passthrough(adev))
> +		return vega10_baco_set_cap(hwmgr);
> 
>  	return 0;
>  }
> diff --git
> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> index b6767d7..83d22cd 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
> pp_hwmgr *hwmgr,
> 
>  	return result;
>  }
> +
> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
> +	int result = 0;
> +
> +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
> +
> +	powerplay_table = get_powerplay_table(hwmgr);
> +
> +	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
> +		"Missing PowerPlay Table!", return -1);
> +
> +	result = check_powerplay_tables(hwmgr, powerplay_table);
> +
> +	PP_ASSERT_WITH_CODE((result == 0),
> +			    "check_powerplay_tables failed", return result);
> +
> +	set_hw_cap(
> +			hwmgr,
> +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
> +			PHM_PlatformCaps_BACO);
> +	return result;
> +}
> +
> diff --git
> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> index d83ed2a..da5fbec 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> @@ -59,4 +59,5 @@ extern int
> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
> *hwmgr);  extern int vega10_get_powerplay_table_entry(struct pp_hwmgr
> *hwmgr, uint32_t entry_index,
>  		struct pp_power_state *power_state, int
> (*call_back_func)(struct pp_hwmgr *, void *,
>  				struct pp_power_state *, void *, uint32_t));
> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>  #endif
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]     ` <MN2PR12MB3344E228ADA29C20F5BA9DDBE41E0-rweVpJHSKToDMgCC8P//OwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-06-13 11:11       ` Russell, Kent
       [not found]         ` <BN6PR12MB1618E4322B84944963895A8E85EF0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Russell, Kent @ 2019-06-13 11:11 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Deng, Emily

Hi Emily,

This patch caused a regression on MI25 (Chip 6860, VBIOS 113-D0513700-001) machines where the driver would not boot. Note that this was not seen on Vega10 Frontier (Chip 6863, VBIOS 113-D0501100-X09) or Radeon64 (Chip 697f). Reverting this patch resolved the issue with no other work required and was confirmed on all 3 machines.

Here is the dmesg:

[    3.908653] amdgpu 0000:23:00.0: BAR 6: can't assign [??? 0x00000000 flags 0x20000000] (bogus alignment)
[    3.908692] BUG: kernel NULL pointer dereference, address: 0000000000000008
[    3.908716] #PF: supervisor read access in kernel mode
[    3.908734] #PF: error_code(0x0000) - not-present page
[    3.908753] PGD 0 P4D 0 
[    3.908767] Oops: 0000 [#1] SMP NOPTI
[    3.909293] CPU: 8 PID: 409 Comm: kworker/8:1 Not tainted 5.2.0-rc1-kfd-compute-roc-master-10734 #1
[    3.909753] Hardware name: Inventec     P47                              WC2071019001            /P47             , BIOS 0.64 04/09/2018
[    3.910534] Workqueue: events work_for_cpu_fn
[    3.910953] RIP: 0010:amdgpu_get_bios+0x3aa/0x580 [amdgpu]
[    3.911314] Code: c0 48 c7 44 24 5c 00 00 00 00 48 c7 84 24 90 00 00 00 00 00 00 00 48 89 d9 48 29 f9 83 c1 3c c1 e9 03 f3 48 ab 49 8b 44 24 38 <48> 8b 40 08 48 85 c0 74 24 ba 3c 00 00 00 48 89 de 4c 89 e7 ff d0
[    3.912069] RSP: 0018:ffffa27dce28fc50 EFLAGS: 00010212
[    3.912502] RAX: 0000000000000000 RBX: ffffa27dce28fcac RCX: 0000000000000000
[    3.912980] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffffa27dce28fce8
[    3.913467] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000079a
[    3.913940] R10: 0000000000000000 R11: 0000000000000001 R12: ffff88d657af0000
[    3.914349] R13: ffffffffc0c38120 R14: ffffa27dce28fc68 R15: ffff88d657af0000
[    3.914767] FS:  0000000000000000(0000) GS:ffff88d65f400000(0000) knlGS:0000000000000000
[    3.915203] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.915637] CR2: 0000000000000008 CR3: 0000003e7540a000 CR4: 00000000003406e0
[    3.916075] Call Trace:
[    3.916522]  ? pcie_capability_clear_and_set_word+0x53/0x80
[    3.917014]  amdgpu_device_init+0x923/0x1820 [amdgpu]
[    3.917515]  amdgpu_driver_load_kms+0x71/0x310 [amdgpu]
[    3.917997]  drm_dev_register+0x113/0x1a0 [drm]
[    3.918514]  amdgpu_pci_probe+0xb8/0x150 [amdgpu]
[    3.919003]  ? __pm_runtime_resume+0x54/0x70
[    3.919270] usb 1-2: New USB device found, idVendor=14dd, idProduct=1005, bcdDevice= 0.00
[    3.919498]  local_pci_probe+0x3d/0x90
[    3.919503]  ? __schedule+0x3de/0x690
[    3.920374] usb 1-2: New USB device strings: Mfr=1, Product=2, SerialNumber=3
[    3.921137]  work_for_cpu_fn+0x10/0x20
[    3.922028] usb 1-2: Product: D2CIM-VUSB
[    3.922815]  process_one_work+0x159/0x3e0
[    3.923633] usb 1-2: Manufacturer: Raritan
[    3.923635] usb 1-2: SerialNumber: EFFB212D0A6E32F
[    3.924416]  worker_thread+0x22b/0x440
[    3.924419]  ? rescuer_thread+0x350/0x350
[    3.927812]  kthread+0xf6/0x130
[    3.928157] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
[    3.928365]  ? kthread_destroy_worker+0x40/0x40
[    3.929401] ata1.00: ATA-10: INTEL SSDSC2KG960G7, SCV10100, max UDMA/133
[    3.930101]  ret_from_fork+0x1f/0x30
[    3.930103] Modules linked in: amdgpu(+) crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ast amd_iommu_v2 aesni_intel gpu_sched i2c_algo_bit aes_x86_64 ttm crypto_simd drm_kms_helper cryptd glue_helper syscopyarea sysfillrect ahci sysimgblt libahci fb_sys_fops ixgbe(+) drm dca mdio
[    3.930965] ata1.00: 1875385008 sectors, multi 1: LBA48 NCQ (depth 32)
[    3.931085] ata1.00: configured for UDMA/133
[    3.931809] CR2: 0000000000000008
[    3.934723] scsi 0:0:0:0: Direct-Access     ATA      INTEL SSDSC2KG96 0100 PQ: 0 ANSI: 5


Thanks!

 Kent

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Quan, Evan
Sent: Monday, May 27, 2019 9:17 PM
To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deng, Emily <Emily.Deng@amd.com>
Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset

Reviewed-by: Evan Quan <evan.quan@amd.com>

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of 
> Emily Deng
> Sent: Monday, May 27, 2019 3:43 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deng, Emily <Emily.Deng@amd.com>
> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco 
> reset
> 
> For passthrough, after rebooted the VM, driver will do a baco reset 
> before doing other driver initialization during loading  driver. For 
> doing the baco reset, it will first check the baco reset capability. 
> So first need to set the cap from the vbios information or baco reset won't be enabled.
> 
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24 ++++++++++-----
> -------
>  drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
> ++++++++++++++++++++++
>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>  5 files changed, 42 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b6ded84..7a8c220 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -1541,6 +1541,17 @@ static int amdgpu_device_ip_early_init(struct
> amdgpu_device *adev)
>  	if (amdgpu_sriov_vf(adev))
>  		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
> 
> +	/* Read BIOS */
> +	if (!amdgpu_get_bios(adev))
> +		return -EINVAL;
> +
> +	r = amdgpu_atombios_init(adev);
> +	if (r) {
> +		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
> +		amdgpu_vf_error_put(adev,
> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
> +		return r;
> +	}
> +
>  	for (i = 0; i < adev->num_ip_blocks; i++) {
>  		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
>  			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>  		goto fence_driver_init;
>  	}
> 
> -	/* Read BIOS */
> -	if (!amdgpu_get_bios(adev)) {
> -		r = -EINVAL;
> -		goto failed;
> -	}
> -
> -	r = amdgpu_atombios_init(adev);
> -	if (r) {
> -		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
> -		amdgpu_vf_error_put(adev,
> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
> -		goto failed;
> -	}
> -
>  	/* detect if we are with an SRIOV vbios */
>  	amdgpu_device_detect_sriov_bios(adev);
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 78bd4fc..d9fdd95 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct 
> amdgpu_device *adev)
>  	/* Just return false for soc15 GPUs.  Reset does not seem to
>  	 * be necessary.
>  	 */
> -	return false;
> +	if (!amdgpu_passthrough(adev))
> +		return false;
> 
>  	if (adev->flags & AMD_IS_APU)
>  		return false;
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index ce6aeb5..1d9bb29 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func 
> vega10_hwmgr_funcs = {
> 
>  int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
> +	struct amdgpu_device *adev = hwmgr->adev;
> +
>  	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
>  	hwmgr->pptable_func = &vega10_pptable_funcs;
> +	if (amdgpu_passthrough(adev))
> +		return vega10_baco_set_cap(hwmgr);
> 
>  	return 0;
>  }
> diff --git
> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> index b6767d7..83d22cd 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
> pp_hwmgr *hwmgr,
> 
>  	return result;
>  }
> +
> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
> +	int result = 0;
> +
> +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
> +
> +	powerplay_table = get_powerplay_table(hwmgr);
> +
> +	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
> +		"Missing PowerPlay Table!", return -1);
> +
> +	result = check_powerplay_tables(hwmgr, powerplay_table);
> +
> +	PP_ASSERT_WITH_CODE((result == 0),
> +			    "check_powerplay_tables failed", return result);
> +
> +	set_hw_cap(
> +			hwmgr,
> +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
> +			PHM_PlatformCaps_BACO);
> +	return result;
> +}
> +
> diff --git
> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> index d83ed2a..da5fbec 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> @@ -59,4 +59,5 @@ extern int
> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);  
> extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, 
> uint32_t entry_index,
>  		struct pp_power_state *power_state, int (*call_back_func)(struct 
> pp_hwmgr *, void *,
>  				struct pp_power_state *, void *, uint32_t));
> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>  #endif
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]         ` <BN6PR12MB1618E4322B84944963895A8E85EF0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-06-14  3:19           ` Deng, Emily
       [not found]             ` <MN2PR12MB29750834B9F6C548E831DE798FEE0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Deng, Emily @ 2019-06-14  3:19 UTC (permalink / raw)
  To: Russell, Kent, Quan, Evan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi Russell,
     This patch will read vbios, and parse vbios to get the baco reset feature bit.  From the call trace, it shows error in " amdgpu_get_bios ", but this patch don't modify amdgpu_get_bios, and code before amdgpu_get_bios. Please first check why it will has error when read vbios.

Best wishes
Emily Deng



>-----Original Message-----
>From: Russell, Kent <Kent.Russell@amd.com>
>Sent: Thursday, June 13, 2019 7:11 PM
>To: Quan, Evan <Evan.Quan@amd.com>; Deng, Emily
><Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily <Emily.Deng@amd.com>
>Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
>
>Hi Emily,
>
>This patch caused a regression on MI25 (Chip 6860, VBIOS 113-D0513700-001)
>machines where the driver would not boot. Note that this was not seen on
>Vega10 Frontier (Chip 6863, VBIOS 113-D0501100-X09) or Radeon64 (Chip
>697f). Reverting this patch resolved the issue with no other work required and
>was confirmed on all 3 machines.
>
>Here is the dmesg:
>
>[    3.908653] amdgpu 0000:23:00.0: BAR 6: can't assign [??? 0x00000000 flags
>0x20000000] (bogus alignment)
>[    3.908692] BUG: kernel NULL pointer dereference, address:
>0000000000000008
>[    3.908716] #PF: supervisor read access in kernel mode
>[    3.908734] #PF: error_code(0x0000) - not-present page
>[    3.908753] PGD 0 P4D 0
>[    3.908767] Oops: 0000 [#1] SMP NOPTI
>[    3.909293] CPU: 8 PID: 409 Comm: kworker/8:1 Not tainted 5.2.0-rc1-kfd-
>compute-roc-master-10734 #1
>[    3.909753] Hardware name: Inventec     P47                              WC2071019001
>/P47             , BIOS 0.64 04/09/2018
>[    3.910534] Workqueue: events work_for_cpu_fn
>[    3.910953] RIP: 0010:amdgpu_get_bios+0x3aa/0x580 [amdgpu]
>[    3.911314] Code: c0 48 c7 44 24 5c 00 00 00 00 48 c7 84 24 90 00 00 00 00 00
>00 00 48 89 d9 48 29 f9 83 c1 3c c1 e9 03 f3 48 ab 49 8b 44 24 38 <48> 8b 40 08
>48 85 c0 74 24 ba 3c 00 00 00 48 89 de 4c 89 e7 ff d0
>[    3.912069] RSP: 0018:ffffa27dce28fc50 EFLAGS: 00010212
>[    3.912502] RAX: 0000000000000000 RBX: ffffa27dce28fcac RCX:
>0000000000000000
>[    3.912980] RDX: 0000000000000000 RSI: 0000000000000082 RDI:
>ffffa27dce28fce8
>[    3.913467] RBP: 0000000000000000 R08: 0000000000000001 R09:
>000000000000079a
>[    3.913940] R10: 0000000000000000 R11: 0000000000000001 R12:
>ffff88d657af0000
>[    3.914349] R13: ffffffffc0c38120 R14: ffffa27dce28fc68 R15:
>ffff88d657af0000
>[    3.914767] FS:  0000000000000000(0000) GS:ffff88d65f400000(0000)
>knlGS:0000000000000000
>[    3.915203] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>[    3.915637] CR2: 0000000000000008 CR3: 0000003e7540a000 CR4:
>00000000003406e0
>[    3.916075] Call Trace:
>[    3.916522]  ? pcie_capability_clear_and_set_word+0x53/0x80
>[    3.917014]  amdgpu_device_init+0x923/0x1820 [amdgpu]
>[    3.917515]  amdgpu_driver_load_kms+0x71/0x310 [amdgpu]
>[    3.917997]  drm_dev_register+0x113/0x1a0 [drm]
>[    3.918514]  amdgpu_pci_probe+0xb8/0x150 [amdgpu]
>[    3.919003]  ? __pm_runtime_resume+0x54/0x70
>[    3.919270] usb 1-2: New USB device found, idVendor=14dd, idProduct=1005,
>bcdDevice= 0.00
>[    3.919498]  local_pci_probe+0x3d/0x90
>[    3.919503]  ? __schedule+0x3de/0x690
>[    3.920374] usb 1-2: New USB device strings: Mfr=1, Product=2,
>SerialNumber=3
>[    3.921137]  work_for_cpu_fn+0x10/0x20
>[    3.922028] usb 1-2: Product: D2CIM-VUSB
>[    3.922815]  process_one_work+0x159/0x3e0
>[    3.923633] usb 1-2: Manufacturer: Raritan
>[    3.923635] usb 1-2: SerialNumber: EFFB212D0A6E32F
>[    3.924416]  worker_thread+0x22b/0x440
>[    3.924419]  ? rescuer_thread+0x350/0x350
>[    3.927812]  kthread+0xf6/0x130
>[    3.928157] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
>[    3.928365]  ? kthread_destroy_worker+0x40/0x40
>[    3.929401] ata1.00: ATA-10: INTEL SSDSC2KG960G7, SCV10100, max
>UDMA/133
>[    3.930101]  ret_from_fork+0x1f/0x30
>[    3.930103] Modules linked in: amdgpu(+) crct10dif_pclmul crc32_pclmul
>ghash_clmulni_intel ast amd_iommu_v2 aesni_intel gpu_sched i2c_algo_bit
>aes_x86_64 ttm crypto_simd drm_kms_helper cryptd glue_helper
>syscopyarea sysfillrect ahci sysimgblt libahci fb_sys_fops ixgbe(+) drm dca
>mdio
>[    3.930965] ata1.00: 1875385008 sectors, multi 1: LBA48 NCQ (depth 32)
>[    3.931085] ata1.00: configured for UDMA/133
>[    3.931809] CR2: 0000000000000008
>[    3.934723] scsi 0:0:0:0: Direct-Access     ATA      INTEL SSDSC2KG96 0100 PQ:
>0 ANSI: 5
>
>
>Thanks!
>
> Kent
>
>-----Original Message-----
>From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Quan,
>Evan
>Sent: Monday, May 27, 2019 9:17 PM
>To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily <Emily.Deng@amd.com>
>Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
>
>Reviewed-by: Evan Quan <evan.quan@amd.com>
>
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>> Emily Deng
>> Sent: Monday, May 27, 2019 3:43 PM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Deng, Emily <Emily.Deng@amd.com>
>> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>> reset
>>
>> For passthrough, after rebooted the VM, driver will do a baco reset
>> before doing other driver initialization during loading  driver. For
>> doing the baco reset, it will first check the baco reset capability.
>> So first need to set the cap from the vbios information or baco reset won't
>be enabled.
>>
>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24 ++++++++++-----
>> -------
>>  drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
>>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>> ++++++++++++++++++++++
>>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>>  5 files changed, 42 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index b6ded84..7a8c220 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1541,6 +1541,17 @@ static int amdgpu_device_ip_early_init(struct
>> amdgpu_device *adev)
>>  	if (amdgpu_sriov_vf(adev))
>>  		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
>>
>> +	/* Read BIOS */
>> +	if (!amdgpu_get_bios(adev))
>> +		return -EINVAL;
>> +
>> +	r = amdgpu_atombios_init(adev);
>> +	if (r) {
>> +		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>> +		amdgpu_vf_error_put(adev,
>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>> +		return r;
>> +	}
>> +
>>  	for (i = 0; i < adev->num_ip_blocks; i++) {
>>  		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
>>  			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
>> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>  		goto fence_driver_init;
>>  	}
>>
>> -	/* Read BIOS */
>> -	if (!amdgpu_get_bios(adev)) {
>> -		r = -EINVAL;
>> -		goto failed;
>> -	}
>> -
>> -	r = amdgpu_atombios_init(adev);
>> -	if (r) {
>> -		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>> -		amdgpu_vf_error_put(adev,
>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>> -		goto failed;
>> -	}
>> -
>>  	/* detect if we are with an SRIOV vbios */
>>  	amdgpu_device_detect_sriov_bios(adev);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> index 78bd4fc..d9fdd95 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
>> amdgpu_device *adev)
>>  	/* Just return false for soc15 GPUs.  Reset does not seem to
>>  	 * be necessary.
>>  	 */
>> -	return false;
>> +	if (!amdgpu_passthrough(adev))
>> +		return false;
>>
>>  	if (adev->flags & AMD_IS_APU)
>>  		return false;
>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> index ce6aeb5..1d9bb29 100644
>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func
>> vega10_hwmgr_funcs = {
>>
>>  int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
>> +	struct amdgpu_device *adev = hwmgr->adev;
>> +
>>  	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
>>  	hwmgr->pptable_func = &vega10_pptable_funcs;
>> +	if (amdgpu_passthrough(adev))
>> +		return vega10_baco_set_cap(hwmgr);
>>
>>  	return 0;
>>  }
>> diff --git
>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> index b6767d7..83d22cd 100644
>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>> pp_hwmgr *hwmgr,
>>
>>  	return result;
>>  }
>> +
>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>> +	int result = 0;
>> +
>> +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>> +
>> +	powerplay_table = get_powerplay_table(hwmgr);
>> +
>> +	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>> +		"Missing PowerPlay Table!", return -1);
>> +
>> +	result = check_powerplay_tables(hwmgr, powerplay_table);
>> +
>> +	PP_ASSERT_WITH_CODE((result == 0),
>> +			    "check_powerplay_tables failed", return result);
>> +
>> +	set_hw_cap(
>> +			hwmgr,
>> +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
>> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>> +			PHM_PlatformCaps_BACO);
>> +	return result;
>> +}
>> +
>> diff --git
>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> index d83ed2a..da5fbec 100644
>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> @@ -59,4 +59,5 @@ extern int
>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>*hwmgr);
>> extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
>> uint32_t entry_index,
>>  		struct pp_power_state *power_state, int
>(*call_back_func)(struct
>> pp_hwmgr *, void *,
>>  				struct pp_power_state *, void *, uint32_t));
>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>>  #endif
>> --
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>_______________________________________________
>amd-gfx mailing list
>amd-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]             ` <MN2PR12MB29750834B9F6C548E831DE798FEE0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-06-14 14:15               ` Yang, Philip
       [not found]                 ` <9bfdbf20-b914-6063-5517-8352e652e203-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Yang, Philip @ 2019-06-14 14:15 UTC (permalink / raw)
  To: Deng, Emily, Russell, Kent, Quan, Evan,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi Emily,

I am not familiar with vbios and driver init part, just based on my 
experience, the patch don't modify amdgpu_get_bios but move 
amdgpu_get_bios to amdgpu_device_ip_early_init from amdgpu_device_init, 
so amdgpu_get_bios is executed earlier. The kernel error message "BUG: 
kernel NULL pointer dereference" means something is not initialized. 
Please review the change. This issue blocks rocm release now.

Regards,
Philip

On 2019-06-13 11:19 p.m., Deng, Emily wrote:
> Hi Russell,
>       This patch will read vbios, and parse vbios to get the baco reset feature bit.  From the call trace, it shows error in " amdgpu_get_bios ", but this patch don't modify amdgpu_get_bios, and code before amdgpu_get_bios. Please first check why it will has error when read vbios.
> 
> Best wishes
> Emily Deng
> 
> 
> 
>> -----Original Message-----
>> From: Russell, Kent <Kent.Russell@amd.com>
>> Sent: Thursday, June 13, 2019 7:11 PM
>> To: Quan, Evan <Evan.Quan@amd.com>; Deng, Emily
>> <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Deng, Emily <Emily.Deng@amd.com>
>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
>>
>> Hi Emily,
>>
>> This patch caused a regression on MI25 (Chip 6860, VBIOS 113-D0513700-001)
>> machines where the driver would not boot. Note that this was not seen on
>> Vega10 Frontier (Chip 6863, VBIOS 113-D0501100-X09) or Radeon64 (Chip
>> 697f). Reverting this patch resolved the issue with no other work required and
>> was confirmed on all 3 machines.
>>
>> Here is the dmesg:
>>
>> [    3.908653] amdgpu 0000:23:00.0: BAR 6: can't assign [??? 0x00000000 flags
>> 0x20000000] (bogus alignment)
>> [    3.908692] BUG: kernel NULL pointer dereference, address:
>> 0000000000000008
>> [    3.908716] #PF: supervisor read access in kernel mode
>> [    3.908734] #PF: error_code(0x0000) - not-present page
>> [    3.908753] PGD 0 P4D 0
>> [    3.908767] Oops: 0000 [#1] SMP NOPTI
>> [    3.909293] CPU: 8 PID: 409 Comm: kworker/8:1 Not tainted 5.2.0-rc1-kfd-
>> compute-roc-master-10734 #1
>> [    3.909753] Hardware name: Inventec     P47                              WC2071019001
>> /P47             , BIOS 0.64 04/09/2018
>> [    3.910534] Workqueue: events work_for_cpu_fn
>> [    3.910953] RIP: 0010:amdgpu_get_bios+0x3aa/0x580 [amdgpu]
>> [    3.911314] Code: c0 48 c7 44 24 5c 00 00 00 00 48 c7 84 24 90 00 00 00 00 00
>> 00 00 48 89 d9 48 29 f9 83 c1 3c c1 e9 03 f3 48 ab 49 8b 44 24 38 <48> 8b 40 08
>> 48 85 c0 74 24 ba 3c 00 00 00 48 89 de 4c 89 e7 ff d0
>> [    3.912069] RSP: 0018:ffffa27dce28fc50 EFLAGS: 00010212
>> [    3.912502] RAX: 0000000000000000 RBX: ffffa27dce28fcac RCX:
>> 0000000000000000
>> [    3.912980] RDX: 0000000000000000 RSI: 0000000000000082 RDI:
>> ffffa27dce28fce8
>> [    3.913467] RBP: 0000000000000000 R08: 0000000000000001 R09:
>> 000000000000079a
>> [    3.913940] R10: 0000000000000000 R11: 0000000000000001 R12:
>> ffff88d657af0000
>> [    3.914349] R13: ffffffffc0c38120 R14: ffffa27dce28fc68 R15:
>> ffff88d657af0000
>> [    3.914767] FS:  0000000000000000(0000) GS:ffff88d65f400000(0000)
>> knlGS:0000000000000000
>> [    3.915203] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [    3.915637] CR2: 0000000000000008 CR3: 0000003e7540a000 CR4:
>> 00000000003406e0
>> [    3.916075] Call Trace:
>> [    3.916522]  ? pcie_capability_clear_and_set_word+0x53/0x80
>> [    3.917014]  amdgpu_device_init+0x923/0x1820 [amdgpu]
>> [    3.917515]  amdgpu_driver_load_kms+0x71/0x310 [amdgpu]
>> [    3.917997]  drm_dev_register+0x113/0x1a0 [drm]
>> [    3.918514]  amdgpu_pci_probe+0xb8/0x150 [amdgpu]
>> [    3.919003]  ? __pm_runtime_resume+0x54/0x70
>> [    3.919270] usb 1-2: New USB device found, idVendor=14dd, idProduct=1005,
>> bcdDevice= 0.00
>> [    3.919498]  local_pci_probe+0x3d/0x90
>> [    3.919503]  ? __schedule+0x3de/0x690
>> [    3.920374] usb 1-2: New USB device strings: Mfr=1, Product=2,
>> SerialNumber=3
>> [    3.921137]  work_for_cpu_fn+0x10/0x20
>> [    3.922028] usb 1-2: Product: D2CIM-VUSB
>> [    3.922815]  process_one_work+0x159/0x3e0
>> [    3.923633] usb 1-2: Manufacturer: Raritan
>> [    3.923635] usb 1-2: SerialNumber: EFFB212D0A6E32F
>> [    3.924416]  worker_thread+0x22b/0x440
>> [    3.924419]  ? rescuer_thread+0x350/0x350
>> [    3.927812]  kthread+0xf6/0x130
>> [    3.928157] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
>> [    3.928365]  ? kthread_destroy_worker+0x40/0x40
>> [    3.929401] ata1.00: ATA-10: INTEL SSDSC2KG960G7, SCV10100, max
>> UDMA/133
>> [    3.930101]  ret_from_fork+0x1f/0x30
>> [    3.930103] Modules linked in: amdgpu(+) crct10dif_pclmul crc32_pclmul
>> ghash_clmulni_intel ast amd_iommu_v2 aesni_intel gpu_sched i2c_algo_bit
>> aes_x86_64 ttm crypto_simd drm_kms_helper cryptd glue_helper
>> syscopyarea sysfillrect ahci sysimgblt libahci fb_sys_fops ixgbe(+) drm dca
>> mdio
>> [    3.930965] ata1.00: 1875385008 sectors, multi 1: LBA48 NCQ (depth 32)
>> [    3.931085] ata1.00: configured for UDMA/133
>> [    3.931809] CR2: 0000000000000008
>> [    3.934723] scsi 0:0:0:0: Direct-Access     ATA      INTEL SSDSC2KG96 0100 PQ:
>> 0 ANSI: 5
>>
>>
>> Thanks!
>>
>> Kent
>>
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Quan,
>> Evan
>> Sent: Monday, May 27, 2019 9:17 PM
>> To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Deng, Emily <Emily.Deng@amd.com>
>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
>>
>> Reviewed-by: Evan Quan <evan.quan@amd.com>
>>
>>> -----Original Message-----
>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>> Emily Deng
>>> Sent: Monday, May 27, 2019 3:43 PM
>>> To: amd-gfx@lists.freedesktop.org
>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>>> reset
>>>
>>> For passthrough, after rebooted the VM, driver will do a baco reset
>>> before doing other driver initialization during loading  driver. For
>>> doing the baco reset, it will first check the baco reset capability.
>>> So first need to set the cap from the vbios information or baco reset won't
>> be enabled.
>>>
>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24 ++++++++++-----
>>> -------
>>>   drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>>>   drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
>>>   .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>>> ++++++++++++++++++++++
>>>   .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>>>   5 files changed, 42 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index b6ded84..7a8c220 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -1541,6 +1541,17 @@ static int amdgpu_device_ip_early_init(struct
>>> amdgpu_device *adev)
>>>   	if (amdgpu_sriov_vf(adev))
>>>   		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
>>>
>>> +	/* Read BIOS */
>>> +	if (!amdgpu_get_bios(adev))
>>> +		return -EINVAL;
>>> +
>>> +	r = amdgpu_atombios_init(adev);
>>> +	if (r) {
>>> +		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>> +		amdgpu_vf_error_put(adev,
>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>> +		return r;
>>> +	}
>>> +
>>>   	for (i = 0; i < adev->num_ip_blocks; i++) {
>>>   		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
>>>   			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
>>> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>   		goto fence_driver_init;
>>>   	}
>>>
>>> -	/* Read BIOS */
>>> -	if (!amdgpu_get_bios(adev)) {
>>> -		r = -EINVAL;
>>> -		goto failed;
>>> -	}
>>> -
>>> -	r = amdgpu_atombios_init(adev);
>>> -	if (r) {
>>> -		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>> -		amdgpu_vf_error_put(adev,
>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>> -		goto failed;
>>> -	}
>>> -
>>>   	/* detect if we are with an SRIOV vbios */
>>>   	amdgpu_device_detect_sriov_bios(adev);
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> index 78bd4fc..d9fdd95 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
>>> amdgpu_device *adev)
>>>   	/* Just return false for soc15 GPUs.  Reset does not seem to
>>>   	 * be necessary.
>>>   	 */
>>> -	return false;
>>> +	if (!amdgpu_passthrough(adev))
>>> +		return false;
>>>
>>>   	if (adev->flags & AMD_IS_APU)
>>>   		return false;
>>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>> index ce6aeb5..1d9bb29 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func
>>> vega10_hwmgr_funcs = {
>>>
>>>   int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
>>> +	struct amdgpu_device *adev = hwmgr->adev;
>>> +
>>>   	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
>>>   	hwmgr->pptable_func = &vega10_pptable_funcs;
>>> +	if (amdgpu_passthrough(adev))
>>> +		return vega10_baco_set_cap(hwmgr);
>>>
>>>   	return 0;
>>>   }
>>> diff --git
>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>> index b6767d7..83d22cd 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>>> pp_hwmgr *hwmgr,
>>>
>>>   	return result;
>>>   }
>>> +
>>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>>> +	int result = 0;
>>> +
>>> +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>>> +
>>> +	powerplay_table = get_powerplay_table(hwmgr);
>>> +
>>> +	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>>> +		"Missing PowerPlay Table!", return -1);
>>> +
>>> +	result = check_powerplay_tables(hwmgr, powerplay_table);
>>> +
>>> +	PP_ASSERT_WITH_CODE((result == 0),
>>> +			    "check_powerplay_tables failed", return result);
>>> +
>>> +	set_hw_cap(
>>> +			hwmgr,
>>> +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
>>> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>>> +			PHM_PlatformCaps_BACO);
>>> +	return result;
>>> +}
>>> +
>>> diff --git
>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>> index d83ed2a..da5fbec 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>> @@ -59,4 +59,5 @@ extern int
>>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>> *hwmgr);
>>> extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
>>> uint32_t entry_index,
>>>   		struct pp_power_state *power_state, int
>> (*call_back_func)(struct
>>> pp_hwmgr *, void *,
>>>   				struct pp_power_state *, void *, uint32_t));
>>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>>>   #endif
>>> --
>>> 2.7.4
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]                 ` <9bfdbf20-b914-6063-5517-8352e652e203-5C7GfCeVMHo@public.gmane.org>
@ 2019-06-17  2:50                   ` Deng, Emily
       [not found]                     ` <MN2PR12MB2975EAAD825DA9842876B4A38FEB0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Deng, Emily @ 2019-06-17  2:50 UTC (permalink / raw)
  To: Yang, Philip, Russell, Kent, Quan, Evan,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi Philip,
     Sorry for introduce this issue for you. From the code, I couldn't see any issue. And I have tested the code in my Vega10, it is OK.  So I think this is the kfd specific issue, but I couldn't reproduce issue on my platform. Could you create an ticket, and assign to me, and share me your platform, so I could debug it and fix it today.

Best wishes
Emily Deng

>-----Original Message-----
>From: Yang, Philip <Philip.Yang@amd.com>
>Sent: Friday, June 14, 2019 10:16 PM
>To: Deng, Emily <Emily.Deng@amd.com>; Russell, Kent
><Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-
>gfx@lists.freedesktop.org
>Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
>
>Hi Emily,
>
>I am not familiar with vbios and driver init part, just based on my experience,
>the patch don't modify amdgpu_get_bios but move amdgpu_get_bios to
>amdgpu_device_ip_early_init from amdgpu_device_init, so amdgpu_get_bios
>is executed earlier. The kernel error message "BUG:
>kernel NULL pointer dereference" means something is not initialized.
>Please review the change. This issue blocks rocm release now.
>
>Regards,
>Philip
>
>On 2019-06-13 11:19 p.m., Deng, Emily wrote:
>> Hi Russell,
>>       This patch will read vbios, and parse vbios to get the baco reset feature
>bit.  From the call trace, it shows error in " amdgpu_get_bios ", but this patch
>don't modify amdgpu_get_bios, and code before amdgpu_get_bios. Please
>first check why it will has error when read vbios.
>>
>> Best wishes
>> Emily Deng
>>
>>
>>
>>> -----Original Message-----
>>> From: Russell, Kent <Kent.Russell@amd.com>
>>> Sent: Thursday, June 13, 2019 7:11 PM
>>> To: Quan, Evan <Evan.Quan@amd.com>; Deng, Emily
><Emily.Deng@amd.com>;
>>> amd-gfx@lists.freedesktop.org
>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>>> reset
>>>
>>> Hi Emily,
>>>
>>> This patch caused a regression on MI25 (Chip 6860, VBIOS
>>> 113-D0513700-001) machines where the driver would not boot. Note that
>>> this was not seen on
>>> Vega10 Frontier (Chip 6863, VBIOS 113-D0501100-X09) or Radeon64 (Chip
>>> 697f). Reverting this patch resolved the issue with no other work
>>> required and was confirmed on all 3 machines.
>>>
>>> Here is the dmesg:
>>>
>>> [    3.908653] amdgpu 0000:23:00.0: BAR 6: can't assign [??? 0x00000000
>flags
>>> 0x20000000] (bogus alignment)
>>> [    3.908692] BUG: kernel NULL pointer dereference, address:
>>> 0000000000000008
>>> [    3.908716] #PF: supervisor read access in kernel mode
>>> [    3.908734] #PF: error_code(0x0000) - not-present page
>>> [    3.908753] PGD 0 P4D 0
>>> [    3.908767] Oops: 0000 [#1] SMP NOPTI
>>> [    3.909293] CPU: 8 PID: 409 Comm: kworker/8:1 Not tainted 5.2.0-rc1-kfd-
>>> compute-roc-master-10734 #1
>>> [    3.909753] Hardware name: Inventec     P47
>WC2071019001
>>> /P47             , BIOS 0.64 04/09/2018
>>> [    3.910534] Workqueue: events work_for_cpu_fn
>>> [    3.910953] RIP: 0010:amdgpu_get_bios+0x3aa/0x580 [amdgpu]
>>> [    3.911314] Code: c0 48 c7 44 24 5c 00 00 00 00 48 c7 84 24 90 00 00 00 00
>00
>>> 00 00 48 89 d9 48 29 f9 83 c1 3c c1 e9 03 f3 48 ab 49 8b 44 24 38
>>> <48> 8b 40 08
>>> 48 85 c0 74 24 ba 3c 00 00 00 48 89 de 4c 89 e7 ff d0
>>> [    3.912069] RSP: 0018:ffffa27dce28fc50 EFLAGS: 00010212
>>> [    3.912502] RAX: 0000000000000000 RBX: ffffa27dce28fcac RCX:
>>> 0000000000000000
>>> [    3.912980] RDX: 0000000000000000 RSI: 0000000000000082 RDI:
>>> ffffa27dce28fce8
>>> [    3.913467] RBP: 0000000000000000 R08: 0000000000000001 R09:
>>> 000000000000079a
>>> [    3.913940] R10: 0000000000000000 R11: 0000000000000001 R12:
>>> ffff88d657af0000
>>> [    3.914349] R13: ffffffffc0c38120 R14: ffffa27dce28fc68 R15:
>>> ffff88d657af0000
>>> [    3.914767] FS:  0000000000000000(0000) GS:ffff88d65f400000(0000)
>>> knlGS:0000000000000000
>>> [    3.915203] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [    3.915637] CR2: 0000000000000008 CR3: 0000003e7540a000 CR4:
>>> 00000000003406e0
>>> [    3.916075] Call Trace:
>>> [    3.916522]  ? pcie_capability_clear_and_set_word+0x53/0x80
>>> [    3.917014]  amdgpu_device_init+0x923/0x1820 [amdgpu]
>>> [    3.917515]  amdgpu_driver_load_kms+0x71/0x310 [amdgpu]
>>> [    3.917997]  drm_dev_register+0x113/0x1a0 [drm]
>>> [    3.918514]  amdgpu_pci_probe+0xb8/0x150 [amdgpu]
>>> [    3.919003]  ? __pm_runtime_resume+0x54/0x70
>>> [    3.919270] usb 1-2: New USB device found, idVendor=14dd,
>idProduct=1005,
>>> bcdDevice= 0.00
>>> [    3.919498]  local_pci_probe+0x3d/0x90
>>> [    3.919503]  ? __schedule+0x3de/0x690
>>> [    3.920374] usb 1-2: New USB device strings: Mfr=1, Product=2,
>>> SerialNumber=3
>>> [    3.921137]  work_for_cpu_fn+0x10/0x20
>>> [    3.922028] usb 1-2: Product: D2CIM-VUSB
>>> [    3.922815]  process_one_work+0x159/0x3e0
>>> [    3.923633] usb 1-2: Manufacturer: Raritan
>>> [    3.923635] usb 1-2: SerialNumber: EFFB212D0A6E32F
>>> [    3.924416]  worker_thread+0x22b/0x440
>>> [    3.924419]  ? rescuer_thread+0x350/0x350
>>> [    3.927812]  kthread+0xf6/0x130
>>> [    3.928157] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
>>> [    3.928365]  ? kthread_destroy_worker+0x40/0x40
>>> [    3.929401] ata1.00: ATA-10: INTEL SSDSC2KG960G7, SCV10100, max
>>> UDMA/133
>>> [    3.930101]  ret_from_fork+0x1f/0x30
>>> [    3.930103] Modules linked in: amdgpu(+) crct10dif_pclmul crc32_pclmul
>>> ghash_clmulni_intel ast amd_iommu_v2 aesni_intel gpu_sched
>>> i2c_algo_bit
>>> aes_x86_64 ttm crypto_simd drm_kms_helper cryptd glue_helper
>>> syscopyarea sysfillrect ahci sysimgblt libahci fb_sys_fops ixgbe(+)
>>> drm dca mdio
>>> [    3.930965] ata1.00: 1875385008 sectors, multi 1: LBA48 NCQ (depth 32)
>>> [    3.931085] ata1.00: configured for UDMA/133
>>> [    3.931809] CR2: 0000000000000008
>>> [    3.934723] scsi 0:0:0:0: Direct-Access     ATA      INTEL SSDSC2KG96 0100
>PQ:
>>> 0 ANSI: 5
>>>
>>>
>>> Thanks!
>>>
>>> Kent
>>>
>>> -----Original Message-----
>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>> Quan, Evan
>>> Sent: Monday, May 27, 2019 9:17 PM
>>> To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>>> reset
>>>
>>> Reviewed-by: Evan Quan <evan.quan@amd.com>
>>>
>>>> -----Original Message-----
>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>> Emily Deng
>>>> Sent: Monday, May 27, 2019 3:43 PM
>>>> To: amd-gfx@lists.freedesktop.org
>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>>>> reset
>>>>
>>>> For passthrough, after rebooted the VM, driver will do a baco reset
>>>> before doing other driver initialization during loading  driver. For
>>>> doing the baco reset, it will first check the baco reset capability.
>>>> So first need to set the cap from the vbios information or baco
>>>> reset won't
>>> be enabled.
>>>>
>>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24 ++++++++++--
>---
>>>> -------
>>>>   drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>>>>   drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>>>> ++++++++++++++++++++++
>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>>>>   5 files changed, 42 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index b6ded84..7a8c220 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -1541,6 +1541,17 @@ static int amdgpu_device_ip_early_init(struct
>>>> amdgpu_device *adev)
>>>>   	if (amdgpu_sriov_vf(adev))
>>>>   		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
>>>>
>>>> +	/* Read BIOS */
>>>> +	if (!amdgpu_get_bios(adev))
>>>> +		return -EINVAL;
>>>> +
>>>> +	r = amdgpu_atombios_init(adev);
>>>> +	if (r) {
>>>> +		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>>> +		amdgpu_vf_error_put(adev,
>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>>> +		return r;
>>>> +	}
>>>> +
>>>>   	for (i = 0; i < adev->num_ip_blocks; i++) {
>>>>   		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
>>>>   			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
>>>> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device
>*adev,
>>>>   		goto fence_driver_init;
>>>>   	}
>>>>
>>>> -	/* Read BIOS */
>>>> -	if (!amdgpu_get_bios(adev)) {
>>>> -		r = -EINVAL;
>>>> -		goto failed;
>>>> -	}
>>>> -
>>>> -	r = amdgpu_atombios_init(adev);
>>>> -	if (r) {
>>>> -		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>>> -		amdgpu_vf_error_put(adev,
>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>>> -		goto failed;
>>>> -	}
>>>> -
>>>>   	/* detect if we are with an SRIOV vbios */
>>>>   	amdgpu_device_detect_sriov_bios(adev);
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>> index 78bd4fc..d9fdd95 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
>>>> amdgpu_device *adev)
>>>>   	/* Just return false for soc15 GPUs.  Reset does not seem to
>>>>   	 * be necessary.
>>>>   	 */
>>>> -	return false;
>>>> +	if (!amdgpu_passthrough(adev))
>>>> +		return false;
>>>>
>>>>   	if (adev->flags & AMD_IS_APU)
>>>>   		return false;
>>>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>> index ce6aeb5..1d9bb29 100644
>>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func
>>>> vega10_hwmgr_funcs = {
>>>>
>>>>   int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
>>>> +	struct amdgpu_device *adev = hwmgr->adev;
>>>> +
>>>>   	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
>>>>   	hwmgr->pptable_func = &vega10_pptable_funcs;
>>>> +	if (amdgpu_passthrough(adev))
>>>> +		return vega10_baco_set_cap(hwmgr);
>>>>
>>>>   	return 0;
>>>>   }
>>>> diff --git
>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>> index b6767d7..83d22cd 100644
>>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>> +++
>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>>>> pp_hwmgr *hwmgr,
>>>>
>>>>   	return result;
>>>>   }
>>>> +
>>>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>>>> +	int result = 0;
>>>> +
>>>> +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>>>> +
>>>> +	powerplay_table = get_powerplay_table(hwmgr);
>>>> +
>>>> +	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>>>> +		"Missing PowerPlay Table!", return -1);
>>>> +
>>>> +	result = check_powerplay_tables(hwmgr, powerplay_table);
>>>> +
>>>> +	PP_ASSERT_WITH_CODE((result == 0),
>>>> +			    "check_powerplay_tables failed", return result);
>>>> +
>>>> +	set_hw_cap(
>>>> +			hwmgr,
>>>> +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
>>>> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>>>> +			PHM_PlatformCaps_BACO);
>>>> +	return result;
>>>> +}
>>>> +
>>>> diff --git
>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>> index d83ed2a..da5fbec 100644
>>>> ---
>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>> +++
>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>> @@ -59,4 +59,5 @@ extern int
>>>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>>> *hwmgr);
>>>> extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
>>>> uint32_t entry_index,
>>>>   		struct pp_power_state *power_state, int
>>> (*call_back_func)(struct
>>>> pp_hwmgr *, void *,
>>>>   				struct pp_power_state *, void *, uint32_t));
>>>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>>>>   #endif
>>>> --
>>>> 2.7.4
>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]                     ` <MN2PR12MB2975EAAD825DA9842876B4A38FEB0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-06-17  3:52                       ` Deng, Emily
       [not found]                         ` <MN2PR12MB29750900DC4B43AD00254D888FEB0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Deng, Emily @ 2019-06-17  3:52 UTC (permalink / raw)
  To: Yang, Philip, Russell, Kent, Quan, Evan,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

[-- Attachment #1: Type: text/plain, Size: 14035 bytes --]

Hi Philip,
     Could you help to try whether the attachment patch could help with the issue encounter? If it is OK, then I will send this patch out to review. 

Best wishes
Emily Deng



>-----Original Message-----
>From: Deng, Emily
>Sent: Monday, June 17, 2019 10:50 AM
>To: Yang, Philip <Philip.Yang@amd.com>; Russell, Kent
><Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-
>gfx@lists.freedesktop.org
>Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
>
>Hi Philip,
>     Sorry for introduce this issue for you. From the code, I couldn't see any
>issue. And I have tested the code in my Vega10, it is OK.  So I think this is the
>kfd specific issue, but I couldn't reproduce issue on my platform. Could you
>create an ticket, and assign to me, and share me your platform, so I could
>debug it and fix it today.
>
>Best wishes
>Emily Deng
>
>>-----Original Message-----
>>From: Yang, Philip <Philip.Yang@amd.com>
>>Sent: Friday, June 14, 2019 10:16 PM
>>To: Deng, Emily <Emily.Deng@amd.com>; Russell, Kent
>><Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-
>>gfx@lists.freedesktop.org
>>Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>>reset
>>
>>Hi Emily,
>>
>>I am not familiar with vbios and driver init part, just based on my
>>experience, the patch don't modify amdgpu_get_bios but move
>>amdgpu_get_bios to amdgpu_device_ip_early_init from amdgpu_device_init,
>>so amdgpu_get_bios is executed earlier. The kernel error message "BUG:
>>kernel NULL pointer dereference" means something is not initialized.
>>Please review the change. This issue blocks rocm release now.
>>
>>Regards,
>>Philip
>>
>>On 2019-06-13 11:19 p.m., Deng, Emily wrote:
>>> Hi Russell,
>>>       This patch will read vbios, and parse vbios to get the baco
>>> reset feature
>>bit.  From the call trace, it shows error in " amdgpu_get_bios ", but
>>this patch don't modify amdgpu_get_bios, and code before
>>amdgpu_get_bios. Please first check why it will has error when read vbios.
>>>
>>> Best wishes
>>> Emily Deng
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: Russell, Kent <Kent.Russell@amd.com>
>>>> Sent: Thursday, June 13, 2019 7:11 PM
>>>> To: Quan, Evan <Evan.Quan@amd.com>; Deng, Emily
>><Emily.Deng@amd.com>;
>>>> amd-gfx@lists.freedesktop.org
>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before
>>>> baco reset
>>>>
>>>> Hi Emily,
>>>>
>>>> This patch caused a regression on MI25 (Chip 6860, VBIOS
>>>> 113-D0513700-001) machines where the driver would not boot. Note
>>>> that this was not seen on
>>>> Vega10 Frontier (Chip 6863, VBIOS 113-D0501100-X09) or Radeon64
>>>> (Chip 697f). Reverting this patch resolved the issue with no other
>>>> work required and was confirmed on all 3 machines.
>>>>
>>>> Here is the dmesg:
>>>>
>>>> [    3.908653] amdgpu 0000:23:00.0: BAR 6: can't assign [??? 0x00000000
>>flags
>>>> 0x20000000] (bogus alignment)
>>>> [    3.908692] BUG: kernel NULL pointer dereference, address:
>>>> 0000000000000008
>>>> [    3.908716] #PF: supervisor read access in kernel mode
>>>> [    3.908734] #PF: error_code(0x0000) - not-present page
>>>> [    3.908753] PGD 0 P4D 0
>>>> [    3.908767] Oops: 0000 [#1] SMP NOPTI
>>>> [    3.909293] CPU: 8 PID: 409 Comm: kworker/8:1 Not tainted 5.2.0-rc1-
>kfd-
>>>> compute-roc-master-10734 #1
>>>> [    3.909753] Hardware name: Inventec     P47
>>WC2071019001
>>>> /P47             , BIOS 0.64 04/09/2018
>>>> [    3.910534] Workqueue: events work_for_cpu_fn
>>>> [    3.910953] RIP: 0010:amdgpu_get_bios+0x3aa/0x580 [amdgpu]
>>>> [    3.911314] Code: c0 48 c7 44 24 5c 00 00 00 00 48 c7 84 24 90 00 00 00
>00
>>00
>>>> 00 00 48 89 d9 48 29 f9 83 c1 3c c1 e9 03 f3 48 ab 49 8b 44 24 38
>>>> <48> 8b 40 08
>>>> 48 85 c0 74 24 ba 3c 00 00 00 48 89 de 4c 89 e7 ff d0
>>>> [    3.912069] RSP: 0018:ffffa27dce28fc50 EFLAGS: 00010212
>>>> [    3.912502] RAX: 0000000000000000 RBX: ffffa27dce28fcac RCX:
>>>> 0000000000000000
>>>> [    3.912980] RDX: 0000000000000000 RSI: 0000000000000082 RDI:
>>>> ffffa27dce28fce8
>>>> [    3.913467] RBP: 0000000000000000 R08: 0000000000000001 R09:
>>>> 000000000000079a
>>>> [    3.913940] R10: 0000000000000000 R11: 0000000000000001 R12:
>>>> ffff88d657af0000
>>>> [    3.914349] R13: ffffffffc0c38120 R14: ffffa27dce28fc68 R15:
>>>> ffff88d657af0000
>>>> [    3.914767] FS:  0000000000000000(0000) GS:ffff88d65f400000(0000)
>>>> knlGS:0000000000000000
>>>> [    3.915203] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>> [    3.915637] CR2: 0000000000000008 CR3: 0000003e7540a000 CR4:
>>>> 00000000003406e0
>>>> [    3.916075] Call Trace:
>>>> [    3.916522]  ? pcie_capability_clear_and_set_word+0x53/0x80
>>>> [    3.917014]  amdgpu_device_init+0x923/0x1820 [amdgpu]
>>>> [    3.917515]  amdgpu_driver_load_kms+0x71/0x310 [amdgpu]
>>>> [    3.917997]  drm_dev_register+0x113/0x1a0 [drm]
>>>> [    3.918514]  amdgpu_pci_probe+0xb8/0x150 [amdgpu]
>>>> [    3.919003]  ? __pm_runtime_resume+0x54/0x70
>>>> [    3.919270] usb 1-2: New USB device found, idVendor=14dd,
>>idProduct=1005,
>>>> bcdDevice= 0.00
>>>> [    3.919498]  local_pci_probe+0x3d/0x90
>>>> [    3.919503]  ? __schedule+0x3de/0x690
>>>> [    3.920374] usb 1-2: New USB device strings: Mfr=1, Product=2,
>>>> SerialNumber=3
>>>> [    3.921137]  work_for_cpu_fn+0x10/0x20
>>>> [    3.922028] usb 1-2: Product: D2CIM-VUSB
>>>> [    3.922815]  process_one_work+0x159/0x3e0
>>>> [    3.923633] usb 1-2: Manufacturer: Raritan
>>>> [    3.923635] usb 1-2: SerialNumber: EFFB212D0A6E32F
>>>> [    3.924416]  worker_thread+0x22b/0x440
>>>> [    3.924419]  ? rescuer_thread+0x350/0x350
>>>> [    3.927812]  kthread+0xf6/0x130
>>>> [    3.928157] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
>>>> [    3.928365]  ? kthread_destroy_worker+0x40/0x40
>>>> [    3.929401] ata1.00: ATA-10: INTEL SSDSC2KG960G7, SCV10100, max
>>>> UDMA/133
>>>> [    3.930101]  ret_from_fork+0x1f/0x30
>>>> [    3.930103] Modules linked in: amdgpu(+) crct10dif_pclmul crc32_pclmul
>>>> ghash_clmulni_intel ast amd_iommu_v2 aesni_intel gpu_sched
>>>> i2c_algo_bit
>>>> aes_x86_64 ttm crypto_simd drm_kms_helper cryptd glue_helper
>>>> syscopyarea sysfillrect ahci sysimgblt libahci fb_sys_fops ixgbe(+)
>>>> drm dca mdio
>>>> [    3.930965] ata1.00: 1875385008 sectors, multi 1: LBA48 NCQ (depth 32)
>>>> [    3.931085] ata1.00: configured for UDMA/133
>>>> [    3.931809] CR2: 0000000000000008
>>>> [    3.934723] scsi 0:0:0:0: Direct-Access     ATA      INTEL SSDSC2KG96 0100
>>PQ:
>>>> 0 ANSI: 5
>>>>
>>>>
>>>> Thanks!
>>>>
>>>> Kent
>>>>
>>>> -----Original Message-----
>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>> Quan, Evan
>>>> Sent: Monday, May 27, 2019 9:17 PM
>>>> To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before
>>>> baco reset
>>>>
>>>> Reviewed-by: Evan Quan <evan.quan@amd.com>
>>>>
>>>>> -----Original Message-----
>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>>>> Emily Deng
>>>>> Sent: Monday, May 27, 2019 3:43 PM
>>>>> To: amd-gfx@lists.freedesktop.org
>>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>>> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>>>>> reset
>>>>>
>>>>> For passthrough, after rebooted the VM, driver will do a baco reset
>>>>> before doing other driver initialization during loading  driver.
>>>>> For doing the baco reset, it will first check the baco reset capability.
>>>>> So first need to set the cap from the vbios information or baco
>>>>> reset won't
>>>> be enabled.
>>>>>
>>>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24
>++++++++++--
>>---
>>>>> -------
>>>>>   drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>>>>>   drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
>>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>>>>> ++++++++++++++++++++++
>>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>>>>>   5 files changed, 42 insertions(+), 14 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index b6ded84..7a8c220 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -1541,6 +1541,17 @@ static int
>>>>> amdgpu_device_ip_early_init(struct
>>>>> amdgpu_device *adev)
>>>>>   	if (amdgpu_sriov_vf(adev))
>>>>>   		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
>>>>>
>>>>> +	/* Read BIOS */
>>>>> +	if (!amdgpu_get_bios(adev))
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	r = amdgpu_atombios_init(adev);
>>>>> +	if (r) {
>>>>> +		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>>>> +		amdgpu_vf_error_put(adev,
>>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>>>> +		return r;
>>>>> +	}
>>>>> +
>>>>>   	for (i = 0; i < adev->num_ip_blocks; i++) {
>>>>>   		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
>>>>>   			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
>>>>> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device
>>*adev,
>>>>>   		goto fence_driver_init;
>>>>>   	}
>>>>>
>>>>> -	/* Read BIOS */
>>>>> -	if (!amdgpu_get_bios(adev)) {
>>>>> -		r = -EINVAL;
>>>>> -		goto failed;
>>>>> -	}
>>>>> -
>>>>> -	r = amdgpu_atombios_init(adev);
>>>>> -	if (r) {
>>>>> -		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>>>> -		amdgpu_vf_error_put(adev,
>>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>>>> -		goto failed;
>>>>> -	}
>>>>> -
>>>>>   	/* detect if we are with an SRIOV vbios */
>>>>>   	amdgpu_device_detect_sriov_bios(adev);
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> index 78bd4fc..d9fdd95 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
>>>>> amdgpu_device *adev)
>>>>>   	/* Just return false for soc15 GPUs.  Reset does not seem to
>>>>>   	 * be necessary.
>>>>>   	 */
>>>>> -	return false;
>>>>> +	if (!amdgpu_passthrough(adev))
>>>>> +		return false;
>>>>>
>>>>>   	if (adev->flags & AMD_IS_APU)
>>>>>   		return false;
>>>>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> index ce6aeb5..1d9bb29 100644
>>>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func
>>>>> vega10_hwmgr_funcs = {
>>>>>
>>>>>   int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
>>>>> +	struct amdgpu_device *adev = hwmgr->adev;
>>>>> +
>>>>>   	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
>>>>>   	hwmgr->pptable_func = &vega10_pptable_funcs;
>>>>> +	if (amdgpu_passthrough(adev))
>>>>> +		return vega10_baco_set_cap(hwmgr);
>>>>>
>>>>>   	return 0;
>>>>>   }
>>>>> diff --git
>>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> index b6767d7..83d22cd 100644
>>>>> ---
>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> +++
>>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>>>>> pp_hwmgr *hwmgr,
>>>>>
>>>>>   	return result;
>>>>>   }
>>>>> +
>>>>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>>>>> +	int result = 0;
>>>>> +
>>>>> +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>>>>> +
>>>>> +	powerplay_table = get_powerplay_table(hwmgr);
>>>>> +
>>>>> +	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>>>>> +		"Missing PowerPlay Table!", return -1);
>>>>> +
>>>>> +	result = check_powerplay_tables(hwmgr, powerplay_table);
>>>>> +
>>>>> +	PP_ASSERT_WITH_CODE((result == 0),
>>>>> +			    "check_powerplay_tables failed", return result);
>>>>> +
>>>>> +	set_hw_cap(
>>>>> +			hwmgr,
>>>>> +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
>>>>> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>>>>> +			PHM_PlatformCaps_BACO);
>>>>> +	return result;
>>>>> +}
>>>>> +
>>>>> diff --git
>>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> index d83ed2a..da5fbec 100644
>>>>> ---
>>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> +++
>>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> @@ -59,4 +59,5 @@ extern int
>>>>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>>>> *hwmgr);
>>>>> extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
>>>>> uint32_t entry_index,
>>>>>   		struct pp_power_state *power_state, int
>>>> (*call_back_func)(struct
>>>>> pp_hwmgr *, void *,
>>>>>   				struct pp_power_state *, void *, uint32_t));
>>>>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>>>>>   #endif
>>>>> --
>>>>> 2.7.4
>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>

[-- Attachment #2: 0001-drm-amdgpu-Fix-the-null-pointer-about-get-vbios.patch --]
[-- Type: application/octet-stream, Size: 1746 bytes --]

From 75eef1a6cf1e53ae44cba40f8f60211ab27cd62e Mon Sep 17 00:00:00 2001
From: Emily Deng <Emily.Deng@amd.com>
Date: Mon, 17 Jun 2019 11:28:09 +0800
Subject: [PATCH] drm/amdgpu: Fix the null pointer about get vbios

Move the get vbios only before SDMA block early init to fix null pointer
about get vbios.

Change-Id: I7907224153d357e12d3add507db39b190705f2e8
Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4a836db..830c4b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1534,18 +1534,19 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
-	/* Read BIOS */
-	if (!amdgpu_get_bios(adev))
-		return -EINVAL;
-
-	r = amdgpu_atombios_init(adev);
-	if (r) {
-		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
-		return r;
-	}
-
 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if ( adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA ) {
+			/* Read BIOS */
+			if (!amdgpu_get_bios(adev))
+				return -EINVAL;
+
+			r = amdgpu_atombios_init(adev);
+			if (r) {
+				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+				return r;
+			}
+		}
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 			DRM_ERROR("disabled ip block: %d <%s>\n",
 				  i, adev->ip_blocks[i].version->funcs->name);
-- 
2.7.4


[-- Attachment #3: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]                         ` <MN2PR12MB29750900DC4B43AD00254D888FEB0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-06-17 11:08                           ` Russell, Kent
       [not found]                             ` <BN6PR12MB16188EBC282A9D8BEA715AB185EB0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2019-06-17 11:39                           ` Russell, Kent
  1 sibling, 1 reply; 18+ messages in thread
From: Russell, Kent @ 2019-06-17 11:08 UTC (permalink / raw)
  To: Deng, Emily, Yang, Philip, Quan, Evan,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

The issue was limited to one specific model of MI25, I'll see if I can get access to that later today and try your patch out. Thank you Emily!
 
 Kent

-----Original Message-----
From: Deng, Emily <Emily.Deng@amd.com> 
Sent: Sunday, June 16, 2019 11:53 PM
To: Yang, Philip <Philip.Yang@amd.com>; Russell, Kent <Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset

Hi Philip,
     Could you help to try whether the attachment patch could help with the issue encounter? If it is OK, then I will send this patch out to review. 

Best wishes
Emily Deng



>-----Original Message-----
>From: Deng, Emily
>Sent: Monday, June 17, 2019 10:50 AM
>To: Yang, Philip <Philip.Yang@amd.com>; Russell, Kent 
><Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd- 
>gfx@lists.freedesktop.org
>Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco 
>reset
>
>Hi Philip,
>     Sorry for introduce this issue for you. From the code, I couldn't 
>see any issue. And I have tested the code in my Vega10, it is OK.  So I 
>think this is the kfd specific issue, but I couldn't reproduce issue on 
>my platform. Could you create an ticket, and assign to me, and share me 
>your platform, so I could debug it and fix it today.
>
>Best wishes
>Emily Deng
>
>>-----Original Message-----
>>From: Yang, Philip <Philip.Yang@amd.com>
>>Sent: Friday, June 14, 2019 10:16 PM
>>To: Deng, Emily <Emily.Deng@amd.com>; Russell, Kent 
>><Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd- 
>>gfx@lists.freedesktop.org
>>Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco 
>>reset
>>
>>Hi Emily,
>>
>>I am not familiar with vbios and driver init part, just based on my 
>>experience, the patch don't modify amdgpu_get_bios but move 
>>amdgpu_get_bios to amdgpu_device_ip_early_init from 
>>amdgpu_device_init, so amdgpu_get_bios is executed earlier. The kernel error message "BUG:
>>kernel NULL pointer dereference" means something is not initialized.
>>Please review the change. This issue blocks rocm release now.
>>
>>Regards,
>>Philip
>>
>>On 2019-06-13 11:19 p.m., Deng, Emily wrote:
>>> Hi Russell,
>>>       This patch will read vbios, and parse vbios to get the baco 
>>> reset feature
>>bit.  From the call trace, it shows error in " amdgpu_get_bios ", but 
>>this patch don't modify amdgpu_get_bios, and code before 
>>amdgpu_get_bios. Please first check why it will has error when read vbios.
>>>
>>> Best wishes
>>> Emily Deng
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: Russell, Kent <Kent.Russell@amd.com>
>>>> Sent: Thursday, June 13, 2019 7:11 PM
>>>> To: Quan, Evan <Evan.Quan@amd.com>; Deng, Emily
>><Emily.Deng@amd.com>;
>>>> amd-gfx@lists.freedesktop.org
>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before 
>>>> baco reset
>>>>
>>>> Hi Emily,
>>>>
>>>> This patch caused a regression on MI25 (Chip 6860, VBIOS
>>>> 113-D0513700-001) machines where the driver would not boot. Note 
>>>> that this was not seen on
>>>> Vega10 Frontier (Chip 6863, VBIOS 113-D0501100-X09) or Radeon64 
>>>> (Chip 697f). Reverting this patch resolved the issue with no other 
>>>> work required and was confirmed on all 3 machines.
>>>>
>>>> Here is the dmesg:
>>>>
>>>> [    3.908653] amdgpu 0000:23:00.0: BAR 6: can't assign [??? 0x00000000
>>flags
>>>> 0x20000000] (bogus alignment)
>>>> [    3.908692] BUG: kernel NULL pointer dereference, address:
>>>> 0000000000000008
>>>> [    3.908716] #PF: supervisor read access in kernel mode
>>>> [    3.908734] #PF: error_code(0x0000) - not-present page
>>>> [    3.908753] PGD 0 P4D 0
>>>> [    3.908767] Oops: 0000 [#1] SMP NOPTI
>>>> [    3.909293] CPU: 8 PID: 409 Comm: kworker/8:1 Not tainted 5.2.0-rc1-
>kfd-
>>>> compute-roc-master-10734 #1
>>>> [    3.909753] Hardware name: Inventec     P47
>>WC2071019001
>>>> /P47             , BIOS 0.64 04/09/2018
>>>> [    3.910534] Workqueue: events work_for_cpu_fn
>>>> [    3.910953] RIP: 0010:amdgpu_get_bios+0x3aa/0x580 [amdgpu]
>>>> [    3.911314] Code: c0 48 c7 44 24 5c 00 00 00 00 48 c7 84 24 90 00 00 00
>00
>>00
>>>> 00 00 48 89 d9 48 29 f9 83 c1 3c c1 e9 03 f3 48 ab 49 8b 44 24 38 
>>>> <48> 8b 40 08
>>>> 48 85 c0 74 24 ba 3c 00 00 00 48 89 de 4c 89 e7 ff d0
>>>> [    3.912069] RSP: 0018:ffffa27dce28fc50 EFLAGS: 00010212
>>>> [    3.912502] RAX: 0000000000000000 RBX: ffffa27dce28fcac RCX:
>>>> 0000000000000000
>>>> [    3.912980] RDX: 0000000000000000 RSI: 0000000000000082 RDI:
>>>> ffffa27dce28fce8
>>>> [    3.913467] RBP: 0000000000000000 R08: 0000000000000001 R09:
>>>> 000000000000079a
>>>> [    3.913940] R10: 0000000000000000 R11: 0000000000000001 R12:
>>>> ffff88d657af0000
>>>> [    3.914349] R13: ffffffffc0c38120 R14: ffffa27dce28fc68 R15:
>>>> ffff88d657af0000
>>>> [    3.914767] FS:  0000000000000000(0000) GS:ffff88d65f400000(0000)
>>>> knlGS:0000000000000000
>>>> [    3.915203] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>> [    3.915637] CR2: 0000000000000008 CR3: 0000003e7540a000 CR4:
>>>> 00000000003406e0
>>>> [    3.916075] Call Trace:
>>>> [    3.916522]  ? pcie_capability_clear_and_set_word+0x53/0x80
>>>> [    3.917014]  amdgpu_device_init+0x923/0x1820 [amdgpu]
>>>> [    3.917515]  amdgpu_driver_load_kms+0x71/0x310 [amdgpu]
>>>> [    3.917997]  drm_dev_register+0x113/0x1a0 [drm]
>>>> [    3.918514]  amdgpu_pci_probe+0xb8/0x150 [amdgpu]
>>>> [    3.919003]  ? __pm_runtime_resume+0x54/0x70
>>>> [    3.919270] usb 1-2: New USB device found, idVendor=14dd,
>>idProduct=1005,
>>>> bcdDevice= 0.00
>>>> [    3.919498]  local_pci_probe+0x3d/0x90
>>>> [    3.919503]  ? __schedule+0x3de/0x690
>>>> [    3.920374] usb 1-2: New USB device strings: Mfr=1, Product=2,
>>>> SerialNumber=3
>>>> [    3.921137]  work_for_cpu_fn+0x10/0x20
>>>> [    3.922028] usb 1-2: Product: D2CIM-VUSB
>>>> [    3.922815]  process_one_work+0x159/0x3e0
>>>> [    3.923633] usb 1-2: Manufacturer: Raritan
>>>> [    3.923635] usb 1-2: SerialNumber: EFFB212D0A6E32F
>>>> [    3.924416]  worker_thread+0x22b/0x440
>>>> [    3.924419]  ? rescuer_thread+0x350/0x350
>>>> [    3.927812]  kthread+0xf6/0x130
>>>> [    3.928157] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
>>>> [    3.928365]  ? kthread_destroy_worker+0x40/0x40
>>>> [    3.929401] ata1.00: ATA-10: INTEL SSDSC2KG960G7, SCV10100, max
>>>> UDMA/133
>>>> [    3.930101]  ret_from_fork+0x1f/0x30
>>>> [    3.930103] Modules linked in: amdgpu(+) crct10dif_pclmul crc32_pclmul
>>>> ghash_clmulni_intel ast amd_iommu_v2 aesni_intel gpu_sched 
>>>> i2c_algo_bit
>>>> aes_x86_64 ttm crypto_simd drm_kms_helper cryptd glue_helper 
>>>> syscopyarea sysfillrect ahci sysimgblt libahci fb_sys_fops ixgbe(+) 
>>>> drm dca mdio
>>>> [    3.930965] ata1.00: 1875385008 sectors, multi 1: LBA48 NCQ (depth 32)
>>>> [    3.931085] ata1.00: configured for UDMA/133
>>>> [    3.931809] CR2: 0000000000000008
>>>> [    3.934723] scsi 0:0:0:0: Direct-Access     ATA      INTEL SSDSC2KG96 0100
>>PQ:
>>>> 0 ANSI: 5
>>>>
>>>>
>>>> Thanks!
>>>>
>>>> Kent
>>>>
>>>> -----Original Message-----
>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of 
>>>> Quan, Evan
>>>> Sent: Monday, May 27, 2019 9:17 PM
>>>> To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before 
>>>> baco reset
>>>>
>>>> Reviewed-by: Evan Quan <evan.quan@amd.com>
>>>>
>>>>> -----Original Message-----
>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of 
>>>>> Emily Deng
>>>>> Sent: Monday, May 27, 2019 3:43 PM
>>>>> To: amd-gfx@lists.freedesktop.org
>>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>>> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco 
>>>>> reset
>>>>>
>>>>> For passthrough, after rebooted the VM, driver will do a baco 
>>>>> reset before doing other driver initialization during loading  driver.
>>>>> For doing the baco reset, it will first check the baco reset capability.
>>>>> So first need to set the cap from the vbios information or baco 
>>>>> reset won't
>>>> be enabled.
>>>>>
>>>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24
>++++++++++--
>>---
>>>>> -------
>>>>>   drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>>>>>   drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
>>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>>>>> ++++++++++++++++++++++
>>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>>>>>   5 files changed, 42 insertions(+), 14 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index b6ded84..7a8c220 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -1541,6 +1541,17 @@ static int
>>>>> amdgpu_device_ip_early_init(struct
>>>>> amdgpu_device *adev)
>>>>>   	if (amdgpu_sriov_vf(adev))
>>>>>   		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
>>>>>
>>>>> +	/* Read BIOS */
>>>>> +	if (!amdgpu_get_bios(adev))
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	r = amdgpu_atombios_init(adev);
>>>>> +	if (r) {
>>>>> +		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>>>> +		amdgpu_vf_error_put(adev,
>>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>>>> +		return r;
>>>>> +	}
>>>>> +
>>>>>   	for (i = 0; i < adev->num_ip_blocks; i++) {
>>>>>   		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
>>>>>   			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
>>>>> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device
>>*adev,
>>>>>   		goto fence_driver_init;
>>>>>   	}
>>>>>
>>>>> -	/* Read BIOS */
>>>>> -	if (!amdgpu_get_bios(adev)) {
>>>>> -		r = -EINVAL;
>>>>> -		goto failed;
>>>>> -	}
>>>>> -
>>>>> -	r = amdgpu_atombios_init(adev);
>>>>> -	if (r) {
>>>>> -		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>>>> -		amdgpu_vf_error_put(adev,
>>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>>>> -		goto failed;
>>>>> -	}
>>>>> -
>>>>>   	/* detect if we are with an SRIOV vbios */
>>>>>   	amdgpu_device_detect_sriov_bios(adev);
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> index 78bd4fc..d9fdd95 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct 
>>>>> amdgpu_device *adev)
>>>>>   	/* Just return false for soc15 GPUs.  Reset does not seem to
>>>>>   	 * be necessary.
>>>>>   	 */
>>>>> -	return false;
>>>>> +	if (!amdgpu_passthrough(adev))
>>>>> +		return false;
>>>>>
>>>>>   	if (adev->flags & AMD_IS_APU)
>>>>>   		return false;
>>>>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> index ce6aeb5..1d9bb29 100644
>>>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func 
>>>>> vega10_hwmgr_funcs = {
>>>>>
>>>>>   int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
>>>>> +	struct amdgpu_device *adev = hwmgr->adev;
>>>>> +
>>>>>   	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
>>>>>   	hwmgr->pptable_func = &vega10_pptable_funcs;
>>>>> +	if (amdgpu_passthrough(adev))
>>>>> +		return vega10_baco_set_cap(hwmgr);
>>>>>
>>>>>   	return 0;
>>>>>   }
>>>>> diff --git
>>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> index b6767d7..83d22cd 100644
>>>>> ---
>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> +++
>>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>>>>> pp_hwmgr *hwmgr,
>>>>>
>>>>>   	return result;
>>>>>   }
>>>>> +
>>>>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>>>>> +	int result = 0;
>>>>> +
>>>>> +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>>>>> +
>>>>> +	powerplay_table = get_powerplay_table(hwmgr);
>>>>> +
>>>>> +	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>>>>> +		"Missing PowerPlay Table!", return -1);
>>>>> +
>>>>> +	result = check_powerplay_tables(hwmgr, powerplay_table);
>>>>> +
>>>>> +	PP_ASSERT_WITH_CODE((result == 0),
>>>>> +			    "check_powerplay_tables failed", return result);
>>>>> +
>>>>> +	set_hw_cap(
>>>>> +			hwmgr,
>>>>> +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
>>>>> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>>>>> +			PHM_PlatformCaps_BACO);
>>>>> +	return result;
>>>>> +}
>>>>> +
>>>>> diff --git
>>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> index d83ed2a..da5fbec 100644
>>>>> ---
>>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> +++
>>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> @@ -59,4 +59,5 @@ extern int
>>>>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>>>> *hwmgr);
>>>>> extern int vega10_get_powerplay_table_entry(struct pp_hwmgr 
>>>>> *hwmgr, uint32_t entry_index,
>>>>>   		struct pp_power_state *power_state, int
>>>> (*call_back_func)(struct
>>>>> pp_hwmgr *, void *,
>>>>>   				struct pp_power_state *, void *, uint32_t));
>>>>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>>>>>   #endif
>>>>> --
>>>>> 2.7.4
>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]                         ` <MN2PR12MB29750900DC4B43AD00254D888FEB0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2019-06-17 11:08                           ` Russell, Kent
@ 2019-06-17 11:39                           ` Russell, Kent
  1 sibling, 0 replies; 18+ messages in thread
From: Russell, Kent @ 2019-06-17 11:39 UTC (permalink / raw)
  To: Deng, Emily, Yang, Philip, Quan, Evan,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Thanks Emily, this fixed the issue.

Verified-By: Kent Russell <kent.russell@amd.com>

 Kent
-----Original Message-----
From: Deng, Emily <Emily.Deng@amd.com> 
Sent: Sunday, June 16, 2019 11:53 PM
To: Yang, Philip <Philip.Yang@amd.com>; Russell, Kent <Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset

Hi Philip,
     Could you help to try whether the attachment patch could help with the issue encounter? If it is OK, then I will send this patch out to review. 

Best wishes
Emily Deng



>-----Original Message-----
>From: Deng, Emily
>Sent: Monday, June 17, 2019 10:50 AM
>To: Yang, Philip <Philip.Yang@amd.com>; Russell, Kent 
><Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd- 
>gfx@lists.freedesktop.org
>Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco 
>reset
>
>Hi Philip,
>     Sorry for introduce this issue for you. From the code, I couldn't 
>see any issue. And I have tested the code in my Vega10, it is OK.  So I 
>think this is the kfd specific issue, but I couldn't reproduce issue on 
>my platform. Could you create an ticket, and assign to me, and share me 
>your platform, so I could debug it and fix it today.
>
>Best wishes
>Emily Deng
>
>>-----Original Message-----
>>From: Yang, Philip <Philip.Yang@amd.com>
>>Sent: Friday, June 14, 2019 10:16 PM
>>To: Deng, Emily <Emily.Deng@amd.com>; Russell, Kent 
>><Kent.Russell@amd.com>; Quan, Evan <Evan.Quan@amd.com>; amd- 
>>gfx@lists.freedesktop.org
>>Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco 
>>reset
>>
>>Hi Emily,
>>
>>I am not familiar with vbios and driver init part, just based on my 
>>experience, the patch don't modify amdgpu_get_bios but move 
>>amdgpu_get_bios to amdgpu_device_ip_early_init from 
>>amdgpu_device_init, so amdgpu_get_bios is executed earlier. The kernel error message "BUG:
>>kernel NULL pointer dereference" means something is not initialized.
>>Please review the change. This issue blocks rocm release now.
>>
>>Regards,
>>Philip
>>
>>On 2019-06-13 11:19 p.m., Deng, Emily wrote:
>>> Hi Russell,
>>>       This patch will read vbios, and parse vbios to get the baco 
>>> reset feature
>>bit.  From the call trace, it shows error in " amdgpu_get_bios ", but 
>>this patch don't modify amdgpu_get_bios, and code before 
>>amdgpu_get_bios. Please first check why it will has error when read vbios.
>>>
>>> Best wishes
>>> Emily Deng
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: Russell, Kent <Kent.Russell@amd.com>
>>>> Sent: Thursday, June 13, 2019 7:11 PM
>>>> To: Quan, Evan <Evan.Quan@amd.com>; Deng, Emily
>><Emily.Deng@amd.com>;
>>>> amd-gfx@lists.freedesktop.org
>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before 
>>>> baco reset
>>>>
>>>> Hi Emily,
>>>>
>>>> This patch caused a regression on MI25 (Chip 6860, VBIOS
>>>> 113-D0513700-001) machines where the driver would not boot. Note 
>>>> that this was not seen on
>>>> Vega10 Frontier (Chip 6863, VBIOS 113-D0501100-X09) or Radeon64 
>>>> (Chip 697f). Reverting this patch resolved the issue with no other 
>>>> work required and was confirmed on all 3 machines.
>>>>
>>>> Here is the dmesg:
>>>>
>>>> [    3.908653] amdgpu 0000:23:00.0: BAR 6: can't assign [??? 0x00000000
>>flags
>>>> 0x20000000] (bogus alignment)
>>>> [    3.908692] BUG: kernel NULL pointer dereference, address:
>>>> 0000000000000008
>>>> [    3.908716] #PF: supervisor read access in kernel mode
>>>> [    3.908734] #PF: error_code(0x0000) - not-present page
>>>> [    3.908753] PGD 0 P4D 0
>>>> [    3.908767] Oops: 0000 [#1] SMP NOPTI
>>>> [    3.909293] CPU: 8 PID: 409 Comm: kworker/8:1 Not tainted 5.2.0-rc1-
>kfd-
>>>> compute-roc-master-10734 #1
>>>> [    3.909753] Hardware name: Inventec     P47
>>WC2071019001
>>>> /P47             , BIOS 0.64 04/09/2018
>>>> [    3.910534] Workqueue: events work_for_cpu_fn
>>>> [    3.910953] RIP: 0010:amdgpu_get_bios+0x3aa/0x580 [amdgpu]
>>>> [    3.911314] Code: c0 48 c7 44 24 5c 00 00 00 00 48 c7 84 24 90 00 00 00
>00
>>00
>>>> 00 00 48 89 d9 48 29 f9 83 c1 3c c1 e9 03 f3 48 ab 49 8b 44 24 38 
>>>> <48> 8b 40 08
>>>> 48 85 c0 74 24 ba 3c 00 00 00 48 89 de 4c 89 e7 ff d0
>>>> [    3.912069] RSP: 0018:ffffa27dce28fc50 EFLAGS: 00010212
>>>> [    3.912502] RAX: 0000000000000000 RBX: ffffa27dce28fcac RCX:
>>>> 0000000000000000
>>>> [    3.912980] RDX: 0000000000000000 RSI: 0000000000000082 RDI:
>>>> ffffa27dce28fce8
>>>> [    3.913467] RBP: 0000000000000000 R08: 0000000000000001 R09:
>>>> 000000000000079a
>>>> [    3.913940] R10: 0000000000000000 R11: 0000000000000001 R12:
>>>> ffff88d657af0000
>>>> [    3.914349] R13: ffffffffc0c38120 R14: ffffa27dce28fc68 R15:
>>>> ffff88d657af0000
>>>> [    3.914767] FS:  0000000000000000(0000) GS:ffff88d65f400000(0000)
>>>> knlGS:0000000000000000
>>>> [    3.915203] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>> [    3.915637] CR2: 0000000000000008 CR3: 0000003e7540a000 CR4:
>>>> 00000000003406e0
>>>> [    3.916075] Call Trace:
>>>> [    3.916522]  ? pcie_capability_clear_and_set_word+0x53/0x80
>>>> [    3.917014]  amdgpu_device_init+0x923/0x1820 [amdgpu]
>>>> [    3.917515]  amdgpu_driver_load_kms+0x71/0x310 [amdgpu]
>>>> [    3.917997]  drm_dev_register+0x113/0x1a0 [drm]
>>>> [    3.918514]  amdgpu_pci_probe+0xb8/0x150 [amdgpu]
>>>> [    3.919003]  ? __pm_runtime_resume+0x54/0x70
>>>> [    3.919270] usb 1-2: New USB device found, idVendor=14dd,
>>idProduct=1005,
>>>> bcdDevice= 0.00
>>>> [    3.919498]  local_pci_probe+0x3d/0x90
>>>> [    3.919503]  ? __schedule+0x3de/0x690
>>>> [    3.920374] usb 1-2: New USB device strings: Mfr=1, Product=2,
>>>> SerialNumber=3
>>>> [    3.921137]  work_for_cpu_fn+0x10/0x20
>>>> [    3.922028] usb 1-2: Product: D2CIM-VUSB
>>>> [    3.922815]  process_one_work+0x159/0x3e0
>>>> [    3.923633] usb 1-2: Manufacturer: Raritan
>>>> [    3.923635] usb 1-2: SerialNumber: EFFB212D0A6E32F
>>>> [    3.924416]  worker_thread+0x22b/0x440
>>>> [    3.924419]  ? rescuer_thread+0x350/0x350
>>>> [    3.927812]  kthread+0xf6/0x130
>>>> [    3.928157] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
>>>> [    3.928365]  ? kthread_destroy_worker+0x40/0x40
>>>> [    3.929401] ata1.00: ATA-10: INTEL SSDSC2KG960G7, SCV10100, max
>>>> UDMA/133
>>>> [    3.930101]  ret_from_fork+0x1f/0x30
>>>> [    3.930103] Modules linked in: amdgpu(+) crct10dif_pclmul crc32_pclmul
>>>> ghash_clmulni_intel ast amd_iommu_v2 aesni_intel gpu_sched 
>>>> i2c_algo_bit
>>>> aes_x86_64 ttm crypto_simd drm_kms_helper cryptd glue_helper 
>>>> syscopyarea sysfillrect ahci sysimgblt libahci fb_sys_fops ixgbe(+) 
>>>> drm dca mdio
>>>> [    3.930965] ata1.00: 1875385008 sectors, multi 1: LBA48 NCQ (depth 32)
>>>> [    3.931085] ata1.00: configured for UDMA/133
>>>> [    3.931809] CR2: 0000000000000008
>>>> [    3.934723] scsi 0:0:0:0: Direct-Access     ATA      INTEL SSDSC2KG96 0100
>>PQ:
>>>> 0 ANSI: 5
>>>>
>>>>
>>>> Thanks!
>>>>
>>>> Kent
>>>>
>>>> -----Original Message-----
>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of 
>>>> Quan, Evan
>>>> Sent: Monday, May 27, 2019 9:17 PM
>>>> To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before 
>>>> baco reset
>>>>
>>>> Reviewed-by: Evan Quan <evan.quan@amd.com>
>>>>
>>>>> -----Original Message-----
>>>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of 
>>>>> Emily Deng
>>>>> Sent: Monday, May 27, 2019 3:43 PM
>>>>> To: amd-gfx@lists.freedesktop.org
>>>>> Cc: Deng, Emily <Emily.Deng@amd.com>
>>>>> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco 
>>>>> reset
>>>>>
>>>>> For passthrough, after rebooted the VM, driver will do a baco 
>>>>> reset before doing other driver initialization during loading  driver.
>>>>> For doing the baco reset, it will first check the baco reset capability.
>>>>> So first need to set the cap from the vbios information or baco 
>>>>> reset won't
>>>> be enabled.
>>>>>
>>>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24
>++++++++++--
>>---
>>>>> -------
>>>>>   drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>>>>>   drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
>>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>>>>> ++++++++++++++++++++++
>>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>>>>>   5 files changed, 42 insertions(+), 14 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index b6ded84..7a8c220 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -1541,6 +1541,17 @@ static int
>>>>> amdgpu_device_ip_early_init(struct
>>>>> amdgpu_device *adev)
>>>>>   	if (amdgpu_sriov_vf(adev))
>>>>>   		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
>>>>>
>>>>> +	/* Read BIOS */
>>>>> +	if (!amdgpu_get_bios(adev))
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	r = amdgpu_atombios_init(adev);
>>>>> +	if (r) {
>>>>> +		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>>>> +		amdgpu_vf_error_put(adev,
>>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>>>> +		return r;
>>>>> +	}
>>>>> +
>>>>>   	for (i = 0; i < adev->num_ip_blocks; i++) {
>>>>>   		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
>>>>>   			DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
>>>>> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device
>>*adev,
>>>>>   		goto fence_driver_init;
>>>>>   	}
>>>>>
>>>>> -	/* Read BIOS */
>>>>> -	if (!amdgpu_get_bios(adev)) {
>>>>> -		r = -EINVAL;
>>>>> -		goto failed;
>>>>> -	}
>>>>> -
>>>>> -	r = amdgpu_atombios_init(adev);
>>>>> -	if (r) {
>>>>> -		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>>>>> -		amdgpu_vf_error_put(adev,
>>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
>>>>> -		goto failed;
>>>>> -	}
>>>>> -
>>>>>   	/* detect if we are with an SRIOV vbios */
>>>>>   	amdgpu_device_detect_sriov_bios(adev);
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> index 78bd4fc..d9fdd95 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>>>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct 
>>>>> amdgpu_device *adev)
>>>>>   	/* Just return false for soc15 GPUs.  Reset does not seem to
>>>>>   	 * be necessary.
>>>>>   	 */
>>>>> -	return false;
>>>>> +	if (!amdgpu_passthrough(adev))
>>>>> +		return false;
>>>>>
>>>>>   	if (adev->flags & AMD_IS_APU)
>>>>>   		return false;
>>>>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> index ce6aeb5..1d9bb29 100644
>>>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>>>> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func 
>>>>> vega10_hwmgr_funcs = {
>>>>>
>>>>>   int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
>>>>> +	struct amdgpu_device *adev = hwmgr->adev;
>>>>> +
>>>>>   	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
>>>>>   	hwmgr->pptable_func = &vega10_pptable_funcs;
>>>>> +	if (amdgpu_passthrough(adev))
>>>>> +		return vega10_baco_set_cap(hwmgr);
>>>>>
>>>>>   	return 0;
>>>>>   }
>>>>> diff --git
>>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> index b6767d7..83d22cd 100644
>>>>> ---
>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> +++
>>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>>>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>>>>> pp_hwmgr *hwmgr,
>>>>>
>>>>>   	return result;
>>>>>   }
>>>>> +
>>>>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>>>>> +	int result = 0;
>>>>> +
>>>>> +	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>>>>> +
>>>>> +	powerplay_table = get_powerplay_table(hwmgr);
>>>>> +
>>>>> +	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>>>>> +		"Missing PowerPlay Table!", return -1);
>>>>> +
>>>>> +	result = check_powerplay_tables(hwmgr, powerplay_table);
>>>>> +
>>>>> +	PP_ASSERT_WITH_CODE((result == 0),
>>>>> +			    "check_powerplay_tables failed", return result);
>>>>> +
>>>>> +	set_hw_cap(
>>>>> +			hwmgr,
>>>>> +			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
>>>>> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>>>>> +			PHM_PlatformCaps_BACO);
>>>>> +	return result;
>>>>> +}
>>>>> +
>>>>> diff --git
>>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> index d83ed2a..da5fbec 100644
>>>>> ---
>>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> +++
>>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>>>> @@ -59,4 +59,5 @@ extern int
>>>>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>>>> *hwmgr);
>>>>> extern int vega10_get_powerplay_table_entry(struct pp_hwmgr 
>>>>> *hwmgr, uint32_t entry_index,
>>>>>   		struct pp_power_state *power_state, int
>>>> (*call_back_func)(struct
>>>>> pp_hwmgr *, void *,
>>>>>   				struct pp_power_state *, void *, uint32_t));
>>>>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>>>>>   #endif
>>>>> --
>>>>> 2.7.4
>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]                             ` <BN6PR12MB16188EBC282A9D8BEA715AB185EB0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-06-17 14:39                               ` Alex Deucher
  0 siblings, 0 replies; 18+ messages in thread
From: Alex Deucher @ 2019-06-17 14:39 UTC (permalink / raw)
  To: Russell, Kent
  Cc: Deng, Emily, Quan, Evan,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Yang, Philip

[-- Attachment #1: Type: text/plain, Size: 16480 bytes --]

On Mon, Jun 17, 2019 at 7:08 AM Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org> wrote:
>
> The issue was limited to one specific model of MI25, I'll see if I can get access to that later today and try your patch out. Thank you Emily!

Where is the crash happening in amdgpu_bios.c?  What pointer is NULL?
Presumably it's one of the asic_funcs?  Can we move the call to after
the common early init?  I think that is cleaner.  Something like the
attached patch?

Alex

>
>  Kent
>
> -----Original Message-----
> From: Deng, Emily <Emily.Deng-5C7GfCeVMHo@public.gmane.org>
> Sent: Sunday, June 16, 2019 11:53 PM
> To: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
>
> Hi Philip,
>      Could you help to try whether the attachment patch could help with the issue encounter? If it is OK, then I will send this patch out to review.
>
> Best wishes
> Emily Deng
>
>
>
> >-----Original Message-----
> >From: Deng, Emily
> >Sent: Monday, June 17, 2019 10:50 AM
> >To: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; Russell, Kent
> ><Kent.Russell-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; amd-
> >gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco
> >reset
> >
> >Hi Philip,
> >     Sorry for introduce this issue for you. From the code, I couldn't
> >see any issue. And I have tested the code in my Vega10, it is OK.  So I
> >think this is the kfd specific issue, but I couldn't reproduce issue on
> >my platform. Could you create an ticket, and assign to me, and share me
> >your platform, so I could debug it and fix it today.
> >
> >Best wishes
> >Emily Deng
> >
> >>-----Original Message-----
> >>From: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
> >>Sent: Friday, June 14, 2019 10:16 PM
> >>To: Deng, Emily <Emily.Deng-5C7GfCeVMHo@public.gmane.org>; Russell, Kent
> >><Kent.Russell-5C7GfCeVMHo@public.gmane.org>; Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; amd-
> >>gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >>Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco
> >>reset
> >>
> >>Hi Emily,
> >>
> >>I am not familiar with vbios and driver init part, just based on my
> >>experience, the patch don't modify amdgpu_get_bios but move
> >>amdgpu_get_bios to amdgpu_device_ip_early_init from
> >>amdgpu_device_init, so amdgpu_get_bios is executed earlier. The kernel error message "BUG:
> >>kernel NULL pointer dereference" means something is not initialized.
> >>Please review the change. This issue blocks rocm release now.
> >>
> >>Regards,
> >>Philip
> >>
> >>On 2019-06-13 11:19 p.m., Deng, Emily wrote:
> >>> Hi Russell,
> >>>       This patch will read vbios, and parse vbios to get the baco
> >>> reset feature
> >>bit.  From the call trace, it shows error in " amdgpu_get_bios ", but
> >>this patch don't modify amdgpu_get_bios, and code before
> >>amdgpu_get_bios. Please first check why it will has error when read vbios.
> >>>
> >>> Best wishes
> >>> Emily Deng
> >>>
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Russell, Kent <Kent.Russell-5C7GfCeVMHo@public.gmane.org>
> >>>> Sent: Thursday, June 13, 2019 7:11 PM
> >>>> To: Quan, Evan <Evan.Quan-5C7GfCeVMHo@public.gmane.org>; Deng, Emily
> >><Emily.Deng-5C7GfCeVMHo@public.gmane.org>;
> >>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >>>> Cc: Deng, Emily <Emily.Deng-5C7GfCeVMHo@public.gmane.org>
> >>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before
> >>>> baco reset
> >>>>
> >>>> Hi Emily,
> >>>>
> >>>> This patch caused a regression on MI25 (Chip 6860, VBIOS
> >>>> 113-D0513700-001) machines where the driver would not boot. Note
> >>>> that this was not seen on
> >>>> Vega10 Frontier (Chip 6863, VBIOS 113-D0501100-X09) or Radeon64
> >>>> (Chip 697f). Reverting this patch resolved the issue with no other
> >>>> work required and was confirmed on all 3 machines.
> >>>>
> >>>> Here is the dmesg:
> >>>>
> >>>> [    3.908653] amdgpu 0000:23:00.0: BAR 6: can't assign [??? 0x00000000
> >>flags
> >>>> 0x20000000] (bogus alignment)
> >>>> [    3.908692] BUG: kernel NULL pointer dereference, address:
> >>>> 0000000000000008
> >>>> [    3.908716] #PF: supervisor read access in kernel mode
> >>>> [    3.908734] #PF: error_code(0x0000) - not-present page
> >>>> [    3.908753] PGD 0 P4D 0
> >>>> [    3.908767] Oops: 0000 [#1] SMP NOPTI
> >>>> [    3.909293] CPU: 8 PID: 409 Comm: kworker/8:1 Not tainted 5.2.0-rc1-
> >kfd-
> >>>> compute-roc-master-10734 #1
> >>>> [    3.909753] Hardware name: Inventec     P47
> >>WC2071019001
> >>>> /P47             , BIOS 0.64 04/09/2018
> >>>> [    3.910534] Workqueue: events work_for_cpu_fn
> >>>> [    3.910953] RIP: 0010:amdgpu_get_bios+0x3aa/0x580 [amdgpu]
> >>>> [    3.911314] Code: c0 48 c7 44 24 5c 00 00 00 00 48 c7 84 24 90 00 00 00
> >00
> >>00
> >>>> 00 00 48 89 d9 48 29 f9 83 c1 3c c1 e9 03 f3 48 ab 49 8b 44 24 38
> >>>> <48> 8b 40 08
> >>>> 48 85 c0 74 24 ba 3c 00 00 00 48 89 de 4c 89 e7 ff d0
> >>>> [    3.912069] RSP: 0018:ffffa27dce28fc50 EFLAGS: 00010212
> >>>> [    3.912502] RAX: 0000000000000000 RBX: ffffa27dce28fcac RCX:
> >>>> 0000000000000000
> >>>> [    3.912980] RDX: 0000000000000000 RSI: 0000000000000082 RDI:
> >>>> ffffa27dce28fce8
> >>>> [    3.913467] RBP: 0000000000000000 R08: 0000000000000001 R09:
> >>>> 000000000000079a
> >>>> [    3.913940] R10: 0000000000000000 R11: 0000000000000001 R12:
> >>>> ffff88d657af0000
> >>>> [    3.914349] R13: ffffffffc0c38120 R14: ffffa27dce28fc68 R15:
> >>>> ffff88d657af0000
> >>>> [    3.914767] FS:  0000000000000000(0000) GS:ffff88d65f400000(0000)
> >>>> knlGS:0000000000000000
> >>>> [    3.915203] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >>>> [    3.915637] CR2: 0000000000000008 CR3: 0000003e7540a000 CR4:
> >>>> 00000000003406e0
> >>>> [    3.916075] Call Trace:
> >>>> [    3.916522]  ? pcie_capability_clear_and_set_word+0x53/0x80
> >>>> [    3.917014]  amdgpu_device_init+0x923/0x1820 [amdgpu]
> >>>> [    3.917515]  amdgpu_driver_load_kms+0x71/0x310 [amdgpu]
> >>>> [    3.917997]  drm_dev_register+0x113/0x1a0 [drm]
> >>>> [    3.918514]  amdgpu_pci_probe+0xb8/0x150 [amdgpu]
> >>>> [    3.919003]  ? __pm_runtime_resume+0x54/0x70
> >>>> [    3.919270] usb 1-2: New USB device found, idVendor=14dd,
> >>idProduct=1005,
> >>>> bcdDevice= 0.00
> >>>> [    3.919498]  local_pci_probe+0x3d/0x90
> >>>> [    3.919503]  ? __schedule+0x3de/0x690
> >>>> [    3.920374] usb 1-2: New USB device strings: Mfr=1, Product=2,
> >>>> SerialNumber=3
> >>>> [    3.921137]  work_for_cpu_fn+0x10/0x20
> >>>> [    3.922028] usb 1-2: Product: D2CIM-VUSB
> >>>> [    3.922815]  process_one_work+0x159/0x3e0
> >>>> [    3.923633] usb 1-2: Manufacturer: Raritan
> >>>> [    3.923635] usb 1-2: SerialNumber: EFFB212D0A6E32F
> >>>> [    3.924416]  worker_thread+0x22b/0x440
> >>>> [    3.924419]  ? rescuer_thread+0x350/0x350
> >>>> [    3.927812]  kthread+0xf6/0x130
> >>>> [    3.928157] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
> >>>> [    3.928365]  ? kthread_destroy_worker+0x40/0x40
> >>>> [    3.929401] ata1.00: ATA-10: INTEL SSDSC2KG960G7, SCV10100, max
> >>>> UDMA/133
> >>>> [    3.930101]  ret_from_fork+0x1f/0x30
> >>>> [    3.930103] Modules linked in: amdgpu(+) crct10dif_pclmul crc32_pclmul
> >>>> ghash_clmulni_intel ast amd_iommu_v2 aesni_intel gpu_sched
> >>>> i2c_algo_bit
> >>>> aes_x86_64 ttm crypto_simd drm_kms_helper cryptd glue_helper
> >>>> syscopyarea sysfillrect ahci sysimgblt libahci fb_sys_fops ixgbe(+)
> >>>> drm dca mdio
> >>>> [    3.930965] ata1.00: 1875385008 sectors, multi 1: LBA48 NCQ (depth 32)
> >>>> [    3.931085] ata1.00: configured for UDMA/133
> >>>> [    3.931809] CR2: 0000000000000008
> >>>> [    3.934723] scsi 0:0:0:0: Direct-Access     ATA      INTEL SSDSC2KG96 0100
> >>PQ:
> >>>> 0 ANSI: 5
> >>>>
> >>>>
> >>>> Thanks!
> >>>>
> >>>> Kent
> >>>>
> >>>> -----Original Message-----
> >>>> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of
> >>>> Quan, Evan
> >>>> Sent: Monday, May 27, 2019 9:17 PM
> >>>> To: Deng, Emily <Emily.Deng-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >>>> Cc: Deng, Emily <Emily.Deng-5C7GfCeVMHo@public.gmane.org>
> >>>> Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before
> >>>> baco reset
> >>>>
> >>>> Reviewed-by: Evan Quan <evan.quan-5C7GfCeVMHo@public.gmane.org>
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of
> >>>>> Emily Deng
> >>>>> Sent: Monday, May 27, 2019 3:43 PM
> >>>>> To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >>>>> Cc: Deng, Emily <Emily.Deng-5C7GfCeVMHo@public.gmane.org>
> >>>>> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco
> >>>>> reset
> >>>>>
> >>>>> For passthrough, after rebooted the VM, driver will do a baco
> >>>>> reset before doing other driver initialization during loading  driver.
> >>>>> For doing the baco reset, it will first check the baco reset capability.
> >>>>> So first need to set the cap from the vbios information or baco
> >>>>> reset won't
> >>>> be enabled.
> >>>>>
> >>>>> Signed-off-by: Emily Deng <Emily.Deng-5C7GfCeVMHo@public.gmane.org>
> >>>>> ---
> >>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         | 24
> >++++++++++--
> >>---
> >>>>> -------
> >>>>>   drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
> >>>>>   drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  4 ++++
> >>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
> >>>>> ++++++++++++++++++++++
> >>>>>   .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
> >>>>>   5 files changed, 42 insertions(+), 14 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>>> index b6ded84..7a8c220 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>>> @@ -1541,6 +1541,17 @@ static int
> >>>>> amdgpu_device_ip_early_init(struct
> >>>>> amdgpu_device *adev)
> >>>>>           if (amdgpu_sriov_vf(adev))
> >>>>>                   adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
> >>>>>
> >>>>> + /* Read BIOS */
> >>>>> + if (!amdgpu_get_bios(adev))
> >>>>> +         return -EINVAL;
> >>>>> +
> >>>>> + r = amdgpu_atombios_init(adev);
> >>>>> + if (r) {
> >>>>> +         dev_err(adev->dev, "amdgpu_atombios_init failed\n");
> >>>>> +         amdgpu_vf_error_put(adev,
> >>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
> >>>>> +         return r;
> >>>>> + }
> >>>>> +
> >>>>>           for (i = 0; i < adev->num_ip_blocks; i++) {
> >>>>>                   if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
> >>>>>                           DRM_ERROR("disabled ip block: %d <%s>\n", @@ -
> >>>>> 2591,19 +2602,6 @@ int amdgpu_device_init(struct amdgpu_device
> >>*adev,
> >>>>>                   goto fence_driver_init;
> >>>>>           }
> >>>>>
> >>>>> - /* Read BIOS */
> >>>>> - if (!amdgpu_get_bios(adev)) {
> >>>>> -         r = -EINVAL;
> >>>>> -         goto failed;
> >>>>> - }
> >>>>> -
> >>>>> - r = amdgpu_atombios_init(adev);
> >>>>> - if (r) {
> >>>>> -         dev_err(adev->dev, "amdgpu_atombios_init failed\n");
> >>>>> -         amdgpu_vf_error_put(adev,
> >>>>> AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
> >>>>> -         goto failed;
> >>>>> - }
> >>>>> -
> >>>>>           /* detect if we are with an SRIOV vbios */
> >>>>>           amdgpu_device_detect_sriov_bios(adev);
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> >>>>> index 78bd4fc..d9fdd95 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> >>>>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
> >>>>> amdgpu_device *adev)
> >>>>>           /* Just return false for soc15 GPUs.  Reset does not seem to
> >>>>>            * be necessary.
> >>>>>            */
> >>>>> - return false;
> >>>>> + if (!amdgpu_passthrough(adev))
> >>>>> +         return false;
> >>>>>
> >>>>>           if (adev->flags & AMD_IS_APU)
> >>>>>                   return false;
> >>>>> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> >>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> >>>>> index ce6aeb5..1d9bb29 100644
> >>>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> >>>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> >>>>> @@ -5311,8 +5311,12 @@ static const struct pp_hwmgr_func
> >>>>> vega10_hwmgr_funcs = {
> >>>>>
> >>>>>   int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)  {
> >>>>> + struct amdgpu_device *adev = hwmgr->adev;
> >>>>> +
> >>>>>           hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
> >>>>>           hwmgr->pptable_func = &vega10_pptable_funcs;
> >>>>> + if (amdgpu_passthrough(adev))
> >>>>> +         return vega10_baco_set_cap(hwmgr);
> >>>>>
> >>>>>           return 0;
> >>>>>   }
> >>>>> diff --git
> >>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> >>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> >>>>> index b6767d7..83d22cd 100644
> >>>>> ---
> >a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> >>>>> +++
> >>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> >>>>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
> >>>>> pp_hwmgr *hwmgr,
> >>>>>
> >>>>>           return result;
> >>>>>   }
> >>>>> +
> >>>>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
> >>>>> + int result = 0;
> >>>>> +
> >>>>> + const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
> >>>>> +
> >>>>> + powerplay_table = get_powerplay_table(hwmgr);
> >>>>> +
> >>>>> + PP_ASSERT_WITH_CODE((powerplay_table != NULL),
> >>>>> +         "Missing PowerPlay Table!", return -1);
> >>>>> +
> >>>>> + result = check_powerplay_tables(hwmgr, powerplay_table);
> >>>>> +
> >>>>> + PP_ASSERT_WITH_CODE((result == 0),
> >>>>> +                     "check_powerplay_tables failed", return result);
> >>>>> +
> >>>>> + set_hw_cap(
> >>>>> +                 hwmgr,
> >>>>> +                 0 != (le32_to_cpu(powerplay_table->ulPlatformCaps)
> >>>>> & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
> >>>>> +                 PHM_PlatformCaps_BACO);
> >>>>> + return result;
> >>>>> +}
> >>>>> +
> >>>>> diff --git
> >>>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> >>>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> >>>>> index d83ed2a..da5fbec 100644
> >>>>> ---
> >>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> >>>>> +++
> >>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> >>>>> @@ -59,4 +59,5 @@ extern int
> >>>>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
> >>>> *hwmgr);
> >>>>> extern int vega10_get_powerplay_table_entry(struct pp_hwmgr
> >>>>> *hwmgr, uint32_t entry_index,
> >>>>>                   struct pp_power_state *power_state, int
> >>>> (*call_back_func)(struct
> >>>>> pp_hwmgr *, void *,
> >>>>>                                   struct pp_power_state *, void *, uint32_t));
> >>>>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
> >>>>>   #endif
> >>>>> --
> >>>>> 2.7.4
> >>>>>
> >>>>> _______________________________________________
> >>>>> amd-gfx mailing list
> >>>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>> _______________________________________________
> >>>> amd-gfx mailing list
> >>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>> _______________________________________________
> >>> amd-gfx mailing list
> >>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #2: 0001-drm-amdgpu-wait-to-fetch-the-vbios-until-after-commo.patch --]
[-- Type: text/x-patch, Size: 1900 bytes --]

From 1f48e705c30e7dc1994bcbdbe05c2e5d396dc6b0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 17 Jun 2019 09:37:45 -0500
Subject: [PATCH] drm/amdgpu: wait to fetch the vbios until after common init

We need the asic_funcs set for the get rom callbacks in some
cases.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 24 ++++++++++++----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4a836db1000f..e26303bc567e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1534,17 +1534,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
-	/* Read BIOS */
-	if (!amdgpu_get_bios(adev))
-		return -EINVAL;
-
-	r = amdgpu_atombios_init(adev);
-	if (r) {
-		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
-		return r;
-	}
-
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 			DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1566,6 +1555,19 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 				adev->ip_blocks[i].status.valid = true;
 			}
 		}
+		/* get the vbios after the asic_funcs are set up */
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+			/* Read BIOS */
+			if (!amdgpu_get_bios(adev))
+				return -EINVAL;
+
+			r = amdgpu_atombios_init(adev);
+			if (r) {
+				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+				return r;
+			}
+		}
 	}
 
 	adev->cg_flags &= amdgpu_cg_mask;
-- 
2.20.1


[-- Attachment #3: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]             ` <CADnq5_Of2+kLtG3tRWkJXG3Ghbs9VFu8g4mDbKvdD7cXyuysRA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-05-27  2:57               ` Deng, Emily
  0 siblings, 0 replies; 18+ messages in thread
From: Deng, Emily @ 2019-05-27  2:57 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx list



>-----Original Message-----
>From: Alex Deucher <alexdeucher@gmail.com>
>Sent: Saturday, May 25, 2019 12:59 AM
>To: Deng, Emily <Emily.Deng@amd.com>
>Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>
>Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>reset
>
>[CAUTION: External Email]
>
>On Thu, May 23, 2019 at 10:29 PM Deng, Emily <Emily.Deng@amd.com> wrote:
>>
>>
>>
>> >-----Original Message-----
>> >From: Alex Deucher <alexdeucher@gmail.com>
>> >Sent: Friday, May 24, 2019 12:09 AM
>> >To: Deng, Emily <Emily.Deng@amd.com>
>> >Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>
>> >Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>> >reset
>> >
>> >[CAUTION: External Email]
>> >
>> >On Thu, May 23, 2019 at 6:22 AM Emily Deng <Emily.Deng@amd.com>
>wrote:
>> >>
>> >> For passthrough, after rebooted the VM, driver will do a baco reset
>> >> before doing other driver initialization during loading  driver.
>> >> For doing the baco reset, it will first check the baco reset capability.
>> >> So first need to set the cap from the vbios information or baco
>> >> reset won't be enabled.
>> >>
>> >> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>> >> ---
>> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         |  8 ++++++++
>> >>  drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>> >>  drivers/gpu/drm/amd/include/kgd_pp_interface.h     |  1 +
>> >>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c      | 16
>> >+++++++++++++++
>> >>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  1 +
>> >>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>> >++++++++++++++++++++++
>> >>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>> >>  drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
>> >>  8 files changed, 54 insertions(+), 1 deletion(-)
>> >>
>> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> >> index bdd1fe73..2dde672 100644
>> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> >> @@ -2611,6 +2611,14 @@ int amdgpu_device_init(struct
>amdgpu_device
>> >*adev,
>> >>          *  E.g., driver was not cleanly unloaded previously, etc.
>> >>          */
>> >>         if (!amdgpu_sriov_vf(adev) &&
>> >> amdgpu_asic_need_reset_on_init(adev)) {
>> >> +               if (amdgpu_passthrough(adev) &&
>> >> + adev->powerplay.pp_funcs &&
>> >adev->powerplay.pp_funcs->set_asic_baco_cap) {
>> >> +                       r =
>> >> + adev->powerplay.pp_funcs->set_asic_baco_cap(adev-
>> >>powerplay.pp_handle);
>> >> +                       if (r) {
>> >> +                               dev_err(adev->dev, "set baco capability failed\n");
>> >> +                               goto failed;
>> >> +                       }
>> >> +               }
>> >> +
>> >
>> >I think it would be cleaner to add this to hwmgr_early_init() or
>> >something called from early init for powerplay.
>> I  also preferred to put it in the hwmgr_early_init, but as the function
>set_asic_baco_cap  need to get the vbios info,  so need to put the
>amdgpu_get_bios before early init. If so the code changes too big.
>
>I think this change is all you need:
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index bdd1fe73f14b..952f61e28d42 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -2564,6 +2564,12 @@ int amdgpu_device_init(struct amdgpu_device
>*adev,
>
>        amdgpu_device_get_pcie_info(adev);
>
>+       /* Read BIOS */
>+       if (!amdgpu_get_bios(adev)) {
>+               r = -EINVAL;
>+               goto failed;
>+       }
>+
>        /* early init functions */
>        r = amdgpu_device_ip_early_init(adev);
>        if (r)
>@@ -2591,12 +2597,6 @@ int amdgpu_device_init(struct amdgpu_device
>*adev,
>                goto fence_driver_init;
>        }
>
>-       /* Read BIOS */
>-       if (!amdgpu_get_bios(adev)) {
>-               r = -EINVAL;
>-               goto failed;
>-       }
>-
>        r = amdgpu_atombios_init(adev);
>        if (r) {
>                dev_err(adev->dev, "amdgpu_atombios_init failed\n");
>
>I guess that could be a follow up change if you want.
>
>Alex
Thanks, will modify the change as your suggestion.

Best wishes
Emily Deng


>
>> >
>> >Alex
>> >
>> >>                 r = amdgpu_asic_reset(adev);
>> >>                 if (r) {
>> >>                         dev_err(adev->dev, "asic reset on init
>> >> failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> >> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> >> index 78bd4fc..d9fdd95 100644
>> >> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> >> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> >> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
>> >amdgpu_device *adev)
>> >>         /* Just return false for soc15 GPUs.  Reset does not seem to
>> >>          * be necessary.
>> >>          */
>> >> -       return false;
>> >> +       if (!amdgpu_passthrough(adev))
>> >> +               return false;
>> >>
>> >>         if (adev->flags & AMD_IS_APU)
>> >>                 return false;
>> >> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>> >> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>> >> index 9f661bf..c6e2a51 100644
>> >> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>> >> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>> >> @@ -296,6 +296,7 @@ struct amd_pm_funcs {
>> >>         int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
>> >>         int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
>> >>         int (*get_asic_baco_capability)(void *handle, bool *cap);
>> >> +       int (*set_asic_baco_cap)(void *handle);
>> >>         int (*get_asic_baco_state)(void *handle, int *state);
>> >>         int (*set_asic_baco_state)(void *handle, int state);
>> >>         int (*get_ppfeature_status)(void *handle, char *buf); diff
>> >> --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>> >> b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>> >> index bea1587..9856760 100644
>> >> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>> >> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>> >> @@ -1404,6 +1404,21 @@ static int pp_set_active_display_count(void
>> >*handle, uint32_t count)
>> >>         return ret;
>> >>  }
>> >>
>> >> +static int pp_set_asic_baco_cap(void *handle) {
>> >> +       struct pp_hwmgr *hwmgr = handle;
>> >> +
>> >> +       if (!hwmgr)
>> >> +               return -EINVAL;
>> >> +
>> >> +       if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_cap)
>> >> +               return 0;
>> >> +
>> >> +       hwmgr->hwmgr_func->set_asic_baco_cap(hwmgr);
>> >> +
>> >> +       return 0;
>> >> +}
>> >> +
>> >>  static int pp_get_asic_baco_capability(void *handle, bool *cap)  {
>> >>         struct pp_hwmgr *hwmgr = handle; @@ -1546,6 +1561,7 @@
>> >> static const struct amd_pm_funcs pp_dpm_funcs = {
>> >>         .set_hard_min_dcefclk_by_freq =
>pp_set_hard_min_dcefclk_by_freq,
>> >>         .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
>> >>         .get_asic_baco_capability = pp_get_asic_baco_capability,
>> >> +       .set_asic_baco_cap = pp_set_asic_baco_cap,
>> >>         .get_asic_baco_state = pp_get_asic_baco_state,
>> >>         .set_asic_baco_state = pp_set_asic_baco_state,
>> >>         .get_ppfeature_status = pp_get_ppfeature_status, diff --git
>> >> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> >> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> >> index ce6aeb5..e5bcbc8 100644
>> >> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> >> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> >> @@ -5302,6 +5302,7 @@ static const struct pp_hwmgr_func
>> >vega10_hwmgr_funcs = {
>> >>         .odn_edit_dpm_table = vega10_odn_edit_dpm_table,
>> >>         .get_performance_level = vega10_get_performance_level,
>> >>         .get_asic_baco_capability = smu9_baco_get_capability,
>> >> +       .set_asic_baco_cap = vega10_baco_set_cap,
>> >>         .get_asic_baco_state = smu9_baco_get_state,
>> >>         .set_asic_baco_state = vega10_baco_set_state,
>> >>         .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost, diff
>> >> --git
>> >> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> >> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> >> index b6767d7..83d22cd 100644
>> >> ---
>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> >> +++
>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> >> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>> >> pp_hwmgr *hwmgr,
>> >>
>> >>         return result;
>> >>  }
>> >> +
>> >> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>> >> +       int result = 0;
>> >> +
>> >> +       const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>> >> +
>> >> +       powerplay_table = get_powerplay_table(hwmgr);
>> >> +
>> >> +       PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>> >> +               "Missing PowerPlay Table!", return -1);
>> >> +
>> >> +       result = check_powerplay_tables(hwmgr, powerplay_table);
>> >> +
>> >> +       PP_ASSERT_WITH_CODE((result == 0),
>> >> +                           "check_powerplay_tables failed", return
>> >> + result);
>> >> +
>> >> +       set_hw_cap(
>> >> +                       hwmgr,
>> >> +                       0 !=
>> >> + (le32_to_cpu(powerplay_table->ulPlatformCaps) &
>> >ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>> >> +                       PHM_PlatformCaps_BACO);
>> >> +       return result;
>> >> +}
>> >> +
>> >> diff --git
>> >> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> >> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> >> index d83ed2a..da5fbec 100644
>> >> ---
>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> >> +++
>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> >> @@ -59,4 +59,5 @@ extern int
>> >> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>> >*hwmgr);  extern int vega10_get_powerplay_table_entry(struct
>pp_hwmgr
>> >*hwmgr, uint32_t entry_index,
>> >>                 struct pp_power_state *power_state, int
>> >> (*call_back_func)(struct
>> >pp_hwmgr *, void *,
>> >>                                 struct pp_power_state *, void *,
>> >> uint32_t));
>> >> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>> >>  #endif
>> >> diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>> >> b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>> >> index bac3d85..14480ae 100644
>> >> --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>> >> +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>> >> @@ -339,6 +339,7 @@ struct pp_hwmgr_func {
>> >>         int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr,
>> >uint32_t clock);
>> >>         int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr,
>> >> uint32_t
>> >clock);
>> >>         int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr,
>> >> bool *cap);
>> >> +       int (*set_asic_baco_cap)(struct pp_hwmgr *hwmgr);
>> >>         int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
>> >BACO_STATE *state);
>> >>         int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
>> >BACO_STATE state);
>> >>         int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char
>> >> *buf);
>> >> --
>> >> 2.7.4
>> >>
>> >> _______________________________________________
>> >> amd-gfx mailing list
>> >> amd-gfx@lists.freedesktop.org
>> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]         ` <MWHPR12MB132625FCE9DF15D24746CFD28F020-Gy0DoCVfaSVaj5rVPFIlogdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2019-05-24  8:28           ` Deng, Emily
@ 2019-05-24 16:59           ` Alex Deucher
       [not found]             ` <CADnq5_Of2+kLtG3tRWkJXG3Ghbs9VFu8g4mDbKvdD7cXyuysRA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 18+ messages in thread
From: Alex Deucher @ 2019-05-24 16:59 UTC (permalink / raw)
  To: Deng, Emily; +Cc: amd-gfx list

On Thu, May 23, 2019 at 10:29 PM Deng, Emily <Emily.Deng@amd.com> wrote:
>
>
>
> >-----Original Message-----
> >From: Alex Deucher <alexdeucher@gmail.com>
> >Sent: Friday, May 24, 2019 12:09 AM
> >To: Deng, Emily <Emily.Deng@amd.com>
> >Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>
> >Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco
> >reset
> >
> >[CAUTION: External Email]
> >
> >On Thu, May 23, 2019 at 6:22 AM Emily Deng <Emily.Deng@amd.com> wrote:
> >>
> >> For passthrough, after rebooted the VM, driver will do a baco reset
> >> before doing other driver initialization during loading  driver. For
> >> doing the baco reset, it will first check the baco reset capability.
> >> So first need to set the cap from the vbios information or baco reset
> >> won't be enabled.
> >>
> >> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> >> ---
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         |  8 ++++++++
> >>  drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
> >>  drivers/gpu/drm/amd/include/kgd_pp_interface.h     |  1 +
> >>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c      | 16
> >+++++++++++++++
> >>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  1 +
> >>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
> >++++++++++++++++++++++
> >>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
> >>  drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
> >>  8 files changed, 54 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> index bdd1fe73..2dde672 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> @@ -2611,6 +2611,14 @@ int amdgpu_device_init(struct amdgpu_device
> >*adev,
> >>          *  E.g., driver was not cleanly unloaded previously, etc.
> >>          */
> >>         if (!amdgpu_sriov_vf(adev) &&
> >> amdgpu_asic_need_reset_on_init(adev)) {
> >> +               if (amdgpu_passthrough(adev) && adev->powerplay.pp_funcs &&
> >adev->powerplay.pp_funcs->set_asic_baco_cap) {
> >> +                       r = adev->powerplay.pp_funcs->set_asic_baco_cap(adev-
> >>powerplay.pp_handle);
> >> +                       if (r) {
> >> +                               dev_err(adev->dev, "set baco capability failed\n");
> >> +                               goto failed;
> >> +                       }
> >> +               }
> >> +
> >
> >I think it would be cleaner to add this to hwmgr_early_init() or something
> >called from early init for powerplay.
> I  also preferred to put it in the hwmgr_early_init, but as the function set_asic_baco_cap  need to get the vbios info,  so need to put the amdgpu_get_bios before early init. If so the code changes too big.

I think this change is all you need:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bdd1fe73f14b..952f61e28d42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2564,6 +2564,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,

        amdgpu_device_get_pcie_info(adev);

+       /* Read BIOS */
+       if (!amdgpu_get_bios(adev)) {
+               r = -EINVAL;
+               goto failed;
+       }
+
        /* early init functions */
        r = amdgpu_device_ip_early_init(adev);
        if (r)
@@ -2591,12 +2597,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                goto fence_driver_init;
        }

-       /* Read BIOS */
-       if (!amdgpu_get_bios(adev)) {
-               r = -EINVAL;
-               goto failed;
-       }
-
        r = amdgpu_atombios_init(adev);
        if (r) {
                dev_err(adev->dev, "amdgpu_atombios_init failed\n");

I guess that could be a follow up change if you want.

Alex

> >
> >Alex
> >
> >>                 r = amdgpu_asic_reset(adev);
> >>                 if (r) {
> >>                         dev_err(adev->dev, "asic reset on init
> >> failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> >> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> >> index 78bd4fc..d9fdd95 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> >> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
> >amdgpu_device *adev)
> >>         /* Just return false for soc15 GPUs.  Reset does not seem to
> >>          * be necessary.
> >>          */
> >> -       return false;
> >> +       if (!amdgpu_passthrough(adev))
> >> +               return false;
> >>
> >>         if (adev->flags & AMD_IS_APU)
> >>                 return false;
> >> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> >> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> >> index 9f661bf..c6e2a51 100644
> >> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> >> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> >> @@ -296,6 +296,7 @@ struct amd_pm_funcs {
> >>         int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
> >>         int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
> >>         int (*get_asic_baco_capability)(void *handle, bool *cap);
> >> +       int (*set_asic_baco_cap)(void *handle);
> >>         int (*get_asic_baco_state)(void *handle, int *state);
> >>         int (*set_asic_baco_state)(void *handle, int state);
> >>         int (*get_ppfeature_status)(void *handle, char *buf); diff
> >> --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> >> b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> >> index bea1587..9856760 100644
> >> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> >> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> >> @@ -1404,6 +1404,21 @@ static int pp_set_active_display_count(void
> >*handle, uint32_t count)
> >>         return ret;
> >>  }
> >>
> >> +static int pp_set_asic_baco_cap(void *handle) {
> >> +       struct pp_hwmgr *hwmgr = handle;
> >> +
> >> +       if (!hwmgr)
> >> +               return -EINVAL;
> >> +
> >> +       if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_cap)
> >> +               return 0;
> >> +
> >> +       hwmgr->hwmgr_func->set_asic_baco_cap(hwmgr);
> >> +
> >> +       return 0;
> >> +}
> >> +
> >>  static int pp_get_asic_baco_capability(void *handle, bool *cap)  {
> >>         struct pp_hwmgr *hwmgr = handle; @@ -1546,6 +1561,7 @@ static
> >> const struct amd_pm_funcs pp_dpm_funcs = {
> >>         .set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq,
> >>         .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
> >>         .get_asic_baco_capability = pp_get_asic_baco_capability,
> >> +       .set_asic_baco_cap = pp_set_asic_baco_cap,
> >>         .get_asic_baco_state = pp_get_asic_baco_state,
> >>         .set_asic_baco_state = pp_set_asic_baco_state,
> >>         .get_ppfeature_status = pp_get_ppfeature_status, diff --git
> >> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> >> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> >> index ce6aeb5..e5bcbc8 100644
> >> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> >> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> >> @@ -5302,6 +5302,7 @@ static const struct pp_hwmgr_func
> >vega10_hwmgr_funcs = {
> >>         .odn_edit_dpm_table = vega10_odn_edit_dpm_table,
> >>         .get_performance_level = vega10_get_performance_level,
> >>         .get_asic_baco_capability = smu9_baco_get_capability,
> >> +       .set_asic_baco_cap = vega10_baco_set_cap,
> >>         .get_asic_baco_state = smu9_baco_get_state,
> >>         .set_asic_baco_state = vega10_baco_set_state,
> >>         .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost, diff
> >> --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> >> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> >> index b6767d7..83d22cd 100644
> >> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> >> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> >> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
> >> pp_hwmgr *hwmgr,
> >>
> >>         return result;
> >>  }
> >> +
> >> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
> >> +       int result = 0;
> >> +
> >> +       const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
> >> +
> >> +       powerplay_table = get_powerplay_table(hwmgr);
> >> +
> >> +       PP_ASSERT_WITH_CODE((powerplay_table != NULL),
> >> +               "Missing PowerPlay Table!", return -1);
> >> +
> >> +       result = check_powerplay_tables(hwmgr, powerplay_table);
> >> +
> >> +       PP_ASSERT_WITH_CODE((result == 0),
> >> +                           "check_powerplay_tables failed", return
> >> + result);
> >> +
> >> +       set_hw_cap(
> >> +                       hwmgr,
> >> +                       0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) &
> >ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
> >> +                       PHM_PlatformCaps_BACO);
> >> +       return result;
> >> +}
> >> +
> >> diff --git
> >> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> >> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> >> index d83ed2a..da5fbec 100644
> >> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> >> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> >> @@ -59,4 +59,5 @@ extern int
> >> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
> >*hwmgr);  extern int vega10_get_powerplay_table_entry(struct pp_hwmgr
> >*hwmgr, uint32_t entry_index,
> >>                 struct pp_power_state *power_state, int (*call_back_func)(struct
> >pp_hwmgr *, void *,
> >>                                 struct pp_power_state *, void *,
> >> uint32_t));
> >> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
> >>  #endif
> >> diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> >> b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> >> index bac3d85..14480ae 100644
> >> --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> >> +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> >> @@ -339,6 +339,7 @@ struct pp_hwmgr_func {
> >>         int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr,
> >uint32_t clock);
> >>         int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t
> >clock);
> >>         int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool
> >> *cap);
> >> +       int (*set_asic_baco_cap)(struct pp_hwmgr *hwmgr);
> >>         int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
> >BACO_STATE *state);
> >>         int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
> >BACO_STATE state);
> >>         int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char
> >> *buf);
> >> --
> >> 2.7.4
> >>
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]         ` <MWHPR12MB132625FCE9DF15D24746CFD28F020-Gy0DoCVfaSVaj5rVPFIlogdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2019-05-24  8:28           ` Deng, Emily
  2019-05-24 16:59           ` Alex Deucher
  1 sibling, 0 replies; 18+ messages in thread
From: Deng, Emily @ 2019-05-24  8:28 UTC (permalink / raw)
  To: Deng, Emily, Alex Deucher; +Cc: amd-gfx list

Ping ......

Best wishes
Emily Deng
>-----Original Message-----
>From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Deng,
>Emily
>Sent: Friday, May 24, 2019 10:29 AM
>To: Alex Deucher <alexdeucher@gmail.com>
>Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>
>Subject: RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>reset
>
>[CAUTION: External Email]
>
>>-----Original Message-----
>>From: Alex Deucher <alexdeucher@gmail.com>
>>Sent: Friday, May 24, 2019 12:09 AM
>>To: Deng, Emily <Emily.Deng@amd.com>
>>Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>
>>Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>>reset
>>
>>[CAUTION: External Email]
>>
>>On Thu, May 23, 2019 at 6:22 AM Emily Deng <Emily.Deng@amd.com> wrote:
>>>
>>> For passthrough, after rebooted the VM, driver will do a baco reset
>>> before doing other driver initialization during loading  driver. For
>>> doing the baco reset, it will first check the baco reset capability.
>>> So first need to set the cap from the vbios information or baco reset
>>> won't be enabled.
>>>
>>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         |  8 ++++++++
>>>  drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>>>  drivers/gpu/drm/amd/include/kgd_pp_interface.h     |  1 +
>>>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c      | 16
>>+++++++++++++++
>>>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  1 +
>>>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>>++++++++++++++++++++++
>>>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>>>  drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
>>>  8 files changed, 54 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index bdd1fe73..2dde672 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2611,6 +2611,14 @@ int amdgpu_device_init(struct amdgpu_device
>>*adev,
>>>          *  E.g., driver was not cleanly unloaded previously, etc.
>>>          */
>>>         if (!amdgpu_sriov_vf(adev) &&
>>> amdgpu_asic_need_reset_on_init(adev)) {
>>> +               if (amdgpu_passthrough(adev) &&
>>> + adev->powerplay.pp_funcs &&
>>adev->powerplay.pp_funcs->set_asic_baco_cap) {
>>> +                       r =
>>> + adev->powerplay.pp_funcs->set_asic_baco_cap(adev-
>>>powerplay.pp_handle);
>>> +                       if (r) {
>>> +                               dev_err(adev->dev, "set baco capability failed\n");
>>> +                               goto failed;
>>> +                       }
>>> +               }
>>> +
>>
>>I think it would be cleaner to add this to hwmgr_early_init() or
>>something called from early init for powerplay.
>I  also preferred to put it in the hwmgr_early_init, but as the function
>set_asic_baco_cap  need to get the vbios info,  so need to put the
>amdgpu_get_bios before early init. If so the code changes too big.
>>
>>Alex
>>
>>>                 r = amdgpu_asic_reset(adev);
>>>                 if (r) {
>>>                         dev_err(adev->dev, "asic reset on init
>>> failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> index 78bd4fc..d9fdd95 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
>>amdgpu_device *adev)
>>>         /* Just return false for soc15 GPUs.  Reset does not seem to
>>>          * be necessary.
>>>          */
>>> -       return false;
>>> +       if (!amdgpu_passthrough(adev))
>>> +               return false;
>>>
>>>         if (adev->flags & AMD_IS_APU)
>>>                 return false;
>>> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>>> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>>> index 9f661bf..c6e2a51 100644
>>> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>>> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>>> @@ -296,6 +296,7 @@ struct amd_pm_funcs {
>>>         int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
>>>         int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
>>>         int (*get_asic_baco_capability)(void *handle, bool *cap);
>>> +       int (*set_asic_baco_cap)(void *handle);
>>>         int (*get_asic_baco_state)(void *handle, int *state);
>>>         int (*set_asic_baco_state)(void *handle, int state);
>>>         int (*get_ppfeature_status)(void *handle, char *buf); diff
>>> --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>>> b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>>> index bea1587..9856760 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>>> @@ -1404,6 +1404,21 @@ static int pp_set_active_display_count(void
>>*handle, uint32_t count)
>>>         return ret;
>>>  }
>>>
>>> +static int pp_set_asic_baco_cap(void *handle) {
>>> +       struct pp_hwmgr *hwmgr = handle;
>>> +
>>> +       if (!hwmgr)
>>> +               return -EINVAL;
>>> +
>>> +       if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_cap)
>>> +               return 0;
>>> +
>>> +       hwmgr->hwmgr_func->set_asic_baco_cap(hwmgr);
>>> +
>>> +       return 0;
>>> +}
>>> +
>>>  static int pp_get_asic_baco_capability(void *handle, bool *cap)  {
>>>         struct pp_hwmgr *hwmgr = handle; @@ -1546,6 +1561,7 @@ static
>>> const struct amd_pm_funcs pp_dpm_funcs = {
>>>         .set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq,
>>>         .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
>>>         .get_asic_baco_capability = pp_get_asic_baco_capability,
>>> +       .set_asic_baco_cap = pp_set_asic_baco_cap,
>>>         .get_asic_baco_state = pp_get_asic_baco_state,
>>>         .set_asic_baco_state = pp_set_asic_baco_state,
>>>         .get_ppfeature_status = pp_get_ppfeature_status, diff --git
>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>> index ce6aeb5..e5bcbc8 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>>> @@ -5302,6 +5302,7 @@ static const struct pp_hwmgr_func
>>vega10_hwmgr_funcs = {
>>>         .odn_edit_dpm_table = vega10_odn_edit_dpm_table,
>>>         .get_performance_level = vega10_get_performance_level,
>>>         .get_asic_baco_capability = smu9_baco_get_capability,
>>> +       .set_asic_baco_cap = vega10_baco_set_cap,
>>>         .get_asic_baco_state = smu9_baco_get_state,
>>>         .set_asic_baco_state = vega10_baco_set_state,
>>>         .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost, diff
>>> --git
>a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>> index b6767d7..83d22cd 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>> +++
>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>>> pp_hwmgr *hwmgr,
>>>
>>>         return result;
>>>  }
>>> +
>>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>>> +       int result = 0;
>>> +
>>> +       const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>>> +
>>> +       powerplay_table = get_powerplay_table(hwmgr);
>>> +
>>> +       PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>>> +               "Missing PowerPlay Table!", return -1);
>>> +
>>> +       result = check_powerplay_tables(hwmgr, powerplay_table);
>>> +
>>> +       PP_ASSERT_WITH_CODE((result == 0),
>>> +                           "check_powerplay_tables failed", return
>>> + result);
>>> +
>>> +       set_hw_cap(
>>> +                       hwmgr,
>>> +                       0 !=
>>> + (le32_to_cpu(powerplay_table->ulPlatformCaps) &
>>ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>>> +                       PHM_PlatformCaps_BACO);
>>> +       return result;
>>> +}
>>> +
>>> diff --git
>>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>> index d83ed2a..da5fbec 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>> +++
>b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>>> @@ -59,4 +59,5 @@ extern int
>>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>>*hwmgr);  extern int vega10_get_powerplay_table_entry(struct pp_hwmgr
>>*hwmgr, uint32_t entry_index,
>>>                 struct pp_power_state *power_state, int
>>> (*call_back_func)(struct
>>pp_hwmgr *, void *,
>>>                                 struct pp_power_state *, void *,
>>> uint32_t));
>>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>>>  #endif
>>> diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>>> b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>>> index bac3d85..14480ae 100644
>>> --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>>> +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>>> @@ -339,6 +339,7 @@ struct pp_hwmgr_func {
>>>         int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr,
>>uint32_t clock);
>>>         int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr,
>>> uint32_t
>>clock);
>>>         int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool
>>> *cap);
>>> +       int (*set_asic_baco_cap)(struct pp_hwmgr *hwmgr);
>>>         int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
>>BACO_STATE *state);
>>>         int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
>>BACO_STATE state);
>>>         int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char
>>> *buf);
>>> --
>>> 2.7.4
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>_______________________________________________
>amd-gfx mailing list
>amd-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found]     ` <CADnq5_NhONFSC2C=0npCx4AKGKAphAsWq1Kq7jZhmeY+ODQ6-Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-05-24  2:29       ` Deng, Emily
       [not found]         ` <MWHPR12MB132625FCE9DF15D24746CFD28F020-Gy0DoCVfaSVaj5rVPFIlogdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Deng, Emily @ 2019-05-24  2:29 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx list



>-----Original Message-----
>From: Alex Deucher <alexdeucher@gmail.com>
>Sent: Friday, May 24, 2019 12:09 AM
>To: Deng, Emily <Emily.Deng@amd.com>
>Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>
>Subject: Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco
>reset
>
>[CAUTION: External Email]
>
>On Thu, May 23, 2019 at 6:22 AM Emily Deng <Emily.Deng@amd.com> wrote:
>>
>> For passthrough, after rebooted the VM, driver will do a baco reset
>> before doing other driver initialization during loading  driver. For
>> doing the baco reset, it will first check the baco reset capability.
>> So first need to set the cap from the vbios information or baco reset
>> won't be enabled.
>>
>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         |  8 ++++++++
>>  drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>>  drivers/gpu/drm/amd/include/kgd_pp_interface.h     |  1 +
>>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c      | 16
>+++++++++++++++
>>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  1 +
>>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24
>++++++++++++++++++++++
>>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>>  drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
>>  8 files changed, 54 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index bdd1fe73..2dde672 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2611,6 +2611,14 @@ int amdgpu_device_init(struct amdgpu_device
>*adev,
>>          *  E.g., driver was not cleanly unloaded previously, etc.
>>          */
>>         if (!amdgpu_sriov_vf(adev) &&
>> amdgpu_asic_need_reset_on_init(adev)) {
>> +               if (amdgpu_passthrough(adev) && adev->powerplay.pp_funcs &&
>adev->powerplay.pp_funcs->set_asic_baco_cap) {
>> +                       r = adev->powerplay.pp_funcs->set_asic_baco_cap(adev-
>>powerplay.pp_handle);
>> +                       if (r) {
>> +                               dev_err(adev->dev, "set baco capability failed\n");
>> +                               goto failed;
>> +                       }
>> +               }
>> +
>
>I think it would be cleaner to add this to hwmgr_early_init() or something
>called from early init for powerplay.
I  also preferred to put it in the hwmgr_early_init, but as the function set_asic_baco_cap  need to get the vbios info,  so need to put the amdgpu_get_bios before early init. If so the code changes too big.
>
>Alex
>
>>                 r = amdgpu_asic_reset(adev);
>>                 if (r) {
>>                         dev_err(adev->dev, "asic reset on init
>> failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> index 78bd4fc..d9fdd95 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
>> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct
>amdgpu_device *adev)
>>         /* Just return false for soc15 GPUs.  Reset does not seem to
>>          * be necessary.
>>          */
>> -       return false;
>> +       if (!amdgpu_passthrough(adev))
>> +               return false;
>>
>>         if (adev->flags & AMD_IS_APU)
>>                 return false;
>> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>> index 9f661bf..c6e2a51 100644
>> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
>> @@ -296,6 +296,7 @@ struct amd_pm_funcs {
>>         int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
>>         int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
>>         int (*get_asic_baco_capability)(void *handle, bool *cap);
>> +       int (*set_asic_baco_cap)(void *handle);
>>         int (*get_asic_baco_state)(void *handle, int *state);
>>         int (*set_asic_baco_state)(void *handle, int state);
>>         int (*get_ppfeature_status)(void *handle, char *buf); diff
>> --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>> b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>> index bea1587..9856760 100644
>> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
>> @@ -1404,6 +1404,21 @@ static int pp_set_active_display_count(void
>*handle, uint32_t count)
>>         return ret;
>>  }
>>
>> +static int pp_set_asic_baco_cap(void *handle) {
>> +       struct pp_hwmgr *hwmgr = handle;
>> +
>> +       if (!hwmgr)
>> +               return -EINVAL;
>> +
>> +       if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_cap)
>> +               return 0;
>> +
>> +       hwmgr->hwmgr_func->set_asic_baco_cap(hwmgr);
>> +
>> +       return 0;
>> +}
>> +
>>  static int pp_get_asic_baco_capability(void *handle, bool *cap)  {
>>         struct pp_hwmgr *hwmgr = handle; @@ -1546,6 +1561,7 @@ static
>> const struct amd_pm_funcs pp_dpm_funcs = {
>>         .set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq,
>>         .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
>>         .get_asic_baco_capability = pp_get_asic_baco_capability,
>> +       .set_asic_baco_cap = pp_set_asic_baco_cap,
>>         .get_asic_baco_state = pp_get_asic_baco_state,
>>         .set_asic_baco_state = pp_set_asic_baco_state,
>>         .get_ppfeature_status = pp_get_ppfeature_status, diff --git
>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> index ce6aeb5..e5bcbc8 100644
>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
>> @@ -5302,6 +5302,7 @@ static const struct pp_hwmgr_func
>vega10_hwmgr_funcs = {
>>         .odn_edit_dpm_table = vega10_odn_edit_dpm_table,
>>         .get_performance_level = vega10_get_performance_level,
>>         .get_asic_baco_capability = smu9_baco_get_capability,
>> +       .set_asic_baco_cap = vega10_baco_set_cap,
>>         .get_asic_baco_state = smu9_baco_get_state,
>>         .set_asic_baco_state = vega10_baco_set_state,
>>         .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost, diff
>> --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> index b6767d7..83d22cd 100644
>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
>> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct
>> pp_hwmgr *hwmgr,
>>
>>         return result;
>>  }
>> +
>> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
>> +       int result = 0;
>> +
>> +       const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
>> +
>> +       powerplay_table = get_powerplay_table(hwmgr);
>> +
>> +       PP_ASSERT_WITH_CODE((powerplay_table != NULL),
>> +               "Missing PowerPlay Table!", return -1);
>> +
>> +       result = check_powerplay_tables(hwmgr, powerplay_table);
>> +
>> +       PP_ASSERT_WITH_CODE((result == 0),
>> +                           "check_powerplay_tables failed", return
>> + result);
>> +
>> +       set_hw_cap(
>> +                       hwmgr,
>> +                       0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) &
>ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
>> +                       PHM_PlatformCaps_BACO);
>> +       return result;
>> +}
>> +
>> diff --git
>> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> index d83ed2a..da5fbec 100644
>> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
>> @@ -59,4 +59,5 @@ extern int
>> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
>*hwmgr);  extern int vega10_get_powerplay_table_entry(struct pp_hwmgr
>*hwmgr, uint32_t entry_index,
>>                 struct pp_power_state *power_state, int (*call_back_func)(struct
>pp_hwmgr *, void *,
>>                                 struct pp_power_state *, void *,
>> uint32_t));
>> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>>  #endif
>> diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>> b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>> index bac3d85..14480ae 100644
>> --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>> +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
>> @@ -339,6 +339,7 @@ struct pp_hwmgr_func {
>>         int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr,
>uint32_t clock);
>>         int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t
>clock);
>>         int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool
>> *cap);
>> +       int (*set_asic_baco_cap)(struct pp_hwmgr *hwmgr);
>>         int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
>BACO_STATE *state);
>>         int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
>BACO_STATE state);
>>         int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char
>> *buf);
>> --
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found] ` <1558606921-1537-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-23 16:08   ` Alex Deucher
       [not found]     ` <CADnq5_NhONFSC2C=0npCx4AKGKAphAsWq1Kq7jZhmeY+ODQ6-Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Alex Deucher @ 2019-05-23 16:08 UTC (permalink / raw)
  To: Emily Deng; +Cc: amd-gfx list

On Thu, May 23, 2019 at 6:22 AM Emily Deng <Emily.Deng@amd.com> wrote:
>
> For passthrough, after rebooted the VM, driver will do
> a baco reset before doing other driver initialization during loading
>  driver. For doing the baco reset, it will first
> check the baco reset capability. So first need to set the
> cap from the vbios information or baco reset won't be
> enabled.
>
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         |  8 ++++++++
>  drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
>  drivers/gpu/drm/amd/include/kgd_pp_interface.h     |  1 +
>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c      | 16 +++++++++++++++
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  1 +
>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24 ++++++++++++++++++++++
>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>  drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
>  8 files changed, 54 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index bdd1fe73..2dde672 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2611,6 +2611,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>          *  E.g., driver was not cleanly unloaded previously, etc.
>          */
>         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
> +               if (amdgpu_passthrough(adev) && adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_asic_baco_cap) {
> +                       r = adev->powerplay.pp_funcs->set_asic_baco_cap(adev->powerplay.pp_handle);
> +                       if (r) {
> +                               dev_err(adev->dev, "set baco capability failed\n");
> +                               goto failed;
> +                       }
> +               }
> +

I think it would be cleaner to add this to hwmgr_early_init() or
something called from early init for powerplay.

Alex

>                 r = amdgpu_asic_reset(adev);
>                 if (r) {
>                         dev_err(adev->dev, "asic reset on init failed\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 78bd4fc..d9fdd95 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
>         /* Just return false for soc15 GPUs.  Reset does not seem to
>          * be necessary.
>          */
> -       return false;
> +       if (!amdgpu_passthrough(adev))
> +               return false;
>
>         if (adev->flags & AMD_IS_APU)
>                 return false;
> diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index 9f661bf..c6e2a51 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -296,6 +296,7 @@ struct amd_pm_funcs {
>         int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
>         int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
>         int (*get_asic_baco_capability)(void *handle, bool *cap);
> +       int (*set_asic_baco_cap)(void *handle);
>         int (*get_asic_baco_state)(void *handle, int *state);
>         int (*set_asic_baco_state)(void *handle, int state);
>         int (*get_ppfeature_status)(void *handle, char *buf);
> diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> index bea1587..9856760 100644
> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> @@ -1404,6 +1404,21 @@ static int pp_set_active_display_count(void *handle, uint32_t count)
>         return ret;
>  }
>
> +static int pp_set_asic_baco_cap(void *handle)
> +{
> +       struct pp_hwmgr *hwmgr = handle;
> +
> +       if (!hwmgr)
> +               return -EINVAL;
> +
> +       if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_cap)
> +               return 0;
> +
> +       hwmgr->hwmgr_func->set_asic_baco_cap(hwmgr);
> +
> +       return 0;
> +}
> +
>  static int pp_get_asic_baco_capability(void *handle, bool *cap)
>  {
>         struct pp_hwmgr *hwmgr = handle;
> @@ -1546,6 +1561,7 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
>         .set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq,
>         .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
>         .get_asic_baco_capability = pp_get_asic_baco_capability,
> +       .set_asic_baco_cap = pp_set_asic_baco_cap,
>         .get_asic_baco_state = pp_get_asic_baco_state,
>         .set_asic_baco_state = pp_set_asic_baco_state,
>         .get_ppfeature_status = pp_get_ppfeature_status,
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index ce6aeb5..e5bcbc8 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -5302,6 +5302,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
>         .odn_edit_dpm_table = vega10_odn_edit_dpm_table,
>         .get_performance_level = vega10_get_performance_level,
>         .get_asic_baco_capability = smu9_baco_get_capability,
> +       .set_asic_baco_cap = vega10_baco_set_cap,
>         .get_asic_baco_state = smu9_baco_get_state,
>         .set_asic_baco_state = vega10_baco_set_state,
>         .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost,
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> index b6767d7..83d22cd 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> @@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
>
>         return result;
>  }
> +
> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr)
> +{
> +       int result = 0;
> +
> +       const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
> +
> +       powerplay_table = get_powerplay_table(hwmgr);
> +
> +       PP_ASSERT_WITH_CODE((powerplay_table != NULL),
> +               "Missing PowerPlay Table!", return -1);
> +
> +       result = check_powerplay_tables(hwmgr, powerplay_table);
> +
> +       PP_ASSERT_WITH_CODE((result == 0),
> +                           "check_powerplay_tables failed", return result);
> +
> +       set_hw_cap(
> +                       hwmgr,
> +                       0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
> +                       PHM_PlatformCaps_BACO);
> +       return result;
> +}
> +
> diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> index d83ed2a..da5fbec 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> @@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);
>  extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,
>                 struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *,
>                                 struct pp_power_state *, void *, uint32_t));
> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>  #endif
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> index bac3d85..14480ae 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> @@ -339,6 +339,7 @@ struct pp_hwmgr_func {
>         int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
>         int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
>         int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap);
> +       int (*set_asic_baco_cap)(struct pp_hwmgr *hwmgr);
>         int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
>         int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
>         int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf);
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
@ 2019-05-23 10:22 Emily Deng
       [not found] ` <1558606921-1537-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Emily Deng @ 2019-05-23 10:22 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Emily Deng

For passthrough, after rebooted the VM, driver will do
a baco reset before doing other driver initialization during loading
 driver. For doing the baco reset, it will first
check the baco reset capability. So first need to set the
cap from the vbios information or baco reset won't be
enabled.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         |  8 ++++++++
 drivers/gpu/drm/amd/amdgpu/soc15.c                 |  3 ++-
 drivers/gpu/drm/amd/include/kgd_pp_interface.h     |  1 +
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c      | 16 +++++++++++++++
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  1 +
 .../amd/powerplay/hwmgr/vega10_processpptables.c   | 24 ++++++++++++++++++++++
 .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
 8 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bdd1fe73..2dde672 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2611,6 +2611,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	 *  E.g., driver was not cleanly unloaded previously, etc.
 	 */
 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
+		if (amdgpu_passthrough(adev) && adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_asic_baco_cap) {
+			r = adev->powerplay.pp_funcs->set_asic_baco_cap(adev->powerplay.pp_handle);
+			if (r) {
+				dev_err(adev->dev, "set baco capability failed\n");
+				goto failed;
+			}
+		}
+
 		r = amdgpu_asic_reset(adev);
 		if (r) {
 			dev_err(adev->dev, "asic reset on init failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 78bd4fc..d9fdd95 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -764,7 +764,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
 	/* Just return false for soc15 GPUs.  Reset does not seem to
 	 * be necessary.
 	 */
-	return false;
+	if (!amdgpu_passthrough(adev))
+		return false;
 
 	if (adev->flags & AMD_IS_APU)
 		return false;
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 9f661bf..c6e2a51 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -296,6 +296,7 @@ struct amd_pm_funcs {
 	int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
 	int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
 	int (*get_asic_baco_capability)(void *handle, bool *cap);
+	int (*set_asic_baco_cap)(void *handle);
 	int (*get_asic_baco_state)(void *handle, int *state);
 	int (*set_asic_baco_state)(void *handle, int state);
 	int (*get_ppfeature_status)(void *handle, char *buf);
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index bea1587..9856760 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -1404,6 +1404,21 @@ static int pp_set_active_display_count(void *handle, uint32_t count)
 	return ret;
 }
 
+static int pp_set_asic_baco_cap(void *handle)
+{
+	struct pp_hwmgr *hwmgr = handle;
+
+	if (!hwmgr)
+		return -EINVAL;
+
+	if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_cap)
+		return 0;
+
+	hwmgr->hwmgr_func->set_asic_baco_cap(hwmgr);
+
+	return 0;
+}
+
 static int pp_get_asic_baco_capability(void *handle, bool *cap)
 {
 	struct pp_hwmgr *hwmgr = handle;
@@ -1546,6 +1561,7 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
 	.set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq,
 	.set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
 	.get_asic_baco_capability = pp_get_asic_baco_capability,
+	.set_asic_baco_cap = pp_set_asic_baco_cap,
 	.get_asic_baco_state = pp_get_asic_baco_state,
 	.set_asic_baco_state = pp_set_asic_baco_state,
 	.get_ppfeature_status = pp_get_ppfeature_status,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index ce6aeb5..e5bcbc8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -5302,6 +5302,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.odn_edit_dpm_table = vega10_odn_edit_dpm_table,
 	.get_performance_level = vega10_get_performance_level,
 	.get_asic_baco_capability = smu9_baco_get_capability,
+	.set_asic_baco_cap = vega10_baco_set_cap,
 	.get_asic_baco_state = smu9_baco_get_state,
 	.set_asic_baco_state = vega10_baco_set_state,
 	.enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index b6767d7..83d22cd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -1371,3 +1371,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
 
 	return result;
 }
+
+int vega10_baco_set_cap(struct pp_hwmgr *hwmgr)
+{
+	int result = 0;
+
+	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
+
+	powerplay_table = get_powerplay_table(hwmgr);
+
+	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
+		"Missing PowerPlay Table!", return -1);
+
+	result = check_powerplay_tables(hwmgr, powerplay_table);
+
+	PP_ASSERT_WITH_CODE((result == 0),
+			    "check_powerplay_tables failed", return result);
+
+	set_hw_cap(
+			hwmgr,
+			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
+			PHM_PlatformCaps_BACO);
+	return result;
+}
+
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
index d83ed2a..da5fbec 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
@@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);
 extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,
 		struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *,
 				struct pp_power_state *, void *, uint32_t));
+extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index bac3d85..14480ae 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -339,6 +339,7 @@ struct pp_hwmgr_func {
 	int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap);
+	int (*set_asic_baco_cap)(struct pp_hwmgr *hwmgr);
 	int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 	int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 	int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
       [not found] ` <1558519591-30349-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-23  3:11   ` Quan, Evan
  0 siblings, 0 replies; 18+ messages in thread
From: Quan, Evan @ 2019-05-23  3:11 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Deng, Emily

With this limited to passthough case only, this patch is reviewed-by: Evan Quan <evan.quan@amd.com>

Regards,
Evan
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> Emily Deng
> Sent: Wednesday, May 22, 2019 6:07 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deng, Emily <Emily.Deng@amd.com>
> Subject: [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
> 
> [CAUTION: External Email]
> 
> For passthrough, after rebooted the VM, driver will do a baco reset before
> doing other driver initialization during loading  driver. For doing the baco
> reset, it will first check the baco reset capability. So first need to set the cap
> from the vbios information or baco reset won't be enabled.
> 
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         |  8 ++++++++
>  drivers/gpu/drm/amd/include/kgd_pp_interface.h     |  1 +
>  drivers/gpu/drm/amd/powerplay/amd_powerplay.c      | 16
> ++++++++++++++++
>  drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  1 +
>  .../amd/powerplay/hwmgr/vega10_processpptables.c   | 22
> ++++++++++++++++++++++
>  .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
>  drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
>  7 files changed, 50 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index d6286ed..14415b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2606,6 +2606,14 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>          *  E.g., driver was not cleanly unloaded previously, etc.
>          */
>         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev))
> {
> +               if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs-
> >set_asic_baco_cap) {
> +                       r = adev->powerplay.pp_funcs->set_asic_baco_cap(adev-
> >powerplay.pp_handle);
> +                       if (r) {
> +                               dev_err(adev->dev, "set baco capability failed\n");
> +                               goto failed;
> +                       }
> +               }
> +
>                 r = amdgpu_asic_reset(adev);
>                 if (r) {
>                         dev_err(adev->dev, "asic reset on init failed\n"); diff --git
> a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> index 2b579ba..0dcc18d 100644
> --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
> @@ -285,6 +285,7 @@ struct amd_pm_funcs {
>         int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
>         int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
>         int (*get_asic_baco_capability)(void *handle, bool *cap);
> +       int (*set_asic_baco_cap)(void *handle);
>         int (*get_asic_baco_state)(void *handle, int *state);
>         int (*set_asic_baco_state)(void *handle, int state);
>         int (*get_ppfeature_status)(void *handle, char *buf); diff --git
> a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> index bea1587..9856760 100644
> --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
> @@ -1404,6 +1404,21 @@ static int pp_set_active_display_count(void
> *handle, uint32_t count)
>         return ret;
>  }
> 
> +static int pp_set_asic_baco_cap(void *handle) {
> +       struct pp_hwmgr *hwmgr = handle;
> +
> +       if (!hwmgr)
> +               return -EINVAL;
> +
> +       if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_cap)
> +               return 0;
> +
> +       hwmgr->hwmgr_func->set_asic_baco_cap(hwmgr);
> +
> +       return 0;
> +}
> +
>  static int pp_get_asic_baco_capability(void *handle, bool *cap)  {
>         struct pp_hwmgr *hwmgr = handle; @@ -1546,6 +1561,7 @@ static const
> struct amd_pm_funcs pp_dpm_funcs = {
>         .set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq,
>         .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
>         .get_asic_baco_capability = pp_get_asic_baco_capability,
> +       .set_asic_baco_cap = pp_set_asic_baco_cap,
>         .get_asic_baco_state = pp_get_asic_baco_state,
>         .set_asic_baco_state = pp_set_asic_baco_state,
>         .get_ppfeature_status = pp_get_ppfeature_status, diff --git
> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> index ed6c638..8dc23eb 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
> @@ -5171,6 +5171,7 @@ static const struct pp_hwmgr_func
> vega10_hwmgr_funcs = {
>         .odn_edit_dpm_table = vega10_odn_edit_dpm_table,
>         .get_performance_level = vega10_get_performance_level,
>         .get_asic_baco_capability = smu9_baco_get_capability,
> +       .set_asic_baco_cap = vega10_baco_set_cap,
>         .get_asic_baco_state = smu9_baco_get_state,
>         .set_asic_baco_state = vega10_baco_set_state,
>         .enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost, diff --git
> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> index b6767d7..8fdeb23 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
> @@ -1371,3 +1371,25 @@ int vega10_get_powerplay_table_entry(struct
> pp_hwmgr *hwmgr,
> 
>         return result;
>  }
> +
> +int vega10_baco_set_cap(struct pp_hwmgr *hwmgr) {
> +       int result = 0;
> +       const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
> +
> +       powerplay_table = get_powerplay_table(hwmgr);
> +
> +       PP_ASSERT_WITH_CODE((powerplay_table != NULL),
> +               "Missing PowerPlay Table!", return -1);
> +
> +       result = check_powerplay_tables(hwmgr, powerplay_table);
> +
> +       PP_ASSERT_WITH_CODE((result == 0),
> +                           "check_powerplay_tables failed", return
> + result);
> +
> +       set_hw_cap(
> +                       hwmgr,
> +                       0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) &
> ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
> +                       PHM_PlatformCaps_BACO);
> +       return result;
> +}
> \ No newline at end of file
> diff --git
> a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> index d83ed2a..da5fbec 100644
> --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
> @@ -59,4 +59,5 @@ extern int
> vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr
> *hwmgr);  extern int vega10_get_powerplay_table_entry(struct pp_hwmgr
> *hwmgr, uint32_t entry_index,
>                 struct pp_power_state *power_state, int (*call_back_func)(struct
> pp_hwmgr *, void *,
>                                 struct pp_power_state *, void *, uint32_t));
> +extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
>  #endif
> diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> index bac3d85..14480ae 100644
> --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
> @@ -339,6 +339,7 @@ struct pp_hwmgr_func {
>         int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr,
> uint32_t clock);
>         int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t
> clock);
>         int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap);
> +       int (*set_asic_baco_cap)(struct pp_hwmgr *hwmgr);
>         int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
> BACO_STATE *state);
>         int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum
> BACO_STATE state);
>         int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf);
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] drm/amdgpu: Need to set the baco cap before baco reset
@ 2019-05-22 10:06 Emily Deng
       [not found] ` <1558519591-30349-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Emily Deng @ 2019-05-22 10:06 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Emily Deng

For passthrough, after rebooted the VM, driver will do
a baco reset before doing other driver initialization during loading
 driver. For doing the baco reset, it will first
check the baco reset capability. So first need to set the
cap from the vbios information or baco reset won't be
enabled.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c         |  8 ++++++++
 drivers/gpu/drm/amd/include/kgd_pp_interface.h     |  1 +
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c      | 16 ++++++++++++++++
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c |  1 +
 .../amd/powerplay/hwmgr/vega10_processpptables.c   | 22 ++++++++++++++++++++++
 .../amd/powerplay/hwmgr/vega10_processpptables.h   |  1 +
 drivers/gpu/drm/amd/powerplay/inc/hwmgr.h          |  1 +
 7 files changed, 50 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d6286ed..14415b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2606,6 +2606,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	 *  E.g., driver was not cleanly unloaded previously, etc.
 	 */
 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
+		if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_asic_baco_cap) {
+			r = adev->powerplay.pp_funcs->set_asic_baco_cap(adev->powerplay.pp_handle);
+			if (r) {
+				dev_err(adev->dev, "set baco capability failed\n");
+				goto failed;
+			}
+		}
+
 		r = amdgpu_asic_reset(adev);
 		if (r) {
 			dev_err(adev->dev, "asic reset on init failed\n");
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 2b579ba..0dcc18d 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -285,6 +285,7 @@ struct amd_pm_funcs {
 	int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
 	int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
 	int (*get_asic_baco_capability)(void *handle, bool *cap);
+	int (*set_asic_baco_cap)(void *handle);
 	int (*get_asic_baco_state)(void *handle, int *state);
 	int (*set_asic_baco_state)(void *handle, int state);
 	int (*get_ppfeature_status)(void *handle, char *buf);
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index bea1587..9856760 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -1404,6 +1404,21 @@ static int pp_set_active_display_count(void *handle, uint32_t count)
 	return ret;
 }
 
+static int pp_set_asic_baco_cap(void *handle)
+{
+	struct pp_hwmgr *hwmgr = handle;
+
+	if (!hwmgr)
+		return -EINVAL;
+
+	if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_cap)
+		return 0;
+
+	hwmgr->hwmgr_func->set_asic_baco_cap(hwmgr);
+
+	return 0;
+}
+
 static int pp_get_asic_baco_capability(void *handle, bool *cap)
 {
 	struct pp_hwmgr *hwmgr = handle;
@@ -1546,6 +1561,7 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
 	.set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq,
 	.set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
 	.get_asic_baco_capability = pp_get_asic_baco_capability,
+	.set_asic_baco_cap = pp_set_asic_baco_cap,
 	.get_asic_baco_state = pp_get_asic_baco_state,
 	.set_asic_baco_state = pp_set_asic_baco_state,
 	.get_ppfeature_status = pp_get_ppfeature_status,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index ed6c638..8dc23eb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -5171,6 +5171,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.odn_edit_dpm_table = vega10_odn_edit_dpm_table,
 	.get_performance_level = vega10_get_performance_level,
 	.get_asic_baco_capability = smu9_baco_get_capability,
+	.set_asic_baco_cap = vega10_baco_set_cap,
 	.get_asic_baco_state = smu9_baco_get_state,
 	.set_asic_baco_state = vega10_baco_set_state,
 	.enable_mgpu_fan_boost = vega10_enable_mgpu_fan_boost,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index b6767d7..8fdeb23 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -1371,3 +1371,25 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
 
 	return result;
 }
+
+int vega10_baco_set_cap(struct pp_hwmgr *hwmgr)
+{
+	int result = 0;
+	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
+
+	powerplay_table = get_powerplay_table(hwmgr);
+
+	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
+		"Missing PowerPlay Table!", return -1);
+
+	result = check_powerplay_tables(hwmgr, powerplay_table);
+
+	PP_ASSERT_WITH_CODE((result == 0),
+			    "check_powerplay_tables failed", return result);
+
+	set_hw_cap(
+			hwmgr,
+			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
+			PHM_PlatformCaps_BACO);
+	return result;
+}
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
index d83ed2a..da5fbec 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
@@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);
 extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,
 		struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *,
 				struct pp_power_state *, void *, uint32_t));
+extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index bac3d85..14480ae 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -339,6 +339,7 @@ struct pp_hwmgr_func {
 	int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*get_asic_baco_capability)(struct pp_hwmgr *hwmgr, bool *cap);
+	int (*set_asic_baco_cap)(struct pp_hwmgr *hwmgr);
 	int (*get_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE *state);
 	int (*set_asic_baco_state)(struct pp_hwmgr *hwmgr, enum BACO_STATE state);
 	int (*get_ppfeature_status)(struct pp_hwmgr *hwmgr, char *buf);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2019-06-17 14:39 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-27  7:43 [PATCH] drm/amdgpu: Need to set the baco cap before baco reset Emily Deng
     [not found] ` <1558943005-16557-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
2019-05-28  1:17   ` Quan, Evan
     [not found]     ` <MN2PR12MB3344E228ADA29C20F5BA9DDBE41E0-rweVpJHSKToDMgCC8P//OwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-06-13 11:11       ` Russell, Kent
     [not found]         ` <BN6PR12MB1618E4322B84944963895A8E85EF0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-06-14  3:19           ` Deng, Emily
     [not found]             ` <MN2PR12MB29750834B9F6C548E831DE798FEE0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-06-14 14:15               ` Yang, Philip
     [not found]                 ` <9bfdbf20-b914-6063-5517-8352e652e203-5C7GfCeVMHo@public.gmane.org>
2019-06-17  2:50                   ` Deng, Emily
     [not found]                     ` <MN2PR12MB2975EAAD825DA9842876B4A38FEB0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-06-17  3:52                       ` Deng, Emily
     [not found]                         ` <MN2PR12MB29750900DC4B43AD00254D888FEB0-rweVpJHSKToFlvJWC7EAqwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-06-17 11:08                           ` Russell, Kent
     [not found]                             ` <BN6PR12MB16188EBC282A9D8BEA715AB185EB0-/b2+HYfkarRqaFUXYJa4HgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-06-17 14:39                               ` Alex Deucher
2019-06-17 11:39                           ` Russell, Kent
  -- strict thread matches above, loose matches on Subject: below --
2019-05-23 10:22 Emily Deng
     [not found] ` <1558606921-1537-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
2019-05-23 16:08   ` Alex Deucher
     [not found]     ` <CADnq5_NhONFSC2C=0npCx4AKGKAphAsWq1Kq7jZhmeY+ODQ6-Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-05-24  2:29       ` Deng, Emily
     [not found]         ` <MWHPR12MB132625FCE9DF15D24746CFD28F020-Gy0DoCVfaSVaj5rVPFIlogdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-05-24  8:28           ` Deng, Emily
2019-05-24 16:59           ` Alex Deucher
     [not found]             ` <CADnq5_Of2+kLtG3tRWkJXG3Ghbs9VFu8g4mDbKvdD7cXyuysRA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-05-27  2:57               ` Deng, Emily
2019-05-22 10:06 Emily Deng
     [not found] ` <1558519591-30349-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
2019-05-23  3:11   ` Quan, Evan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.