All of lore.kernel.org
 help / color / mirror / Atom feed
* FW: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in amdgpu_device_fini_sw()
       [not found] <20211021071750.2912140-1-lang.yu@amd.com>
@ 2021-10-21  7:19 ` Yu, Lang
  2021-10-21 15:18   ` Andrey Grodzovsky
       [not found] ` <20211021071750.2912140-3-lang.yu@amd.com>
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: Yu, Lang @ 2021-10-21  7:19 UTC (permalink / raw)
  To: amd-gfx

[AMD Official Use Only]



>-----Original Message-----
>From: Yu, Lang <Lang.Yu@amd.com>
>Sent: Thursday, October 21, 2021 3:18 PM
>To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
><Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Yu, Lang
><Lang.Yu@amd.com>
>Subject: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in
>amdgpu_device_fini_sw()
>
>amdgpu_fence_driver_sw_fini() should be executed before
>amdgpu_device_ip_fini(), otherwise fence driver resource won't be properly freed
>as adev->rings have been tore down.
>
>Fixes: 72c8c97b1522 ("drm/amdgpu: Split amdgpu_device_fini into early and late")
>
>Signed-off-by: Lang Yu <lang.yu@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 41ce86244144..5654c4790773 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -3843,8 +3843,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device
>*adev)
>
> void amdgpu_device_fini_sw(struct amdgpu_device *adev)  {
>-	amdgpu_device_ip_fini(adev);
> 	amdgpu_fence_driver_sw_fini(adev);
>+	amdgpu_device_ip_fini(adev);
> 	release_firmware(adev->firmware.gpu_info_fw);
> 	adev->firmware.gpu_info_fw = NULL;
> 	adev->accel_working = false;
>--
>2.25.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* FW: [PATCH 3/3] drm/amdgpu: remove unnecessary NULL check in amdgpu_device.c
       [not found] ` <20211021071750.2912140-3-lang.yu@amd.com>
@ 2021-10-21  7:20   ` Yu, Lang
  0 siblings, 0 replies; 7+ messages in thread
From: Yu, Lang @ 2021-10-21  7:20 UTC (permalink / raw)
  To: amd-gfx

[AMD Official Use Only]



>-----Original Message-----
>From: Yu, Lang <Lang.Yu@amd.com>
>Sent: Thursday, October 21, 2021 3:18 PM
>To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
><Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Yu, Lang
><Lang.Yu@amd.com>
>Subject: [PATCH 3/3] drm/amdgpu: remove unnecessary NULL check in
>amdgpu_device.c
>
>NULL is safe for these functions.
>
>Signed-off-by: Lang Yu <lang.yu@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++++++----------
> 1 file changed, 7 insertions(+), 10 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index be64861ed19a..dd979db93399 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -1091,12 +1091,9 @@ static void amdgpu_device_doorbell_fini(struct
>amdgpu_device *adev)
>  */
> static void amdgpu_device_wb_fini(struct amdgpu_device *adev)  {
>-	if (adev->wb.wb_obj) {
>-		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
>-				      &adev->wb.gpu_addr,
>-				      (void **)&adev->wb.wb);
>-		adev->wb.wb_obj = NULL;
>-	}
>+	amdgpu_bo_free_kernel(&adev->wb.wb_obj,
>+			      &adev->wb.gpu_addr,
>+			      (void **)&adev->wb.wb);
> }
>
> /**
>@@ -3794,8 +3791,8 @@ static void amdgpu_device_unmap_mmio(struct
>amdgpu_device *adev)
>
> 	iounmap(adev->rmmio);
> 	adev->rmmio = NULL;
>-	if (adev->mman.aper_base_kaddr)
>-		iounmap(adev->mman.aper_base_kaddr);
>+
>+	iounmap(adev->mman.aper_base_kaddr);
> 	adev->mman.aper_base_kaddr = NULL;
>
> 	/* Memory manager related */
>@@ -3886,8 +3883,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device
>*adev)
>
> 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
> 		amdgpu_pmu_fini(adev);
>-	if (adev->mman.discovery_bin)
>-		amdgpu_discovery_fini(adev);
>+
>+	amdgpu_discovery_fini(adev);
>
> 	amdgpu_device_free_pci_state(adev);
> }
>--
>2.25.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* FW: [PATCH 2/3] drm/amdgpu: use some wrapper functions in amdgpu_device_fini_sw()
       [not found] ` <20211021071750.2912140-2-lang.yu@amd.com>
@ 2021-10-21  7:20   ` Yu, Lang
  0 siblings, 0 replies; 7+ messages in thread
From: Yu, Lang @ 2021-10-21  7:20 UTC (permalink / raw)
  To: amd-gfx

[AMD Official Use Only]



>-----Original Message-----
>From: Yu, Lang <Lang.Yu@amd.com>
>Sent: Thursday, October 21, 2021 3:18 PM
>To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
><Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Yu, Lang
><Lang.Yu@amd.com>
>Subject: [PATCH 2/3] drm/amdgpu: use some wrapper functions in
>amdgpu_device_fini_sw()
>
>Add some wrapper functions to make amdgpu_device_fini_sw() more clear.
>
>Fix an error handling in amdgpu_device_parse_gpu_info_fw().
>
>Signed-off-by: Lang Yu <lang.yu@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 10 +++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 32 ++++++++++++++++------
> 2 files changed, 34 insertions(+), 8 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>index d58e37fd01f4..5df194259e15 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>@@ -372,6 +372,11 @@ int amdgpu_device_ip_block_add(struct amdgpu_device
>*adev,
>  */
> bool amdgpu_get_bios(struct amdgpu_device *adev);  bool
>amdgpu_read_bios(struct amdgpu_device *adev);
>+static inline void amdgpu_free_bios(struct amdgpu_device *adev) {
>+	kfree(adev->bios);
>+	adev->bios = NULL;
>+}
>
> /*
>  * Clocks
>@@ -1440,6 +1445,11 @@ void amdgpu_pci_resume(struct pci_dev *pdev);
>
> bool amdgpu_device_cache_pci_state(struct pci_dev *pdev);  bool
>amdgpu_device_load_pci_state(struct pci_dev *pdev);
>+static inline void amdgpu_device_free_pci_state(struct amdgpu_device
>+*adev) {
>+	kfree(adev->pci_state);
>+	adev->pci_state = NULL;
>+}
>
> bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev);
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index 5654c4790773..be64861ed19a 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -1871,6 +1871,19 @@ static void
>amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
> 	}
> }
>
>+/**
>+ * amdgpu_device_release_gpu_info_fw - release gpu info firmware
>+ *
>+ * @adev: amdgpu_device pointer
>+ *
>+ *  Wrapper to release gpu info firmware  */ static inline void
>+amdgpu_device_release_gpu_info_fw(struct amdgpu_device *adev) {
>+	release_firmware(adev->firmware.gpu_info_fw);
>+	adev->firmware.gpu_info_fw = NULL;
>+}
>+
> /**
>  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
>  *
>@@ -1987,7 +2000,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct
>amdgpu_device *adev)
> 		dev_err(adev->dev,
> 			"Failed to validate gpu_info firmware \"%s\"\n",
> 			fw_name);
>-		goto out;
>+		goto release_fw;
> 	}
>
> 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev-
>>firmware.gpu_info_fw->data;
>@@ -2051,8 +2064,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct
>amdgpu_device *adev)
> 		dev_err(adev->dev,
> 			"Unsupported gpu_info table %d\n", hdr-
>>header.ucode_version);
> 		err = -EINVAL;
>-		goto out;
>+		goto release_fw;
> 	}
>+
>+	return 0;
>+release_fw:
>+	amdgpu_device_release_gpu_info_fw(adev);
> out:
> 	return err;
> }
>@@ -3845,8 +3862,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device
>*adev)  {
> 	amdgpu_fence_driver_sw_fini(adev);
> 	amdgpu_device_ip_fini(adev);
>-	release_firmware(adev->firmware.gpu_info_fw);
>-	adev->firmware.gpu_info_fw = NULL;
>+	amdgpu_device_release_gpu_info_fw(adev);
>+
> 	adev->accel_working = false;
>
> 	amdgpu_reset_fini(adev);
>@@ -3858,8 +3875,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device
>*adev)
> 	if (amdgpu_emu_mode != 1)
> 		amdgpu_atombios_fini(adev);
>
>-	kfree(adev->bios);
>-	adev->bios = NULL;
>+	amdgpu_free_bios(adev);
>+
> 	if (amdgpu_device_supports_px(adev_to_drm(adev))) {
> 		vga_switcheroo_unregister_client(adev->pdev);
> 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
>@@ -3872,8 +3889,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device
>*adev)
> 	if (adev->mman.discovery_bin)
> 		amdgpu_discovery_fini(adev);
>
>-	kfree(adev->pci_state);
>-
>+	amdgpu_device_free_pci_state(adev);
> }
>
> /**
>--
>2.25.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in amdgpu_device_fini_sw()
       [not found] ` <840b06aa-f0ec-0093-9243-5636a3c5a98f@amd.com>
@ 2021-10-21  7:29   ` Yu, Lang
  0 siblings, 0 replies; 7+ messages in thread
From: Yu, Lang @ 2021-10-21  7:29 UTC (permalink / raw)
  To: Koenig, Christian, Grodzovsky, Andrey, amd-gfx
  Cc: Deucher, Alexander, Huang, Ray

[AMD Official Use Only]



>-----Original Message-----
>From: Koenig, Christian <Christian.Koenig@amd.com>
>Sent: Thursday, October 21, 2021 3:27 PM
>To: Yu, Lang <Lang.Yu@amd.com>; Grodzovsky, Andrey
><Andrey.Grodzovsky@amd.com>
>Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Huang, Ray
><Ray.Huang@amd.com>
>Subject: Re: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in
>amdgpu_device_fini_sw()
>
>Is there any reason you are sending that around only internally and not to the
>public mailing list?

Sorry, I missed that. It’s a mistake.

Regards,
Lang

>Christian.
>
>Am 21.10.21 um 09:17 schrieb Lang Yu:
>> amdgpu_fence_driver_sw_fini() should be executed before
>> amdgpu_device_ip_fini(), otherwise fence driver resource won't be
>> properly freed as adev->rings have been tore down.
>>
>> Fixes: 72c8c97b1522 ("drm/amdgpu: Split amdgpu_device_fini into early
>> and late")
>>
>> Signed-off-by: Lang Yu <lang.yu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 41ce86244144..5654c4790773 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3843,8 +3843,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device
>> *adev)
>>
>>   void amdgpu_device_fini_sw(struct amdgpu_device *adev)
>>   {
>> -	amdgpu_device_ip_fini(adev);
>>   	amdgpu_fence_driver_sw_fini(adev);
>> +	amdgpu_device_ip_fini(adev);
>>   	release_firmware(adev->firmware.gpu_info_fw);
>>   	adev->firmware.gpu_info_fw = NULL;
>>   	adev->accel_working = false;

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: FW: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in amdgpu_device_fini_sw()
  2021-10-21  7:19 ` FW: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in amdgpu_device_fini_sw() Yu, Lang
@ 2021-10-21 15:18   ` Andrey Grodzovsky
  2021-10-21 23:33     ` Yu, Lang
  0 siblings, 1 reply; 7+ messages in thread
From: Andrey Grodzovsky @ 2021-10-21 15:18 UTC (permalink / raw)
  To: Yu, Lang, amd-gfx

On 2021-10-21 3:19 a.m., Yu, Lang wrote:

> [AMD Official Use Only]
>
>
>
>> -----Original Message-----
>> From: Yu, Lang <Lang.Yu@amd.com>
>> Sent: Thursday, October 21, 2021 3:18 PM
>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
>> <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Yu, Lang
>> <Lang.Yu@amd.com>
>> Subject: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in
>> amdgpu_device_fini_sw()
>>
>> amdgpu_fence_driver_sw_fini() should be executed before
>> amdgpu_device_ip_fini(), otherwise fence driver resource won't be properly freed
>> as adev->rings have been tore down.


Cam you clarify more where exactly the memleak happens ?

Andrey


>>
>> Fixes: 72c8c97b1522 ("drm/amdgpu: Split amdgpu_device_fini into early and late")
>>
>> Signed-off-by: Lang Yu <lang.yu@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 41ce86244144..5654c4790773 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3843,8 +3843,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device
>> *adev)
>>
>> void amdgpu_device_fini_sw(struct amdgpu_device *adev)  {
>> -	amdgpu_device_ip_fini(adev);
>> 	amdgpu_fence_driver_sw_fini(adev);
>> +	amdgpu_device_ip_fini(adev);
>> 	release_firmware(adev->firmware.gpu_info_fw);
>> 	adev->firmware.gpu_info_fw = NULL;
>> 	adev->accel_working = false;
>> --
>> 2.25.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: FW: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in amdgpu_device_fini_sw()
  2021-10-21 15:18   ` Andrey Grodzovsky
@ 2021-10-21 23:33     ` Yu, Lang
  2021-10-22 15:09       ` Andrey Grodzovsky
  0 siblings, 1 reply; 7+ messages in thread
From: Yu, Lang @ 2021-10-21 23:33 UTC (permalink / raw)
  To: Grodzovsky, Andrey, amd-gfx

[AMD Official Use Only]



>-----Original Message-----
>From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>Sent: Thursday, October 21, 2021 11:18 PM
>To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>Subject: Re: FW: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in
>amdgpu_device_fini_sw()
>
>On 2021-10-21 3:19 a.m., Yu, Lang wrote:
>
>> [AMD Official Use Only]
>>
>>
>>
>>> -----Original Message-----
>>> From: Yu, Lang <Lang.Yu@amd.com>
>>> Sent: Thursday, October 21, 2021 3:18 PM
>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
>>> <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Yu, Lang
>>> <Lang.Yu@amd.com>
>>> Subject: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in
>>> amdgpu_device_fini_sw()
>>>
>>> amdgpu_fence_driver_sw_fini() should be executed before
>>> amdgpu_device_ip_fini(), otherwise fence driver resource won't be
>>> properly freed as adev->rings have been tore down.
>
>
>Cam you clarify more where exactly the memleak happens ?
>
>Andrey

See amdgpu_fence_driver_sw_fini(), ring->fence_drv.fences will only be freed
when adev->rings[i] is not NULL.

void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
{
	unsigned int i, j;

	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
		struct amdgpu_ring *ring = adev->rings[i];

		if (!ring || !ring->fence_drv.initialized)
			continue;

		if (!ring->no_scheduler)
			drm_sched_fini(&ring->sched);

		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
			dma_fence_put(ring->fence_drv.fences[j]);
		kfree(ring->fence_drv.fences);
		ring->fence_drv.fences = NULL;
		ring->fence_drv.initialized = false;
	}
}

If amdgpu_device_ip_fini() is executed before amdgpu_fence_driver_sw_fini(), 
amdgpu_device_ip_fini() will call gfx_vX_0_sw_fini() 
then call amdgpu_ring_fini() and set adev->rings[i] to NULL.
Nothing will be freed in amdgpu_fence_driver_sw_fini().
ring->fence_drv.fences  memory leak happened!

void amdgpu_ring_fini(struct amdgpu_ring *ring)
{
	......
	ring->adev->rings[ring->idx] = NULL;
}

Regards,
Lang

>
>
>>>
>>> Fixes: 72c8c97b1522 ("drm/amdgpu: Split amdgpu_device_fini into early
>>> and late")
>>>
>>> Signed-off-by: Lang Yu <lang.yu@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index 41ce86244144..5654c4790773 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3843,8 +3843,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device
>>> *adev)
>>>
>>> void amdgpu_device_fini_sw(struct amdgpu_device *adev)  {
>>> -	amdgpu_device_ip_fini(adev);
>>> 	amdgpu_fence_driver_sw_fini(adev);
>>> +	amdgpu_device_ip_fini(adev);
>>> 	release_firmware(adev->firmware.gpu_info_fw);
>>> 	adev->firmware.gpu_info_fw = NULL;
>>> 	adev->accel_working = false;
>>> --
>>> 2.25.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: FW: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in amdgpu_device_fini_sw()
  2021-10-21 23:33     ` Yu, Lang
@ 2021-10-22 15:09       ` Andrey Grodzovsky
  0 siblings, 0 replies; 7+ messages in thread
From: Andrey Grodzovsky @ 2021-10-22 15:09 UTC (permalink / raw)
  To: Yu, Lang, amd-gfx


On 2021-10-21 7:33 p.m., Yu, Lang wrote:
> [AMD Official Use Only]
>
>
>
>> -----Original Message-----
>> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>> Sent: Thursday, October 21, 2021 11:18 PM
>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: FW: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in
>> amdgpu_device_fini_sw()
>>
>> On 2021-10-21 3:19 a.m., Yu, Lang wrote:
>>
>>> [AMD Official Use Only]
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: Yu, Lang <Lang.Yu@amd.com>
>>>> Sent: Thursday, October 21, 2021 3:18 PM
>>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>>>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
>>>> <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Yu, Lang
>>>> <Lang.Yu@amd.com>
>>>> Subject: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in
>>>> amdgpu_device_fini_sw()
>>>>
>>>> amdgpu_fence_driver_sw_fini() should be executed before
>>>> amdgpu_device_ip_fini(), otherwise fence driver resource won't be
>>>> properly freed as adev->rings have been tore down.
>>
>> Cam you clarify more where exactly the memleak happens ?
>>
>> Andrey
> See amdgpu_fence_driver_sw_fini(), ring->fence_drv.fences will only be freed
> when adev->rings[i] is not NULL.
>
> void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
> {
> 	unsigned int i, j;
>
> 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
> 		struct amdgpu_ring *ring = adev->rings[i];
>
> 		if (!ring || !ring->fence_drv.initialized)
> 			continue;
>
> 		if (!ring->no_scheduler)
> 			drm_sched_fini(&ring->sched);
>
> 		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
> 			dma_fence_put(ring->fence_drv.fences[j]);
> 		kfree(ring->fence_drv.fences);
> 		ring->fence_drv.fences = NULL;
> 		ring->fence_drv.initialized = false;
> 	}
> }
>
> If amdgpu_device_ip_fini() is executed before amdgpu_fence_driver_sw_fini(),
> amdgpu_device_ip_fini() will call gfx_vX_0_sw_fini()
> then call amdgpu_ring_fini() and set adev->rings[i] to NULL.
> Nothing will be freed in amdgpu_fence_driver_sw_fini().
> ring->fence_drv.fences  memory leak happened!
>
> void amdgpu_ring_fini(struct amdgpu_ring *ring)
> {
> 	......
> 	ring->adev->rings[ring->idx] = NULL;
> }
>
> Regards,
> Lang


Got it, Looks good to me.

Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Andrey

>
>>
>>>> Fixes: 72c8c97b1522 ("drm/amdgpu: Split amdgpu_device_fini into early
>>>> and late")
>>>>
>>>> Signed-off-by: Lang Yu <lang.yu@amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 41ce86244144..5654c4790773 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -3843,8 +3843,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device
>>>> *adev)
>>>>
>>>> void amdgpu_device_fini_sw(struct amdgpu_device *adev)  {
>>>> -	amdgpu_device_ip_fini(adev);
>>>> 	amdgpu_fence_driver_sw_fini(adev);
>>>> +	amdgpu_device_ip_fini(adev);
>>>> 	release_firmware(adev->firmware.gpu_info_fw);
>>>> 	adev->firmware.gpu_info_fw = NULL;
>>>> 	adev->accel_working = false;
>>>> --
>>>> 2.25.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-10-22 15:10 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20211021071750.2912140-1-lang.yu@amd.com>
2021-10-21  7:19 ` FW: [PATCH 1/3] drm/amdgpu: fix a potential memory leak in amdgpu_device_fini_sw() Yu, Lang
2021-10-21 15:18   ` Andrey Grodzovsky
2021-10-21 23:33     ` Yu, Lang
2021-10-22 15:09       ` Andrey Grodzovsky
     [not found] ` <20211021071750.2912140-3-lang.yu@amd.com>
2021-10-21  7:20   ` FW: [PATCH 3/3] drm/amdgpu: remove unnecessary NULL check in amdgpu_device.c Yu, Lang
     [not found] ` <20211021071750.2912140-2-lang.yu@amd.com>
2021-10-21  7:20   ` FW: [PATCH 2/3] drm/amdgpu: use some wrapper functions in amdgpu_device_fini_sw() Yu, Lang
     [not found] ` <840b06aa-f0ec-0093-9243-5636a3c5a98f@amd.com>
2021-10-21  7:29   ` [PATCH 1/3] drm/amdgpu: fix a potential memory leak " Yu, Lang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.