All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] drm/radeon: use writel to avoid gcc optimization
@ 2020-12-25  3:54 ` Chen Li
  2020-12-25  3:58   ` [PATCH v2 2/2] drm/amdgpu: use GTT for uvd_get_create/destory_msg Chen Li
  2021-01-04 11:48   ` [PATCH v2 1/2] drm/radeon: use writel to avoid gcc optimization Christian König
  0 siblings, 2 replies; 3+ messages in thread
From: Chen Li @ 2020-12-25  3:54 UTC (permalink / raw)
  To: dri-devel, Christian König

When using e8860(gcn1) on arm64, the kernel crashed on drm/radeon:

[   11.240414] pc : __memset+0x4c/0x188
[   11.244101] lr : radeon_uvd_get_create_msg+0x114/0x1d0 [radeon]
[   11.249995] sp : ffff00000d7eb700
[   11.253295] x29: ffff00000d7eb700 x28: ffff8001f632a868
[   11.258585] x27: 0000000000040000 x26: ffff00000de00000
[   11.263875] x25: 0000000000000125 x24: 0000000000000001
[   11.269168] x23: 0000000000000000 x22: 0000000000000005
[   11.274459] x21: ffff00000df24000 x20: ffff8001f74b4000
[   11.279753] x19: 0000000000124000 x18: 0000000000000020
[   11.285043] x17: 0000000000000000 x16: 0000000000000000
[   11.290336] x15: ffff000009309000 x14: ffffffffffffffff
[   11.290340] x13: ffff0000094b6f88 x12: ffff0000094b6bd2
[   11.290343] x11: ffff00000d7eb700 x10: ffff00000d7eb700
[   11.306246] x9 : ffff00000d7eb700 x8 : ffff00000df2402c
[   11.306254] x7 : 0000000000000000 x6 : ffff0000094b626a
[   11.306257] x5 : 0000000000000000 x4 : 0000000000000004
[   11.306262] x3 : ffffffffffffffff x2 : 0000000000000fd4
[   11.306265] x1 : 0000000000000000 x0 : ffff00000df2402c
[   11.306272] Call trace:
[   11.306316]  __memset+0x4c/0x188
[   11.306638]  uvd_v1_0_ib_test+0x70/0x1c0 [radeon]
[   11.306758]  radeon_ib_ring_tests+0x54/0xe0 [radeon]
[   11.309961] IPv6: ADDRCONF(NETDEV_UP): enp5s0f0: link is not ready
[   11.354628]  radeon_device_init+0x53c/0xbdc [radeon]
[   11.354693]  radeon_driver_load_kms+0x6c/0x1b0 [radeon]
[   11.364788]  drm_dev_register+0x130/0x1c0
[   11.364794]  drm_get_pci_dev+0x8c/0x14c
[   11.372704]  radeon_pci_probe+0xb0/0x110 [radeon]
[   11.372715]  local_pci_probe+0x3c/0xb0
[   11.381129]  pci_device_probe+0x114/0x1b0
[   11.385121]  really_probe+0x23c/0x400
[   11.388757]  driver_probe_device+0xdc/0x130
[   11.392921]  __driver_attach+0x128/0x150
[   11.396826]  bus_for_each_dev+0x70/0xbc
[   11.400643]  driver_attach+0x20/0x2c
[   11.404201]  bus_add_driver+0x160/0x260
[   11.408019]  driver_register+0x74/0x120
[   11.411837]  __pci_register_driver+0x40/0x50
[   11.416149]  radeon_init+0x78/0x1000 [radeon]
[   11.420489]  do_one_initcall+0x54/0x154
[   11.424310]  do_init_module+0x54/0x260
[   11.428041]  load_module+0x1ccc/0x20b0
[   11.431773]  __se_sys_finit_module+0xac/0x10c
[   11.436109]  __arm64_sys_finit_module+0x18/0x20
[   11.440622]  el0_svc_common+0x70/0x164
[   11.444353]  el0_svc_handler+0x2c/0x80
[   11.448084]  el0_svc+0x8/0xc
[   11.450954] Code: d65f03c0 cb0803e4 f2400c84 54000080 (a9001d07)

Obviously, the __memset call is generated by gcc(8.3.1). It optimizes
this for loop into memset. But this may break on some platforms which
cannot map device memory correctly. So, just invoke `writel` to handle this.
Signed-off-by: chenli <chenli@uniontech.com>
---
v1->v2:
* Convert the other assignments of msg as well.
* Casting types to avoid static checker warnings.

---
 drivers/gpu/drm/radeon/radeon_uvd.c | 34 ++++++++++++++---------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 39c1c339be7b..e576470402b9 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -791,19 +791,19 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 		return r;
 
 	/* stitch together an UVD create msg */
-	msg[0] = cpu_to_le32(0x00000de4);
-	msg[1] = cpu_to_le32(0x00000000);
-	msg[2] = cpu_to_le32(handle);
-	msg[3] = cpu_to_le32(0x00000000);
-	msg[4] = cpu_to_le32(0x00000000);
-	msg[5] = cpu_to_le32(0x00000000);
-	msg[6] = cpu_to_le32(0x00000000);
-	msg[7] = cpu_to_le32(0x00000780);
-	msg[8] = cpu_to_le32(0x00000440);
-	msg[9] = cpu_to_le32(0x00000000);
-	msg[10] = cpu_to_le32(0x01b37000);
+	writel(cpu_to_le32(0x00000de4), (void __iomem *)&msg[0]);
+	writel(0x0, (void __iomem *)&msg[1]);
+	writel(cpu_to_le32(handle), (void __iomem *)&msg[2]);
+	writel(0x0, (void __iomem *)&msg[3]);
+	writel(0x0, (void __iomem *)&msg[4]);
+	writel(0x0, (void __iomem *)&msg[5]);
+	writel(0x0, (void __iomem *)&msg[6]);
+	writel(cpu_to_le32(0x00000780), (void __iomem *)&msg[7]);
+	writel(cpu_to_le32(0x00000440), (void __iomem *)&msg[8]);
+	writel(0x0, (void __iomem *)&msg[9]);
+	writel(cpu_to_le32(0x01b37000), (void __iomem *)&msg[10]);
 	for (i = 11; i < 1024; ++i)
-		msg[i] = cpu_to_le32(0x0);
+		writel(0x0, (void __iomem *)&msg[i]);
 
 	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
 	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
@@ -827,12 +827,12 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
 		return r;
 
 	/* stitch together an UVD destroy msg */
-	msg[0] = cpu_to_le32(0x00000de4);
-	msg[1] = cpu_to_le32(0x00000002);
-	msg[2] = cpu_to_le32(handle);
-	msg[3] = cpu_to_le32(0x00000000);
+	writel(cpu_to_le32(0x00000de4), (void __iomem *)&msg[0]);
+	writel(cpu_to_le32(0x00000002), (void __iomem *)&msg[1]);
+	writel(cpu_to_le32(handle), (void __iomem *)&msg[2]);
+	writel(0x0, (void __iomem *)&msg[3]);
 	for (i = 4; i < 1024; ++i)
-		msg[i] = cpu_to_le32(0x0);
+		writel(0x0, (void __iomem *)&msg[i]);
 
 	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
 	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
-- 
2.29.2



_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2 2/2] drm/amdgpu: use GTT for uvd_get_create/destory_msg
  2020-12-25  3:54 ` Chen Li
@ 2020-12-25  3:58   ` Chen Li
  2021-01-04 11:48   ` [PATCH v2 1/2] drm/radeon: use writel to avoid gcc optimization Christian König
  1 sibling, 0 replies; 3+ messages in thread
From: Chen Li @ 2020-12-25  3:58 UTC (permalink / raw)
  To: dri-devel, Christian König

On modern gpus, GTT (system memory) works as well here, and this may
also be a workaround for platforms which cannot map vram correctly.

Signed-off-by: chenli <chenli@uniontech.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
v1->v2: no change

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 8b989670ed66..e2ed4689118a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1170,7 +1170,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	int r, i;
 
 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
+				      AMDGPU_GEM_DOMAIN_GTT,
 				      &bo, NULL, (void **)&msg);
 	if (r)
 		return r;
@@ -1202,7 +1202,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	int r, i;
 
 	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
+				      AMDGPU_GEM_DOMAIN_GTT,
 				      &bo, NULL, (void **)&msg);
 	if (r)
 		return r;
-- 
2.29.2



_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2 1/2] drm/radeon: use writel to avoid gcc optimization
  2020-12-25  3:54 ` Chen Li
  2020-12-25  3:58   ` [PATCH v2 2/2] drm/amdgpu: use GTT for uvd_get_create/destory_msg Chen Li
@ 2021-01-04 11:48   ` Christian König
  1 sibling, 0 replies; 3+ messages in thread
From: Christian König @ 2021-01-04 11:48 UTC (permalink / raw)
  To: Chen Li, dri-devel

Am 25.12.20 um 04:54 schrieb Chen Li:
> When using e8860(gcn1) on arm64, the kernel crashed on drm/radeon:
>
> [   11.240414] pc : __memset+0x4c/0x188
> [   11.244101] lr : radeon_uvd_get_create_msg+0x114/0x1d0 [radeon]
> [   11.249995] sp : ffff00000d7eb700
> [   11.253295] x29: ffff00000d7eb700 x28: ffff8001f632a868
> [   11.258585] x27: 0000000000040000 x26: ffff00000de00000
> [   11.263875] x25: 0000000000000125 x24: 0000000000000001
> [   11.269168] x23: 0000000000000000 x22: 0000000000000005
> [   11.274459] x21: ffff00000df24000 x20: ffff8001f74b4000
> [   11.279753] x19: 0000000000124000 x18: 0000000000000020
> [   11.285043] x17: 0000000000000000 x16: 0000000000000000
> [   11.290336] x15: ffff000009309000 x14: ffffffffffffffff
> [   11.290340] x13: ffff0000094b6f88 x12: ffff0000094b6bd2
> [   11.290343] x11: ffff00000d7eb700 x10: ffff00000d7eb700
> [   11.306246] x9 : ffff00000d7eb700 x8 : ffff00000df2402c
> [   11.306254] x7 : 0000000000000000 x6 : ffff0000094b626a
> [   11.306257] x5 : 0000000000000000 x4 : 0000000000000004
> [   11.306262] x3 : ffffffffffffffff x2 : 0000000000000fd4
> [   11.306265] x1 : 0000000000000000 x0 : ffff00000df2402c
> [   11.306272] Call trace:
> [   11.306316]  __memset+0x4c/0x188
> [   11.306638]  uvd_v1_0_ib_test+0x70/0x1c0 [radeon]
> [   11.306758]  radeon_ib_ring_tests+0x54/0xe0 [radeon]
> [   11.309961] IPv6: ADDRCONF(NETDEV_UP): enp5s0f0: link is not ready
> [   11.354628]  radeon_device_init+0x53c/0xbdc [radeon]
> [   11.354693]  radeon_driver_load_kms+0x6c/0x1b0 [radeon]
> [   11.364788]  drm_dev_register+0x130/0x1c0
> [   11.364794]  drm_get_pci_dev+0x8c/0x14c
> [   11.372704]  radeon_pci_probe+0xb0/0x110 [radeon]
> [   11.372715]  local_pci_probe+0x3c/0xb0
> [   11.381129]  pci_device_probe+0x114/0x1b0
> [   11.385121]  really_probe+0x23c/0x400
> [   11.388757]  driver_probe_device+0xdc/0x130
> [   11.392921]  __driver_attach+0x128/0x150
> [   11.396826]  bus_for_each_dev+0x70/0xbc
> [   11.400643]  driver_attach+0x20/0x2c
> [   11.404201]  bus_add_driver+0x160/0x260
> [   11.408019]  driver_register+0x74/0x120
> [   11.411837]  __pci_register_driver+0x40/0x50
> [   11.416149]  radeon_init+0x78/0x1000 [radeon]
> [   11.420489]  do_one_initcall+0x54/0x154
> [   11.424310]  do_init_module+0x54/0x260
> [   11.428041]  load_module+0x1ccc/0x20b0
> [   11.431773]  __se_sys_finit_module+0xac/0x10c
> [   11.436109]  __arm64_sys_finit_module+0x18/0x20
> [   11.440622]  el0_svc_common+0x70/0x164
> [   11.444353]  el0_svc_handler+0x2c/0x80
> [   11.448084]  el0_svc+0x8/0xc
> [   11.450954] Code: d65f03c0 cb0803e4 f2400c84 54000080 (a9001d07)
>
> Obviously, the __memset call is generated by gcc(8.3.1). It optimizes
> this for loop into memset. But this may break on some platforms which
> cannot map device memory correctly. So, just invoke `writel` to handle this.
> Signed-off-by: chenli <chenli@uniontech.com>
> ---
> v1->v2:
> * Convert the other assignments of msg as well.
> * Casting types to avoid static checker warnings.
>
> ---
>   drivers/gpu/drm/radeon/radeon_uvd.c | 34 ++++++++++++++---------------
>   1 file changed, 17 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
> index 39c1c339be7b..e576470402b9 100644
> --- a/drivers/gpu/drm/radeon/radeon_uvd.c
> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c
> @@ -791,19 +791,19 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
>   		return r;
>   
>   	/* stitch together an UVD create msg */
> -	msg[0] = cpu_to_le32(0x00000de4);
> -	msg[1] = cpu_to_le32(0x00000000);
> -	msg[2] = cpu_to_le32(handle);
> -	msg[3] = cpu_to_le32(0x00000000);
> -	msg[4] = cpu_to_le32(0x00000000);
> -	msg[5] = cpu_to_le32(0x00000000);
> -	msg[6] = cpu_to_le32(0x00000000);
> -	msg[7] = cpu_to_le32(0x00000780);
> -	msg[8] = cpu_to_le32(0x00000440);
> -	msg[9] = cpu_to_le32(0x00000000);
> -	msg[10] = cpu_to_le32(0x01b37000);
> +	writel(cpu_to_le32(0x00000de4), (void __iomem *)&msg[0]);

I made the minor change to apply the __iomem attribute to the msg 
definition and avoid the casting here.

With that done I've pushed the series to our internal servers. It should 
show up in 5.12.

Thanks for the help,
Christian.

> +	writel(0x0, (void __iomem *)&msg[1]);
> +	writel(cpu_to_le32(handle), (void __iomem *)&msg[2]);
> +	writel(0x0, (void __iomem *)&msg[3]);
> +	writel(0x0, (void __iomem *)&msg[4]);
> +	writel(0x0, (void __iomem *)&msg[5]);
> +	writel(0x0, (void __iomem *)&msg[6]);
> +	writel(cpu_to_le32(0x00000780), (void __iomem *)&msg[7]);
> +	writel(cpu_to_le32(0x00000440), (void __iomem *)&msg[8]);
> +	writel(0x0, (void __iomem *)&msg[9]);
> +	writel(cpu_to_le32(0x01b37000), (void __iomem *)&msg[10]);
>   	for (i = 11; i < 1024; ++i)
> -		msg[i] = cpu_to_le32(0x0);
> +		writel(0x0, (void __iomem *)&msg[i]);
>   
>   	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
>   	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
> @@ -827,12 +827,12 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
>   		return r;
>   
>   	/* stitch together an UVD destroy msg */
> -	msg[0] = cpu_to_le32(0x00000de4);
> -	msg[1] = cpu_to_le32(0x00000002);
> -	msg[2] = cpu_to_le32(handle);
> -	msg[3] = cpu_to_le32(0x00000000);
> +	writel(cpu_to_le32(0x00000de4), (void __iomem *)&msg[0]);
> +	writel(cpu_to_le32(0x00000002), (void __iomem *)&msg[1]);
> +	writel(cpu_to_le32(handle), (void __iomem *)&msg[2]);
> +	writel(0x0, (void __iomem *)&msg[3]);
>   	for (i = 4; i < 1024; ++i)
> -		msg[i] = cpu_to_le32(0x0);
> +		writel(0x0, (void __iomem *)&msg[i]);
>   
>   	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
>   	radeon_bo_unreserve(rdev->uvd.vcpu_bo);

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-01-01 15:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-25  3:54 [PATCH v2 1/2] drm/radeon: use writel to avoid gcc optimization Chen Li
2020-12-25  3:54 ` Chen Li
2020-12-25  3:58   ` [PATCH v2 2/2] drm/amdgpu: use GTT for uvd_get_create/destory_msg Chen Li
2021-01-04 11:48   ` [PATCH v2 1/2] drm/radeon: use writel to avoid gcc optimization Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.