All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: queue kfd interrupt work to different CPU
@ 2019-12-12 18:40 Philip Yang
  0 siblings, 0 replies; 4+ messages in thread
From: Philip Yang @ 2019-12-12 18:40 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang

Because queue_work schedule the work on the same CPU the interrupt
handler is running, if there are many interrupts pending, it takes
longer time for work queue to start, or even worse system will hang.

v2: queue work to same NUMA node for better cache locality

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Eric Huang <JinhuiEric.Huang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 209bfc849352..1dad76a3f3c8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -822,6 +822,21 @@ static int kfd_resume(struct kfd_dev *kfd)
 	return err;
 }
 
+static inline void kfd_queue_work(struct workqueue_struct *wq,
+				  struct work_struct *work)
+{
+	int cpu, new_cpu;
+
+	cpu = new_cpu = smp_processor_id();
+	do {
+		new_cpu = cpumask_next(new_cpu, cpu_online_mask);
+		if (cpu_to_node(new_cpu) == numa_node_id())
+			break;
+	} while (cpu != new_cpu);
+
+	queue_work_on(new_cpu, wq, work);
+}
+
 /* This is called directly from KGD at ISR. */
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
@@ -844,7 +859,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 				   patched_ihre, &is_patched)
 	    && enqueue_ih_ring_entry(kfd,
 				     is_patched ? patched_ihre : ih_ring_entry))
-		queue_work(kfd->ih_wq, &kfd->interrupt_work);
+		kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
 
 	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
 }
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] drm/amdkfd: queue kfd interrupt work to different CPU
@ 2019-12-12 19:13 Philip Yang
  0 siblings, 0 replies; 4+ messages in thread
From: Philip Yang @ 2019-12-12 19:13 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang

Because queue_work schedule the work on the same CPU the interrupt
handler is running, if there are many interrupts pending, it takes
longer time for work queue to start, or even worse system will hang.

v2: queue work to same NUMA node for better cache locality
v3: handle cpumask_next wraparound case

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Eric Huang <JinhuiEric.Huang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 209bfc849352..c6b6901bbda3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -822,6 +822,21 @@ static int kfd_resume(struct kfd_dev *kfd)
 	return err;
 }
 
+static inline void kfd_queue_work(struct workqueue_struct *wq,
+				  struct work_struct *work)
+{
+	int cpu, new_cpu;
+
+	cpu = new_cpu = smp_processor_id();
+	do {
+		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+		if (cpu_to_node(new_cpu) == numa_node_id())
+			break;
+	} while (cpu != new_cpu);
+
+	queue_work_on(new_cpu, wq, work);
+}
+
 /* This is called directly from KGD at ISR. */
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
@@ -844,7 +859,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 				   patched_ihre, &is_patched)
 	    && enqueue_ih_ring_entry(kfd,
 				     is_patched ? patched_ihre : ih_ring_entry))
-		queue_work(kfd->ih_wq, &kfd->interrupt_work);
+		kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
 
 	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
 }
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdkfd: queue kfd interrupt work to different CPU
  2019-12-12 14:51 Philip Yang
@ 2019-12-12 15:07 ` Eric Huang
  0 siblings, 0 replies; 4+ messages in thread
From: Eric Huang @ 2019-12-12 15:07 UTC (permalink / raw)
  To: amd-gfx

It fixes cpu stuck issue in some extreme test cases.

Reviewed-by: Eric Huang <JinhuiEric.Huang@amd.com>

On 2019-12-12 9:51 a.m., Philip Yang wrote:
> Because queue_work schedule the work on the same CPU the interrupt
> handler is running, if there are many interrupts pending, it takes
> longer time for work queue to start, or even worse system will hang.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 209bfc849352..ee2a9bb1cb07 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -844,7 +844,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
>   				   patched_ihre, &is_patched)
>   	    && enqueue_ih_ring_entry(kfd,
>   				     is_patched ? patched_ihre : ih_ring_entry))
> -		queue_work(kfd->ih_wq, &kfd->interrupt_work);
> +		queue_work_on((smp_processor_id() + 1) % num_online_cpus(),
> +			       kfd->ih_wq, &kfd->interrupt_work);
>   
>   	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
>   }

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] drm/amdkfd: queue kfd interrupt work to different CPU
@ 2019-12-12 14:51 Philip Yang
  2019-12-12 15:07 ` Eric Huang
  0 siblings, 1 reply; 4+ messages in thread
From: Philip Yang @ 2019-12-12 14:51 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang

Because queue_work schedule the work on the same CPU the interrupt
handler is running, if there are many interrupts pending, it takes
longer time for work queue to start, or even worse system will hang.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 209bfc849352..ee2a9bb1cb07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -844,7 +844,8 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 				   patched_ihre, &is_patched)
 	    && enqueue_ih_ring_entry(kfd,
 				     is_patched ? patched_ihre : ih_ring_entry))
-		queue_work(kfd->ih_wq, &kfd->interrupt_work);
+		queue_work_on((smp_processor_id() + 1) % num_online_cpus(),
+			       kfd->ih_wq, &kfd->interrupt_work);
 
 	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
 }
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-12-12 19:13 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-12 18:40 [PATCH] drm/amdkfd: queue kfd interrupt work to different CPU Philip Yang
  -- strict thread matches above, loose matches on Subject: below --
2019-12-12 19:13 Philip Yang
2019-12-12 14:51 Philip Yang
2019-12-12 15:07 ` Eric Huang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.