All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] dma-buf/fence-array: signal from wq
@ 2016-10-16 16:39 Rob Clark
  2016-10-16 17:49 ` Chris Wilson
  0 siblings, 1 reply; 5+ messages in thread
From: Rob Clark @ 2016-10-16 16:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Gustavo Padovan

This is the alternative approach for solving a deadlock situation with
array-fences.

Currently with fence-array, we have a potential deadlock situation.  If we
fence_add_callback() on an array-fence, the array-fence's lock is aquired
first, and in it's ->enable_signaling() callback, it will install cb's on
it's array-member fences, so the array-member's lock is acquired second.

But in the signal path, the array-member's lock is acquired first, and the
array-fence's lock acquired second.

To solve that, punt signaling the array-fence to a worker.

lockdep splat:

 ======================================================
[ INFO: possible circular locking dependency detected ]
4.7.0-rc7+ #489 Not tainted
-------------------------------------------------------
surfaceflinger/2034 is trying to acquire lock:
 (&(&array->lock)->rlock){......}, at: [<ffff00000858cddc>] fence_signal+0x5c/0xf8

but task is already holding lock:
 (&(&obj->child_list_lock)->rlock){......}, at: [<ffff0000085902f8>] sw_sync_ioctl+0x228/0x3b0

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&(&obj->child_list_lock)->rlock){......}:
       [<ffff000008108924>] __lock_acquire+0x173c/0x18d8
       [<ffff000008108e0c>] lock_acquire+0x4c/0x68
       [<ffff000008ac6a6c>] _raw_spin_lock_irqsave+0x54/0x70
       [<ffff00000858d05c>] fence_add_callback+0x3c/0x100
       [<ffff00000858f100>] fence_array_enable_signaling+0x80/0x170
       [<ffff00000858d0d8>] fence_add_callback+0xb8/0x100
       [<ffff00000858f504>] sync_file_poll+0xd4/0xf0
       [<ffff0000081fd3a0>] do_sys_poll+0x220/0x438
       [<ffff0000081fd8d0>] SyS_ppoll+0x1b0/0x1d8
       [<ffff000008084f30>] el0_svc_naked+0x24/0x28

-> #0 (&(&array->lock)->rlock){......}:
       [<ffff000008104768>] print_circular_bug+0x80/0x2e0
       [<ffff0000081089ac>] __lock_acquire+0x17c4/0x18d8
       [<ffff000008108e0c>] lock_acquire+0x4c/0x68
       [<ffff000008ac6a6c>] _raw_spin_lock_irqsave+0x54/0x70
       [<ffff00000858cddc>] fence_signal+0x5c/0xf8
       [<ffff00000858f268>] fence_array_cb_func+0x78/0x88
       [<ffff00000858cb28>] fence_signal_locked+0x80/0xe0
       [<ffff0000085903c8>] sw_sync_ioctl+0x2f8/0x3b0
       [<ffff0000081faf6c>] do_vfs_ioctl+0xa4/0x790
       [<ffff0000081fb6e4>] SyS_ioctl+0x8c/0xa0
       [<ffff000008084f30>] el0_svc_naked+0x24/0x28

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&(&obj->child_list_lock)->rlock);
                               lock(&(&array->lock)->rlock);
                               lock(&(&obj->child_list_lock)->rlock);
  lock(&(&array->lock)->rlock);

 *** DEADLOCK ***

1 lock held by surfaceflinger/2034:
 #0:  (&(&obj->child_list_lock)->rlock){......}, at: [<ffff0000085902f8>] sw_sync_ioctl+0x228/0x3b0
---
 drivers/dma-buf/fence-array.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/dma-buf/fence-array.c b/drivers/dma-buf/fence-array.c
index f1989fc..446dfc2 100644
--- a/drivers/dma-buf/fence-array.c
+++ b/drivers/dma-buf/fence-array.c
@@ -19,8 +19,11 @@
 
 #include <linux/export.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/fence-array.h>
 
+static struct workqueue_struct *fence_array_wq;
+
 static void fence_array_cb_func(struct fence *f, struct fence_cb *cb);
 
 static const char *fence_array_get_driver_name(struct fence *fence)
@@ -33,6 +36,13 @@ static const char *fence_array_get_timeline_name(struct fence *fence)
 	return "unbound";
 }
 
+static void signal_worker(struct work_struct *w)
+{
+	struct fence_array *array =
+		container_of(w, struct fence_array, signal_worker);
+	fence_signal(&array->base);
+}
+
 static void fence_array_cb_func(struct fence *f, struct fence_cb *cb)
 {
 	struct fence_array_cb *array_cb =
@@ -40,7 +50,7 @@ static void fence_array_cb_func(struct fence *f, struct fence_cb *cb)
 	struct fence_array *array = array_cb->array;
 
 	if (atomic_dec_and_test(&array->num_pending))
-		fence_signal(&array->base);
+		queue_work(fence_array_wq, &array->signal_worker);
 	fence_put(&array->base);
 }
 
@@ -140,6 +150,25 @@ struct fence_array *fence_array_create(int num_fences, struct fence **fences,
 	atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
 	array->fences = fences;
 
+	INIT_WORK(&array->signal_worker, signal_worker);
+
 	return array;
 }
 EXPORT_SYMBOL(fence_array_create);
+
+static int __init fence_array_init(void)
+{
+	fence_array_wq = alloc_ordered_workqueue("fence-array", 0);
+	if (!fence_array_wq)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __exit fence_array_exit(void)
+{
+	flush_workqueue(fence_array_wq);
+	destroy_workqueue(fence_array_wq);
+}
+
+module_init(fence_array_init);
+module_exit(fence_array_exit);
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC] dma-buf/fence-array: signal from wq
  2016-10-16 16:39 [RFC] dma-buf/fence-array: signal from wq Rob Clark
@ 2016-10-16 17:49 ` Chris Wilson
  2016-10-16 18:29   ` Rob Clark
  0 siblings, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2016-10-16 17:49 UTC (permalink / raw)
  To: Rob Clark; +Cc: Gustavo Padovan, dri-devel

On Sun, Oct 16, 2016 at 12:39:35PM -0400, Rob Clark wrote:
> This is the alternative approach for solving a deadlock situation with
> array-fences.
> 
> Currently with fence-array, we have a potential deadlock situation.  If we
> fence_add_callback() on an array-fence, the array-fence's lock is aquired
> first, and in it's ->enable_signaling() callback, it will install cb's on
> it's array-member fences, so the array-member's lock is acquired second.
> 
> But in the signal path, the array-member's lock is acquired first, and the
> array-fence's lock acquired second.

I'm feeling this is the better approach, it puts the complexity inside
the trickster. However, I think it is better to move the wq to
enable-signaling rather than the fence_signal() cb. (Latency at the
start of the wait will hopefully be absorbed by the wait, but we cannot
hide latency at the end).

> +static int __init fence_array_init(void)
> +{
> +	fence_array_wq = alloc_ordered_workqueue("fence-array", 0);
> +	if (!fence_array_wq)
> +		return -ENOMEM;

Defintely doesn't want to be ordered, since the fences put onto the wq
should be independent and can work in parallel. system_unbound should
suffice.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] dma-buf/fence-array: signal from wq
  2016-10-16 17:49 ` Chris Wilson
@ 2016-10-16 18:29   ` Rob Clark
  2016-10-16 18:54     ` Chris Wilson
  0 siblings, 1 reply; 5+ messages in thread
From: Rob Clark @ 2016-10-16 18:29 UTC (permalink / raw)
  To: Chris Wilson, Rob Clark, dri-devel, Maarten Lankhorst, Gustavo Padovan

On Sun, Oct 16, 2016 at 1:49 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Sun, Oct 16, 2016 at 12:39:35PM -0400, Rob Clark wrote:
>> This is the alternative approach for solving a deadlock situation with
>> array-fences.
>>
>> Currently with fence-array, we have a potential deadlock situation.  If we
>> fence_add_callback() on an array-fence, the array-fence's lock is aquired
>> first, and in it's ->enable_signaling() callback, it will install cb's on
>> it's array-member fences, so the array-member's lock is acquired second.
>>
>> But in the signal path, the array-member's lock is acquired first, and the
>> array-fence's lock acquired second.
>
> I'm feeling this is the better approach, it puts the complexity inside
> the trickster. However, I think it is better to move the wq to
> enable-signaling rather than the fence_signal() cb. (Latency at the
> start of the wait will hopefully be absorbed by the wait, but we cannot
> hide latency at the end).

my spidey sense is telling me that would race with the fence getting signalled..

(although I probably also need to move the fence_put() from
fence_array_cb_func() to the worker fxn)

>> +static int __init fence_array_init(void)
>> +{
>> +     fence_array_wq = alloc_ordered_workqueue("fence-array", 0);
>> +     if (!fence_array_wq)
>> +             return -ENOMEM;
>
> Defintely doesn't want to be ordered, since the fences put onto the wq
> should be independent and can work in parallel. system_unbound should
> suffice.

ahh, yeah, that makes sense..

BR,
-R

> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] dma-buf/fence-array: signal from wq
  2016-10-16 18:29   ` Rob Clark
@ 2016-10-16 18:54     ` Chris Wilson
  2016-10-26 15:54       ` Rob Clark
  0 siblings, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2016-10-16 18:54 UTC (permalink / raw)
  To: Rob Clark; +Cc: Gustavo Padovan, dri-devel

On Sun, Oct 16, 2016 at 02:29:51PM -0400, Rob Clark wrote:
> On Sun, Oct 16, 2016 at 1:49 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Sun, Oct 16, 2016 at 12:39:35PM -0400, Rob Clark wrote:
> >> This is the alternative approach for solving a deadlock situation with
> >> array-fences.
> >>
> >> Currently with fence-array, we have a potential deadlock situation.  If we
> >> fence_add_callback() on an array-fence, the array-fence's lock is aquired
> >> first, and in it's ->enable_signaling() callback, it will install cb's on
> >> it's array-member fences, so the array-member's lock is acquired second.
> >>
> >> But in the signal path, the array-member's lock is acquired first, and the
> >> array-fence's lock acquired second.
> >
> > I'm feeling this is the better approach, it puts the complexity inside
> > the trickster. However, I think it is better to move the wq to
> > enable-signaling rather than the fence_signal() cb. (Latency at the
> > start of the wait will hopefully be absorbed by the wait, but we cannot
> > hide latency at the end).
> 
> my spidey sense is telling me that would race with the fence getting signalled..

Which we don't care about, since we haven't yet added ourselves to the
cb_list of the fence so haven't yet adjusted the fence-array's pending
count and so haven't yet signaled the fence array. i.e. we can tolerate
enabling signaling on the fence array after all of its children are
signaled. Imo, better to have that latency upfront.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] dma-buf/fence-array: signal from wq
  2016-10-16 18:54     ` Chris Wilson
@ 2016-10-26 15:54       ` Rob Clark
  0 siblings, 0 replies; 5+ messages in thread
From: Rob Clark @ 2016-10-26 15:54 UTC (permalink / raw)
  To: Chris Wilson, Rob Clark, dri-devel, Maarten Lankhorst, Gustavo Padovan

On Sun, Oct 16, 2016 at 2:54 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Sun, Oct 16, 2016 at 02:29:51PM -0400, Rob Clark wrote:
>> On Sun, Oct 16, 2016 at 1:49 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > On Sun, Oct 16, 2016 at 12:39:35PM -0400, Rob Clark wrote:
>> >> This is the alternative approach for solving a deadlock situation with
>> >> array-fences.
>> >>
>> >> Currently with fence-array, we have a potential deadlock situation.  If we
>> >> fence_add_callback() on an array-fence, the array-fence's lock is aquired
>> >> first, and in it's ->enable_signaling() callback, it will install cb's on
>> >> it's array-member fences, so the array-member's lock is acquired second.
>> >>
>> >> But in the signal path, the array-member's lock is acquired first, and the
>> >> array-fence's lock acquired second.
>> >
>> > I'm feeling this is the better approach, it puts the complexity inside
>> > the trickster. However, I think it is better to move the wq to
>> > enable-signaling rather than the fence_signal() cb. (Latency at the
>> > start of the wait will hopefully be absorbed by the wait, but we cannot
>> > hide latency at the end).
>>
>> my spidey sense is telling me that would race with the fence getting signalled..
>
> Which we don't care about, since we haven't yet added ourselves to the
> cb_list of the fence so haven't yet adjusted the fence-array's pending
> count and so haven't yet signaled the fence array. i.e. we can tolerate
> enabling signaling on the fence array after all of its children are
> signaled. Imo, better to have that latency upfront.

btw, another fun one, and I think another argument for doing the wq on
the callback side so we can avoid the final fence_put() from the
callback:

---------
=============================================
[ INFO: possible recursive locking detected ]
4.7.0-rc7+ #548 Not tainted
---------------------------------------------
surfaceflinger/2038 is trying to acquire lock:
 (&(&obj->child_list_lock)->rlock){......}, at: [<ffff0000085908a8>]
timeline_fence_release+0x38/0xa8

but task is already holding lock:[  168.064283]
(&(&obj->child_list_lock)->rlock){......}, at: [<ffff000008590b40>]
sw_sync_ioctl+0x228/0x3b0

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&obj->child_list_lock)->rlock);
  lock(&(&obj->child_list_lock)->rlock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

1 lock held by surfaceflinger/2038:
 #0:  (&(&obj->child_list_lock)->rlock){......}, at:
[<ffff000008590b40>] sw_sync_ioctl+0x228/0x3b0

stack backtrace:
CPU: 3 PID: 2038 Comm: surfaceflinger Not tainted 4.7.0-rc7+ #548
Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT)
Call trace:
[<ffff000008088be8>] dump_backtrace+0x0/0x1a8
[<ffff000008088da4>] show_stack+0x14/0x20
[<ffff0000083b8234>] dump_stack+0xb4/0xf0
[<ffff0000081080f4>] __lock_acquire+0xf0c/0x18d8
[<ffff000008108e0c>] lock_acquire+0x4c/0x68
[<ffff000008ac72b4>] _raw_spin_lock_irqsave+0x54/0x70
[<ffff0000085908a8>] timeline_fence_release+0x38/0xa8
[<ffff00000858d758>] fence_release+0x28/0x50
[<ffff00000858f878>] fence_array_release+0x88/0xd0
[<ffff00000858d758>] fence_release+0x28/0x50
[<ffff00000858fa94>] fence_array_cb_func+0x64/0x88
[<ffff00000858d31c>] fence_signal_locked+0x8c/0x100
[<ffff000008590c10>] sw_sync_ioctl+0x2f8/0x3b0
[<ffff0000081faf6c>] do_vfs_ioctl+0xa4/0x790
[<ffff0000081fb6e4>] SyS_ioctl+0x8c/0xa0
[<ffff000008084f30>] el0_svc_naked+0x24/0x28
---------
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-10-26 15:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-16 16:39 [RFC] dma-buf/fence-array: signal from wq Rob Clark
2016-10-16 17:49 ` Chris Wilson
2016-10-16 18:29   ` Rob Clark
2016-10-16 18:54     ` Chris Wilson
2016-10-26 15:54       ` Rob Clark

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.