All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal.
@ 2020-07-14 15:41 ` Bas Nieuwenhuizen
  0 siblings, 0 replies; 9+ messages in thread
From: Bas Nieuwenhuizen @ 2020-07-14 15:41 UTC (permalink / raw)
  To: dri-devel
  Cc: Bas Nieuwenhuizen, Sumit Semwal, Chris Wilson, Gustavo Padovan,
	Christian König, stable

Calltree:
  timeline_fence_release
  drm_sched_entity_wakeup
  dma_fence_signal_locked
  sync_timeline_signal
  sw_sync_ioctl

Releasing the reference to the fence in the fence signal callback
seems reasonable to me, so this patch avoids the locking issue in
sw_sync.

d3862e44daa7 ("dma-buf/sw-sync: Fix locking around sync_timeline lists")
fixed the recursive locking issue but caused an use-after-free. Later
d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
fixed the use-after-free but reintroduced the recursive locking issue.

In this attempt we avoid the use-after-free still because the release
function still always locks, and outside of the locking region in the
signal function we have properly refcounted references.

We furthermore also avoid the recurive lock by making sure that either:

1) We have a properly refcounted reference, preventing the signal from
   triggering the release function inside the locked region.
2) The refcount was already zero, and hence nobody will be able to trigger
   the release function from the signal function.

Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Christian König <christian.koenig@amd.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/dma-buf/sw_sync.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 348b3a9170fa..30a482f75d56 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -192,9 +192,12 @@ static const struct dma_fence_ops timeline_fence_ops = {
 static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 {
 	struct sync_pt *pt, *next;
+	struct list_head ref_list;
 
 	trace_sync_timeline(obj);
 
+	INIT_LIST_HEAD(&ref_list);
+
 	spin_lock_irq(&obj->lock);
 
 	obj->value += inc;
@@ -206,18 +209,27 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 		list_del_init(&pt->link);
 		rb_erase(&pt->node, &obj->pt_tree);
 
-		/*
-		 * A signal callback may release the last reference to this
-		 * fence, causing it to be freed. That operation has to be
-		 * last to avoid a use after free inside this loop, and must
-		 * be after we remove the fence from the timeline in order to
-		 * prevent deadlocking on timeline->lock inside
-		 * timeline_fence_release().
-		 */
+		/* We need to take a reference to avoid a release during
+		 * signalling (which can cause a recursive lock of obj->lock).
+		 * If refcount was already zero, another thread is already taking
+		 * care of destructing the fence, so the signal cannot release
+		 * it again and we hence will not have the recursive lock. */
+		if (dma_fence_get_rcu(&pt->base))
+			list_add_tail(&pt->link, &ref_list);
+
 		dma_fence_signal_locked(&pt->base);
 	}
 
 	spin_unlock_irq(&obj->lock);
+
+	list_for_each_entry_safe(pt, next, &ref_list, link) {
+		/* This needs to be cleared before release, otherwise the
+		 * timeline_fence_release function gets confused about also
+		 * removing the fence from the pt_tree. */
+		list_del_init(&pt->link);
+
+		dma_fence_put(&pt->base);
+	}
 }
 
 /**
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal.
@ 2020-07-14 15:41 ` Bas Nieuwenhuizen
  0 siblings, 0 replies; 9+ messages in thread
From: Bas Nieuwenhuizen @ 2020-07-14 15:41 UTC (permalink / raw)
  To: dri-devel; +Cc: Gustavo Padovan, stable, Chris Wilson, Christian König

Calltree:
  timeline_fence_release
  drm_sched_entity_wakeup
  dma_fence_signal_locked
  sync_timeline_signal
  sw_sync_ioctl

Releasing the reference to the fence in the fence signal callback
seems reasonable to me, so this patch avoids the locking issue in
sw_sync.

d3862e44daa7 ("dma-buf/sw-sync: Fix locking around sync_timeline lists")
fixed the recursive locking issue but caused an use-after-free. Later
d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
fixed the use-after-free but reintroduced the recursive locking issue.

In this attempt we avoid the use-after-free still because the release
function still always locks, and outside of the locking region in the
signal function we have properly refcounted references.

We furthermore also avoid the recurive lock by making sure that either:

1) We have a properly refcounted reference, preventing the signal from
   triggering the release function inside the locked region.
2) The refcount was already zero, and hence nobody will be able to trigger
   the release function from the signal function.

Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Christian König <christian.koenig@amd.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/dma-buf/sw_sync.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 348b3a9170fa..30a482f75d56 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -192,9 +192,12 @@ static const struct dma_fence_ops timeline_fence_ops = {
 static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 {
 	struct sync_pt *pt, *next;
+	struct list_head ref_list;
 
 	trace_sync_timeline(obj);
 
+	INIT_LIST_HEAD(&ref_list);
+
 	spin_lock_irq(&obj->lock);
 
 	obj->value += inc;
@@ -206,18 +209,27 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 		list_del_init(&pt->link);
 		rb_erase(&pt->node, &obj->pt_tree);
 
-		/*
-		 * A signal callback may release the last reference to this
-		 * fence, causing it to be freed. That operation has to be
-		 * last to avoid a use after free inside this loop, and must
-		 * be after we remove the fence from the timeline in order to
-		 * prevent deadlocking on timeline->lock inside
-		 * timeline_fence_release().
-		 */
+		/* We need to take a reference to avoid a release during
+		 * signalling (which can cause a recursive lock of obj->lock).
+		 * If refcount was already zero, another thread is already taking
+		 * care of destructing the fence, so the signal cannot release
+		 * it again and we hence will not have the recursive lock. */
+		if (dma_fence_get_rcu(&pt->base))
+			list_add_tail(&pt->link, &ref_list);
+
 		dma_fence_signal_locked(&pt->base);
 	}
 
 	spin_unlock_irq(&obj->lock);
+
+	list_for_each_entry_safe(pt, next, &ref_list, link) {
+		/* This needs to be cleared before release, otherwise the
+		 * timeline_fence_release function gets confused about also
+		 * removing the fence from the pt_tree. */
+		list_del_init(&pt->link);
+
+		dma_fence_put(&pt->base);
+	}
 }
 
 /**
-- 
2.27.0

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal.
  2020-07-14 15:41 ` Bas Nieuwenhuizen
  (?)
@ 2020-07-14 16:25 ` Chris Wilson
  2020-07-14 18:17     ` Bas Nieuwenhuizen
  -1 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2020-07-14 16:25 UTC (permalink / raw)
  To: Bas Nieuwenhuizen, dri-devel
  Cc: Gustavo Padovan, Christian König, stable

Quoting Bas Nieuwenhuizen (2020-07-14 16:41:02)
> Calltree:
>   timeline_fence_release
>   drm_sched_entity_wakeup
>   dma_fence_signal_locked
>   sync_timeline_signal
>   sw_sync_ioctl
> 
> Releasing the reference to the fence in the fence signal callback
> seems reasonable to me, so this patch avoids the locking issue in
> sw_sync.
> 
> d3862e44daa7 ("dma-buf/sw-sync: Fix locking around sync_timeline lists")
> fixed the recursive locking issue but caused an use-after-free. Later
> d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> fixed the use-after-free but reintroduced the recursive locking issue.
> 
> In this attempt we avoid the use-after-free still because the release
> function still always locks, and outside of the locking region in the
> signal function we have properly refcounted references.
> 
> We furthermore also avoid the recurive lock by making sure that either:
> 
> 1) We have a properly refcounted reference, preventing the signal from
>    triggering the release function inside the locked region.
> 2) The refcount was already zero, and hence nobody will be able to trigger
>    the release function from the signal function.
> 
> Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Gustavo Padovan <gustavo@padovan.org>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> ---
>  drivers/dma-buf/sw_sync.c | 28 ++++++++++++++++++++--------
>  1 file changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
> index 348b3a9170fa..30a482f75d56 100644
> --- a/drivers/dma-buf/sw_sync.c
> +++ b/drivers/dma-buf/sw_sync.c
> @@ -192,9 +192,12 @@ static const struct dma_fence_ops timeline_fence_ops = {
>  static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
>  {
>         struct sync_pt *pt, *next;
> +       struct list_head ref_list;
>  
>         trace_sync_timeline(obj);
>  
> +       INIT_LIST_HEAD(&ref_list);
> +
>         spin_lock_irq(&obj->lock);
>  
>         obj->value += inc;
> @@ -206,18 +209,27 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
>                 list_del_init(&pt->link);
>                 rb_erase(&pt->node, &obj->pt_tree);
>  
> -               /*
> -                * A signal callback may release the last reference to this
> -                * fence, causing it to be freed. That operation has to be
> -                * last to avoid a use after free inside this loop, and must
> -                * be after we remove the fence from the timeline in order to
> -                * prevent deadlocking on timeline->lock inside
> -                * timeline_fence_release().
> -                */
> +               /* We need to take a reference to avoid a release during
> +                * signalling (which can cause a recursive lock of obj->lock).
> +                * If refcount was already zero, another thread is already taking
> +                * care of destructing the fence, so the signal cannot release
> +                * it again and we hence will not have the recursive lock. */

/*
 * Block commentary style:
 * https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
 */

> +               if (dma_fence_get_rcu(&pt->base))
> +                       list_add_tail(&pt->link, &ref_list);

Ok.

> +
>                 dma_fence_signal_locked(&pt->base);
>         }
>  
>         spin_unlock_irq(&obj->lock);
> +
> +       list_for_each_entry_safe(pt, next, &ref_list, link) {
> +               /* This needs to be cleared before release, otherwise the
> +                * timeline_fence_release function gets confused about also
> +                * removing the fence from the pt_tree. */
> +               list_del_init(&pt->link);
> +
> +               dma_fence_put(&pt->base);
> +       }

How serious is the problem of one fence callback freeing another pt?

Following the pattern here

	spin_lock(&obj->lock);
        list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
                if (!timeline_fence_signaled(&pt->base))
                        break;

		if (!dma_fence_get_rcu(&pt->base))
			continue; /* too late! */

                rb_erase(&pt->node, &obj->pt_tree);
                list_move_tail(&pt->link, &ref_list);
	}
	spin_unlock(&obj->lock);

        list_for_each_entry_safe(pt, next, &ref_list, link) {
		list_del_init(&pt->link);
		dma_fence_signal(&pt->base);
		dma_fence_put(&pt->base);
	}

Marginal extra cost for signaling along the debug sw_timeline for total
peace of mind.
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal.
  2020-07-14 16:25 ` Chris Wilson
@ 2020-07-14 18:17     ` Bas Nieuwenhuizen
  0 siblings, 0 replies; 9+ messages in thread
From: Bas Nieuwenhuizen @ 2020-07-14 18:17 UTC (permalink / raw)
  To: Chris Wilson
  Cc: ML dri-devel, Sumit Semwal, Gustavo Padovan,
	Christian König, stable

On Tue, Jul 14, 2020 at 6:26 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Bas Nieuwenhuizen (2020-07-14 16:41:02)
> > Calltree:
> >   timeline_fence_release
> >   drm_sched_entity_wakeup
> >   dma_fence_signal_locked
> >   sync_timeline_signal
> >   sw_sync_ioctl
> >
> > Releasing the reference to the fence in the fence signal callback
> > seems reasonable to me, so this patch avoids the locking issue in
> > sw_sync.
> >
> > d3862e44daa7 ("dma-buf/sw-sync: Fix locking around sync_timeline lists")
> > fixed the recursive locking issue but caused an use-after-free. Later
> > d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> > fixed the use-after-free but reintroduced the recursive locking issue.
> >
> > In this attempt we avoid the use-after-free still because the release
> > function still always locks, and outside of the locking region in the
> > signal function we have properly refcounted references.
> >
> > We furthermore also avoid the recurive lock by making sure that either:
> >
> > 1) We have a properly refcounted reference, preventing the signal from
> >    triggering the release function inside the locked region.
> > 2) The refcount was already zero, and hence nobody will be able to trigger
> >    the release function from the signal function.
> >
> > Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> > Cc: Sumit Semwal <sumit.semwal@linaro.org>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Gustavo Padovan <gustavo@padovan.org>
> > Cc: Christian König <christian.koenig@amd.com>
> > Cc: <stable@vger.kernel.org>
> > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> > ---
> >  drivers/dma-buf/sw_sync.c | 28 ++++++++++++++++++++--------
> >  1 file changed, 20 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
> > index 348b3a9170fa..30a482f75d56 100644
> > --- a/drivers/dma-buf/sw_sync.c
> > +++ b/drivers/dma-buf/sw_sync.c
> > @@ -192,9 +192,12 @@ static const struct dma_fence_ops timeline_fence_ops = {
> >  static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
> >  {
> >         struct sync_pt *pt, *next;
> > +       struct list_head ref_list;
> >
> >         trace_sync_timeline(obj);
> >
> > +       INIT_LIST_HEAD(&ref_list);
> > +
> >         spin_lock_irq(&obj->lock);
> >
> >         obj->value += inc;
> > @@ -206,18 +209,27 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
> >                 list_del_init(&pt->link);
> >                 rb_erase(&pt->node, &obj->pt_tree);
> >
> > -               /*
> > -                * A signal callback may release the last reference to this
> > -                * fence, causing it to be freed. That operation has to be
> > -                * last to avoid a use after free inside this loop, and must
> > -                * be after we remove the fence from the timeline in order to
> > -                * prevent deadlocking on timeline->lock inside
> > -                * timeline_fence_release().
> > -                */
> > +               /* We need to take a reference to avoid a release during
> > +                * signalling (which can cause a recursive lock of obj->lock).
> > +                * If refcount was already zero, another thread is already taking
> > +                * care of destructing the fence, so the signal cannot release
> > +                * it again and we hence will not have the recursive lock. */
>
> /*
>  * Block commentary style:
>  * https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
>  */
>
> > +               if (dma_fence_get_rcu(&pt->base))
> > +                       list_add_tail(&pt->link, &ref_list);
>
> Ok.
>
> > +
> >                 dma_fence_signal_locked(&pt->base);
> >         }
> >
> >         spin_unlock_irq(&obj->lock);
> > +
> > +       list_for_each_entry_safe(pt, next, &ref_list, link) {
> > +               /* This needs to be cleared before release, otherwise the
> > +                * timeline_fence_release function gets confused about also
> > +                * removing the fence from the pt_tree. */
> > +               list_del_init(&pt->link);
> > +
> > +               dma_fence_put(&pt->base);
> > +       }
>
> How serious is the problem of one fence callback freeing another pt?
>
> Following the pattern here
>
>         spin_lock(&obj->lock);
>         list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
>                 if (!timeline_fence_signaled(&pt->base))
>                         break;
>
>                 if (!dma_fence_get_rcu(&pt->base))
>                         continue; /* too late! */
>
>                 rb_erase(&pt->node, &obj->pt_tree);
>                 list_move_tail(&pt->link, &ref_list);
>         }
>         spin_unlock(&obj->lock);
>
>         list_for_each_entry_safe(pt, next, &ref_list, link) {
>                 list_del_init(&pt->link);
>                 dma_fence_signal(&pt->base);

Question is what the scope should be. Using this method we only take a
reference and avoid releasing the fences we're going to signal.
However the lock is on the timeline and dma_fence_signal already takes
the lock, so if the signal ends up releasing any of the other fences
in the timeline (e.g. the fences that are later than the new value),
then we are still going to have the recursive locking issue.

One solution to that would be splitting the lock into 2 locks: 1
protecting pt_list + pt_tree and 1 protecting value & serving as the
lock for the fences. Then we can only take the list lock in the
release function and dma_fence_signal takes the value lock. However,
then you still have issues like if a callback for fence A (when called
it holds the lock of A's timeline) adds a callback to fence B of the
same timeline (requires the fence lock) we still have a recursion
locking issue.

I'm not sure it is reasonable for the last use case to work, because I
only see it working with per-fence locks, but per-fence locks are
deadlock prone due to the order of taking links not really being
fixed.



>                 dma_fence_put(&pt->base);
>         }
>
> Marginal extra cost for signaling along the debug sw_timeline for total
> peace of mind.
> -Chris

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal.
@ 2020-07-14 18:17     ` Bas Nieuwenhuizen
  0 siblings, 0 replies; 9+ messages in thread
From: Bas Nieuwenhuizen @ 2020-07-14 18:17 UTC (permalink / raw)
  To: Chris Wilson; +Cc: Gustavo Padovan, stable, ML dri-devel, Christian König

On Tue, Jul 14, 2020 at 6:26 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Bas Nieuwenhuizen (2020-07-14 16:41:02)
> > Calltree:
> >   timeline_fence_release
> >   drm_sched_entity_wakeup
> >   dma_fence_signal_locked
> >   sync_timeline_signal
> >   sw_sync_ioctl
> >
> > Releasing the reference to the fence in the fence signal callback
> > seems reasonable to me, so this patch avoids the locking issue in
> > sw_sync.
> >
> > d3862e44daa7 ("dma-buf/sw-sync: Fix locking around sync_timeline lists")
> > fixed the recursive locking issue but caused an use-after-free. Later
> > d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> > fixed the use-after-free but reintroduced the recursive locking issue.
> >
> > In this attempt we avoid the use-after-free still because the release
> > function still always locks, and outside of the locking region in the
> > signal function we have properly refcounted references.
> >
> > We furthermore also avoid the recurive lock by making sure that either:
> >
> > 1) We have a properly refcounted reference, preventing the signal from
> >    triggering the release function inside the locked region.
> > 2) The refcount was already zero, and hence nobody will be able to trigger
> >    the release function from the signal function.
> >
> > Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> > Cc: Sumit Semwal <sumit.semwal@linaro.org>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Gustavo Padovan <gustavo@padovan.org>
> > Cc: Christian König <christian.koenig@amd.com>
> > Cc: <stable@vger.kernel.org>
> > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> > ---
> >  drivers/dma-buf/sw_sync.c | 28 ++++++++++++++++++++--------
> >  1 file changed, 20 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
> > index 348b3a9170fa..30a482f75d56 100644
> > --- a/drivers/dma-buf/sw_sync.c
> > +++ b/drivers/dma-buf/sw_sync.c
> > @@ -192,9 +192,12 @@ static const struct dma_fence_ops timeline_fence_ops = {
> >  static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
> >  {
> >         struct sync_pt *pt, *next;
> > +       struct list_head ref_list;
> >
> >         trace_sync_timeline(obj);
> >
> > +       INIT_LIST_HEAD(&ref_list);
> > +
> >         spin_lock_irq(&obj->lock);
> >
> >         obj->value += inc;
> > @@ -206,18 +209,27 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
> >                 list_del_init(&pt->link);
> >                 rb_erase(&pt->node, &obj->pt_tree);
> >
> > -               /*
> > -                * A signal callback may release the last reference to this
> > -                * fence, causing it to be freed. That operation has to be
> > -                * last to avoid a use after free inside this loop, and must
> > -                * be after we remove the fence from the timeline in order to
> > -                * prevent deadlocking on timeline->lock inside
> > -                * timeline_fence_release().
> > -                */
> > +               /* We need to take a reference to avoid a release during
> > +                * signalling (which can cause a recursive lock of obj->lock).
> > +                * If refcount was already zero, another thread is already taking
> > +                * care of destructing the fence, so the signal cannot release
> > +                * it again and we hence will not have the recursive lock. */
>
> /*
>  * Block commentary style:
>  * https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
>  */
>
> > +               if (dma_fence_get_rcu(&pt->base))
> > +                       list_add_tail(&pt->link, &ref_list);
>
> Ok.
>
> > +
> >                 dma_fence_signal_locked(&pt->base);
> >         }
> >
> >         spin_unlock_irq(&obj->lock);
> > +
> > +       list_for_each_entry_safe(pt, next, &ref_list, link) {
> > +               /* This needs to be cleared before release, otherwise the
> > +                * timeline_fence_release function gets confused about also
> > +                * removing the fence from the pt_tree. */
> > +               list_del_init(&pt->link);
> > +
> > +               dma_fence_put(&pt->base);
> > +       }
>
> How serious is the problem of one fence callback freeing another pt?
>
> Following the pattern here
>
>         spin_lock(&obj->lock);
>         list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
>                 if (!timeline_fence_signaled(&pt->base))
>                         break;
>
>                 if (!dma_fence_get_rcu(&pt->base))
>                         continue; /* too late! */
>
>                 rb_erase(&pt->node, &obj->pt_tree);
>                 list_move_tail(&pt->link, &ref_list);
>         }
>         spin_unlock(&obj->lock);
>
>         list_for_each_entry_safe(pt, next, &ref_list, link) {
>                 list_del_init(&pt->link);
>                 dma_fence_signal(&pt->base);

Question is what the scope should be. Using this method we only take a
reference and avoid releasing the fences we're going to signal.
However the lock is on the timeline and dma_fence_signal already takes
the lock, so if the signal ends up releasing any of the other fences
in the timeline (e.g. the fences that are later than the new value),
then we are still going to have the recursive locking issue.

One solution to that would be splitting the lock into 2 locks: 1
protecting pt_list + pt_tree and 1 protecting value & serving as the
lock for the fences. Then we can only take the list lock in the
release function and dma_fence_signal takes the value lock. However,
then you still have issues like if a callback for fence A (when called
it holds the lock of A's timeline) adds a callback to fence B of the
same timeline (requires the fence lock) we still have a recursion
locking issue.

I'm not sure it is reasonable for the last use case to work, because I
only see it working with per-fence locks, but per-fence locks are
deadlock prone due to the order of taking links not really being
fixed.



>                 dma_fence_put(&pt->base);
>         }
>
> Marginal extra cost for signaling along the debug sw_timeline for total
> peace of mind.
> -Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal.
  2020-07-14 18:17     ` Bas Nieuwenhuizen
  (?)
@ 2020-07-14 18:30     ` Chris Wilson
  2020-07-14 19:43       ` Chris Wilson
  -1 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2020-07-14 18:30 UTC (permalink / raw)
  To: Bas Nieuwenhuizen
  Cc: Gustavo Padovan, stable, ML dri-devel, Christian König

Quoting Bas Nieuwenhuizen (2020-07-14 19:17:21)
> On Tue, Jul 14, 2020 at 6:26 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Quoting Bas Nieuwenhuizen (2020-07-14 16:41:02)
> > > Calltree:
> > >   timeline_fence_release
> > >   drm_sched_entity_wakeup
> > >   dma_fence_signal_locked
> > >   sync_timeline_signal
> > >   sw_sync_ioctl
> > >
> > > Releasing the reference to the fence in the fence signal callback
> > > seems reasonable to me, so this patch avoids the locking issue in
> > > sw_sync.
> > >
> > > d3862e44daa7 ("dma-buf/sw-sync: Fix locking around sync_timeline lists")
> > > fixed the recursive locking issue but caused an use-after-free. Later
> > > d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> > > fixed the use-after-free but reintroduced the recursive locking issue.
> > >
> > > In this attempt we avoid the use-after-free still because the release
> > > function still always locks, and outside of the locking region in the
> > > signal function we have properly refcounted references.
> > >
> > > We furthermore also avoid the recurive lock by making sure that either:
> > >
> > > 1) We have a properly refcounted reference, preventing the signal from
> > >    triggering the release function inside the locked region.
> > > 2) The refcount was already zero, and hence nobody will be able to trigger
> > >    the release function from the signal function.
> > >
> > > Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> > > Cc: Sumit Semwal <sumit.semwal@linaro.org>
> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Gustavo Padovan <gustavo@padovan.org>
> > > Cc: Christian König <christian.koenig@amd.com>
> > > Cc: <stable@vger.kernel.org>
> > > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> > > ---
> > >  drivers/dma-buf/sw_sync.c | 28 ++++++++++++++++++++--------
> > >  1 file changed, 20 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
> > > index 348b3a9170fa..30a482f75d56 100644
> > > --- a/drivers/dma-buf/sw_sync.c
> > > +++ b/drivers/dma-buf/sw_sync.c
> > > @@ -192,9 +192,12 @@ static const struct dma_fence_ops timeline_fence_ops = {
> > >  static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
> > >  {
> > >         struct sync_pt *pt, *next;
> > > +       struct list_head ref_list;
> > >
> > >         trace_sync_timeline(obj);
> > >
> > > +       INIT_LIST_HEAD(&ref_list);
> > > +
> > >         spin_lock_irq(&obj->lock);
> > >
> > >         obj->value += inc;
> > > @@ -206,18 +209,27 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
> > >                 list_del_init(&pt->link);
> > >                 rb_erase(&pt->node, &obj->pt_tree);
> > >
> > > -               /*
> > > -                * A signal callback may release the last reference to this
> > > -                * fence, causing it to be freed. That operation has to be
> > > -                * last to avoid a use after free inside this loop, and must
> > > -                * be after we remove the fence from the timeline in order to
> > > -                * prevent deadlocking on timeline->lock inside
> > > -                * timeline_fence_release().
> > > -                */
> > > +               /* We need to take a reference to avoid a release during
> > > +                * signalling (which can cause a recursive lock of obj->lock).
> > > +                * If refcount was already zero, another thread is already taking
> > > +                * care of destructing the fence, so the signal cannot release
> > > +                * it again and we hence will not have the recursive lock. */
> >
> > /*
> >  * Block commentary style:
> >  * https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
> >  */
> >
> > > +               if (dma_fence_get_rcu(&pt->base))
> > > +                       list_add_tail(&pt->link, &ref_list);
> >
> > Ok.
> >
> > > +
> > >                 dma_fence_signal_locked(&pt->base);
> > >         }
> > >
> > >         spin_unlock_irq(&obj->lock);
> > > +
> > > +       list_for_each_entry_safe(pt, next, &ref_list, link) {
> > > +               /* This needs to be cleared before release, otherwise the
> > > +                * timeline_fence_release function gets confused about also
> > > +                * removing the fence from the pt_tree. */
> > > +               list_del_init(&pt->link);
> > > +
> > > +               dma_fence_put(&pt->base);
> > > +       }
> >
> > How serious is the problem of one fence callback freeing another pt?
> >
> > Following the pattern here
> >
> >         spin_lock(&obj->lock);
> >         list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
> >                 if (!timeline_fence_signaled(&pt->base))
> >                         break;
> >
> >                 if (!dma_fence_get_rcu(&pt->base))
> >                         continue; /* too late! */
> >
> >                 rb_erase(&pt->node, &obj->pt_tree);
> >                 list_move_tail(&pt->link, &ref_list);
> >         }
> >         spin_unlock(&obj->lock);
> >
> >         list_for_each_entry_safe(pt, next, &ref_list, link) {
> >                 list_del_init(&pt->link);
> >                 dma_fence_signal(&pt->base);
> 
> Question is what the scope should be. Using this method we only take a
> reference and avoid releasing the fences we're going to signal.
> However the lock is on the timeline and dma_fence_signal already takes
> the lock, so if the signal ends up releasing any of the other fences
> in the timeline (e.g. the fences that are later than the new value),
> then we are still going to have the recursive locking issue.
> 
> One solution to that would be splitting the lock into 2 locks: 1
> protecting pt_list + pt_tree and 1 protecting value & serving as the
> lock for the fences. Then we can only take the list lock in the
> release function and dma_fence_signal takes the value lock. However,
> then you still have issues like if a callback for fence A (when called
> it holds the lock of A's timeline) adds a callback to fence B of the
> same timeline (requires the fence lock) we still have a recursion
> locking issue.
> 
> I'm not sure it is reasonable for the last use case to work, because I
> only see it working with per-fence locks, but per-fence locks are
> deadlock prone due to the order of taking links not really being
> fixed.

With the explicit reference you introduce for the signaler, is it not
then safe to revert d3c6dd1fb30d i.e. conditionally take the spinlock
inside release? We just have to remember to acquire the ref before
removing the pt from the list.
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal.
  2020-07-14 18:30     ` Chris Wilson
@ 2020-07-14 19:43       ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2020-07-14 19:43 UTC (permalink / raw)
  To: Bas Nieuwenhuizen
  Cc: Gustavo Padovan, stable, ML dri-devel, Christian König

Quoting Chris Wilson (2020-07-14 19:30:39)
> Quoting Bas Nieuwenhuizen (2020-07-14 19:17:21)
> > On Tue, Jul 14, 2020 at 6:26 PM Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > >
> > > Quoting Bas Nieuwenhuizen (2020-07-14 16:41:02)
> > > > Calltree:
> > > >   timeline_fence_release
> > > >   drm_sched_entity_wakeup
> > > >   dma_fence_signal_locked
> > > >   sync_timeline_signal
> > > >   sw_sync_ioctl
> > > >
> > > > Releasing the reference to the fence in the fence signal callback
> > > > seems reasonable to me, so this patch avoids the locking issue in
> > > > sw_sync.
> > > >
> > > > d3862e44daa7 ("dma-buf/sw-sync: Fix locking around sync_timeline lists")
> > > > fixed the recursive locking issue but caused an use-after-free. Later
> > > > d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> > > > fixed the use-after-free but reintroduced the recursive locking issue.
> > > >
> > > > In this attempt we avoid the use-after-free still because the release
> > > > function still always locks, and outside of the locking region in the
> > > > signal function we have properly refcounted references.
> > > >
> > > > We furthermore also avoid the recurive lock by making sure that either:
> > > >
> > > > 1) We have a properly refcounted reference, preventing the signal from
> > > >    triggering the release function inside the locked region.
> > > > 2) The refcount was already zero, and hence nobody will be able to trigger
> > > >    the release function from the signal function.
> > > >
> > > > Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
> > > > Cc: Sumit Semwal <sumit.semwal@linaro.org>
> > > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Cc: Gustavo Padovan <gustavo@padovan.org>
> > > > Cc: Christian König <christian.koenig@amd.com>
> > > > Cc: <stable@vger.kernel.org>
> > > > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> > > > ---
> > > >  drivers/dma-buf/sw_sync.c | 28 ++++++++++++++++++++--------
> > > >  1 file changed, 20 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
> > > > index 348b3a9170fa..30a482f75d56 100644
> > > > --- a/drivers/dma-buf/sw_sync.c
> > > > +++ b/drivers/dma-buf/sw_sync.c
> > > > @@ -192,9 +192,12 @@ static const struct dma_fence_ops timeline_fence_ops = {
> > > >  static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
> > > >  {
> > > >         struct sync_pt *pt, *next;
> > > > +       struct list_head ref_list;
> > > >
> > > >         trace_sync_timeline(obj);
> > > >
> > > > +       INIT_LIST_HEAD(&ref_list);
> > > > +
> > > >         spin_lock_irq(&obj->lock);
> > > >
> > > >         obj->value += inc;
> > > > @@ -206,18 +209,27 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
> > > >                 list_del_init(&pt->link);
> > > >                 rb_erase(&pt->node, &obj->pt_tree);
> > > >
> > > > -               /*
> > > > -                * A signal callback may release the last reference to this
> > > > -                * fence, causing it to be freed. That operation has to be
> > > > -                * last to avoid a use after free inside this loop, and must
> > > > -                * be after we remove the fence from the timeline in order to
> > > > -                * prevent deadlocking on timeline->lock inside
> > > > -                * timeline_fence_release().
> > > > -                */
> > > > +               /* We need to take a reference to avoid a release during
> > > > +                * signalling (which can cause a recursive lock of obj->lock).
> > > > +                * If refcount was already zero, another thread is already taking
> > > > +                * care of destructing the fence, so the signal cannot release
> > > > +                * it again and we hence will not have the recursive lock. */
> > >
> > > /*
> > >  * Block commentary style:
> > >  * https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
> > >  */
> > >
> > > > +               if (dma_fence_get_rcu(&pt->base))
> > > > +                       list_add_tail(&pt->link, &ref_list);
> > >
> > > Ok.
> > >
> > > > +
> > > >                 dma_fence_signal_locked(&pt->base);
> > > >         }
> > > >
> > > >         spin_unlock_irq(&obj->lock);
> > > > +
> > > > +       list_for_each_entry_safe(pt, next, &ref_list, link) {
> > > > +               /* This needs to be cleared before release, otherwise the
> > > > +                * timeline_fence_release function gets confused about also
> > > > +                * removing the fence from the pt_tree. */
> > > > +               list_del_init(&pt->link);
> > > > +
> > > > +               dma_fence_put(&pt->base);
> > > > +       }
> > >
> > > How serious is the problem of one fence callback freeing another pt?
> > >
> > > Following the pattern here
> > >
> > >         spin_lock(&obj->lock);
> > >         list_for_each_entry_safe(pt, next, &obj->pt_list, link) {
> > >                 if (!timeline_fence_signaled(&pt->base))
> > >                         break;
> > >
> > >                 if (!dma_fence_get_rcu(&pt->base))
> > >                         continue; /* too late! */
> > >
> > >                 rb_erase(&pt->node, &obj->pt_tree);
> > >                 list_move_tail(&pt->link, &ref_list);
> > >         }
> > >         spin_unlock(&obj->lock);
> > >
> > >         list_for_each_entry_safe(pt, next, &ref_list, link) {
> > >                 list_del_init(&pt->link);
> > >                 dma_fence_signal(&pt->base);
> > 
> > Question is what the scope should be. Using this method we only take a
> > reference and avoid releasing the fences we're going to signal.
> > However the lock is on the timeline and dma_fence_signal already takes
> > the lock, so if the signal ends up releasing any of the other fences
> > in the timeline (e.g. the fences that are later than the new value),
> > then we are still going to have the recursive locking issue.
> > 
> > One solution to that would be splitting the lock into 2 locks: 1
> > protecting pt_list + pt_tree and 1 protecting value & serving as the
> > lock for the fences. Then we can only take the list lock in the
> > release function and dma_fence_signal takes the value lock. However,
> > then you still have issues like if a callback for fence A (when called
> > it holds the lock of A's timeline) adds a callback to fence B of the
> > same timeline (requires the fence lock) we still have a recursion
> > locking issue.
> > 
> > I'm not sure it is reasonable for the last use case to work, because I
> > only see it working with per-fence locks, but per-fence locks are
> > deadlock prone due to the order of taking links not really being
> > fixed.
> 
> With the explicit reference you introduce for the signaler, is it not
> then safe to revert d3c6dd1fb30d i.e. conditionally take the spinlock
> inside release? We just have to remember to acquire the ref before
> removing the pt from the list.

No, still only thinking about recursing down the same set of syncpt.
-Chris
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal
@ 2023-08-17 21:37 ` Rob Clark
  0 siblings, 0 replies; 9+ messages in thread
From: Rob Clark @ 2023-08-17 21:37 UTC (permalink / raw)
  To: dri-devel
  Cc: Rob Clark, Gustavo Padovan, open list, Chris Wilson,
	moderated list:DMA BUFFER SHARING FRAMEWORK, Sumit Semwal,
	Christian König, open list:SYNC FILE FRAMEWORK

From: Rob Clark <robdclark@chromium.org>

If a signal callback releases the sw_sync fence, that will trigger a
deadlock as the timeline_fence_release recurses onto the fence->lock
(used both for signaling and the the timeline tree).

To avoid that, temporarily hold an extra reference to the signalled
fences until after we drop the lock.

(This is an alternative implementation of https://patchwork.kernel.org/patch/11664717/
which avoids some potential UAF issues with the original patch.)

Reported-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/dma-buf/sw_sync.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 63f0aeb66db6..ceb6a0408624 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
  */
 static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 {
+	LIST_HEAD(signalled);
 	struct sync_pt *pt, *next;
 
 	trace_sync_timeline(obj);
@@ -203,9 +204,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 		if (!timeline_fence_signaled(&pt->base))
 			break;
 
+		dma_fence_get(&pt->base);
+
 		list_del_init(&pt->link);
 		rb_erase(&pt->node, &obj->pt_tree);
 
+		list_add_tail(&pt->link, &signalled);
+
 		/*
 		 * A signal callback may release the last reference to this
 		 * fence, causing it to be freed. That operation has to be
@@ -218,6 +223,11 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 	}
 
 	spin_unlock_irq(&obj->lock);
+
+	list_for_each_entry_safe(pt, next, &signalled, link) {
+		list_del(&pt->link);
+		dma_fence_put(&pt->base);
+	}
 }
 
 /**
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal
@ 2023-08-17 21:37 ` Rob Clark
  0 siblings, 0 replies; 9+ messages in thread
From: Rob Clark @ 2023-08-17 21:37 UTC (permalink / raw)
  To: dri-devel
  Cc: Rob Clark, Bas Nieuwenhuizen, Sumit Semwal, Gustavo Padovan,
	Christian König, Chris Wilson,
	open list:SYNC FILE FRAMEWORK,
	moderated list:DMA BUFFER SHARING FRAMEWORK, open list

From: Rob Clark <robdclark@chromium.org>

If a signal callback releases the sw_sync fence, that will trigger a
deadlock as the timeline_fence_release recurses onto the fence->lock
(used both for signaling and the the timeline tree).

To avoid that, temporarily hold an extra reference to the signalled
fences until after we drop the lock.

(This is an alternative implementation of https://patchwork.kernel.org/patch/11664717/
which avoids some potential UAF issues with the original patch.)

Reported-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Fixes: d3c6dd1fb30d ("dma-buf/sw_sync: Synchronize signal vs syncpt free")
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/dma-buf/sw_sync.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 63f0aeb66db6..ceb6a0408624 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
  */
 static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 {
+	LIST_HEAD(signalled);
 	struct sync_pt *pt, *next;
 
 	trace_sync_timeline(obj);
@@ -203,9 +204,13 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 		if (!timeline_fence_signaled(&pt->base))
 			break;
 
+		dma_fence_get(&pt->base);
+
 		list_del_init(&pt->link);
 		rb_erase(&pt->node, &obj->pt_tree);
 
+		list_add_tail(&pt->link, &signalled);
+
 		/*
 		 * A signal callback may release the last reference to this
 		 * fence, causing it to be freed. That operation has to be
@@ -218,6 +223,11 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 	}
 
 	spin_unlock_irq(&obj->lock);
+
+	list_for_each_entry_safe(pt, next, &signalled, link) {
+		list_del(&pt->link);
+		dma_fence_put(&pt->base);
+	}
 }
 
 /**
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-08-17 21:38 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-14 15:41 [PATCH] dma-buf/sw_sync: Avoid recursive lock during fence signal Bas Nieuwenhuizen
2020-07-14 15:41 ` Bas Nieuwenhuizen
2020-07-14 16:25 ` Chris Wilson
2020-07-14 18:17   ` Bas Nieuwenhuizen
2020-07-14 18:17     ` Bas Nieuwenhuizen
2020-07-14 18:30     ` Chris Wilson
2020-07-14 19:43       ` Chris Wilson
2023-08-17 21:37 Rob Clark
2023-08-17 21:37 ` Rob Clark

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.