dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] dma-buf: fix and rework dma_buf_poll v4
@ 2021-06-30 12:36 Christian König
  2021-06-30 14:07 ` Daniel Vetter
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2021-06-30 12:36 UTC (permalink / raw)
  To: dri-devel, daniel.vetter

Daniel pointed me towards this function and there are multiple obvious problems
in the implementation.

First of all the retry loop is not working as intended. In general the retry
makes only sense if you grab the reference first and then check the sequence
values.

Then we should always also wait for the exclusive fence.

It's also good practice to keep the reference around when installing callbacks
to fences you don't own.

And last the whole implementation was unnecessary complex and rather hard to
understand which could lead to probably unexpected behavior of the IOCTL.

Fix all this by reworking the implementation from scratch. Dropping the
whole RCU approach and taking the lock instead.

Only mildly tested and needs a thoughtful review of the code.

v2: fix the reference counting as well
v3: keep the excl fence handling as is for stable
v4: back to testing all fences, drop RCU

Signed-off-by: Christian König <christian.koenig@amd.com>
CC: stable@vger.kernel.org
---
 drivers/dma-buf/dma-buf.c | 132 +++++++++++++-------------------------
 include/linux/dma-buf.h   |   2 +-
 2 files changed, 46 insertions(+), 88 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index eadd1eaa2fb5..192c4d34704b 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry)
 	 * If you hit this BUG() it means someone dropped their ref to the
 	 * dma-buf while still having pending operation to the buffer.
 	 */
-	BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
+	BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
 
 	dmabuf->ops->release(dmabuf);
 
@@ -202,16 +202,19 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 	wake_up_locked_poll(dcb->poll, dcb->active);
 	dcb->active = 0;
 	spin_unlock_irqrestore(&dcb->poll->lock, flags);
+	dma_fence_put(fence);
 }
 
 static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
 {
+	struct dma_buf_poll_cb_t *dcb;
 	struct dma_buf *dmabuf;
 	struct dma_resv *resv;
 	struct dma_resv_list *fobj;
-	struct dma_fence *fence_excl;
+	struct dma_fence *fence;
+	unsigned shared_count;
 	__poll_t events;
-	unsigned shared_count, seq;
+	int r, i;
 
 	dmabuf = file->private_data;
 	if (!dmabuf || !dmabuf->resv)
@@ -225,101 +228,56 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
 	if (!events)
 		return 0;
 
-retry:
-	seq = read_seqcount_begin(&resv->seq);
-	rcu_read_lock();
+	dcb = events & EPOLLOUT ? &dmabuf->cb_out : &dmabuf->cb_in;
+
+	/* Only queue a new one if we are not still waiting for the old one */
+	spin_lock_irq(&dmabuf->poll.lock);
+	if (dcb->active)
+		events = 0;
+	else
+		dcb->active = events;
+	spin_unlock_irq(&dmabuf->poll.lock);
+	if (!events)
+		return 0;
+
+	dma_resv_lock(resv, NULL);
 
-	fobj = rcu_dereference(resv->fence);
-	if (fobj)
+	fobj = dma_resv_get_list(resv);
+	if (fobj && events & EPOLLOUT)
 		shared_count = fobj->shared_count;
 	else
 		shared_count = 0;
-	fence_excl = rcu_dereference(resv->fence_excl);
-	if (read_seqcount_retry(&resv->seq, seq)) {
-		rcu_read_unlock();
-		goto retry;
-	}
 
-	if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
-		struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
-		__poll_t pevents = EPOLLIN;
-
-		if (shared_count == 0)
-			pevents |= EPOLLOUT;
-
-		spin_lock_irq(&dmabuf->poll.lock);
-		if (dcb->active) {
-			dcb->active |= pevents;
-			events &= ~pevents;
-		} else
-			dcb->active = pevents;
-		spin_unlock_irq(&dmabuf->poll.lock);
-
-		if (events & pevents) {
-			if (!dma_fence_get_rcu(fence_excl)) {
-				/* force a recheck */
-				events &= ~pevents;
-				dma_buf_poll_cb(NULL, &dcb->cb);
-			} else if (!dma_fence_add_callback(fence_excl, &dcb->cb,
-							   dma_buf_poll_cb)) {
-				events &= ~pevents;
-				dma_fence_put(fence_excl);
-			} else {
-				/*
-				 * No callback queued, wake up any additional
-				 * waiters.
-				 */
-				dma_fence_put(fence_excl);
-				dma_buf_poll_cb(NULL, &dcb->cb);
-			}
+	for (i = 0; i < shared_count; ++i) {
+		fence = rcu_dereference_protected(fobj->shared[i],
+						  dma_resv_held(resv));
+		dma_fence_get(fence);
+		r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
+		if (!r) {
+			/* Callback queued */
+			events = 0;
+			goto out;
 		}
+		dma_fence_put(fence);
 	}
 
-	if ((events & EPOLLOUT) && shared_count > 0) {
-		struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
-		int i;
-
-		/* Only queue a new callback if no event has fired yet */
-		spin_lock_irq(&dmabuf->poll.lock);
-		if (dcb->active)
-			events &= ~EPOLLOUT;
-		else
-			dcb->active = EPOLLOUT;
-		spin_unlock_irq(&dmabuf->poll.lock);
-
-		if (!(events & EPOLLOUT))
+	fence = dma_resv_get_excl(resv);
+	if (fence) {
+		dma_fence_get(fence);
+		r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
+		if (!r) {
+			/* Callback queued */
+			events = 0;
 			goto out;
-
-		for (i = 0; i < shared_count; ++i) {
-			struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
-
-			if (!dma_fence_get_rcu(fence)) {
-				/*
-				 * fence refcount dropped to zero, this means
-				 * that fobj has been freed
-				 *
-				 * call dma_buf_poll_cb and force a recheck!
-				 */
-				events &= ~EPOLLOUT;
-				dma_buf_poll_cb(NULL, &dcb->cb);
-				break;
-			}
-			if (!dma_fence_add_callback(fence, &dcb->cb,
-						    dma_buf_poll_cb)) {
-				dma_fence_put(fence);
-				events &= ~EPOLLOUT;
-				break;
-			}
-			dma_fence_put(fence);
 		}
-
-		/* No callback queued, wake up any additional waiters. */
-		if (i == shared_count)
-			dma_buf_poll_cb(NULL, &dcb->cb);
+		dma_fence_put(fence);
 	}
 
+	/* No callback queued, wake up any additional waiters. */
+	dma_buf_poll_cb(NULL, &dcb->cb);
+
 out:
-	rcu_read_unlock();
+	dma_resv_unlock(resv);
 	return events;
 }
 
@@ -562,8 +520,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	dmabuf->owner = exp_info->owner;
 	spin_lock_init(&dmabuf->name_lock);
 	init_waitqueue_head(&dmabuf->poll);
-	dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
-	dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
+	dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll;
+	dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
 
 	if (!resv) {
 		resv = (struct dma_resv *)&dmabuf[1];
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index efdc56b9d95f..7e747ad54c81 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -329,7 +329,7 @@ struct dma_buf {
 		wait_queue_head_t *poll;
 
 		__poll_t active;
-	} cb_excl, cb_shared;
+	} cb_in, cb_out;
 };
 
 /**
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] dma-buf: fix and rework dma_buf_poll v4
  2021-06-30 12:36 [PATCH] dma-buf: fix and rework dma_buf_poll v4 Christian König
@ 2021-06-30 14:07 ` Daniel Vetter
  2021-06-30 14:22   ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Daniel Vetter @ 2021-06-30 14:07 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Wed, Jun 30, 2021 at 2:36 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Daniel pointed me towards this function and there are multiple obvious problems
> in the implementation.
>
> First of all the retry loop is not working as intended. In general the retry
> makes only sense if you grab the reference first and then check the sequence
> values.
>
> Then we should always also wait for the exclusive fence.
>
> It's also good practice to keep the reference around when installing callbacks
> to fences you don't own.
>
> And last the whole implementation was unnecessary complex and rather hard to
> understand which could lead to probably unexpected behavior of the IOCTL.
>
> Fix all this by reworking the implementation from scratch. Dropping the
> whole RCU approach and taking the lock instead.
>
> Only mildly tested and needs a thoughtful review of the code.

prime_vgem.c has some basic stuff, but it might actually encoding the
broken behaviour. Would be good to extend/fix that I think so we don't
entirely rely on review. We can't really build easily on top of the
testcase Jason created for import/export, since for implicit sync we
need some driver that attaches the fences for us.

There's also a vc4 one, but I guess that's less useful for us :-)

> v2: fix the reference counting as well
> v3: keep the excl fence handling as is for stable
> v4: back to testing all fences, drop RCU
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> CC: stable@vger.kernel.org
> ---
>  drivers/dma-buf/dma-buf.c | 132 +++++++++++++-------------------------
>  include/linux/dma-buf.h   |   2 +-
>  2 files changed, 46 insertions(+), 88 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index eadd1eaa2fb5..192c4d34704b 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry)
>          * If you hit this BUG() it means someone dropped their ref to the
>          * dma-buf while still having pending operation to the buffer.
>          */
> -       BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
> +       BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
>
>         dmabuf->ops->release(dmabuf);
>
> @@ -202,16 +202,19 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
>         wake_up_locked_poll(dcb->poll, dcb->active);
>         dcb->active = 0;
>         spin_unlock_irqrestore(&dcb->poll->lock, flags);
> +       dma_fence_put(fence);
>  }
>
>  static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
>  {
> +       struct dma_buf_poll_cb_t *dcb;
>         struct dma_buf *dmabuf;
>         struct dma_resv *resv;
>         struct dma_resv_list *fobj;
> -       struct dma_fence *fence_excl;
> +       struct dma_fence *fence;
> +       unsigned shared_count;
>         __poll_t events;
> -       unsigned shared_count, seq;
> +       int r, i;
>
>         dmabuf = file->private_data;
>         if (!dmabuf || !dmabuf->resv)
> @@ -225,101 +228,56 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
>         if (!events)
>                 return 0;
>
> -retry:
> -       seq = read_seqcount_begin(&resv->seq);
> -       rcu_read_lock();
> +       dcb = events & EPOLLOUT ? &dmabuf->cb_out : &dmabuf->cb_in;
> +
> +       /* Only queue a new one if we are not still waiting for the old one */
> +       spin_lock_irq(&dmabuf->poll.lock);
> +       if (dcb->active)
> +               events = 0;
> +       else
> +               dcb->active = events;
> +       spin_unlock_irq(&dmabuf->poll.lock);
> +       if (!events)
> +               return 0;
> +
> +       dma_resv_lock(resv, NULL);
>
> -       fobj = rcu_dereference(resv->fence);
> -       if (fobj)
> +       fobj = dma_resv_get_list(resv);
> +       if (fobj && events & EPOLLOUT)
>                 shared_count = fobj->shared_count;
>         else
>                 shared_count = 0;
> -       fence_excl = rcu_dereference(resv->fence_excl);
> -       if (read_seqcount_retry(&resv->seq, seq)) {
> -               rcu_read_unlock();
> -               goto retry;
> -       }
>
> -       if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
> -               struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
> -               __poll_t pevents = EPOLLIN;
> -
> -               if (shared_count == 0)
> -                       pevents |= EPOLLOUT;
> -
> -               spin_lock_irq(&dmabuf->poll.lock);
> -               if (dcb->active) {
> -                       dcb->active |= pevents;
> -                       events &= ~pevents;
> -               } else
> -                       dcb->active = pevents;
> -               spin_unlock_irq(&dmabuf->poll.lock);
> -
> -               if (events & pevents) {
> -                       if (!dma_fence_get_rcu(fence_excl)) {
> -                               /* force a recheck */
> -                               events &= ~pevents;
> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> -                       } else if (!dma_fence_add_callback(fence_excl, &dcb->cb,
> -                                                          dma_buf_poll_cb)) {
> -                               events &= ~pevents;
> -                               dma_fence_put(fence_excl);
> -                       } else {
> -                               /*
> -                                * No callback queued, wake up any additional
> -                                * waiters.
> -                                */
> -                               dma_fence_put(fence_excl);
> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> -                       }
> +       for (i = 0; i < shared_count; ++i) {
> +               fence = rcu_dereference_protected(fobj->shared[i],
> +                                                 dma_resv_held(resv));
> +               dma_fence_get(fence);
> +               r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
> +               if (!r) {
> +                       /* Callback queued */
> +                       events = 0;

Why do you clear events here? There's more than EPOLLIN/OUT, and I
think you can also set both (and then we should be able to queue
both). I think a few more testcases for this would be really good. I
think the old code all handled that like I'd expect it to.

Cheers, Daniel

> +                       goto out;
>                 }
> +               dma_fence_put(fence);
>         }
>
> -       if ((events & EPOLLOUT) && shared_count > 0) {
> -               struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
> -               int i;
> -
> -               /* Only queue a new callback if no event has fired yet */
> -               spin_lock_irq(&dmabuf->poll.lock);
> -               if (dcb->active)
> -                       events &= ~EPOLLOUT;
> -               else
> -                       dcb->active = EPOLLOUT;
> -               spin_unlock_irq(&dmabuf->poll.lock);
> -
> -               if (!(events & EPOLLOUT))
> +       fence = dma_resv_get_excl(resv);
> +       if (fence) {
> +               dma_fence_get(fence);
> +               r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
> +               if (!r) {
> +                       /* Callback queued */
> +                       events = 0;
>                         goto out;
> -
> -               for (i = 0; i < shared_count; ++i) {
> -                       struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
> -
> -                       if (!dma_fence_get_rcu(fence)) {
> -                               /*
> -                                * fence refcount dropped to zero, this means
> -                                * that fobj has been freed
> -                                *
> -                                * call dma_buf_poll_cb and force a recheck!
> -                                */
> -                               events &= ~EPOLLOUT;
> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> -                               break;
> -                       }
> -                       if (!dma_fence_add_callback(fence, &dcb->cb,
> -                                                   dma_buf_poll_cb)) {
> -                               dma_fence_put(fence);
> -                               events &= ~EPOLLOUT;
> -                               break;
> -                       }
> -                       dma_fence_put(fence);
>                 }
> -
> -               /* No callback queued, wake up any additional waiters. */
> -               if (i == shared_count)
> -                       dma_buf_poll_cb(NULL, &dcb->cb);
> +               dma_fence_put(fence);
>         }
>
> +       /* No callback queued, wake up any additional waiters. */
> +       dma_buf_poll_cb(NULL, &dcb->cb);
> +
>  out:
> -       rcu_read_unlock();
> +       dma_resv_unlock(resv);
>         return events;
>  }
>
> @@ -562,8 +520,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
>         dmabuf->owner = exp_info->owner;
>         spin_lock_init(&dmabuf->name_lock);
>         init_waitqueue_head(&dmabuf->poll);
> -       dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
> -       dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
> +       dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll;
> +       dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
>
>         if (!resv) {
>                 resv = (struct dma_resv *)&dmabuf[1];
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index efdc56b9d95f..7e747ad54c81 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -329,7 +329,7 @@ struct dma_buf {
>                 wait_queue_head_t *poll;
>
>                 __poll_t active;
> -       } cb_excl, cb_shared;
> +       } cb_in, cb_out;
>  };
>
>  /**
> --
> 2.25.1
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] dma-buf: fix and rework dma_buf_poll v4
  2021-06-30 14:07 ` Daniel Vetter
@ 2021-06-30 14:22   ` Christian König
  2021-06-30 16:29     ` Daniel Vetter
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2021-06-30 14:22 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel

Am 30.06.21 um 16:07 schrieb Daniel Vetter:
> On Wed, Jun 30, 2021 at 2:36 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Daniel pointed me towards this function and there are multiple obvious problems
>> in the implementation.
>>
>> First of all the retry loop is not working as intended. In general the retry
>> makes only sense if you grab the reference first and then check the sequence
>> values.
>>
>> Then we should always also wait for the exclusive fence.
>>
>> It's also good practice to keep the reference around when installing callbacks
>> to fences you don't own.
>>
>> And last the whole implementation was unnecessary complex and rather hard to
>> understand which could lead to probably unexpected behavior of the IOCTL.
>>
>> Fix all this by reworking the implementation from scratch. Dropping the
>> whole RCU approach and taking the lock instead.
>>
>> Only mildly tested and needs a thoughtful review of the code.
> prime_vgem.c has some basic stuff, but it might actually encoding the
> broken behaviour. Would be good to extend/fix that I think so we don't
> entirely rely on review. We can't really build easily on top of the
> testcase Jason created for import/export, since for implicit sync we
> need some driver that attaches the fences for us.

My question is if I can just send that to 
intel-gfx@lists.freedesktop.org and the CI will pick it up?

>
> There's also a vc4 one, but I guess that's less useful for us :-)
>
>> v2: fix the reference counting as well
>> v3: keep the excl fence handling as is for stable
>> v4: back to testing all fences, drop RCU
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> CC: stable@vger.kernel.org
>> ---
>>   drivers/dma-buf/dma-buf.c | 132 +++++++++++++-------------------------
>>   include/linux/dma-buf.h   |   2 +-
>>   2 files changed, 46 insertions(+), 88 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>> index eadd1eaa2fb5..192c4d34704b 100644
>> --- a/drivers/dma-buf/dma-buf.c
>> +++ b/drivers/dma-buf/dma-buf.c
>> @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry)
>>           * If you hit this BUG() it means someone dropped their ref to the
>>           * dma-buf while still having pending operation to the buffer.
>>           */
>> -       BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
>> +       BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
>>
>>          dmabuf->ops->release(dmabuf);
>>
>> @@ -202,16 +202,19 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
>>          wake_up_locked_poll(dcb->poll, dcb->active);
>>          dcb->active = 0;
>>          spin_unlock_irqrestore(&dcb->poll->lock, flags);
>> +       dma_fence_put(fence);
>>   }
>>
>>   static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
>>   {
>> +       struct dma_buf_poll_cb_t *dcb;
>>          struct dma_buf *dmabuf;
>>          struct dma_resv *resv;
>>          struct dma_resv_list *fobj;
>> -       struct dma_fence *fence_excl;
>> +       struct dma_fence *fence;
>> +       unsigned shared_count;
>>          __poll_t events;
>> -       unsigned shared_count, seq;
>> +       int r, i;
>>
>>          dmabuf = file->private_data;
>>          if (!dmabuf || !dmabuf->resv)
>> @@ -225,101 +228,56 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
>>          if (!events)
>>                  return 0;
>>
>> -retry:
>> -       seq = read_seqcount_begin(&resv->seq);
>> -       rcu_read_lock();
>> +       dcb = events & EPOLLOUT ? &dmabuf->cb_out : &dmabuf->cb_in;
>> +
>> +       /* Only queue a new one if we are not still waiting for the old one */
>> +       spin_lock_irq(&dmabuf->poll.lock);
>> +       if (dcb->active)
>> +               events = 0;
>> +       else
>> +               dcb->active = events;
>> +       spin_unlock_irq(&dmabuf->poll.lock);
>> +       if (!events)
>> +               return 0;
>> +
>> +       dma_resv_lock(resv, NULL);
>>
>> -       fobj = rcu_dereference(resv->fence);
>> -       if (fobj)
>> +       fobj = dma_resv_get_list(resv);
>> +       if (fobj && events & EPOLLOUT)
>>                  shared_count = fobj->shared_count;
>>          else
>>                  shared_count = 0;
>> -       fence_excl = rcu_dereference(resv->fence_excl);
>> -       if (read_seqcount_retry(&resv->seq, seq)) {
>> -               rcu_read_unlock();
>> -               goto retry;
>> -       }
>>
>> -       if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
>> -               struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
>> -               __poll_t pevents = EPOLLIN;
>> -
>> -               if (shared_count == 0)
>> -                       pevents |= EPOLLOUT;
>> -
>> -               spin_lock_irq(&dmabuf->poll.lock);
>> -               if (dcb->active) {
>> -                       dcb->active |= pevents;
>> -                       events &= ~pevents;
>> -               } else
>> -                       dcb->active = pevents;
>> -               spin_unlock_irq(&dmabuf->poll.lock);
>> -
>> -               if (events & pevents) {
>> -                       if (!dma_fence_get_rcu(fence_excl)) {
>> -                               /* force a recheck */
>> -                               events &= ~pevents;
>> -                               dma_buf_poll_cb(NULL, &dcb->cb);
>> -                       } else if (!dma_fence_add_callback(fence_excl, &dcb->cb,
>> -                                                          dma_buf_poll_cb)) {
>> -                               events &= ~pevents;
>> -                               dma_fence_put(fence_excl);
>> -                       } else {
>> -                               /*
>> -                                * No callback queued, wake up any additional
>> -                                * waiters.
>> -                                */
>> -                               dma_fence_put(fence_excl);
>> -                               dma_buf_poll_cb(NULL, &dcb->cb);
>> -                       }
>> +       for (i = 0; i < shared_count; ++i) {
>> +               fence = rcu_dereference_protected(fobj->shared[i],
>> +                                                 dma_resv_held(resv));
>> +               dma_fence_get(fence);
>> +               r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
>> +               if (!r) {
>> +                       /* Callback queued */
>> +                       events = 0;
> Why do you clear events here? There's more than EPOLLIN/OUT, and I
> think you can also set both (and then we should be able to queue
> both). I think a few more testcases for this would be really good. I
> think the old code all handled that like I'd expect it to.

Yeah, that's exactly the stuff I was wondering about since I'm not so 
familiar with the poll interface.

Going to fix that.

Thanks,
Christian.

>
> Cheers, Daniel
>
>> +                       goto out;
>>                  }
>> +               dma_fence_put(fence);
>>          }
>>
>> -       if ((events & EPOLLOUT) && shared_count > 0) {
>> -               struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
>> -               int i;
>> -
>> -               /* Only queue a new callback if no event has fired yet */
>> -               spin_lock_irq(&dmabuf->poll.lock);
>> -               if (dcb->active)
>> -                       events &= ~EPOLLOUT;
>> -               else
>> -                       dcb->active = EPOLLOUT;
>> -               spin_unlock_irq(&dmabuf->poll.lock);
>> -
>> -               if (!(events & EPOLLOUT))
>> +       fence = dma_resv_get_excl(resv);
>> +       if (fence) {
>> +               dma_fence_get(fence);
>> +               r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
>> +               if (!r) {
>> +                       /* Callback queued */
>> +                       events = 0;
>>                          goto out;
>> -
>> -               for (i = 0; i < shared_count; ++i) {
>> -                       struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
>> -
>> -                       if (!dma_fence_get_rcu(fence)) {
>> -                               /*
>> -                                * fence refcount dropped to zero, this means
>> -                                * that fobj has been freed
>> -                                *
>> -                                * call dma_buf_poll_cb and force a recheck!
>> -                                */
>> -                               events &= ~EPOLLOUT;
>> -                               dma_buf_poll_cb(NULL, &dcb->cb);
>> -                               break;
>> -                       }
>> -                       if (!dma_fence_add_callback(fence, &dcb->cb,
>> -                                                   dma_buf_poll_cb)) {
>> -                               dma_fence_put(fence);
>> -                               events &= ~EPOLLOUT;
>> -                               break;
>> -                       }
>> -                       dma_fence_put(fence);
>>                  }
>> -
>> -               /* No callback queued, wake up any additional waiters. */
>> -               if (i == shared_count)
>> -                       dma_buf_poll_cb(NULL, &dcb->cb);
>> +               dma_fence_put(fence);
>>          }
>>
>> +       /* No callback queued, wake up any additional waiters. */
>> +       dma_buf_poll_cb(NULL, &dcb->cb);
>> +
>>   out:
>> -       rcu_read_unlock();
>> +       dma_resv_unlock(resv);
>>          return events;
>>   }
>>
>> @@ -562,8 +520,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
>>          dmabuf->owner = exp_info->owner;
>>          spin_lock_init(&dmabuf->name_lock);
>>          init_waitqueue_head(&dmabuf->poll);
>> -       dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
>> -       dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
>> +       dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll;
>> +       dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
>>
>>          if (!resv) {
>>                  resv = (struct dma_resv *)&dmabuf[1];
>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
>> index efdc56b9d95f..7e747ad54c81 100644
>> --- a/include/linux/dma-buf.h
>> +++ b/include/linux/dma-buf.h
>> @@ -329,7 +329,7 @@ struct dma_buf {
>>                  wait_queue_head_t *poll;
>>
>>                  __poll_t active;
>> -       } cb_excl, cb_shared;
>> +       } cb_in, cb_out;
>>   };
>>
>>   /**
>> --
>> 2.25.1
>>
>


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] dma-buf: fix and rework dma_buf_poll v4
  2021-06-30 14:22   ` Christian König
@ 2021-06-30 16:29     ` Daniel Vetter
  0 siblings, 0 replies; 4+ messages in thread
From: Daniel Vetter @ 2021-06-30 16:29 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Wed, Jun 30, 2021 at 4:22 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 30.06.21 um 16:07 schrieb Daniel Vetter:
> > On Wed, Jun 30, 2021 at 2:36 PM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> Daniel pointed me towards this function and there are multiple obvious problems
> >> in the implementation.
> >>
> >> First of all the retry loop is not working as intended. In general the retry
> >> makes only sense if you grab the reference first and then check the sequence
> >> values.
> >>
> >> Then we should always also wait for the exclusive fence.
> >>
> >> It's also good practice to keep the reference around when installing callbacks
> >> to fences you don't own.
> >>
> >> And last the whole implementation was unnecessary complex and rather hard to
> >> understand which could lead to probably unexpected behavior of the IOCTL.
> >>
> >> Fix all this by reworking the implementation from scratch. Dropping the
> >> whole RCU approach and taking the lock instead.
> >>
> >> Only mildly tested and needs a thoughtful review of the code.
> > prime_vgem.c has some basic stuff, but it might actually encoding the
> > broken behaviour. Would be good to extend/fix that I think so we don't
> > entirely rely on review. We can't really build easily on top of the
> > testcase Jason created for import/export, since for implicit sync we
> > need some driver that attaches the fences for us.
>
> My question is if I can just send that to
> intel-gfx@lists.freedesktop.org and the CI will pick it up?

We do run all the prime_vgem tests.

Btw if you do an igt patch, you can tell CI to run that igt patch
series together with your kernel submission. Pretty useful for hackery
like this, documented how it works here:

https://intel-gfx-ci.01.org/test-with.html

>
> >
> > There's also a vc4 one, but I guess that's less useful for us :-)
> >
> >> v2: fix the reference counting as well
> >> v3: keep the excl fence handling as is for stable
> >> v4: back to testing all fences, drop RCU
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> CC: stable@vger.kernel.org
> >> ---
> >>   drivers/dma-buf/dma-buf.c | 132 +++++++++++++-------------------------
> >>   include/linux/dma-buf.h   |   2 +-
> >>   2 files changed, 46 insertions(+), 88 deletions(-)
> >>
> >> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> >> index eadd1eaa2fb5..192c4d34704b 100644
> >> --- a/drivers/dma-buf/dma-buf.c
> >> +++ b/drivers/dma-buf/dma-buf.c
> >> @@ -72,7 +72,7 @@ static void dma_buf_release(struct dentry *dentry)
> >>           * If you hit this BUG() it means someone dropped their ref to the
> >>           * dma-buf while still having pending operation to the buffer.
> >>           */
> >> -       BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
> >> +       BUG_ON(dmabuf->cb_in.active || dmabuf->cb_out.active);
> >>
> >>          dmabuf->ops->release(dmabuf);
> >>
> >> @@ -202,16 +202,19 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
> >>          wake_up_locked_poll(dcb->poll, dcb->active);
> >>          dcb->active = 0;
> >>          spin_unlock_irqrestore(&dcb->poll->lock, flags);
> >> +       dma_fence_put(fence);
> >>   }
> >>
> >>   static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
> >>   {
> >> +       struct dma_buf_poll_cb_t *dcb;
> >>          struct dma_buf *dmabuf;
> >>          struct dma_resv *resv;
> >>          struct dma_resv_list *fobj;
> >> -       struct dma_fence *fence_excl;
> >> +       struct dma_fence *fence;
> >> +       unsigned shared_count;
> >>          __poll_t events;
> >> -       unsigned shared_count, seq;
> >> +       int r, i;
> >>
> >>          dmabuf = file->private_data;
> >>          if (!dmabuf || !dmabuf->resv)
> >> @@ -225,101 +228,56 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
> >>          if (!events)
> >>                  return 0;
> >>
> >> -retry:
> >> -       seq = read_seqcount_begin(&resv->seq);
> >> -       rcu_read_lock();
> >> +       dcb = events & EPOLLOUT ? &dmabuf->cb_out : &dmabuf->cb_in;
> >> +
> >> +       /* Only queue a new one if we are not still waiting for the old one */
> >> +       spin_lock_irq(&dmabuf->poll.lock);
> >> +       if (dcb->active)
> >> +               events = 0;
> >> +       else
> >> +               dcb->active = events;
> >> +       spin_unlock_irq(&dmabuf->poll.lock);
> >> +       if (!events)
> >> +               return 0;
> >> +
> >> +       dma_resv_lock(resv, NULL);
> >>
> >> -       fobj = rcu_dereference(resv->fence);
> >> -       if (fobj)
> >> +       fobj = dma_resv_get_list(resv);
> >> +       if (fobj && events & EPOLLOUT)
> >>                  shared_count = fobj->shared_count;
> >>          else
> >>                  shared_count = 0;
> >> -       fence_excl = rcu_dereference(resv->fence_excl);
> >> -       if (read_seqcount_retry(&resv->seq, seq)) {
> >> -               rcu_read_unlock();
> >> -               goto retry;
> >> -       }
> >>
> >> -       if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
> >> -               struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
> >> -               __poll_t pevents = EPOLLIN;
> >> -
> >> -               if (shared_count == 0)
> >> -                       pevents |= EPOLLOUT;
> >> -
> >> -               spin_lock_irq(&dmabuf->poll.lock);
> >> -               if (dcb->active) {
> >> -                       dcb->active |= pevents;
> >> -                       events &= ~pevents;
> >> -               } else
> >> -                       dcb->active = pevents;
> >> -               spin_unlock_irq(&dmabuf->poll.lock);
> >> -
> >> -               if (events & pevents) {
> >> -                       if (!dma_fence_get_rcu(fence_excl)) {
> >> -                               /* force a recheck */
> >> -                               events &= ~pevents;
> >> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> >> -                       } else if (!dma_fence_add_callback(fence_excl, &dcb->cb,
> >> -                                                          dma_buf_poll_cb)) {
> >> -                               events &= ~pevents;
> >> -                               dma_fence_put(fence_excl);
> >> -                       } else {
> >> -                               /*
> >> -                                * No callback queued, wake up any additional
> >> -                                * waiters.
> >> -                                */
> >> -                               dma_fence_put(fence_excl);
> >> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> >> -                       }
> >> +       for (i = 0; i < shared_count; ++i) {
> >> +               fence = rcu_dereference_protected(fobj->shared[i],
> >> +                                                 dma_resv_held(resv));
> >> +               dma_fence_get(fence);
> >> +               r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
> >> +               if (!r) {
> >> +                       /* Callback queued */
> >> +                       events = 0;
> > Why do you clear events here? There's more than EPOLLIN/OUT, and I
> > think you can also set both (and then we should be able to queue
> > both). I think a few more testcases for this would be really good. I
> > think the old code all handled that like I'd expect it to.
>
> Yeah, that's exactly the stuff I was wondering about since I'm not so
> familiar with the poll interface.

tbh I don't have much clue either, I think we need to decide this with
some testcases. The poll support in the kernel is pretty impressive
amounts of magic since it works for all variants of poll. At least I
think it's rather much clever and tricky to understand as an outsider
...
-Daniel

> Going to fix that.
>
> Thanks,
> Christian.
>
> >
> > Cheers, Daniel
> >
> >> +                       goto out;
> >>                  }
> >> +               dma_fence_put(fence);
> >>          }
> >>
> >> -       if ((events & EPOLLOUT) && shared_count > 0) {
> >> -               struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_shared;
> >> -               int i;
> >> -
> >> -               /* Only queue a new callback if no event has fired yet */
> >> -               spin_lock_irq(&dmabuf->poll.lock);
> >> -               if (dcb->active)
> >> -                       events &= ~EPOLLOUT;
> >> -               else
> >> -                       dcb->active = EPOLLOUT;
> >> -               spin_unlock_irq(&dmabuf->poll.lock);
> >> -
> >> -               if (!(events & EPOLLOUT))
> >> +       fence = dma_resv_get_excl(resv);
> >> +       if (fence) {
> >> +               dma_fence_get(fence);
> >> +               r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb);
> >> +               if (!r) {
> >> +                       /* Callback queued */
> >> +                       events = 0;
> >>                          goto out;
> >> -
> >> -               for (i = 0; i < shared_count; ++i) {
> >> -                       struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
> >> -
> >> -                       if (!dma_fence_get_rcu(fence)) {
> >> -                               /*
> >> -                                * fence refcount dropped to zero, this means
> >> -                                * that fobj has been freed
> >> -                                *
> >> -                                * call dma_buf_poll_cb and force a recheck!
> >> -                                */
> >> -                               events &= ~EPOLLOUT;
> >> -                               dma_buf_poll_cb(NULL, &dcb->cb);
> >> -                               break;
> >> -                       }
> >> -                       if (!dma_fence_add_callback(fence, &dcb->cb,
> >> -                                                   dma_buf_poll_cb)) {
> >> -                               dma_fence_put(fence);
> >> -                               events &= ~EPOLLOUT;
> >> -                               break;
> >> -                       }
> >> -                       dma_fence_put(fence);
> >>                  }
> >> -
> >> -               /* No callback queued, wake up any additional waiters. */
> >> -               if (i == shared_count)
> >> -                       dma_buf_poll_cb(NULL, &dcb->cb);
> >> +               dma_fence_put(fence);
> >>          }
> >>
> >> +       /* No callback queued, wake up any additional waiters. */
> >> +       dma_buf_poll_cb(NULL, &dcb->cb);
> >> +
> >>   out:
> >> -       rcu_read_unlock();
> >> +       dma_resv_unlock(resv);
> >>          return events;
> >>   }
> >>
> >> @@ -562,8 +520,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
> >>          dmabuf->owner = exp_info->owner;
> >>          spin_lock_init(&dmabuf->name_lock);
> >>          init_waitqueue_head(&dmabuf->poll);
> >> -       dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
> >> -       dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
> >> +       dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll;
> >> +       dmabuf->cb_in.active = dmabuf->cb_out.active = 0;
> >>
> >>          if (!resv) {
> >>                  resv = (struct dma_resv *)&dmabuf[1];
> >> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> >> index efdc56b9d95f..7e747ad54c81 100644
> >> --- a/include/linux/dma-buf.h
> >> +++ b/include/linux/dma-buf.h
> >> @@ -329,7 +329,7 @@ struct dma_buf {
> >>                  wait_queue_head_t *poll;
> >>
> >>                  __poll_t active;
> >> -       } cb_excl, cb_shared;
> >> +       } cb_in, cb_out;
> >>   };
> >>
> >>   /**
> >> --
> >> 2.25.1
> >>
> >
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-06-30 16:29 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-30 12:36 [PATCH] dma-buf: fix and rework dma_buf_poll v4 Christian König
2021-06-30 14:07 ` Daniel Vetter
2021-06-30 14:22   ` Christian König
2021-06-30 16:29     ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).