qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation
@ 2019-07-29 21:34 Paolo Bonzini
  2019-07-29 21:46 ` John Snow
  2019-08-13 22:40 ` John Snow
  0 siblings, 2 replies; 6+ messages in thread
From: Paolo Bonzini @ 2019-07-29 21:34 UTC (permalink / raw)
  To: qemu-devel; +Cc: John Snow

dma_aio_cancel unschedules the BH if there is one, which corresponds
to the reschedule_dma case of dma_blk_cb.  This can stall the DMA
permanently, because dma_complete will never get invoked and therefore
nobody will ever invoke the original AIO callback in dbs->common.cb.

Fix this by invoking the callback (which is ensured to happen after
a bdrv_aio_cancel_async, or done manually in the dbs->bh case), and
add assertions to check that the DMA state machine is indeed waiting
for dma_complete or reschedule_dma, but never both.

Reported-by: John Snow <jsnow@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 dma-helpers.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/dma-helpers.c b/dma-helpers.c
index 2d7e02d..d3871dc 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -90,6 +90,7 @@ static void reschedule_dma(void *opaque)
 {
     DMAAIOCB *dbs = (DMAAIOCB *)opaque;
 
+    assert(!dbs->acb && dbs->bh);
     qemu_bh_delete(dbs->bh);
     dbs->bh = NULL;
     dma_blk_cb(dbs, 0);
@@ -111,15 +112,12 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
 {
     trace_dma_complete(dbs, ret, dbs->common.cb);
 
+    assert(!dbs->acb && !dbs->bh);
     dma_blk_unmap(dbs);
     if (dbs->common.cb) {
         dbs->common.cb(dbs->common.opaque, ret);
     }
     qemu_iovec_destroy(&dbs->iov);
-    if (dbs->bh) {
-        qemu_bh_delete(dbs->bh);
-        dbs->bh = NULL;
-    }
     qemu_aio_unref(dbs);
 }
 
@@ -179,14 +177,21 @@ static void dma_aio_cancel(BlockAIOCB *acb)
 
     trace_dma_aio_cancel(dbs);
 
+    assert(!(dbs->acb && dbs->bh));
     if (dbs->acb) {
+        /* This will invoke dma_blk_cb.  */
         blk_aio_cancel_async(dbs->acb);
+        return;
     }
+
     if (dbs->bh) {
         cpu_unregister_map_client(dbs->bh);
         qemu_bh_delete(dbs->bh);
         dbs->bh = NULL;
     }
+    if (dbs->common.cb) {
+        dbs->common.cb(dbs->common.opaque, -ECANCELED);
+    }
 }
 
 static AioContext *dma_get_aio_context(BlockAIOCB *acb)
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation
  2019-07-29 21:34 [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation Paolo Bonzini
@ 2019-07-29 21:46 ` John Snow
  2019-07-29 21:51   ` Paolo Bonzini
  2019-08-13 22:40 ` John Snow
  1 sibling, 1 reply; 6+ messages in thread
From: John Snow @ 2019-07-29 21:46 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: Qemu-block



On 7/29/19 5:34 PM, Paolo Bonzini wrote:
> dma_aio_cancel unschedules the BH if there is one, which corresponds
> to the reschedule_dma case of dma_blk_cb.  This can stall the DMA
> permanently, because dma_complete will never get invoked and therefore
> nobody will ever invoke the original AIO callback in dbs->common.cb.
> 
> Fix this by invoking the callback (which is ensured to happen after
> a bdrv_aio_cancel_async, or done manually in the dbs->bh case), and
> add assertions to check that the DMA state machine is indeed waiting
> for dma_complete or reschedule_dma, but never both.
> 
> Reported-by: John Snow <jsnow@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  dma-helpers.c | 13 +++++++++----
>  1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/dma-helpers.c b/dma-helpers.c
> index 2d7e02d..d3871dc 100644
> --- a/dma-helpers.c
> +++ b/dma-helpers.c
> @@ -90,6 +90,7 @@ static void reschedule_dma(void *opaque)
>  {
>      DMAAIOCB *dbs = (DMAAIOCB *)opaque;
>  
> +    assert(!dbs->acb && dbs->bh);
>      qemu_bh_delete(dbs->bh);
>      dbs->bh = NULL;
>      dma_blk_cb(dbs, 0);
> @@ -111,15 +112,12 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
>  {
>      trace_dma_complete(dbs, ret, dbs->common.cb);
>  
> +    assert(!dbs->acb && !dbs->bh);
>      dma_blk_unmap(dbs);
>      if (dbs->common.cb) {
>          dbs->common.cb(dbs->common.opaque, ret);
>      }
>      qemu_iovec_destroy(&dbs->iov);
> -    if (dbs->bh) {
> -        qemu_bh_delete(dbs->bh);
> -        dbs->bh = NULL;
> -    }

Now presumably handled by dma_aio_cancel,

>      qemu_aio_unref(dbs);
>  }
>  
> @@ -179,14 +177,21 @@ static void dma_aio_cancel(BlockAIOCB *acb)
>  
>      trace_dma_aio_cancel(dbs);
>  
> +    assert(!(dbs->acb && dbs->bh));
>      if (dbs->acb) {
> +        /* This will invoke dma_blk_cb.  */

uhh, does it? this is maybe where I got lost reading this code.
Isn't dbs->acb going to be what was returned from e.g.
dma_blk_read_io_func, which ultimately uses blk_aio_em_aiocb_info, that
has no cancel callback?

I thought this was just going to NOP entirely. No?

>          blk_aio_cancel_async(dbs->acb);
> +        return;
>      }
> +
>      if (dbs->bh) {
>          cpu_unregister_map_client(dbs->bh);
>          qemu_bh_delete(dbs->bh);
>          dbs->bh = NULL;
>      }
> +    if (dbs->common.cb) {
> +        dbs->common.cb(dbs->common.opaque, -ECANCELED);
> +    }

Well, here at least I am now on terra-firma that we're going to call the
original callback with ECANCELED, which is a step towards code that
isn't surprising my sensibilities.

>  }
>  
>  static AioContext *dma_get_aio_context(BlockAIOCB *acb)
> 



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation
  2019-07-29 21:46 ` John Snow
@ 2019-07-29 21:51   ` Paolo Bonzini
  2019-07-29 22:00     ` John Snow
  0 siblings, 1 reply; 6+ messages in thread
From: Paolo Bonzini @ 2019-07-29 21:51 UTC (permalink / raw)
  To: John Snow, qemu-devel; +Cc: Qemu-block

On 29/07/19 23:46, John Snow wrote:
>> @@ -111,15 +112,12 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
>>  {
>>      trace_dma_complete(dbs, ret, dbs->common.cb);
>>  
>> +    assert(!dbs->acb && !dbs->bh);
>>      dma_blk_unmap(dbs);
>>      if (dbs->common.cb) {
>>          dbs->common.cb(dbs->common.opaque, ret);
>>      }
>>      qemu_iovec_destroy(&dbs->iov);
>> -    if (dbs->bh) {
>> -        qemu_bh_delete(dbs->bh);
>> -        dbs->bh = NULL;
>> -    }
> 
> Now presumably handled by dma_aio_cancel,

No, it simply could never happen.  dma_complete is called here in dma_blk_cb:

    dbs->acb = NULL;
    dbs->offset += dbs->iov.size;

    if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
        dma_complete(dbs, ret);
        return;
    }

and the only way to reach that when dbs->bh becomes non-NULL is through 
reschedule_dma, which clears dbs->bh before invoking dma_blk_cb.

>>      if (dbs->acb) {
>> +        /* This will invoke dma_blk_cb.  */
> 
> uhh, does it?

Yes:

/* Async version of aio cancel. The caller is not blocked if the acb implements
 * cancel_async, otherwise we do nothing and let the request normally complete.
 * In either case the completion callback must be called. */

> this is maybe where I got lost reading this code.
> Isn't dbs->acb going to be what was returned from e.g.
> dma_blk_read_io_func, which ultimately uses blk_aio_em_aiocb_info, that
> has no cancel callback?

Right therefore the I/O will complete and the callback will be invoked.

> Well, here at least I am now on terra-firma that we're going to call the
> original callback with ECANCELED, which is a step towards code that
> isn't surprising my sensibilities.

Good. :)

Paolo


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation
  2019-07-29 21:51   ` Paolo Bonzini
@ 2019-07-29 22:00     ` John Snow
  0 siblings, 0 replies; 6+ messages in thread
From: John Snow @ 2019-07-29 22:00 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: Qemu-block



On 7/29/19 5:51 PM, Paolo Bonzini wrote:
> On 29/07/19 23:46, John Snow wrote:
>>> @@ -111,15 +112,12 @@ static void dma_complete(DMAAIOCB *dbs, int ret)
>>>  {
>>>      trace_dma_complete(dbs, ret, dbs->common.cb);
>>>  
>>> +    assert(!dbs->acb && !dbs->bh);
>>>      dma_blk_unmap(dbs);
>>>      if (dbs->common.cb) {
>>>          dbs->common.cb(dbs->common.opaque, ret);
>>>      }
>>>      qemu_iovec_destroy(&dbs->iov);
>>> -    if (dbs->bh) {
>>> -        qemu_bh_delete(dbs->bh);
>>> -        dbs->bh = NULL;
>>> -    }
>>
>> Now presumably handled by dma_aio_cancel,
> 
> No, it simply could never happen.  dma_complete is called here in dma_blk_cb:
> 
>     dbs->acb = NULL;
>     dbs->offset += dbs->iov.size;
> 
>     if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
>         dma_complete(dbs, ret);
>         return;
>     }
> 
> and the only way to reach that when dbs->bh becomes non-NULL is through 
> reschedule_dma, which clears dbs->bh before invoking dma_blk_cb.
> 
>>>      if (dbs->acb) {
>>> +        /* This will invoke dma_blk_cb.  */
>>
>> uhh, does it?
> 
> Yes:
> 
> /* Async version of aio cancel. The caller is not blocked if the acb implements
>  * cancel_async, otherwise we do nothing and let the request normally complete.
>  * In either case the completion callback must be called. */
> 

Right, right -- the comment can SAY anything it likes about what the
"contract" is ...

OK, so it's more as if EITHER the cancel callback will invoke
dma_blk_cb, OR the acb object there will ... eventually ... through
normal execution.

OK, ok, ok. Getting it, slowly, slowly.

I think this comment is confusing, actually -- because dma_blk_cb might
not actually get invoked in the stack below this call. We only know it
might.

>> this is maybe where I got lost reading this code.
>> Isn't dbs->acb going to be what was returned from e.g.
>> dma_blk_read_io_func, which ultimately uses blk_aio_em_aiocb_info, that
>> has no cancel callback?
> 
> Right therefore the I/O will complete and the callback will be invoked.
> 

From the other email:

***oh***.

>> Well, here at least I am now on terra-firma that we're going to call the
>> original callback with ECANCELED, which is a step towards code that
>> isn't surprising my sensibilities.
> 
> Good. :)
> 
> Paolo
> 

OK, this seems right to me then; the last puzzle piece is that Fam added
no-op returns for ECANCELED to the IDE originators of such DMA requests,
but now that I see the pathways beneath here I think it'd be /never/
right to ignore them.

If you cancel IDE's DMA out from under it, I think the IDE state machine
ought to treat it as an error, yes.

Thanks for the help, Paolo!
--js



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation
  2019-07-29 21:34 [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation Paolo Bonzini
  2019-07-29 21:46 ` John Snow
@ 2019-08-13 22:40 ` John Snow
  2019-08-13 22:49   ` John Snow
  1 sibling, 1 reply; 6+ messages in thread
From: John Snow @ 2019-08-13 22:40 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel



On 7/29/19 5:34 PM, Paolo Bonzini wrote:
> dma_aio_cancel unschedules the BH if there is one, which corresponds
> to the reschedule_dma case of dma_blk_cb.  This can stall the DMA
> permanently, because dma_complete will never get invoked and therefore
> nobody will ever invoke the original AIO callback in dbs->common.cb.
> 
> Fix this by invoking the callback (which is ensured to happen after
> a bdrv_aio_cancel_async, or done manually in the dbs->bh case), and
> add assertions to check that the DMA state machine is indeed waiting
> for dma_complete or reschedule_dma, but never both.
> 
> Reported-by: John Snow <jsnow@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

No maintainer here, I guess; Paolo will you be pulling this or should I
do it as part of the other IDE fixes I need to make?

--js


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation
  2019-08-13 22:40 ` John Snow
@ 2019-08-13 22:49   ` John Snow
  0 siblings, 0 replies; 6+ messages in thread
From: John Snow @ 2019-08-13 22:49 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel



On 8/13/19 6:40 PM, John Snow wrote:
> 
> 
> On 7/29/19 5:34 PM, Paolo Bonzini wrote:
>> dma_aio_cancel unschedules the BH if there is one, which corresponds
>> to the reschedule_dma case of dma_blk_cb.  This can stall the DMA
>> permanently, because dma_complete will never get invoked and therefore
>> nobody will ever invoke the original AIO callback in dbs->common.cb.
>>
>> Fix this by invoking the callback (which is ensured to happen after
>> a bdrv_aio_cancel_async, or done manually in the dbs->bh case), and
>> add assertions to check that the DMA state machine is indeed waiting
>> for dma_complete or reschedule_dma, but never both.
>>
>> Reported-by: John Snow <jsnow@redhat.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> No maintainer here, I guess; Paolo will you be pulling this or should I
> do it as part of the other IDE fixes I need to make?
> 

Nevermind, I made a decision.

--js


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-08-13 22:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-29 21:34 [Qemu-devel] [PATCH] dma-helpers: ensure AIO callback is invoked after cancellation Paolo Bonzini
2019-07-29 21:46 ` John Snow
2019-07-29 21:51   ` Paolo Bonzini
2019-07-29 22:00     ` John Snow
2019-08-13 22:40 ` John Snow
2019-08-13 22:49   ` John Snow

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).