qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] hw/block/nvme: fix aer logic
@ 2020-10-19  6:54 Klaus Jensen
  2020-10-19 16:43 ` Keith Busch
  0 siblings, 1 reply; 3+ messages in thread
From: Klaus Jensen @ 2020-10-19  6:54 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Dmitry Fomichev, Klaus Jensen, Max Reitz,
	Klaus Jensen, Keith Busch, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Fix same flawed logic in the handling of event masking. Before this
patch the device would erroneously

  a) queue up events even though that event type is masked
  b) issue AERs for queued events in response to events getting cleared
  c) respond to new AERs with queued events even though the event was
     already cleared

Fix this by moving the mask check to nvme_enqueue_event() and replace
the nvme_process_aers() call with a pruning of queued events when the
event type is cleared.

Fixes: 5d5a53302b95 ("hw/block/nvme: add support for the asynchronous event request command")
Cc: Maxim Levitsky <mlevitsk@redhat.com>
Cc: Dmitry Fomichev <dmitry.fomichev@wdc.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c       | 22 ++++++++++++++--------
 hw/block/trace-events |  2 +-
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9d30ca69dcf1..b18a310d9271 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -805,12 +805,6 @@ static void nvme_process_aers(void *opaque)
             break;
         }
 
-        /* ignore if masked (cqe posted, but event not cleared) */
-        if (n->aer_mask & (1 << event->result.event_type)) {
-            trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
-            continue;
-        }
-
         QTAILQ_REMOVE(&n->aer_queue, event, entry);
         n->aer_queued--;
 
@@ -844,6 +838,12 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
         return;
     }
 
+    /* ignore if masked (cqe posted, but event not cleared) */
+    if (n->aer_mask & (1 << event_type)) {
+        trace_pci_nvme_aer_masked(event_type, n->aer_mask);
+        return;
+    }
+
     event = g_new(NvmeAsyncEvent, 1);
     event->result = (NvmeAerResult) {
         .event_type = event_type,
@@ -859,9 +859,15 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
 
 static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
 {
+    NvmeAsyncEvent *event, *next;
+
     n->aer_mask &= ~(1 << event_type);
-    if (!QTAILQ_EMPTY(&n->aer_queue)) {
-        nvme_process_aers(n);
+
+    QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
+        if (event->result.event_type == event_type) {
+            QTAILQ_REMOVE(&n->aer_queue, event, entry);
+            n->aer_queued--;
+        }
     }
 }
 
diff --git a/hw/block/trace-events b/hw/block/trace-events
index cab9913b1f2d..11bad6ae6a11 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -67,7 +67,7 @@ pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRI
 pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
 pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
 pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
-pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
+pci_nvme_no_outstanding_aers(void) "no outstanding aers"
 pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
 pci_nvme_mmio_read(uint64_t addr) "addr 0x%"PRIx64""
 pci_nvme_mmio_write(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64""
-- 
2.28.0



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] hw/block/nvme: fix aer logic
  2020-10-19  6:54 [PATCH] hw/block/nvme: fix aer logic Klaus Jensen
@ 2020-10-19 16:43 ` Keith Busch
  2020-10-19 17:48   ` Klaus Jensen
  0 siblings, 1 reply; 3+ messages in thread
From: Keith Busch @ 2020-10-19 16:43 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Dmitry Fomichev, Klaus Jensen,
	qemu-devel, Maxim Levitsky, Max Reitz

On Mon, Oct 19, 2020 at 08:54:16AM +0200, Klaus Jensen wrote:
> @@ -844,6 +838,12 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
>          return;
>      }
>  
> +    /* ignore if masked (cqe posted, but event not cleared) */
> +    if (n->aer_mask & (1 << event_type)) {
> +        trace_pci_nvme_aer_masked(event_type, n->aer_mask);
> +        return;
> +    }

The 'mask' means the host hasn't yet acknowledged the AER with the
appropriate log. The controller should continue to internally enqueue
subsequent events of this type, but suppress sending the notification
for them until the host unlatches the event type.

>      event = g_new(NvmeAsyncEvent, 1);
>      event->result = (NvmeAerResult) {
>          .event_type = event_type,
> @@ -859,9 +859,15 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
>  
>  static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
>  {
> +    NvmeAsyncEvent *event, *next;
> +
>      n->aer_mask &= ~(1 << event_type);
> -    if (!QTAILQ_EMPTY(&n->aer_queue)) {
> -        nvme_process_aers(n);
> +
> +    QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
> +        if (event->result.event_type == event_type) {
> +            QTAILQ_REMOVE(&n->aer_queue, event, entry);

Memory leaking the 'event'?

> +            n->aer_queued--;
> +        }
>      }
>  }


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] hw/block/nvme: fix aer logic
  2020-10-19 16:43 ` Keith Busch
@ 2020-10-19 17:48   ` Klaus Jensen
  0 siblings, 0 replies; 3+ messages in thread
From: Klaus Jensen @ 2020-10-19 17:48 UTC (permalink / raw)
  To: Keith Busch; +Cc: Kevin Wolf, Klaus Jensen, qemu-devel, qemu-block, Max Reitz

[-- Attachment #1: Type: text/plain, Size: 1798 bytes --]

On Oct 19 09:43, Keith Busch wrote:
> On Mon, Oct 19, 2020 at 08:54:16AM +0200, Klaus Jensen wrote:
> > @@ -844,6 +838,12 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
> >          return;
> >      }
> >  
> > +    /* ignore if masked (cqe posted, but event not cleared) */
> > +    if (n->aer_mask & (1 << event_type)) {
> > +        trace_pci_nvme_aer_masked(event_type, n->aer_mask);
> > +        return;
> > +    }
> 
> The 'mask' means the host hasn't yet acknowledged the AER with the
> appropriate log. The controller should continue to internally enqueue
> subsequent events of this type, but suppress sending the notification
> for them until the host unlatches the event type.
> 

Ugh. Looks like you are right. Again.

Notice events are definitely a good case for when we want to queue up
the events internally since the information correspond to different log
pages but use the same type.

> >      event = g_new(NvmeAsyncEvent, 1);
> >      event->result = (NvmeAerResult) {
> >          .event_type = event_type,
> > @@ -859,9 +859,15 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
> >  
> >  static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
> >  {
> > +    NvmeAsyncEvent *event, *next;
> > +
> >      n->aer_mask &= ~(1 << event_type);
> > -    if (!QTAILQ_EMPTY(&n->aer_queue)) {
> > -        nvme_process_aers(n);
> > +
> > +    QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
> > +        if (event->result.event_type == event_type) {
> > +            QTAILQ_REMOVE(&n->aer_queue, event, entry);
> 
> Memory leaking the 'event'?
> 

Thanks, good catch, but this change is also irrelevant now.

> > +            n->aer_queued--;
> > +        }
> >      }
> >  }
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-10-19 17:53 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-19  6:54 [PATCH] hw/block/nvme: fix aer logic Klaus Jensen
2020-10-19 16:43 ` Keith Busch
2020-10-19 17:48   ` Klaus Jensen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).