All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jinhao Fan <fanjinhao21s@ict.ac.cn>
To: qemu-devel@nongnu.org
Cc: its@irrelevant.dk, kbusch@kernel.org, stefanha@gmail.com,
	Jinhao Fan <fanjinhao21s@ict.ac.cn>,
	qemu-block@nongnu.org (open list:nvme)
Subject: [PATCH 2/4] hw/nvme: add option to (de)assert irq with eventfd
Date: Thu, 11 Aug 2022 23:37:37 +0800	[thread overview]
Message-ID: <20220811153739.3079672-3-fanjinhao21s@ict.ac.cn> (raw)
In-Reply-To: <20220811153739.3079672-1-fanjinhao21s@ict.ac.cn>

When the new option 'irq-eventfd' is turned on, the IO emulation code
signals an eventfd when it want to (de)assert an irq. The main loop
eventfd handler does the actual irq (de)assertion.  This paves the way
for iothread support since QEMU's interrupt emulation is not thread
safe.

Asserting and deasseting irq with eventfd has some performance
implications. For small queue depth it increases request latency but
for large queue depth it effectively coalesces irqs.

Comparision (KIOPS):

QD            1   4  16  64
QEMU         38 123 210 329
irq-eventfd  32 106 240 364

Signed-off-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
---
 hw/nvme/ctrl.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++--
 hw/nvme/nvme.h |  4 +++
 2 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index bd3350d7e0..8a1c5ce3e1 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1338,6 +1338,54 @@ static void nvme_update_cq_head(NvmeCQueue *cq)
     trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
 }
 
+static void nvme_assert_notifier_read(EventNotifier *e)
+{
+    NvmeCQueue *cq = container_of(e, NvmeCQueue, assert_notifier);
+    if (event_notifier_test_and_clear(e)) {
+        nvme_irq_assert(cq->ctrl, cq);
+    }
+}
+
+static void nvme_deassert_notifier_read(EventNotifier *e)
+{
+    NvmeCQueue *cq = container_of(e, NvmeCQueue, deassert_notifier);
+    if (event_notifier_test_and_clear(e)) {
+        nvme_irq_deassert(cq->ctrl, cq);
+    }
+}
+
+static void nvme_init_irq_notifier(NvmeCtrl *n, NvmeCQueue *cq)
+{
+    int ret;
+
+    ret = event_notifier_init(&cq->assert_notifier, 0);
+    if (ret < 0) {
+        goto fail_assert_handler;
+    }
+
+    event_notifier_set_handler(&cq->assert_notifier,
+                                nvme_assert_notifier_read);
+
+    if (!msix_enabled(&n->parent_obj)) {
+        ret = event_notifier_init(&cq->deassert_notifier, 0);
+        if (ret < 0) {
+            goto fail_deassert_handler;
+        }
+
+        event_notifier_set_handler(&cq->deassert_notifier,
+                                   nvme_deassert_notifier_read);
+    }
+
+    return;
+
+fail_deassert_handler:
+    event_notifier_set_handler(&cq->deassert_notifier, NULL);
+    event_notifier_cleanup(&cq->deassert_notifier);
+fail_assert_handler:
+    event_notifier_set_handler(&cq->assert_notifier, NULL);
+    event_notifier_cleanup(&cq->assert_notifier);
+}
+
 static void nvme_post_cqes(void *opaque)
 {
     NvmeCQueue *cq = opaque;
@@ -1382,7 +1430,23 @@ static void nvme_post_cqes(void *opaque)
                 n->cq_pending++;
             }
 
-            nvme_irq_assert(n, cq);
+            if (unlikely(cq->first_io_cqe)) {
+                /*
+                 * Initilize event notifier when first cqe is posted. For irqfd 
+                 * support we need to register the MSI message in KVM. We
+                 * can not do this registration at CQ creation time because
+                 * Linux's NVMe driver changes the MSI message after CQ creation.
+                 */
+                cq->first_io_cqe = false;
+
+                nvme_init_irq_notifier(n, cq);
+            }
+
+            if (cq->assert_notifier.initialized) {
+                event_notifier_set(&cq->assert_notifier);
+            } else {
+                nvme_irq_assert(n, cq);
+            }
         }
     }
 }
@@ -4249,7 +4313,11 @@ static void nvme_cq_notifier(EventNotifier *e)
     if (cq->irq_enabled && cq->tail == cq->head) {
         n->cq_pending--;
         if (!msix_enabled(&n->parent_obj)) {
-            nvme_irq_deassert(n, cq);
+            if (cq->deassert_notifier.initialized) {
+                event_notifier_set(&cq->deassert_notifier);
+            } else {
+                nvme_irq_deassert(n, cq);
+            }
         }
     }
 
@@ -4706,6 +4774,14 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
         event_notifier_set_handler(&cq->notifier, NULL);
         event_notifier_cleanup(&cq->notifier);
     }
+    if (cq->assert_notifier.initialized) {
+        event_notifier_set_handler(&cq->assert_notifier, NULL);
+        event_notifier_cleanup(&cq->assert_notifier);
+    }
+    if (cq->deassert_notifier.initialized) {
+        event_notifier_set_handler(&cq->deassert_notifier, NULL);
+        event_notifier_cleanup(&cq->deassert_notifier);
+    }
     if (msix_enabled(&n->parent_obj)) {
         msix_vector_unuse(&n->parent_obj, cq->vector);
     }
@@ -4737,6 +4813,7 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
         }
 
         if (!msix_enabled(&n->parent_obj)) {
+            /* Do not use eventfd since this is always called in main loop */
             nvme_irq_deassert(n, cq);
         }
     }
@@ -4777,6 +4854,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
     }
     n->cq[cqid] = cq;
     cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+    cq->first_io_cqe = cqid != 0;
 }
 
 static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
@@ -6926,7 +7004,11 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
         if (cq->irq_enabled && cq->tail == cq->head) {
             n->cq_pending--;
             if (!msix_enabled(&n->parent_obj)) {
-                nvme_irq_deassert(n, cq);
+                if (cq->deassert_notifier.initialized) {
+                    event_notifier_set(&cq->deassert_notifier);
+                } else {
+                    nvme_irq_deassert(n, cq);
+                }
             }
         }
     } else {
@@ -7675,6 +7757,7 @@ static Property nvme_props[] = {
     DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
     DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
     DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
+    DEFINE_PROP_BOOL("irq-eventfd", NvmeCtrl, params.irq_eventfd, false),
     DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
     DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
                      params.auto_transition_zones, true),
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 79f5c281c2..759d0ecd7c 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -398,6 +398,9 @@ typedef struct NvmeCQueue {
     uint64_t    ei_addr;
     QEMUTimer   *timer;
     EventNotifier notifier;
+    EventNotifier assert_notifier;
+    EventNotifier deassert_notifier;
+    bool        first_io_cqe;
     bool        ioeventfd_enabled;
     QTAILQ_HEAD(, NvmeSQueue) sq_list;
     QTAILQ_HEAD(, NvmeRequest) req_list;
@@ -422,6 +425,7 @@ typedef struct NvmeParams {
     bool     auto_transition_zones;
     bool     legacy_cmb;
     bool     ioeventfd;
+    bool     irq_eventfd;
     uint8_t  sriov_max_vfs;
     uint16_t sriov_vq_flexible;
     uint16_t sriov_vi_flexible;
-- 
2.25.1



  parent reply	other threads:[~2022-08-11 15:40 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-11 15:37 [PATCH 0/4] hw/nvme: add irqfd support Jinhao Fan
2022-08-11 15:37 ` [PATCH 1/4] hw/nvme: avoid unnecessary call to irq (de)assertion functions Jinhao Fan
2022-08-16 15:24   ` Stefan Hajnoczi
2022-08-17  5:42     ` Jinhao Fan
2022-08-11 15:37 ` Jinhao Fan [this message]
2022-08-16 11:20   ` [PATCH 2/4] hw/nvme: add option to (de)assert irq with eventfd Klaus Jensen
2022-08-17  5:36     ` Jinhao Fan
2022-08-23 10:58   ` Klaus Jensen
2022-08-11 15:37 ` [PATCH 3/4] hw/nvme: use irqfd to send interrupts Jinhao Fan
2022-08-11 15:37 ` [PATCH 4/4] hw/nvme: add MSI-x mask handlers for irqfd Jinhao Fan
2022-08-16 10:46   ` Klaus Jensen
2022-08-17  5:35     ` Jinhao Fan
2022-08-23 14:43     ` Jinhao Fan
2022-08-24 11:22       ` Klaus Jensen
2022-08-24 13:16         ` Jinhao Fan
2022-08-23 11:04   ` Klaus Jensen
2022-08-16  1:54 ` [PATCH 0/4] hw/nvme: add irqfd support Jinhao Fan
2022-08-24 20:15 ` Klaus Jensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220811153739.3079672-3-fanjinhao21s@ict.ac.cn \
    --to=fanjinhao21s@ict.ac.cn \
    --cc=its@irrelevant.dk \
    --cc=kbusch@kernel.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.