* [PATCH v2 0/3] hw/nvme: add irqfd support
@ 2022-08-25 7:47 Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd Jinhao Fan
` (2 more replies)
0 siblings, 3 replies; 13+ messages in thread
From: Jinhao Fan @ 2022-08-25 7:47 UTC (permalink / raw)
To: qemu-devel; +Cc: its, kbusch, stefanha, Jinhao Fan
This patch series changes qemu-nvme's interrupt emulation to use event
notifiers, which can ensure thread-safe interrupt delivery when iothread
is used. In the first patche, I convert qemu-nvme's IO emulation
logic to send irq via eventfd, so that the actual assertion and
deassertion is always done in the main loop thread with BQL held. In the
second patch, support is added to send irq via KVM irqfd, bypassing
qemu's MSI-x emulation. In the last patch, I add MSI-x mask handlers
when irqfd is enabled so that qemu-nvme knows which vector is masked
even when qemu's MSI-x emulation is bypassed.
Changes since v1:
- Made nvme_irq_(de)assert wrappers around eventfd call and actual irq
assertion
- Dropped the previous first patch to avoid duplicate checks for
irq_enabled and msix_enabled
Jinhao Fan (3):
hw/nvme: support irq(de)assertion with eventfd
hw/nvme: use KVM irqfd when available
hw/nvme: add MSI-x mask handlers for irqfd
hw/nvme/ctrl.c | 264 ++++++++++++++++++++++++++++++++++++++++---
hw/nvme/nvme.h | 7 ++
hw/nvme/trace-events | 3 +
3 files changed, 257 insertions(+), 17 deletions(-)
--
2.25.1
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 7:47 [PATCH v2 0/3] hw/nvme: add irqfd support Jinhao Fan
@ 2022-08-25 7:47 ` Jinhao Fan
2022-08-25 9:33 ` Klaus Jensen
2022-08-25 7:47 ` [PATCH v2 2/3] hw/nvme: use KVM irqfd when available Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 3/3] hw/nvme: add MSI-x mask handlers for irqfd Jinhao Fan
2 siblings, 1 reply; 13+ messages in thread
From: Jinhao Fan @ 2022-08-25 7:47 UTC (permalink / raw)
To: qemu-devel; +Cc: its, kbusch, stefanha, Jinhao Fan, open list:nvme
When the new option 'irq-eventfd' is turned on, the IO emulation code
signals an eventfd when it want to (de)assert an irq. The main loop
eventfd handler does the actual irq (de)assertion. This paves the way
for iothread support since QEMU's interrupt emulation is not thread
safe.
Asserting and deasseting irq with eventfd has some performance
implications. For small queue depth it increases request latency but
for large queue depth it effectively coalesces irqs.
Comparision (KIOPS):
QD 1 4 16 64
QEMU 38 123 210 329
irq-eventfd 32 106 240 364
Signed-off-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
---
hw/nvme/ctrl.c | 136 ++++++++++++++++++++++++++++++++++++++++++-------
hw/nvme/nvme.h | 4 ++
2 files changed, 123 insertions(+), 17 deletions(-)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 87aeba0564..6ecf6fafd9 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -526,34 +526,57 @@ static void nvme_irq_check(NvmeCtrl *n)
}
}
+static void nvme_irq_do_assert(NvmeCtrl *n, NvmeCQueue *cq)
+{
+ if (msix_enabled(&(n->parent_obj))) {
+ trace_pci_nvme_irq_msix(cq->vector);
+ msix_notify(&(n->parent_obj), cq->vector);
+ } else {
+ trace_pci_nvme_irq_pin();
+ assert(cq->vector < 32);
+ n->irq_status |= 1 << cq->vector;
+ nvme_irq_check(n);
+ }
+}
+
static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
{
if (cq->irq_enabled) {
- if (msix_enabled(&(n->parent_obj))) {
- trace_pci_nvme_irq_msix(cq->vector);
- msix_notify(&(n->parent_obj), cq->vector);
+ if (cq->assert_notifier.initialized) {
+ event_notifier_set(&cq->assert_notifier);
} else {
- trace_pci_nvme_irq_pin();
- assert(cq->vector < 32);
- n->irq_status |= 1 << cq->vector;
- nvme_irq_check(n);
+ nvme_irq_do_assert(n, cq);
}
} else {
trace_pci_nvme_irq_masked();
}
}
+static void nvme_irq_do_deassert(NvmeCtrl *n, NvmeCQueue *cq)
+{
+ if (msix_enabled(&(n->parent_obj))) {
+ return;
+ } else {
+ assert(cq->vector < 32);
+ if (!n->cq_pending) {
+ n->irq_status &= ~(1 << cq->vector);
+ }
+ nvme_irq_check(n);
+ }
+}
+
static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
{
if (cq->irq_enabled) {
- if (msix_enabled(&(n->parent_obj))) {
- return;
+ if (cq->deassert_notifier.initialized) {
+ /*
+ * Event notifier will only be initilized when MSI-X is in use,
+ * therefore no need to worry about extra eventfd syscall for
+ * pin-based interrupts.
+ */
+ event_notifier_set(&cq->deassert_notifier);
} else {
- assert(cq->vector < 32);
- if (!n->cq_pending) {
- n->irq_status &= ~(1 << cq->vector);
- }
- nvme_irq_check(n);
+ nvme_irq_do_deassert(n, cq);
}
}
}
@@ -1338,6 +1361,54 @@ static void nvme_update_cq_head(NvmeCQueue *cq)
trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
}
+static void nvme_assert_notifier_read(EventNotifier *e)
+{
+ NvmeCQueue *cq = container_of(e, NvmeCQueue, assert_notifier);
+ if (event_notifier_test_and_clear(e)) {
+ nvme_irq_do_assert(cq->ctrl, cq);
+ }
+}
+
+static void nvme_deassert_notifier_read(EventNotifier *e)
+{
+ NvmeCQueue *cq = container_of(e, NvmeCQueue, deassert_notifier);
+ if (event_notifier_test_and_clear(e)) {
+ nvme_irq_do_deassert(cq->ctrl, cq);
+ }
+}
+
+static void nvme_init_irq_notifier(NvmeCtrl *n, NvmeCQueue *cq)
+{
+ int ret;
+
+ ret = event_notifier_init(&cq->assert_notifier, 0);
+ if (ret < 0) {
+ goto fail_assert_handler;
+ }
+
+ event_notifier_set_handler(&cq->assert_notifier,
+ nvme_assert_notifier_read);
+
+ if (!msix_enabled(&n->parent_obj)) {
+ ret = event_notifier_init(&cq->deassert_notifier, 0);
+ if (ret < 0) {
+ goto fail_deassert_handler;
+ }
+
+ event_notifier_set_handler(&cq->deassert_notifier,
+ nvme_deassert_notifier_read);
+ }
+
+ return;
+
+fail_deassert_handler:
+ event_notifier_set_handler(&cq->deassert_notifier, NULL);
+ event_notifier_cleanup(&cq->deassert_notifier);
+fail_assert_handler:
+ event_notifier_set_handler(&cq->assert_notifier, NULL);
+ event_notifier_cleanup(&cq->assert_notifier);
+}
+
static void nvme_post_cqes(void *opaque)
{
NvmeCQueue *cq = opaque;
@@ -1377,8 +1448,25 @@ static void nvme_post_cqes(void *opaque)
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
}
if (cq->tail != cq->head) {
- if (cq->irq_enabled && !pending) {
- n->cq_pending++;
+ if (cq->irq_enabled) {
+ if (!pending) {
+ n->cq_pending++;
+ }
+
+ if (unlikely(cq->first_io_cqe)) {
+ /*
+ * Initilize event notifier when first cqe is posted. For irqfd
+ * support we need to register the MSI message in KVM. We
+ * can not do this registration at CQ creation time because
+ * Linux's NVMe driver changes the MSI message after CQ creation.
+ */
+ cq->first_io_cqe = false;
+
+ if (n->params.irq_eventfd) {
+ nvme_init_irq_notifier(n, cq);
+ }
+ }
+
}
nvme_irq_assert(n, cq);
@@ -4705,6 +4793,14 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
event_notifier_set_handler(&cq->notifier, NULL);
event_notifier_cleanup(&cq->notifier);
}
+ if (cq->assert_notifier.initialized) {
+ event_notifier_set_handler(&cq->assert_notifier, NULL);
+ event_notifier_cleanup(&cq->assert_notifier);
+ }
+ if (cq->deassert_notifier.initialized) {
+ event_notifier_set_handler(&cq->deassert_notifier, NULL);
+ event_notifier_cleanup(&cq->deassert_notifier);
+ }
if (msix_enabled(&n->parent_obj)) {
msix_vector_unuse(&n->parent_obj, cq->vector);
}
@@ -4734,7 +4830,7 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
n->cq_pending--;
}
- nvme_irq_deassert(n, cq);
+ nvme_irq_do_deassert(n, cq);
trace_pci_nvme_del_cq(qid);
nvme_free_cq(cq, n);
return NVME_SUCCESS;
@@ -4772,6 +4868,11 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
}
n->cq[cqid] = cq;
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+ /*
+ * Only enable irqfd for IO queues since we always emulate admin queue
+ * in main loop thread
+ */
+ cq->first_io_cqe = cqid != 0;
}
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
@@ -7671,6 +7772,7 @@ static Property nvme_props[] = {
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
+ DEFINE_PROP_BOOL("irq-eventfd", NvmeCtrl, params.irq_eventfd, false),
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
params.auto_transition_zones, true),
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 79f5c281c2..759d0ecd7c 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -398,6 +398,9 @@ typedef struct NvmeCQueue {
uint64_t ei_addr;
QEMUTimer *timer;
EventNotifier notifier;
+ EventNotifier assert_notifier;
+ EventNotifier deassert_notifier;
+ bool first_io_cqe;
bool ioeventfd_enabled;
QTAILQ_HEAD(, NvmeSQueue) sq_list;
QTAILQ_HEAD(, NvmeRequest) req_list;
@@ -422,6 +425,7 @@ typedef struct NvmeParams {
bool auto_transition_zones;
bool legacy_cmb;
bool ioeventfd;
+ bool irq_eventfd;
uint8_t sriov_max_vfs;
uint16_t sriov_vq_flexible;
uint16_t sriov_vi_flexible;
--
2.25.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 2/3] hw/nvme: use KVM irqfd when available
2022-08-25 7:47 [PATCH v2 0/3] hw/nvme: add irqfd support Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd Jinhao Fan
@ 2022-08-25 7:47 ` Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 3/3] hw/nvme: add MSI-x mask handlers for irqfd Jinhao Fan
2 siblings, 0 replies; 13+ messages in thread
From: Jinhao Fan @ 2022-08-25 7:47 UTC (permalink / raw)
To: qemu-devel; +Cc: its, kbusch, stefanha, Jinhao Fan, open list:nvme
Use KVM's irqfd to send interrupts when possible. This approach is
thread safe. Moreover, it does not have the inter-thread communication
overhead of plain event notifiers since handler callback are called
in the same system call as irqfd write.
Signed-off-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
---
hw/nvme/ctrl.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
hw/nvme/nvme.h | 1 +
2 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 6ecf6fafd9..74075f782f 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -192,6 +192,7 @@
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
#include "sysemu/block-backend.h"
#include "sysemu/hostmem.h"
#include "hw/pci/msix.h"
@@ -1377,8 +1378,26 @@ static void nvme_deassert_notifier_read(EventNotifier *e)
}
}
+static int nvme_kvm_msix_vector_use(NvmeCtrl *n,
+ NvmeCQueue *cq,
+ uint32_t vector)
+{
+ int ret;
+
+ KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
+ ret = kvm_irqchip_add_msi_route(&c, vector, &n->parent_obj);
+ if (ret < 0) {
+ return ret;
+ }
+ kvm_irqchip_commit_route_changes(&c);
+ cq->virq = ret;
+ return 0;
+}
+
static void nvme_init_irq_notifier(NvmeCtrl *n, NvmeCQueue *cq)
{
+ bool with_irqfd = msix_enabled(&n->parent_obj) &&
+ kvm_msi_via_irqfd_enabled();
int ret;
ret = event_notifier_init(&cq->assert_notifier, 0);
@@ -1386,8 +1405,21 @@ static void nvme_init_irq_notifier(NvmeCtrl *n, NvmeCQueue *cq)
goto fail_assert_handler;
}
- event_notifier_set_handler(&cq->assert_notifier,
- nvme_assert_notifier_read);
+ if (with_irqfd) {
+ ret = nvme_kvm_msix_vector_use(n, cq, cq->vector);
+ if (ret < 0) {
+ goto fail_assert_handler;
+ }
+ ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+ &cq->assert_notifier, NULL,
+ cq->virq);
+ if (ret < 0) {
+ goto fail_kvm;
+ }
+ } else {
+ event_notifier_set_handler(&cq->assert_notifier,
+ nvme_assert_notifier_read);
+ }
if (!msix_enabled(&n->parent_obj)) {
ret = event_notifier_init(&cq->deassert_notifier, 0);
@@ -1404,6 +1436,12 @@ static void nvme_init_irq_notifier(NvmeCtrl *n, NvmeCQueue *cq)
fail_deassert_handler:
event_notifier_set_handler(&cq->deassert_notifier, NULL);
event_notifier_cleanup(&cq->deassert_notifier);
+ if (with_irqfd) {
+ kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &cq->assert_notifier,
+ cq->virq);
+fail_kvm:
+ kvm_irqchip_release_virq(kvm_state, cq->virq);
+ }
fail_assert_handler:
event_notifier_set_handler(&cq->assert_notifier, NULL);
event_notifier_cleanup(&cq->assert_notifier);
@@ -4783,6 +4821,8 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
{
+ bool with_irqfd = msix_enabled(&n->parent_obj) &&
+ kvm_msi_via_irqfd_enabled();
uint16_t offset = (cq->cqid << 3) + (1 << 2);
n->cq[cq->cqid] = NULL;
@@ -4794,6 +4834,12 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
event_notifier_cleanup(&cq->notifier);
}
if (cq->assert_notifier.initialized) {
+ if (with_irqfd) {
+ kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
+ &cq->assert_notifier,
+ cq->virq);
+ kvm_irqchip_release_virq(kvm_state, cq->virq);
+ }
event_notifier_set_handler(&cq->assert_notifier, NULL);
event_notifier_cleanup(&cq->assert_notifier);
}
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 759d0ecd7c..85fd9cd0e2 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -396,6 +396,7 @@ typedef struct NvmeCQueue {
uint64_t dma_addr;
uint64_t db_addr;
uint64_t ei_addr;
+ int virq;
QEMUTimer *timer;
EventNotifier notifier;
EventNotifier assert_notifier;
--
2.25.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 3/3] hw/nvme: add MSI-x mask handlers for irqfd
2022-08-25 7:47 [PATCH v2 0/3] hw/nvme: add irqfd support Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 2/3] hw/nvme: use KVM irqfd when available Jinhao Fan
@ 2022-08-25 7:47 ` Jinhao Fan
2 siblings, 0 replies; 13+ messages in thread
From: Jinhao Fan @ 2022-08-25 7:47 UTC (permalink / raw)
To: qemu-devel; +Cc: its, kbusch, stefanha, Jinhao Fan, open list:nvme
When irqfd is enabled, we bypass QEMU's irq emulation and let KVM to
directly assert the irq. However, KVM is not aware of the device's MSI-x
masking status. Add MSI-x mask bookkeeping in NVMe emulation and
detach the corresponding irqfd when the certain vector is masked.
Signed-off-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
---
hw/nvme/ctrl.c | 82 ++++++++++++++++++++++++++++++++++++++++++++
hw/nvme/nvme.h | 2 ++
hw/nvme/trace-events | 3 ++
3 files changed, 87 insertions(+)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 74075f782f..30bbda7bb5 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -7493,10 +7493,84 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
return 0;
}
+static int nvme_vector_unmask(PCIDevice *pci_dev, unsigned vector,
+ MSIMessage msg)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+ int ret;
+
+ trace_pci_nvme_irq_unmask(vector, msg.address, msg.data);
+
+ for (uint32_t i = 1; i <= n->params.max_ioqpairs; i++) {
+ NvmeCQueue *cq = n->cq[i];
+ /*
+ * If this function is called, then irqfd must be available. Therefore,
+ * irqfd must be in use if cq->assert_notifier.initialized is true.
+ */
+ if (cq && cq->vector == vector && cq->assert_notifier.initialized) {
+ if (cq->msg.data != msg.data || cq->msg.address != msg.address) {
+ ret = kvm_irqchip_update_msi_route(kvm_state, cq->virq, msg,
+ pci_dev);
+ if (ret < 0) {
+ return ret;
+ }
+ kvm_irqchip_commit_routes(kvm_state);
+ cq->msg = msg;
+ }
+
+ ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+ &cq->assert_notifier,
+ NULL, cq->virq);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void nvme_vector_mask(PCIDevice *pci_dev, unsigned vector)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+
+ trace_pci_nvme_irq_mask(vector);
+
+ for (uint32_t i = 1; i <= n->params.max_ioqpairs; i++) {
+ NvmeCQueue *cq = n->cq[i];
+ if (cq && cq->vector == vector && cq->assert_notifier.initialized) {
+ kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
+ &cq->assert_notifier,
+ cq->virq);
+ }
+ }
+}
+
+static void nvme_vector_poll(PCIDevice *pci_dev,
+ unsigned int vector_start,
+ unsigned int vector_end)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+
+ trace_pci_nvme_irq_poll(vector_start, vector_end);
+
+ for (uint32_t i = 1; i <= n->params.max_ioqpairs; i++) {
+ NvmeCQueue *cq = n->cq[i];
+ if (cq && cq->vector >= vector_start && cq->vector <= vector_end
+ && msix_is_masked(pci_dev, cq->vector)
+ && cq->assert_notifier.initialized) {
+ if (event_notifier_test_and_clear(&cq->assert_notifier)) {
+ msix_set_pending(pci_dev, i);
+ }
+ }
+ }
+}
static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
{
uint8_t *pci_conf = pci_dev->config;
+ bool with_irqfd = msix_enabled(&n->parent_obj) &&
+ kvm_msi_via_irqfd_enabled();
uint64_t bar_size;
unsigned msix_table_offset, msix_pba_offset;
int ret;
@@ -7549,6 +7623,13 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
}
}
+ if (with_irqfd) {
+ msix_set_vector_notifiers(pci_dev,
+ nvme_vector_unmask,
+ nvme_vector_mask,
+ nvme_vector_poll);
+ }
+
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
if (n->params.cmb_size_mb) {
@@ -7796,6 +7877,7 @@ static void nvme_exit(PCIDevice *pci_dev)
pcie_sriov_pf_exit(pci_dev);
}
+ msix_unset_vector_notifiers(pci_dev);
msix_uninit(pci_dev, &n->bar0, &n->bar0);
memory_region_del_subregion(&n->bar0, &n->iomem);
}
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 85fd9cd0e2..707a55ebfc 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -20,6 +20,7 @@
#include "qemu/uuid.h"
#include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
#include "hw/block/block.h"
#include "block/nvme.h"
@@ -401,6 +402,7 @@ typedef struct NvmeCQueue {
EventNotifier notifier;
EventNotifier assert_notifier;
EventNotifier deassert_notifier;
+ MSIMessage msg;
bool first_io_cqe;
bool ioeventfd_enabled;
QTAILQ_HEAD(, NvmeSQueue) sq_list;
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index fccb79f489..b11fcf4a65 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -2,6 +2,9 @@
pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
pci_nvme_irq_pin(void) "pulsing IRQ pin"
pci_nvme_irq_masked(void) "IRQ is masked"
+pci_nvme_irq_mask(uint32_t vector) "IRQ %u gets masked"
+pci_nvme_irq_unmask(uint32_t vector, uint64_t addr, uint32_t data) "IRQ %u gets unmasked, addr=0x%"PRIx64" data=0x%"PRIu32""
+pci_nvme_irq_poll(uint32_t vector_start, uint32_t vector_end) "IRQ poll, start=0x%"PRIu32" end=0x%"PRIu32""
pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
pci_nvme_dbbuf_config(uint64_t dbs_addr, uint64_t eis_addr) "dbs_addr=0x%"PRIx64" eis_addr=0x%"PRIx64""
pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
--
2.25.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 7:47 ` [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd Jinhao Fan
@ 2022-08-25 9:33 ` Klaus Jensen
2022-08-25 11:16 ` Jinhao Fan
0 siblings, 1 reply; 13+ messages in thread
From: Klaus Jensen @ 2022-08-25 9:33 UTC (permalink / raw)
To: Jinhao Fan; +Cc: qemu-devel, kbusch, stefanha, open list:nvme
[-- Attachment #1: Type: text/plain, Size: 2291 bytes --]
On Aug 25 15:47, Jinhao Fan wrote:
> When the new option 'irq-eventfd' is turned on, the IO emulation code
> signals an eventfd when it want to (de)assert an irq. The main loop
> eventfd handler does the actual irq (de)assertion. This paves the way
> for iothread support since QEMU's interrupt emulation is not thread
> safe.
>
> Asserting and deasseting irq with eventfd has some performance
> implications. For small queue depth it increases request latency but
> for large queue depth it effectively coalesces irqs.
>
> Comparision (KIOPS):
>
> QD 1 4 16 64
> QEMU 38 123 210 329
> irq-eventfd 32 106 240 364
>
> Signed-off-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
> ---
> hw/nvme/ctrl.c | 136 ++++++++++++++++++++++++++++++++++++++++++-------
> hw/nvme/nvme.h | 4 ++
> 2 files changed, 123 insertions(+), 17 deletions(-)
>
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 87aeba0564..6ecf6fafd9 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -1377,8 +1448,25 @@ static void nvme_post_cqes(void *opaque)
> QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
> }
> if (cq->tail != cq->head) {
> - if (cq->irq_enabled && !pending) {
> - n->cq_pending++;
> + if (cq->irq_enabled) {
> + if (!pending) {
> + n->cq_pending++;
> + }
> +
> + if (unlikely(cq->first_io_cqe)) {
> + /*
> + * Initilize event notifier when first cqe is posted. For irqfd
> + * support we need to register the MSI message in KVM. We
> + * can not do this registration at CQ creation time because
> + * Linux's NVMe driver changes the MSI message after CQ creation.
> + */
> + cq->first_io_cqe = false;
> +
> + if (n->params.irq_eventfd) {
> + nvme_init_irq_notifier(n, cq);
> + }
> + }
I'm still a bit perplexed by this issue, so I just tried moving
nvme_init_irq_notifier() to the end of nvme_init_cq() and removing this
first_io_cqe thing. I did not observe any particular issues?
What bad behavior did you encounter, it seems to work fine to me?
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 9:33 ` Klaus Jensen
@ 2022-08-25 11:16 ` Jinhao Fan
2022-08-25 11:56 ` Klaus Jensen
0 siblings, 1 reply; 13+ messages in thread
From: Jinhao Fan @ 2022-08-25 11:16 UTC (permalink / raw)
To: Klaus Jensen; +Cc: qemu-devel, kbusch, stefanha, open list:nvme
On 8/25/2022 5:33 PM, Klaus Jensen wrote:
> I'm still a bit perplexed by this issue, so I just tried moving
> nvme_init_irq_notifier() to the end of nvme_init_cq() and removing this
> first_io_cqe thing. I did not observe any particular issues?
>
> What bad behavior did you encounter, it seems to work fine to me
The kernel boots up and got stuck, waiting for interrupts. Then the
request times out and got retried three times. Finally the driver seems
to decide that the drive is down and continues to boot.
I added some prints during debugging and found that the MSI-X message
which got registered in KVM via kvm_irqchip_add_msi_route() is not the
same as the one actually used in msix_notify().
Are you sure you are using KVM's irqfd?
Here is a previous discussion with Keith [1].
[1]
https://lore.kernel.org/qemu-devel/YvKJk2dYiwomexFv@kbusch-mbp.dhcp.thefacebook.com/#t
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 11:16 ` Jinhao Fan
@ 2022-08-25 11:56 ` Klaus Jensen
2022-08-25 12:38 ` Klaus Jensen
2022-08-25 14:05 ` Jinhao Fan
0 siblings, 2 replies; 13+ messages in thread
From: Klaus Jensen @ 2022-08-25 11:56 UTC (permalink / raw)
To: Jinhao Fan; +Cc: qemu-devel, kbusch, stefanha, open list:nvme
[-- Attachment #1: Type: text/plain, Size: 2330 bytes --]
On Aug 25 19:16, Jinhao Fan wrote:
> On 8/25/2022 5:33 PM, Klaus Jensen wrote:
> > I'm still a bit perplexed by this issue, so I just tried moving
> > nvme_init_irq_notifier() to the end of nvme_init_cq() and removing this
> > first_io_cqe thing. I did not observe any particular issues?
> >
> > What bad behavior did you encounter, it seems to work fine to me
>
> The kernel boots up and got stuck, waiting for interrupts. Then the request
> times out and got retried three times. Finally the driver seems to decide
> that the drive is down and continues to boot.
>
> I added some prints during debugging and found that the MSI-X message which
> got registered in KVM via kvm_irqchip_add_msi_route() is not the same as the
> one actually used in msix_notify().
>
> Are you sure you are using KVM's irqfd?
>
Pretty sure? Using "ioeventfd=on,irq-eventfd=on" on the controller.
And the following patch.
diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
index 30bbda7bb5ae..b2e41d3bd745 100644
--- i/hw/nvme/ctrl.c
+++ w/hw/nvme/ctrl.c
@@ -1490,21 +1490,6 @@ static void nvme_post_cqes(void *opaque)
if (!pending) {
n->cq_pending++;
}
-
- if (unlikely(cq->first_io_cqe)) {
- /*
- * Initilize event notifier when first cqe is posted. For irqfd
- * support we need to register the MSI message in KVM. We
- * can not do this registration at CQ creation time because
- * Linux's NVMe driver changes the MSI message after CQ creation.
- */
- cq->first_io_cqe = false;
-
- if (n->params.irq_eventfd) {
- nvme_init_irq_notifier(n, cq);
- }
- }
-
}
nvme_irq_assert(n, cq);
@@ -4914,11 +4899,14 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
}
n->cq[cqid] = cq;
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+
/*
* Only enable irqfd for IO queues since we always emulate admin queue
* in main loop thread
*/
- cq->first_io_cqe = cqid != 0;
+ if (cqid && n->params.irq_eventfd) {
+ nvme_init_irq_notifier(n, cq);
+ }
}
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 11:56 ` Klaus Jensen
@ 2022-08-25 12:38 ` Klaus Jensen
2022-08-25 13:09 ` Jinhao Fan
2022-08-25 14:05 ` Jinhao Fan
1 sibling, 1 reply; 13+ messages in thread
From: Klaus Jensen @ 2022-08-25 12:38 UTC (permalink / raw)
To: Jinhao Fan; +Cc: qemu-devel, kbusch, stefanha, open list:nvme
[-- Attachment #1: Type: text/plain, Size: 4580 bytes --]
On Aug 25 13:56, Klaus Jensen wrote:
> On Aug 25 19:16, Jinhao Fan wrote:
> > On 8/25/2022 5:33 PM, Klaus Jensen wrote:
> > > I'm still a bit perplexed by this issue, so I just tried moving
> > > nvme_init_irq_notifier() to the end of nvme_init_cq() and removing this
> > > first_io_cqe thing. I did not observe any particular issues?
> > >
> > > What bad behavior did you encounter, it seems to work fine to me
> >
> > The kernel boots up and got stuck, waiting for interrupts. Then the request
> > times out and got retried three times. Finally the driver seems to decide
> > that the drive is down and continues to boot.
> >
> > I added some prints during debugging and found that the MSI-X message which
> > got registered in KVM via kvm_irqchip_add_msi_route() is not the same as the
> > one actually used in msix_notify().
> >
> > Are you sure you are using KVM's irqfd?
> >
>
> Pretty sure? Using "ioeventfd=on,irq-eventfd=on" on the controller.
>
> And the following patch.
>
>
> diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
> index 30bbda7bb5ae..b2e41d3bd745 100644
> --- i/hw/nvme/ctrl.c
> +++ w/hw/nvme/ctrl.c
> @@ -1490,21 +1490,6 @@ static void nvme_post_cqes(void *opaque)
> if (!pending) {
> n->cq_pending++;
> }
> -
> - if (unlikely(cq->first_io_cqe)) {
> - /*
> - * Initilize event notifier when first cqe is posted. For irqfd
> - * support we need to register the MSI message in KVM. We
> - * can not do this registration at CQ creation time because
> - * Linux's NVMe driver changes the MSI message after CQ creation.
> - */
> - cq->first_io_cqe = false;
> -
> - if (n->params.irq_eventfd) {
> - nvme_init_irq_notifier(n, cq);
> - }
> - }
> -
> }
>
> nvme_irq_assert(n, cq);
> @@ -4914,11 +4899,14 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
> }
> n->cq[cqid] = cq;
> cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
> +
> /*
> * Only enable irqfd for IO queues since we always emulate admin queue
> * in main loop thread
> */
> - cq->first_io_cqe = cqid != 0;
> + if (cqid && n->params.irq_eventfd) {
> + nvme_init_irq_notifier(n, cq);
> + }
> }
>
>
From a trace, this is what I observe:
First, the queue is created and a virq (0) is assigned.
msix_table_mmio_write dev nvme hwaddr 0xc val 0x0 size 4
pci_nvme_mmio_write addr 0x1000 data 0x7 size 4
pci_nvme_mmio_doorbell_sq sqid 0 new_tail 7
pci_nvme_admin_cmd cid 4117 sqid 0 opc 0x5 opname 'NVME_ADM_CMD_CREATE_CQ'
pci_nvme_create_cq create completion queue, addr=0x104318000, cqid=1, vector=1, qsize=1023, qflags=3, ien=1
kvm_irqchip_add_msi_route dev nvme vector 1 virq 0
kvm_irqchip_commit_routes
pci_nvme_enqueue_req_completion cid 4117 cqid 0 dw0 0x0 dw1 0x0 status 0x0
pci_nvme_irq_msix raising MSI-X IRQ vector 0
pci_nvme_mmio_write addr 0x1004 data 0x7 size 4
pci_nvme_mmio_doorbell_cq cqid 0 new_head 7
We go on and the SQ is created as well.
pci_nvme_mmio_write addr 0x1000 data 0x8 size 4
pci_nvme_mmio_doorbell_sq sqid 0 new_tail 8
pci_nvme_admin_cmd cid 4118 sqid 0 opc 0x1 opname 'NVME_ADM_CMD_CREATE_SQ'
pci_nvme_create_sq create submission queue, addr=0x1049a0000, sqid=1, cqid=1, qsize=1023, qflags=1
pci_nvme_enqueue_req_completion cid 4118 cqid 0 dw0 0x0 dw1 0x0 status 0x0
pci_nvme_irq_msix raising MSI-X IRQ vector 0
pci_nvme_mmio_write addr 0x1004 data 0x8 size 4
pci_nvme_mmio_doorbell_cq cqid 0 new_head 8
Then i get a bunch of update_msi_routes, but the virq's are not related
to the nvme device.
However, I then assume we hit queue_request_irq() in the kernel and we
see the MSI-X table updated:
msix_table_mmio_write dev nvme hwaddr 0x1c val 0x1 size 4
msix_table_mmio_write dev nvme hwaddr 0x10 val 0xfee003f8 size 4
msix_table_mmio_write dev nvme hwaddr 0x14 val 0x0 size 4
msix_table_mmio_write dev nvme hwaddr 0x18 val 0x0 size 4
msix_table_mmio_write dev nvme hwaddr 0x1c val 0x0 size 4
kvm_irqchip_update_msi_route Updating MSI route virq=0
... other virq updates
kvm_irqchip_commit_routes
Notice the last trace line. The route for virq 0 is updated.
Looks to me that the virq route is implicitly updated with the new
message, no?
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 12:38 ` Klaus Jensen
@ 2022-08-25 13:09 ` Jinhao Fan
2022-08-25 13:59 ` Klaus Jensen
0 siblings, 1 reply; 13+ messages in thread
From: Jinhao Fan @ 2022-08-25 13:09 UTC (permalink / raw)
To: Klaus Jensen; +Cc: qemu-devel, kbusch, stefanha, open list:nvme
[-- Attachment #1: Type: text/plain, Size: 4902 bytes --]
> 在 2022年8月25日,20:39,Klaus Jensen <its@irrelevant.dk> 写道:
>
> On Aug 25 13:56, Klaus Jensen wrote:
>>> On Aug 25 19:16, Jinhao Fan wrote:
>>> On 8/25/2022 5:33 PM, Klaus Jensen wrote:
>>>> I'm still a bit perplexed by this issue, so I just tried moving
>>>> nvme_init_irq_notifier() to the end of nvme_init_cq() and removing this
>>>> first_io_cqe thing. I did not observe any particular issues?
>>>>
>>>> What bad behavior did you encounter, it seems to work fine to me
>>>
>>> The kernel boots up and got stuck, waiting for interrupts. Then the request
>>> times out and got retried three times. Finally the driver seems to decide
>>> that the drive is down and continues to boot.
>>>
>>> I added some prints during debugging and found that the MSI-X message which
>>> got registered in KVM via kvm_irqchip_add_msi_route() is not the same as the
>>> one actually used in msix_notify().
>>>
>>> Are you sure you are using KVM's irqfd?
>>>
>>
>> Pretty sure? Using "ioeventfd=on,irq-eventfd=on" on the controller.
>>
>> And the following patch.
>>
>>
>> diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
>> index 30bbda7bb5ae..b2e41d3bd745 100644
>> --- i/hw/nvme/ctrl.c
>> +++ w/hw/nvme/ctrl.c
>> @@ -1490,21 +1490,6 @@ static void nvme_post_cqes(void *opaque)
>> if (!pending) {
>> n->cq_pending++;
>> }
>> -
>> - if (unlikely(cq->first_io_cqe)) {
>> - /*
>> - * Initilize event notifier when first cqe is posted. For irqfd
>> - * support we need to register the MSI message in KVM. We
>> - * can not do this registration at CQ creation time because
>> - * Linux's NVMe driver changes the MSI message after CQ creation.
>> - */
>> - cq->first_io_cqe = false;
>> -
>> - if (n->params.irq_eventfd) {
>> - nvme_init_irq_notifier(n, cq);
>> - }
>> - }
>> -
>> }
>>
>> nvme_irq_assert(n, cq);
>> @@ -4914,11 +4899,14 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
>> }
>> n->cq[cqid] = cq;
>> cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
>> +
>> /*
>> * Only enable irqfd for IO queues since we always emulate admin queue
>> * in main loop thread
>> */
>> - cq->first_io_cqe = cqid != 0;
>> + if (cqid && n->params.irq_eventfd) {
>> + nvme_init_irq_notifier(n, cq);
>> + }
>> }
>>
>>
>
> From a trace, this is what I observe:
>
> First, the queue is created and a virq (0) is assigned.
>
> msix_table_mmio_write dev nvme hwaddr 0xc val 0x0 size 4
> pci_nvme_mmio_write addr 0x1000 data 0x7 size 4
> pci_nvme_mmio_doorbell_sq sqid 0 new_tail 7
> pci_nvme_admin_cmd cid 4117 sqid 0 opc 0x5 opname 'NVME_ADM_CMD_CREATE_CQ'
> pci_nvme_create_cq create completion queue, addr=0x104318000, cqid=1, vector=1, qsize=1023, qflags=3, ien=1
> kvm_irqchip_add_msi_route dev nvme vector 1 virq 0
> kvm_irqchip_commit_routes
> pci_nvme_enqueue_req_completion cid 4117 cqid 0 dw0 0x0 dw1 0x0 status 0x0
> pci_nvme_irq_msix raising MSI-X IRQ vector 0
> pci_nvme_mmio_write addr 0x1004 data 0x7 size 4
> pci_nvme_mmio_doorbell_cq cqid 0 new_head 7
>
> We go on and the SQ is created as well.
>
> pci_nvme_mmio_write addr 0x1000 data 0x8 size 4
> pci_nvme_mmio_doorbell_sq sqid 0 new_tail 8
> pci_nvme_admin_cmd cid 4118 sqid 0 opc 0x1 opname 'NVME_ADM_CMD_CREATE_SQ'
> pci_nvme_create_sq create submission queue, addr=0x1049a0000, sqid=1, cqid=1, qsize=1023, qflags=1
> pci_nvme_enqueue_req_completion cid 4118 cqid 0 dw0 0x0 dw1 0x0 status 0x0
> pci_nvme_irq_msix raising MSI-X IRQ vector 0
> pci_nvme_mmio_write addr 0x1004 data 0x8 size 4
> pci_nvme_mmio_doorbell_cq cqid 0 new_head 8
>
>
> Then i get a bunch of update_msi_routes, but the virq's are not related
> to the nvme device.
>
> However, I then assume we hit queue_request_irq() in the kernel and we
> see the MSI-X table updated:
>
> msix_table_mmio_write dev nvme hwaddr 0x1c val 0x1 size 4
> msix_table_mmio_write dev nvme hwaddr 0x10 val 0xfee003f8 size 4
> msix_table_mmio_write dev nvme hwaddr 0x14 val 0x0 size 4
> msix_table_mmio_write dev nvme hwaddr 0x18 val 0x0 size 4
> msix_table_mmio_write dev nvme hwaddr 0x1c val 0x0 size 4
> kvm_irqchip_update_msi_route Updating MSI route virq=0
> ... other virq updates
> kvm_irqchip_commit_routes
>
> Notice the last trace line. The route for virq 0 is updated.
>
> Looks to me that the virq route is implicitly updated with the new
> message, no?
Could you try without the msix masking patch? I suspect our unmask function actually did the “implicit” update here.
>
[-- Attachment #2: signature.asc --]
[-- Type: application/octet-stream, Size: 489 bytes --]
-----BEGIN PGP SIGNATURE-----
iQEzBAEBCAAdFiEEUigzqnXi3OaiR2bATeGvMW1PDekFAmMHbTcACgkQTeGvMW1P
DempXwgAhefrwajftbdm0SLeL3isCwQYRl3qCekbUGw2cFlcblNyvn7NSj5sjtX3
1x7XcHv4LVGaRlE7b/r5RadVT938KFjs57Jxc0xzzPeo+tHT5B62Hw6lFZf4KT/d
kykxcggCuVS7Fvmg/iQgXuY5O3FgH+3NiUX8mc6Qgu8rQHBC1eD3u067sVS9K2Gr
Ky2StxAly6chRY8veARY9Go6jUZpD+N0nDRa8x9CP+wn19bqp8oq7DcanB3hvBv4
m30Y2QmzdIQge68gWpsP1x4ehVmn399/87TJz25tboPexWVEmkuHvSDZRU7HyoJ5
QOqqKYZ1Ioexxhd8cuMFvE9N3edPAg==
=Yhtw
-----END PGP SIGNATURE-----
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 13:09 ` Jinhao Fan
@ 2022-08-25 13:59 ` Klaus Jensen
2022-08-25 14:11 ` Jinhao Fan
0 siblings, 1 reply; 13+ messages in thread
From: Klaus Jensen @ 2022-08-25 13:59 UTC (permalink / raw)
To: Jinhao Fan; +Cc: qemu-devel, kbusch, stefanha, open list:nvme
[-- Attachment #1: Type: text/plain, Size: 5977 bytes --]
On Aug 25 21:09, Jinhao Fan wrote:
>
>
>
> > 在 2022年8月25日,20:39,Klaus Jensen <its@irrelevant.dk> 写道:
> >
> > On Aug 25 13:56, Klaus Jensen wrote:
> >>> On Aug 25 19:16, Jinhao Fan wrote:
> >>> On 8/25/2022 5:33 PM, Klaus Jensen wrote:
> >>>> I'm still a bit perplexed by this issue, so I just tried moving
> >>>> nvme_init_irq_notifier() to the end of nvme_init_cq() and removing this
> >>>> first_io_cqe thing. I did not observe any particular issues?
> >>>>
> >>>> What bad behavior did you encounter, it seems to work fine to me
> >>>
> >>> The kernel boots up and got stuck, waiting for interrupts. Then the request
> >>> times out and got retried three times. Finally the driver seems to decide
> >>> that the drive is down and continues to boot.
> >>>
> >>> I added some prints during debugging and found that the MSI-X message which
> >>> got registered in KVM via kvm_irqchip_add_msi_route() is not the same as the
> >>> one actually used in msix_notify().
> >>>
> >>> Are you sure you are using KVM's irqfd?
> >>>
> >>
> >> Pretty sure? Using "ioeventfd=on,irq-eventfd=on" on the controller.
> >>
> >> And the following patch.
> >>
> >>
> >> diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
> >> index 30bbda7bb5ae..b2e41d3bd745 100644
> >> --- i/hw/nvme/ctrl.c
> >> +++ w/hw/nvme/ctrl.c
> >> @@ -1490,21 +1490,6 @@ static void nvme_post_cqes(void *opaque)
> >> if (!pending) {
> >> n->cq_pending++;
> >> }
> >> -
> >> - if (unlikely(cq->first_io_cqe)) {
> >> - /*
> >> - * Initilize event notifier when first cqe is posted. For irqfd
> >> - * support we need to register the MSI message in KVM. We
> >> - * can not do this registration at CQ creation time because
> >> - * Linux's NVMe driver changes the MSI message after CQ creation.
> >> - */
> >> - cq->first_io_cqe = false;
> >> -
> >> - if (n->params.irq_eventfd) {
> >> - nvme_init_irq_notifier(n, cq);
> >> - }
> >> - }
> >> -
> >> }
> >>
> >> nvme_irq_assert(n, cq);
> >> @@ -4914,11 +4899,14 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
> >> }
> >> n->cq[cqid] = cq;
> >> cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
> >> +
> >> /*
> >> * Only enable irqfd for IO queues since we always emulate admin queue
> >> * in main loop thread
> >> */
> >> - cq->first_io_cqe = cqid != 0;
> >> + if (cqid && n->params.irq_eventfd) {
> >> + nvme_init_irq_notifier(n, cq);
> >> + }
> >> }
> >>
> >>
> >
> > From a trace, this is what I observe:
> >
> > First, the queue is created and a virq (0) is assigned.
> >
> > msix_table_mmio_write dev nvme hwaddr 0xc val 0x0 size 4
> > pci_nvme_mmio_write addr 0x1000 data 0x7 size 4
> > pci_nvme_mmio_doorbell_sq sqid 0 new_tail 7
> > pci_nvme_admin_cmd cid 4117 sqid 0 opc 0x5 opname 'NVME_ADM_CMD_CREATE_CQ'
> > pci_nvme_create_cq create completion queue, addr=0x104318000, cqid=1, vector=1, qsize=1023, qflags=3, ien=1
> > kvm_irqchip_add_msi_route dev nvme vector 1 virq 0
> > kvm_irqchip_commit_routes
> > pci_nvme_enqueue_req_completion cid 4117 cqid 0 dw0 0x0 dw1 0x0 status 0x0
> > pci_nvme_irq_msix raising MSI-X IRQ vector 0
> > pci_nvme_mmio_write addr 0x1004 data 0x7 size 4
> > pci_nvme_mmio_doorbell_cq cqid 0 new_head 7
> >
> > We go on and the SQ is created as well.
> >
> > pci_nvme_mmio_write addr 0x1000 data 0x8 size 4
> > pci_nvme_mmio_doorbell_sq sqid 0 new_tail 8
> > pci_nvme_admin_cmd cid 4118 sqid 0 opc 0x1 opname 'NVME_ADM_CMD_CREATE_SQ'
> > pci_nvme_create_sq create submission queue, addr=0x1049a0000, sqid=1, cqid=1, qsize=1023, qflags=1
> > pci_nvme_enqueue_req_completion cid 4118 cqid 0 dw0 0x0 dw1 0x0 status 0x0
> > pci_nvme_irq_msix raising MSI-X IRQ vector 0
> > pci_nvme_mmio_write addr 0x1004 data 0x8 size 4
> > pci_nvme_mmio_doorbell_cq cqid 0 new_head 8
> >
> >
> > Then i get a bunch of update_msi_routes, but the virq's are not related
> > to the nvme device.
> >
> > However, I then assume we hit queue_request_irq() in the kernel and we
> > see the MSI-X table updated:
> >
> > msix_table_mmio_write dev nvme hwaddr 0x1c val 0x1 size 4
> > msix_table_mmio_write dev nvme hwaddr 0x10 val 0xfee003f8 size 4
> > msix_table_mmio_write dev nvme hwaddr 0x14 val 0x0 size 4
> > msix_table_mmio_write dev nvme hwaddr 0x18 val 0x0 size 4
> > msix_table_mmio_write dev nvme hwaddr 0x1c val 0x0 size 4
> > kvm_irqchip_update_msi_route Updating MSI route virq=0
> > ... other virq updates
> > kvm_irqchip_commit_routes
> >
> > Notice the last trace line. The route for virq 0 is updated.
> >
> > Looks to me that the virq route is implicitly updated with the new
> > message, no?
>
> Could you try without the msix masking patch? I suspect our unmask function actually did the “implicit” update here.
>
> >
RIGHT.
target/i386/kvm/kvm.c:
5353 if (!notify_list_inited) {
5354 /* For the first time we do add route, add ourselves into
5355 * IOMMU's IEC notify list if needed. */
5356 X86IOMMUState *iommu = x86_iommu_get_default();
5357 if (iommu) {
5358 x86_iommu_iec_register_notifier(iommu,
5359 kvm_update_msi_routes_all,
5360 NULL);
5361 }
5362 notify_list_inited = true;
5363 }
If we have an IOMMU, then it all just works. I always run with a viommu
configured, so that is why I was not seeing the issue. The masking has
nothing to do with it.
I wonder if this can be made to work without the iommu as well...
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 11:56 ` Klaus Jensen
2022-08-25 12:38 ` Klaus Jensen
@ 2022-08-25 14:05 ` Jinhao Fan
1 sibling, 0 replies; 13+ messages in thread
From: Jinhao Fan @ 2022-08-25 14:05 UTC (permalink / raw)
To: Klaus Jensen; +Cc: qemu-devel, kbusch, stefanha, open list:nvme
On 8/25/2022 7:56 PM, Klaus Jensen wrote:
> On Aug 25 19:16, Jinhao Fan wrote:
>> On 8/25/2022 5:33 PM, Klaus Jensen wrote:
>>> I'm still a bit perplexed by this issue, so I just tried moving
>>> nvme_init_irq_notifier() to the end of nvme_init_cq() and removing this
>>> first_io_cqe thing. I did not observe any particular issues?
>>>
>>> What bad behavior did you encounter, it seems to work fine to me
>>
>> The kernel boots up and got stuck, waiting for interrupts. Then the request
>> times out and got retried three times. Finally the driver seems to decide
>> that the drive is down and continues to boot.
>>
>> I added some prints during debugging and found that the MSI-X message which
>> got registered in KVM via kvm_irqchip_add_msi_route() is not the same as the
>> one actually used in msix_notify().
>>
>> Are you sure you are using KVM's irqfd?
>>
>
> Pretty sure? Using "ioeventfd=on,irq-eventfd=on" on the controller.
>
> And the following patch.
>
>
> diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
> index 30bbda7bb5ae..b2e41d3bd745 100644
> --- i/hw/nvme/ctrl.c
> +++ w/hw/nvme/ctrl.c
> @@ -1490,21 +1490,6 @@ static void nvme_post_cqes(void *opaque)
> if (!pending) {
> n->cq_pending++;
> }
> -
> - if (unlikely(cq->first_io_cqe)) {
> - /*
> - * Initilize event notifier when first cqe is posted. For irqfd
> - * support we need to register the MSI message in KVM. We
> - * can not do this registration at CQ creation time because
> - * Linux's NVMe driver changes the MSI message after CQ creation.
> - */
> - cq->first_io_cqe = false;
> -
> - if (n->params.irq_eventfd) {
> - nvme_init_irq_notifier(n, cq);
> - }
> - }
> -
> }
>
> nvme_irq_assert(n, cq);
> @@ -4914,11 +4899,14 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
> }
> n->cq[cqid] = cq;
> cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
> +
> /*
> * Only enable irqfd for IO queues since we always emulate admin queue
> * in main loop thread
> */
> - cq->first_io_cqe = cqid != 0;
> + if (cqid && n->params.irq_eventfd) {
> + nvme_init_irq_notifier(n, cq);
> + }
> }
>
>
Interesting. This patch does not work well for me. Here is the kernel
boot log related to NVMe:
[ 0.625096] nvme nvme0: 4/0/0 default/read/poll queues
[ 0.627386] IPI shorthand broadcast: enabled
[ 0.627534] nvme nvme0: Ignoring bogus Namespace Identifiers
[ 0.628167] sched_clock: Marking stable (528813377,
99314943)->(636909773, -8781453)
[ 0.629204] Spurious interrupt (vector 0xef) on CPU#0. Acked
....
[ 30.901423] nvme nvme0: I/O 320 (Read) QID 2 timeout, aborting
[ 30.902696] nvme nvme0: Abort status: 0x0
Notice the "spurious interrupt" line. I believe this means the MSI
message is incorrect.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-25 13:59 ` Klaus Jensen
@ 2022-08-25 14:11 ` Jinhao Fan
0 siblings, 0 replies; 13+ messages in thread
From: Jinhao Fan @ 2022-08-25 14:11 UTC (permalink / raw)
To: Klaus Jensen; +Cc: qemu-devel, kbusch, stefanha, open list:nvme
On 8/25/2022 9:59 PM, Klaus Jensen wrote:
> On Aug 25 21:09, Jinhao Fan wrote:
>>
>>
>>
>>> 在 2022年8月25日,20:39,Klaus Jensen <its@irrelevant.dk> 写道:
>>>
>>> On Aug 25 13:56, Klaus Jensen wrote:
>>>>> On Aug 25 19:16, Jinhao Fan wrote:
>>>>> On 8/25/2022 5:33 PM, Klaus Jensen wrote:
>>>>>> I'm still a bit perplexed by this issue, so I just tried moving
>>>>>> nvme_init_irq_notifier() to the end of nvme_init_cq() and removing this
>>>>>> first_io_cqe thing. I did not observe any particular issues?
>>>>>>
>>>>>> What bad behavior did you encounter, it seems to work fine to me
>>>>>
>>>>> The kernel boots up and got stuck, waiting for interrupts. Then the request
>>>>> times out and got retried three times. Finally the driver seems to decide
>>>>> that the drive is down and continues to boot.
>>>>>
>>>>> I added some prints during debugging and found that the MSI-X message which
>>>>> got registered in KVM via kvm_irqchip_add_msi_route() is not the same as the
>>>>> one actually used in msix_notify().
>>>>>
>>>>> Are you sure you are using KVM's irqfd?
>>>>>
>>>>
>>>> Pretty sure? Using "ioeventfd=on,irq-eventfd=on" on the controller.
>>>>
>>>> And the following patch.
>>>>
>>>>
>>>> diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
>>>> index 30bbda7bb5ae..b2e41d3bd745 100644
>>>> --- i/hw/nvme/ctrl.c
>>>> +++ w/hw/nvme/ctrl.c
>>>> @@ -1490,21 +1490,6 @@ static void nvme_post_cqes(void *opaque)
>>>> if (!pending) {
>>>> n->cq_pending++;
>>>> }
>>>> -
>>>> - if (unlikely(cq->first_io_cqe)) {
>>>> - /*
>>>> - * Initilize event notifier when first cqe is posted. For irqfd
>>>> - * support we need to register the MSI message in KVM. We
>>>> - * can not do this registration at CQ creation time because
>>>> - * Linux's NVMe driver changes the MSI message after CQ creation.
>>>> - */
>>>> - cq->first_io_cqe = false;
>>>> -
>>>> - if (n->params.irq_eventfd) {
>>>> - nvme_init_irq_notifier(n, cq);
>>>> - }
>>>> - }
>>>> -
>>>> }
>>>>
>>>> nvme_irq_assert(n, cq);
>>>> @@ -4914,11 +4899,14 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
>>>> }
>>>> n->cq[cqid] = cq;
>>>> cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
>>>> +
>>>> /*
>>>> * Only enable irqfd for IO queues since we always emulate admin queue
>>>> * in main loop thread
>>>> */
>>>> - cq->first_io_cqe = cqid != 0;
>>>> + if (cqid && n->params.irq_eventfd) {
>>>> + nvme_init_irq_notifier(n, cq);
>>>> + }
>>>> }
>>>>
>>>>
>>>
>>> From a trace, this is what I observe:
>>>
>>> First, the queue is created and a virq (0) is assigned.
>>>
>>> msix_table_mmio_write dev nvme hwaddr 0xc val 0x0 size 4
>>> pci_nvme_mmio_write addr 0x1000 data 0x7 size 4
>>> pci_nvme_mmio_doorbell_sq sqid 0 new_tail 7
>>> pci_nvme_admin_cmd cid 4117 sqid 0 opc 0x5 opname 'NVME_ADM_CMD_CREATE_CQ'
>>> pci_nvme_create_cq create completion queue, addr=0x104318000, cqid=1, vector=1, qsize=1023, qflags=3, ien=1
>>> kvm_irqchip_add_msi_route dev nvme vector 1 virq 0
>>> kvm_irqchip_commit_routes
>>> pci_nvme_enqueue_req_completion cid 4117 cqid 0 dw0 0x0 dw1 0x0 status 0x0
>>> pci_nvme_irq_msix raising MSI-X IRQ vector 0
>>> pci_nvme_mmio_write addr 0x1004 data 0x7 size 4
>>> pci_nvme_mmio_doorbell_cq cqid 0 new_head 7
>>>
>>> We go on and the SQ is created as well.
>>>
>>> pci_nvme_mmio_write addr 0x1000 data 0x8 size 4
>>> pci_nvme_mmio_doorbell_sq sqid 0 new_tail 8
>>> pci_nvme_admin_cmd cid 4118 sqid 0 opc 0x1 opname 'NVME_ADM_CMD_CREATE_SQ'
>>> pci_nvme_create_sq create submission queue, addr=0x1049a0000, sqid=1, cqid=1, qsize=1023, qflags=1
>>> pci_nvme_enqueue_req_completion cid 4118 cqid 0 dw0 0x0 dw1 0x0 status 0x0
>>> pci_nvme_irq_msix raising MSI-X IRQ vector 0
>>> pci_nvme_mmio_write addr 0x1004 data 0x8 size 4
>>> pci_nvme_mmio_doorbell_cq cqid 0 new_head 8
>>>
>>>
>>> Then i get a bunch of update_msi_routes, but the virq's are not related
>>> to the nvme device.
>>>
>>> However, I then assume we hit queue_request_irq() in the kernel and we
>>> see the MSI-X table updated:
>>>
>>> msix_table_mmio_write dev nvme hwaddr 0x1c val 0x1 size 4
>>> msix_table_mmio_write dev nvme hwaddr 0x10 val 0xfee003f8 size 4
>>> msix_table_mmio_write dev nvme hwaddr 0x14 val 0x0 size 4
>>> msix_table_mmio_write dev nvme hwaddr 0x18 val 0x0 size 4
>>> msix_table_mmio_write dev nvme hwaddr 0x1c val 0x0 size 4
>>> kvm_irqchip_update_msi_route Updating MSI route virq=0
>>> ... other virq updates
>>> kvm_irqchip_commit_routes
>>>
>>> Notice the last trace line. The route for virq 0 is updated.
>>>
>>> Looks to me that the virq route is implicitly updated with the new
>>> message, no?
>>
>> Could you try without the msix masking patch? I suspect our unmask function actually did the “implicit” update here.
>>
>>>
>
> RIGHT.
>
> target/i386/kvm/kvm.c:
>
> 5353 if (!notify_list_inited) {
> 5354 /* For the first time we do add route, add ourselves into
> 5355 * IOMMU's IEC notify list if needed. */
> 5356 X86IOMMUState *iommu = x86_iommu_get_default();
> 5357 if (iommu) {
> 5358 x86_iommu_iec_register_notifier(iommu,
> 5359 kvm_update_msi_routes_all,
> 5360 NULL);
> 5361 }
> 5362 notify_list_inited = true;
> 5363 }
>
> If we have an IOMMU, then it all just works. I always run with a viommu
> configured, so that is why I was not seeing the issue. The masking has
> nothing to do with it.
>
> I wonder if this can be made to work without the iommu as well...
Yes, I didn't use IOMMU in my tests. Is it possible that there is some
unstated requirement that irqfd only works with IOMMU enabled? :)
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd
2022-08-26 15:12 [PATCH v2 0/3] iothread and irqfd support Jinhao Fan
@ 2022-08-26 15:12 ` Jinhao Fan
0 siblings, 0 replies; 13+ messages in thread
From: Jinhao Fan @ 2022-08-26 15:12 UTC (permalink / raw)
To: qemu-devel
Cc: its, kbusch, stefanha, Jinhao Fan, Klaus Jensen, open list:nvme
When the new option 'irq-eventfd' is turned on, the IO emulation code
signals an eventfd when it want to (de)assert an irq. The main loop
eventfd handler does the actual irq (de)assertion. This paves the way
for iothread support since QEMU's interrupt emulation is not thread
safe.
Asserting and deasseting irq with eventfd has some performance
implications. For small queue depth it increases request latency but
for large queue depth it effectively coalesces irqs.
Comparision (KIOPS):
QD 1 4 16 64
QEMU 38 123 210 329
irq-eventfd 32 106 240 364
Signed-off-by: Jinhao Fan <fanjinhao21s@ict.ac.cn>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/nvme/ctrl.c | 120 ++++++++++++++++++++++++++++++++++++++++++-------
hw/nvme/nvme.h | 3 ++
2 files changed, 106 insertions(+), 17 deletions(-)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 87aeba0564..51792f3955 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -526,34 +526,57 @@ static void nvme_irq_check(NvmeCtrl *n)
}
}
+static void nvme_irq_do_assert(NvmeCtrl *n, NvmeCQueue *cq)
+{
+ if (msix_enabled(&(n->parent_obj))) {
+ trace_pci_nvme_irq_msix(cq->vector);
+ msix_notify(&(n->parent_obj), cq->vector);
+ } else {
+ trace_pci_nvme_irq_pin();
+ assert(cq->vector < 32);
+ n->irq_status |= 1 << cq->vector;
+ nvme_irq_check(n);
+ }
+}
+
static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
{
if (cq->irq_enabled) {
- if (msix_enabled(&(n->parent_obj))) {
- trace_pci_nvme_irq_msix(cq->vector);
- msix_notify(&(n->parent_obj), cq->vector);
+ if (cq->assert_notifier.initialized) {
+ event_notifier_set(&cq->assert_notifier);
} else {
- trace_pci_nvme_irq_pin();
- assert(cq->vector < 32);
- n->irq_status |= 1 << cq->vector;
- nvme_irq_check(n);
+ nvme_irq_do_assert(n, cq);
}
} else {
trace_pci_nvme_irq_masked();
}
}
+static void nvme_irq_do_deassert(NvmeCtrl *n, NvmeCQueue *cq)
+{
+ if (msix_enabled(&(n->parent_obj))) {
+ return;
+ } else {
+ assert(cq->vector < 32);
+ if (!n->cq_pending) {
+ n->irq_status &= ~(1 << cq->vector);
+ }
+ nvme_irq_check(n);
+ }
+}
+
static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
{
if (cq->irq_enabled) {
- if (msix_enabled(&(n->parent_obj))) {
- return;
+ if (cq->deassert_notifier.initialized) {
+ /*
+ * The deassert notifier will only be initilized when MSI-X is NOT
+ * in use. Therefore no need to worry about extra eventfd syscall
+ * for pin-based interrupts.
+ */
+ event_notifier_set(&cq->deassert_notifier);
} else {
- assert(cq->vector < 32);
- if (!n->cq_pending) {
- n->irq_status &= ~(1 << cq->vector);
- }
- nvme_irq_check(n);
+ nvme_irq_do_deassert(n, cq);
}
}
}
@@ -1338,6 +1361,50 @@ static void nvme_update_cq_head(NvmeCQueue *cq)
trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
}
+static void nvme_assert_notifier_read(EventNotifier *e)
+{
+ NvmeCQueue *cq = container_of(e, NvmeCQueue, assert_notifier);
+ if (event_notifier_test_and_clear(e)) {
+ nvme_irq_do_assert(cq->ctrl, cq);
+ }
+}
+
+static void nvme_deassert_notifier_read(EventNotifier *e)
+{
+ NvmeCQueue *cq = container_of(e, NvmeCQueue, deassert_notifier);
+ if (event_notifier_test_and_clear(e)) {
+ nvme_irq_do_deassert(cq->ctrl, cq);
+ }
+}
+
+static void nvme_init_irq_notifier(NvmeCtrl *n, NvmeCQueue *cq)
+{
+ int ret;
+
+ ret = event_notifier_init(&cq->assert_notifier, 0);
+ if (ret < 0) {
+ return;
+ }
+
+ event_notifier_set_handler(&cq->assert_notifier,
+ nvme_assert_notifier_read);
+
+ if (!msix_enabled(&n->parent_obj)) {
+ ret = event_notifier_init(&cq->deassert_notifier, 0);
+ if (ret < 0) {
+ event_notifier_set_handler(&cq->assert_notifier, NULL);
+ event_notifier_cleanup(&cq->assert_notifier);
+
+ return;
+ }
+
+ event_notifier_set_handler(&cq->deassert_notifier,
+ nvme_deassert_notifier_read);
+ }
+
+ return;
+}
+
static void nvme_post_cqes(void *opaque)
{
NvmeCQueue *cq = opaque;
@@ -1377,8 +1444,10 @@ static void nvme_post_cqes(void *opaque)
QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
}
if (cq->tail != cq->head) {
- if (cq->irq_enabled && !pending) {
- n->cq_pending++;
+ if (cq->irq_enabled) {
+ if (!pending) {
+ n->cq_pending++;
+ }
}
nvme_irq_assert(n, cq);
@@ -4705,6 +4774,14 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
event_notifier_set_handler(&cq->notifier, NULL);
event_notifier_cleanup(&cq->notifier);
}
+ if (cq->assert_notifier.initialized) {
+ event_notifier_set_handler(&cq->assert_notifier, NULL);
+ event_notifier_cleanup(&cq->assert_notifier);
+ }
+ if (cq->deassert_notifier.initialized) {
+ event_notifier_set_handler(&cq->deassert_notifier, NULL);
+ event_notifier_cleanup(&cq->deassert_notifier);
+ }
if (msix_enabled(&n->parent_obj)) {
msix_vector_unuse(&n->parent_obj, cq->vector);
}
@@ -4734,7 +4811,7 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
n->cq_pending--;
}
- nvme_irq_deassert(n, cq);
+ nvme_irq_do_deassert(n, cq);
trace_pci_nvme_del_cq(qid);
nvme_free_cq(cq, n);
return NVME_SUCCESS;
@@ -4772,6 +4849,14 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
}
n->cq[cqid] = cq;
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+
+ /*
+ * Only enable irq eventfd for IO queues since we always emulate admin
+ * queue in main loop thread.
+ */
+ if (cqid && n->params.irq_eventfd) {
+ nvme_init_irq_notifier(n, cq);
+ }
}
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
@@ -7671,6 +7756,7 @@ static Property nvme_props[] = {
DEFINE_PROP_BOOL("use-intel-id", NvmeCtrl, params.use_intel_id, false),
DEFINE_PROP_BOOL("legacy-cmb", NvmeCtrl, params.legacy_cmb, false),
DEFINE_PROP_BOOL("ioeventfd", NvmeCtrl, params.ioeventfd, false),
+ DEFINE_PROP_BOOL("irq-eventfd", NvmeCtrl, params.irq_eventfd, false),
DEFINE_PROP_UINT8("zoned.zasl", NvmeCtrl, params.zasl, 0),
DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
params.auto_transition_zones, true),
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 79f5c281c2..4850d3e965 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -398,6 +398,8 @@ typedef struct NvmeCQueue {
uint64_t ei_addr;
QEMUTimer *timer;
EventNotifier notifier;
+ EventNotifier assert_notifier;
+ EventNotifier deassert_notifier;
bool ioeventfd_enabled;
QTAILQ_HEAD(, NvmeSQueue) sq_list;
QTAILQ_HEAD(, NvmeRequest) req_list;
@@ -422,6 +424,7 @@ typedef struct NvmeParams {
bool auto_transition_zones;
bool legacy_cmb;
bool ioeventfd;
+ bool irq_eventfd;
uint8_t sriov_max_vfs;
uint16_t sriov_vq_flexible;
uint16_t sriov_vi_flexible;
--
2.25.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
end of thread, other threads:[~2022-08-26 15:21 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-25 7:47 [PATCH v2 0/3] hw/nvme: add irqfd support Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd Jinhao Fan
2022-08-25 9:33 ` Klaus Jensen
2022-08-25 11:16 ` Jinhao Fan
2022-08-25 11:56 ` Klaus Jensen
2022-08-25 12:38 ` Klaus Jensen
2022-08-25 13:09 ` Jinhao Fan
2022-08-25 13:59 ` Klaus Jensen
2022-08-25 14:11 ` Jinhao Fan
2022-08-25 14:05 ` Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 2/3] hw/nvme: use KVM irqfd when available Jinhao Fan
2022-08-25 7:47 ` [PATCH v2 3/3] hw/nvme: add MSI-x mask handlers for irqfd Jinhao Fan
2022-08-26 15:12 [PATCH v2 0/3] iothread and irqfd support Jinhao Fan
2022-08-26 15:12 ` [PATCH v2 1/3] hw/nvme: support irq(de)assertion with eventfd Jinhao Fan
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.