All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup
@ 2020-07-20 11:37 Klaus Jensen
  2020-07-20 11:37 ` [PATCH 01/16] hw/block/nvme: memset preallocated requests structures Klaus Jensen
                   ` (16 more replies)
  0 siblings, 17 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen

From: Klaus Jensen <k.jensen@samsung.com>

This series consists of patches that refactors dma read/write and adds a
number of address mapping helper functions.

Based-on: <20200706061303.246057-1-its@irrelevant.dk>

Klaus Jensen (16):
  hw/block/nvme: memset preallocated requests structures
  hw/block/nvme: add mapping helpers
  hw/block/nvme: replace dma_acct with blk_acct equivalent
  hw/block/nvme: remove redundant has_sg member
  hw/block/nvme: refactor dma read/write
  hw/block/nvme: pass request along for tracing
  hw/block/nvme: add request mapping helper
  hw/block/nvme: verify validity of prp lists in the cmb
  hw/block/nvme: refactor request bounds checking
  hw/block/nvme: add check for mdts
  hw/block/nvme: be consistent about zeros vs zeroes
  hw/block/nvme: refactor NvmeRequest clearing
  hw/block/nvme: add a namespace reference in NvmeRequest
  hw/block/nvme: consolidate qsg/iov clearing
  hw/block/nvme: remove NvmeCmd parameter
  hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp

 block/nvme.c          |   4 +-
 hw/block/nvme.c       | 498 +++++++++++++++++++++++++++---------------
 hw/block/nvme.h       |   4 +-
 hw/block/trace-events |   4 +
 include/block/nvme.h  |   4 +-
 5 files changed, 331 insertions(+), 183 deletions(-)

-- 
2.27.0



^ permalink raw reply	[flat|nested] 58+ messages in thread

* [PATCH 01/16] hw/block/nvme: memset preallocated requests structures
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-20 11:37 ` [PATCH 02/16] hw/block/nvme: add mapping helpers Klaus Jensen
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

This is preparatory to subsequent patches that change how QSGs/IOVs are
handled. It is important that the qsg and iov members of the NvmeRequest
are initially zeroed.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 841c18920c4e..4d7b730a62b6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -653,7 +653,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
     sq->size = size;
     sq->cqid = cqid;
     sq->head = sq->tail = 0;
-    sq->io_req = g_new(NvmeRequest, sq->size);
+    sq->io_req = g_new0(NvmeRequest, sq->size);
 
     QTAILQ_INIT(&sq->req_list);
     QTAILQ_INIT(&sq->out_req_list);
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 02/16] hw/block/nvme: add mapping helpers
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
  2020-07-20 11:37 ` [PATCH 01/16] hw/block/nvme: memset preallocated requests structures Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 13:57   ` Maxim Levitsky
                     ` (2 more replies)
  2020-07-20 11:37 ` [PATCH 03/16] hw/block/nvme: replace dma_acct with blk_acct equivalent Klaus Jensen
                   ` (14 subsequent siblings)
  16 siblings, 3 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen

From: Klaus Jensen <k.jensen@samsung.com>

Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
use them in nvme_map_prp.

This fixes a bug where in the case of a CMB transfer, the device would
map to the buffer with a wrong length.

Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c       | 109 +++++++++++++++++++++++++++++++++++-------
 hw/block/trace-events |   2 +
 2 files changed, 94 insertions(+), 17 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 4d7b730a62b6..9b1a080cdc70 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -109,6 +109,11 @@ static uint16_t nvme_sqid(NvmeRequest *req)
     return le16_to_cpu(req->sq->sqid);
 }
 
+static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
+{
+    return &n->cmbuf[addr - n->ctrl_mem.addr];
+}
+
 static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 {
     hwaddr low = n->ctrl_mem.addr;
@@ -120,7 +125,7 @@ static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
 static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
     if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
-        memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
+        memcpy(buf, nvme_addr_to_cmb(n, addr), size);
         return;
     }
 
@@ -203,29 +208,91 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
     }
 }
 
+static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
+                                  size_t len)
+{
+    if (!len) {
+        return NVME_SUCCESS;
+    }
+
+    trace_pci_nvme_map_addr_cmb(addr, len);
+
+    if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
+        return NVME_DATA_TRAS_ERROR;
+    }
+
+    qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
+
+    return NVME_SUCCESS;
+}
+
+static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
+                              hwaddr addr, size_t len)
+{
+    if (!len) {
+        return NVME_SUCCESS;
+    }
+
+    trace_pci_nvme_map_addr(addr, len);
+
+    if (nvme_addr_is_cmb(n, addr)) {
+        if (qsg && qsg->sg) {
+            return NVME_INVALID_USE_OF_CMB | NVME_DNR;
+        }
+
+        assert(iov);
+
+        if (!iov->iov) {
+            qemu_iovec_init(iov, 1);
+        }
+
+        return nvme_map_addr_cmb(n, iov, addr, len);
+    }
+
+    if (iov && iov->iov) {
+        return NVME_INVALID_USE_OF_CMB | NVME_DNR;
+    }
+
+    assert(qsg);
+
+    if (!qsg->sg) {
+        pci_dma_sglist_init(qsg, &n->parent_obj, 1);
+    }
+
+    qemu_sglist_add(qsg, addr, len);
+
+    return NVME_SUCCESS;
+}
+
 static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
                              uint64_t prp2, uint32_t len, NvmeCtrl *n)
 {
     hwaddr trans_len = n->page_size - (prp1 % n->page_size);
     trans_len = MIN(len, trans_len);
     int num_prps = (len >> n->page_bits) + 1;
+    uint16_t status;
 
     if (unlikely(!prp1)) {
         trace_pci_nvme_err_invalid_prp();
         return NVME_INVALID_FIELD | NVME_DNR;
-    } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr &&
-               prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
-        qsg->nsg = 0;
+    }
+
+    if (nvme_addr_is_cmb(n, prp1)) {
         qemu_iovec_init(iov, num_prps);
-        qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
     } else {
         pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
-        qemu_sglist_add(qsg, prp1, trans_len);
     }
+
+    status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
+    if (status) {
+        goto unmap;
+    }
+
     len -= trans_len;
     if (len) {
         if (unlikely(!prp2)) {
             trace_pci_nvme_err_invalid_prp2_missing();
+            status = NVME_INVALID_FIELD | NVME_DNR;
             goto unmap;
         }
         if (len > n->page_size) {
@@ -242,6 +309,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
                 if (i == n->max_prp_ents - 1 && len > n->page_size) {
                     if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
                         trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
+                        status = NVME_INVALID_FIELD | NVME_DNR;
                         goto unmap;
                     }
 
@@ -255,14 +323,14 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
 
                 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
                     trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
+                    status = NVME_INVALID_FIELD | NVME_DNR;
                     goto unmap;
                 }
 
                 trans_len = MIN(len, n->page_size);
-                if (qsg->nsg){
-                    qemu_sglist_add(qsg, prp_ent, trans_len);
-                } else {
-                    qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
+                status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
+                if (status) {
+                    goto unmap;
                 }
                 len -= trans_len;
                 i++;
@@ -270,20 +338,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
         } else {
             if (unlikely(prp2 & (n->page_size - 1))) {
                 trace_pci_nvme_err_invalid_prp2_align(prp2);
+                status = NVME_INVALID_FIELD | NVME_DNR;
                 goto unmap;
             }
-            if (qsg->nsg) {
-                qemu_sglist_add(qsg, prp2, len);
-            } else {
-                qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
+            status = nvme_map_addr(n, qsg, iov, prp2, len);
+            if (status) {
+                goto unmap;
             }
         }
     }
     return NVME_SUCCESS;
 
- unmap:
-    qemu_sglist_destroy(qsg);
-    return NVME_INVALID_FIELD | NVME_DNR;
+unmap:
+    if (iov && iov->iov) {
+        qemu_iovec_destroy(iov);
+    }
+
+    if (qsg && qsg->sg) {
+        qemu_sglist_destroy(qsg);
+    }
+
+    return status;
 }
 
 static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 7b7303cab1dd..f3b2d004e078 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -33,6 +33,8 @@ pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
 pci_nvme_irq_pin(void) "pulsing IRQ pin"
 pci_nvme_irq_masked(void) "IRQ is masked"
 pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
+pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
+pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
 pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
 pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8""
 pci_nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 03/16] hw/block/nvme: replace dma_acct with blk_acct equivalent
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
  2020-07-20 11:37 ` [PATCH 01/16] hw/block/nvme: memset preallocated requests structures Klaus Jensen
  2020-07-20 11:37 ` [PATCH 02/16] hw/block/nvme: add mapping helpers Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 15:23   ` Minwoo Im
  2020-07-20 11:37 ` [PATCH 04/16] hw/block/nvme: remove redundant has_sg member Klaus Jensen
                   ` (13 subsequent siblings)
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

The QSG isn't always initialized, so accounting could be wrong. Issue a
call to blk_acct_start instead with the size taken from the QSG or IOV
depending on the kind of I/O.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9b1a080cdc70..cb236d1c8c46 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -620,9 +620,10 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
         return NVME_INVALID_FIELD | NVME_DNR;
     }
 
-    dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
     if (req->qsg.nsg > 0) {
         req->has_sg = true;
+        block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->qsg.size,
+                         acct);
         req->aiocb = is_write ?
             dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
                           nvme_rw_cb, req) :
@@ -630,6 +631,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
                          nvme_rw_cb, req);
     } else {
         req->has_sg = false;
+        block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->iov.size,
+                         acct);
         req->aiocb = is_write ?
             blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
                             req) :
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 04/16] hw/block/nvme: remove redundant has_sg member
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (2 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 03/16] hw/block/nvme: replace dma_acct with blk_acct equivalent Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 15:29   ` Minwoo Im
  2020-07-20 11:37 ` [PATCH 05/16] hw/block/nvme: refactor dma read/write Klaus Jensen
                   ` (12 subsequent siblings)
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Remove the has_sg member from NvmeRequest since it's redundant.

Also, make sure the request iov is destroyed at completion time.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c | 11 ++++++-----
 hw/block/nvme.h |  1 -
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index cb236d1c8c46..6a1a1626b87b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -548,16 +548,20 @@ static void nvme_rw_cb(void *opaque, int ret)
         block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
         req->status = NVME_INTERNAL_DEV_ERROR;
     }
-    if (req->has_sg) {
+
+    if (req->qsg.nalloc) {
         qemu_sglist_destroy(&req->qsg);
     }
+    if (req->iov.nalloc) {
+        qemu_iovec_destroy(&req->iov);
+    }
+
     nvme_enqueue_req_completion(cq, req);
 }
 
 static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     NvmeRequest *req)
 {
-    req->has_sg = false;
     block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
          BLOCK_ACCT_FLUSH);
     req->aiocb = blk_aio_flush(n->conf.blk, nvme_rw_cb, req);
@@ -583,7 +587,6 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
         return NVME_LBA_RANGE | NVME_DNR;
     }
 
-    req->has_sg = false;
     block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
                      BLOCK_ACCT_WRITE);
     req->aiocb = blk_aio_pwrite_zeroes(n->conf.blk, offset, count,
@@ -621,7 +624,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     }
 
     if (req->qsg.nsg > 0) {
-        req->has_sg = true;
         block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->qsg.size,
                          acct);
         req->aiocb = is_write ?
@@ -630,7 +632,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
             dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
                          nvme_rw_cb, req);
     } else {
-        req->has_sg = false;
         block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->iov.size,
                          acct);
         req->aiocb = is_write ?
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 0b6a8ae66559..5519b5cc7686 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -22,7 +22,6 @@ typedef struct NvmeRequest {
     struct NvmeSQueue       *sq;
     BlockAIOCB              *aiocb;
     uint16_t                status;
-    bool                    has_sg;
     NvmeCqe                 cqe;
     BlockAcctCookie         acct;
     QEMUSGList              qsg;
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 05/16] hw/block/nvme: refactor dma read/write
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (3 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 04/16] hw/block/nvme: remove redundant has_sg member Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 15:35   ` Minwoo Im
  2020-07-29 17:35   ` Maxim Levitsky
  2020-07-20 11:37 ` [PATCH 06/16] hw/block/nvme: pass request along for tracing Klaus Jensen
                   ` (11 subsequent siblings)
  16 siblings, 2 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Refactor the nvme_dma_{read,write}_prp functions into a common function
taking a DMADirection parameter.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c | 88 ++++++++++++++++++++++++-------------------------
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 6a1a1626b87b..d314a604db81 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -361,55 +361,50 @@ unmap:
     return status;
 }
 
-static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                                   uint64_t prp1, uint64_t prp2)
+static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
+                             uint64_t prp1, uint64_t prp2, DMADirection dir)
 {
     QEMUSGList qsg;
     QEMUIOVector iov;
     uint16_t status = NVME_SUCCESS;
 
-    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
+    status = nvme_map_prp(&qsg, &iov, prp1, prp2, len, n);
+    if (status) {
+        return status;
     }
+
     if (qsg.nsg > 0) {
-        if (dma_buf_write(ptr, len, &qsg)) {
-            status = NVME_INVALID_FIELD | NVME_DNR;
+        uint64_t residual;
+
+        if (dir == DMA_DIRECTION_TO_DEVICE) {
+            residual = dma_buf_write(ptr, len, &qsg);
+        } else {
+            residual = dma_buf_read(ptr, len, &qsg);
         }
-        qemu_sglist_destroy(&qsg);
-    } else {
-        if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
-            status = NVME_INVALID_FIELD | NVME_DNR;
-        }
-        qemu_iovec_destroy(&iov);
-    }
-    return status;
-}
 
-static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-    uint64_t prp1, uint64_t prp2)
-{
-    QEMUSGList qsg;
-    QEMUIOVector iov;
-    uint16_t status = NVME_SUCCESS;
-
-    trace_pci_nvme_dma_read(prp1, prp2);
-
-    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
-        return NVME_INVALID_FIELD | NVME_DNR;
-    }
-    if (qsg.nsg > 0) {
-        if (unlikely(dma_buf_read(ptr, len, &qsg))) {
+        if (unlikely(residual)) {
             trace_pci_nvme_err_invalid_dma();
             status = NVME_INVALID_FIELD | NVME_DNR;
         }
+
         qemu_sglist_destroy(&qsg);
     } else {
-        if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) {
+        size_t bytes;
+
+        if (dir == DMA_DIRECTION_TO_DEVICE) {
+            bytes = qemu_iovec_to_buf(&iov, 0, ptr, len);
+        } else {
+            bytes = qemu_iovec_from_buf(&iov, 0, ptr, len);
+        }
+
+        if (unlikely(bytes != len)) {
             trace_pci_nvme_err_invalid_dma();
             status = NVME_INVALID_FIELD | NVME_DNR;
         }
+
         qemu_iovec_destroy(&iov);
     }
+
     return status;
 }
 
@@ -840,8 +835,8 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
         nvme_clear_events(n, NVME_AER_TYPE_SMART);
     }
 
-    return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
-                             prp2);
+    return nvme_dma_prp(n, (uint8_t *) &smart + off, trans_len, prp1, prp2,
+                        DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
@@ -862,8 +857,8 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
 
     trans_len = MIN(sizeof(fw_log) - off, buf_len);
 
-    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
-                             prp2);
+    return nvme_dma_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, prp2,
+                        DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
@@ -887,7 +882,8 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
 
     trans_len = MIN(sizeof(errlog) - off, buf_len);
 
-    return nvme_dma_read_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2);
+    return nvme_dma_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2,
+                        DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
@@ -1042,8 +1038,8 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
 
     trace_pci_nvme_identify_ctrl();
 
-    return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
-        prp1, prp2);
+    return nvme_dma_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), prp1,
+                        prp2, DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
@@ -1062,8 +1058,8 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
 
     ns = &n->namespaces[nsid - 1];
 
-    return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
-        prp1, prp2);
+    return nvme_dma_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1,
+                        prp2, DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
@@ -1098,7 +1094,8 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
             break;
         }
     }
-    ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2);
+    ret = nvme_dma_prp(n, (uint8_t *)list, data_len, prp1, prp2,
+                       DMA_DIRECTION_FROM_DEVICE);
     g_free(list);
     return ret;
 }
@@ -1139,7 +1136,8 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c)
     ns_descrs->uuid.hdr.nidl = NVME_NIDT_UUID_LEN;
     stl_be_p(&ns_descrs->uuid.v, nsid);
 
-    return nvme_dma_read_prp(n, list, NVME_IDENTIFY_DATA_SIZE, prp1, prp2);
+    return nvme_dma_prp(n, list, NVME_IDENTIFY_DATA_SIZE, prp1, prp2,
+                        DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
@@ -1220,8 +1218,8 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
 
     uint64_t timestamp = nvme_get_timestamp(n);
 
-    return nvme_dma_read_prp(n, (uint8_t *)&timestamp,
-                                 sizeof(timestamp), prp1, prp2);
+    return nvme_dma_prp(n, (uint8_t *)&timestamp, sizeof(timestamp), prp1,
+                        prp2, DMA_DIRECTION_FROM_DEVICE);
 }
 
 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
@@ -1352,8 +1350,8 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
     uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
 
-    ret = nvme_dma_write_prp(n, (uint8_t *)&timestamp,
-                                sizeof(timestamp), prp1, prp2);
+    ret = nvme_dma_prp(n, (uint8_t *)&timestamp, sizeof(timestamp), prp1,
+                       prp2, DMA_DIRECTION_TO_DEVICE);
     if (ret != NVME_SUCCESS) {
         return ret;
     }
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 06/16] hw/block/nvme: pass request along for tracing
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (4 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 05/16] hw/block/nvme: refactor dma read/write Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 15:49   ` Minwoo Im
  2020-07-20 11:37 ` [PATCH 07/16] hw/block/nvme: add request mapping helper Klaus Jensen
                   ` (10 subsequent siblings)
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Pass along the NvmeRequest in various functions since it is very useful
for tracing.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c       | 67 +++++++++++++++++++++++++------------------
 hw/block/trace-events |  1 +
 2 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d314a604db81..f1e04608804b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -264,14 +264,18 @@ static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
     return NVME_SUCCESS;
 }
 
-static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
-                             uint64_t prp2, uint32_t len, NvmeCtrl *n)
+static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
+                             uint64_t prp1, uint64_t prp2, uint32_t len,
+                             NvmeRequest *req)
 {
     hwaddr trans_len = n->page_size - (prp1 % n->page_size);
     trans_len = MIN(len, trans_len);
     int num_prps = (len >> n->page_bits) + 1;
     uint16_t status;
 
+    trace_pci_nvme_map_prp(nvme_cid(req), trans_len, len, prp1, prp2,
+                           num_prps);
+
     if (unlikely(!prp1)) {
         trace_pci_nvme_err_invalid_prp();
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -362,13 +366,14 @@ unmap:
 }
 
 static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
-                             uint64_t prp1, uint64_t prp2, DMADirection dir)
+                             uint64_t prp1, uint64_t prp2, DMADirection dir,
+                             NvmeRequest *req)
 {
     QEMUSGList qsg;
     QEMUIOVector iov;
     uint16_t status = NVME_SUCCESS;
 
-    status = nvme_map_prp(&qsg, &iov, prp1, prp2, len, n);
+    status = nvme_map_prp(n, &qsg, &iov, prp1, prp2, len, req);
     if (status) {
         return status;
     }
@@ -613,7 +618,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
         return NVME_LBA_RANGE | NVME_DNR;
     }
 
-    if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
+    if (nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, data_size, req)) {
         block_acct_invalid(blk_get_stats(n->conf.blk), acct);
         return NVME_INVALID_FIELD | NVME_DNR;
     }
@@ -836,7 +841,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
     }
 
     return nvme_dma_prp(n, (uint8_t *) &smart + off, trans_len, prp1, prp2,
-                        DMA_DIRECTION_FROM_DEVICE);
+                        DMA_DIRECTION_FROM_DEVICE, req);
 }
 
 static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
@@ -858,7 +863,7 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
     trans_len = MIN(sizeof(fw_log) - off, buf_len);
 
     return nvme_dma_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, prp2,
-                        DMA_DIRECTION_FROM_DEVICE);
+                        DMA_DIRECTION_FROM_DEVICE, req);
 }
 
 static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
@@ -883,7 +888,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
     trans_len = MIN(sizeof(errlog) - off, buf_len);
 
     return nvme_dma_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2,
-                        DMA_DIRECTION_FROM_DEVICE);
+                        DMA_DIRECTION_FROM_DEVICE, req);
 }
 
 static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
@@ -1031,7 +1036,8 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
     return NVME_SUCCESS;
 }
 
-static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c,
+                                   NvmeRequest *req)
 {
     uint64_t prp1 = le64_to_cpu(c->prp1);
     uint64_t prp2 = le64_to_cpu(c->prp2);
@@ -1039,10 +1045,11 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
     trace_pci_nvme_identify_ctrl();
 
     return nvme_dma_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), prp1,
-                        prp2, DMA_DIRECTION_FROM_DEVICE);
+                        prp2, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c,
+                                 NvmeRequest *req)
 {
     NvmeNamespace *ns;
     uint32_t nsid = le32_to_cpu(c->nsid);
@@ -1059,10 +1066,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
     ns = &n->namespaces[nsid - 1];
 
     return nvme_dma_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1,
-                        prp2, DMA_DIRECTION_FROM_DEVICE);
+                        prp2, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c,
+                                     NvmeRequest *req)
 {
     static const int data_len = NVME_IDENTIFY_DATA_SIZE;
     uint32_t min_nsid = le32_to_cpu(c->nsid);
@@ -1095,12 +1103,13 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
         }
     }
     ret = nvme_dma_prp(n, (uint8_t *)list, data_len, prp1, prp2,
-                       DMA_DIRECTION_FROM_DEVICE);
+                       DMA_DIRECTION_FROM_DEVICE, req);
     g_free(list);
     return ret;
 }
 
-static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c,
+                                            NvmeRequest *req)
 {
     uint32_t nsid = le32_to_cpu(c->nsid);
     uint64_t prp1 = le64_to_cpu(c->prp1);
@@ -1137,22 +1146,22 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c)
     stl_be_p(&ns_descrs->uuid.v, nsid);
 
     return nvme_dma_prp(n, list, NVME_IDENTIFY_DATA_SIZE, prp1, prp2,
-                        DMA_DIRECTION_FROM_DEVICE);
+                        DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
+static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     NvmeIdentify *c = (NvmeIdentify *)cmd;
 
     switch (le32_to_cpu(c->cns)) {
     case NVME_ID_CNS_NS:
-        return nvme_identify_ns(n, c);
+        return nvme_identify_ns(n, c, req);
     case NVME_ID_CNS_CTRL:
-        return nvme_identify_ctrl(n, c);
+        return nvme_identify_ctrl(n, c, req);
     case NVME_ID_CNS_NS_ACTIVE_LIST:
-        return nvme_identify_nslist(n, c);
+        return nvme_identify_nslist(n, c, req);
     case NVME_ID_CNS_NS_DESCR_LIST:
-        return nvme_identify_ns_descr_list(n, c);
+        return nvme_identify_ns_descr_list(n, c, req);
     default:
         trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -1211,7 +1220,8 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
     return cpu_to_le64(ts.all);
 }
 
-static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
+static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
+                                           NvmeRequest *req)
 {
     uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
@@ -1219,7 +1229,7 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
     uint64_t timestamp = nvme_get_timestamp(n);
 
     return nvme_dma_prp(n, (uint8_t *)&timestamp, sizeof(timestamp), prp1,
-                        prp2, DMA_DIRECTION_FROM_DEVICE);
+                        prp2, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
@@ -1297,7 +1307,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         result = n->features.async_config;
         goto out;
     case NVME_TIMESTAMP:
-        return nvme_get_feature_timestamp(n, cmd);
+        return nvme_get_feature_timestamp(n, cmd, req);
     default:
         break;
     }
@@ -1343,7 +1353,8 @@ out:
     return NVME_SUCCESS;
 }
 
-static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
+static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
+                                           NvmeRequest *req)
 {
     uint16_t ret;
     uint64_t timestamp;
@@ -1351,7 +1362,7 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
 
     ret = nvme_dma_prp(n, (uint8_t *)&timestamp, sizeof(timestamp), prp1,
-                       prp2, DMA_DIRECTION_TO_DEVICE);
+                       prp2, DMA_DIRECTION_TO_DEVICE, req);
     if (ret != NVME_SUCCESS) {
         return ret;
     }
@@ -1453,7 +1464,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         n->features.async_config = dw11;
         break;
     case NVME_TIMESTAMP:
-        return nvme_set_feature_timestamp(n, cmd);
+        return nvme_set_feature_timestamp(n, cmd, req);
     default:
         return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
     }
@@ -1495,7 +1506,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     case NVME_ADM_CMD_CREATE_CQ:
         return nvme_create_cq(n, cmd);
     case NVME_ADM_CMD_IDENTIFY:
-        return nvme_identify(n, cmd);
+        return nvme_identify(n, cmd, req);
     case NVME_ADM_CMD_ABORT:
         return nvme_abort(n, cmd, req);
     case NVME_ADM_CMD_SET_FEATURES:
diff --git a/hw/block/trace-events b/hw/block/trace-events
index f3b2d004e078..6d0cd588c786 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -35,6 +35,7 @@ pci_nvme_irq_masked(void) "IRQ is masked"
 pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
 pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
 pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
+pci_nvme_map_prp(uint16_t cid, uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t prp2, int num_prps) "cid %"PRIu16" trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 0x%"PRIx64" num_prps %d"
 pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
 pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8""
 pci_nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 07/16] hw/block/nvme: add request mapping helper
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (5 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 06/16] hw/block/nvme: pass request along for tracing Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 15:52   ` Minwoo Im
  2020-07-20 11:37 ` [PATCH 08/16] hw/block/nvme: verify validity of prp lists in the cmb Klaus Jensen
                   ` (9 subsequent siblings)
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Introduce the nvme_map helper to remove some noise in the main nvme_rw
function.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index f1e04608804b..68c33a11c144 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -413,6 +413,15 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
     return status;
 }
 
+static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
+                         NvmeRequest *req)
+{
+    uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
+
+    return nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, len, req);
+}
+
 static void nvme_post_cqes(void *opaque)
 {
     NvmeCQueue *cq = opaque;
@@ -600,8 +609,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
     uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
     uint64_t slba = le64_to_cpu(rw->slba);
-    uint64_t prp1 = le64_to_cpu(rw->dptr.prp1);
-    uint64_t prp2 = le64_to_cpu(rw->dptr.prp2);
 
     uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
     uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
@@ -618,7 +625,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
         return NVME_LBA_RANGE | NVME_DNR;
     }
 
-    if (nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, data_size, req)) {
+    if (nvme_map(n, cmd, data_size, req)) {
         block_acct_invalid(blk_get_stats(n->conf.blk), acct);
         return NVME_INVALID_FIELD | NVME_DNR;
     }
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 08/16] hw/block/nvme: verify validity of prp lists in the cmb
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (6 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 07/16] hw/block/nvme: add request mapping helper Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 15:54   ` Minwoo Im
  2020-07-20 11:37 ` [PATCH 09/16] hw/block/nvme: refactor request bounds checking Klaus Jensen
                   ` (8 subsequent siblings)
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Before this patch the device already supported PRP lists in the CMB, but
it did not check for the validity of it nor announced the support in the
Identify Controller data structure LISTS field.

If some of the PRPs in a PRP list are in the CMB, then ALL entries must
be there. This patch makes sure that requirement is verified as well as
properly announcing support for PRP lists in the CMB.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 68c33a11c144..530f5155eac0 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -272,6 +272,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
     trans_len = MIN(len, trans_len);
     int num_prps = (len >> n->page_bits) + 1;
     uint16_t status;
+    bool prp_list_in_cmb = false;
 
     trace_pci_nvme_map_prp(nvme_cid(req), trans_len, len, prp1, prp2,
                            num_prps);
@@ -299,11 +300,16 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
             status = NVME_INVALID_FIELD | NVME_DNR;
             goto unmap;
         }
+
         if (len > n->page_size) {
             uint64_t prp_list[n->max_prp_ents];
             uint32_t nents, prp_trans;
             int i = 0;
 
+            if (nvme_addr_is_cmb(n, prp2)) {
+                prp_list_in_cmb = true;
+            }
+
             nents = (len + n->page_size - 1) >> n->page_bits;
             prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
             nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
@@ -317,6 +323,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
                         goto unmap;
                     }
 
+                    if (prp_list_in_cmb != nvme_addr_is_cmb(n, prp_ent)) {
+                        status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
+                        goto unmap;
+                    }
+
                     i = 0;
                     nents = (len + n->page_size - 1) >> n->page_bits;
                     prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
@@ -336,6 +347,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
                 if (status) {
                     goto unmap;
                 }
+
                 len -= trans_len;
                 i++;
             }
@@ -2144,7 +2156,7 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
 
     NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
     NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
-    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
+    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 1);
     NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
     NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
     NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 09/16] hw/block/nvme: refactor request bounds checking
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (7 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 08/16] hw/block/nvme: verify validity of prp lists in the cmb Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 15:56   ` Minwoo Im
  2020-07-20 11:37 ` [PATCH 10/16] hw/block/nvme: add check for mdts Klaus Jensen
                   ` (7 subsequent siblings)
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Hoist bounds checking into its own function and check for wrap-around.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 530f5155eac0..35bc1a7b7e21 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -553,6 +553,18 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
     }
 }
 
+static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
+                                         uint64_t slba, uint32_t nlb)
+{
+    uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
+
+    if (unlikely(UINT64_MAX - slba < nlb || slba + nlb > nsze)) {
+        return NVME_LBA_RANGE | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
 static void nvme_rw_cb(void *opaque, int ret)
 {
     NvmeRequest *req = opaque;
@@ -600,12 +612,14 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
     uint64_t offset = slba << data_shift;
     uint32_t count = nlb << data_shift;
+    uint16_t status;
 
     trace_pci_nvme_write_zeroes(nvme_cid(req), slba, nlb);
 
-    if (unlikely(slba + nlb > ns->id_ns.nsze)) {
+    status = nvme_check_bounds(n, ns, slba, nlb);
+    if (status) {
         trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-        return NVME_LBA_RANGE | NVME_DNR;
+        return status;
     }
 
     block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
@@ -628,13 +642,15 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     uint64_t data_offset = slba << data_shift;
     int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
     enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
+    uint16_t status;
 
     trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
 
-    if (unlikely((slba + nlb) > ns->id_ns.nsze)) {
-        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+    status = nvme_check_bounds(n, ns, slba, nlb);
+    if (status) {
         trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
-        return NVME_LBA_RANGE | NVME_DNR;
+        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+        return status;
     }
 
     if (nvme_map(n, cmd, data_size, req)) {
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 10/16] hw/block/nvme: add check for mdts
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (8 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 09/16] hw/block/nvme: refactor request bounds checking Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 16:00   ` Minwoo Im
  2020-07-20 11:37 ` [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes Klaus Jensen
                   ` (6 subsequent siblings)
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Add 'mdts' device parameter to control the Maximum Data Transfer Size of
the controller and check that it is respected.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c       | 32 ++++++++++++++++++++++++++++++--
 hw/block/nvme.h       |  1 +
 hw/block/trace-events |  1 +
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 35bc1a7b7e21..10fe53873ae9 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -18,9 +18,10 @@
  * Usage: add options:
  *      -drive file=<file>,if=none,id=<drive_id>
  *      -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
- *              cmb_size_mb=<cmb_size_mb[optional]>, \
+ *              [cmb_size_mb=<cmb_size_mb>,] \
  *              [pmrdev=<mem_backend_file_id>,] \
- *              max_ioqpairs=<N[optional]>
+ *              [max_ioqpairs=<N>,] \
+ *              [mdts=<N>]
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
@@ -553,6 +554,17 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
     }
 }
 
+static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
+{
+    uint8_t mdts = n->params.mdts;
+
+    if (mdts && len > n->page_size << mdts) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    return NVME_SUCCESS;
+}
+
 static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
                                          uint64_t slba, uint32_t nlb)
 {
@@ -646,6 +658,13 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
 
     trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
 
+    status = nvme_check_mdts(n, data_size);
+    if (status) {
+        trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
+        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+        return status;
+    }
+
     status = nvme_check_bounds(n, ns, slba, nlb);
     if (status) {
         trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
@@ -938,6 +957,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     uint32_t numdl, numdu;
     uint64_t off, lpol, lpou;
     size_t   len;
+    uint16_t status;
 
     numdl = (dw10 >> 16);
     numdu = (dw11 & 0xffff);
@@ -953,6 +973,12 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
     trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
 
+    status = nvme_check_mdts(n, len);
+    if (status) {
+        trace_pci_nvme_err_mdts(nvme_cid(req), len);
+        return status;
+    }
+
     switch (lid) {
     case NVME_LOG_ERROR_INFO:
         return nvme_error_info(n, cmd, rae, len, off, req);
@@ -2275,6 +2301,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     id->ieee[0] = 0x00;
     id->ieee[1] = 0x02;
     id->ieee[2] = 0xb3;
+    id->mdts = n->params.mdts;
     id->ver = cpu_to_le32(NVME_SPEC_VER);
     id->oacs = cpu_to_le16(0);
 
@@ -2394,6 +2421,7 @@ static Property nvme_props[] = {
     DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
     DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
     DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
+    DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 5519b5cc7686..137cd8c2bf20 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -11,6 +11,7 @@ typedef struct NvmeParams {
     uint32_t cmb_size_mb;
     uint8_t  aerl;
     uint32_t aer_max_queued;
+    uint8_t  mdts;
 } NvmeParams;
 
 typedef struct NvmeAsyncEvent {
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 6d0cd588c786..5d7d4679650b 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -85,6 +85,7 @@ pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
 pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
 
 # nvme traces for error conditions
+pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %"PRIu64""
 pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
 pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
 pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (9 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 10/16] hw/block/nvme: add check for mdts Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 16:01   ` Minwoo Im
  2020-07-29 17:39   ` Maxim Levitsky
  2020-07-20 11:37 ` [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing Klaus Jensen
                   ` (5 subsequent siblings)
  16 siblings, 2 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, Fam Zheng, qemu-block, Klaus Jensen, Max Reitz,
	Keith Busch, Klaus Jensen

From: Klaus Jensen <k.jensen@samsung.com>

The NVM Express specification generally uses 'zeroes' and not 'zeros',
so let us align with it.

Cc: Fam Zheng <fam@euphon.net>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 block/nvme.c         | 4 ++--
 hw/block/nvme.c      | 8 ++++----
 include/block/nvme.h | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index c1c4c07ac6cc..05485fdd1189 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -537,7 +537,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
                           s->page_size / sizeof(uint64_t) * s->page_size);
 
     oncs = le16_to_cpu(idctrl->oncs);
-    s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROS);
+    s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES);
     s->supports_discard = !!(oncs & NVME_ONCS_DSM);
 
     memset(resp, 0, 4096);
@@ -1201,7 +1201,7 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
     }
 
     NvmeCmd cmd = {
-        .opcode = NVME_CMD_WRITE_ZEROS,
+        .opcode = NVME_CMD_WRITE_ZEROES,
         .nsid = cpu_to_le32(s->nsid),
         .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0xFFFFFFFF),
         .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0xFFFFFFFF),
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 10fe53873ae9..e2932239c661 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -614,7 +614,7 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
+static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     NvmeRequest *req)
 {
     NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
@@ -714,8 +714,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     switch (cmd->opcode) {
     case NVME_CMD_FLUSH:
         return nvme_flush(n, ns, cmd, req);
-    case NVME_CMD_WRITE_ZEROS:
-        return nvme_write_zeros(n, ns, cmd, req);
+    case NVME_CMD_WRITE_ZEROES:
+        return nvme_write_zeroes(n, ns, cmd, req);
     case NVME_CMD_WRITE:
     case NVME_CMD_READ:
         return nvme_rw(n, ns, cmd, req);
@@ -2328,7 +2328,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     id->sqes = (0x6 << 4) | 0x6;
     id->cqes = (0x4 << 4) | 0x4;
     id->nn = cpu_to_le32(n->num_namespaces);
-    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP |
+    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
                            NVME_ONCS_FEATURES);
 
     subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 370df7fc0570..65e68a82c897 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -460,7 +460,7 @@ enum NvmeIoCommands {
     NVME_CMD_READ               = 0x02,
     NVME_CMD_WRITE_UNCOR        = 0x04,
     NVME_CMD_COMPARE            = 0x05,
-    NVME_CMD_WRITE_ZEROS        = 0x08,
+    NVME_CMD_WRITE_ZEROES       = 0x08,
     NVME_CMD_DSM                = 0x09,
 };
 
@@ -838,7 +838,7 @@ enum NvmeIdCtrlOncs {
     NVME_ONCS_COMPARE       = 1 << 0,
     NVME_ONCS_WRITE_UNCORR  = 1 << 1,
     NVME_ONCS_DSM           = 1 << 2,
-    NVME_ONCS_WRITE_ZEROS   = 1 << 3,
+    NVME_ONCS_WRITE_ZEROES  = 1 << 3,
     NVME_ONCS_FEATURES      = 1 << 4,
     NVME_ONCS_RESRVATIONS   = 1 << 5,
     NVME_ONCS_TIMESTAMP     = 1 << 6,
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (10 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 16:04   ` Minwoo Im
  2020-07-29 17:47   ` Maxim Levitsky
  2020-07-20 11:37 ` [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest Klaus Jensen
                   ` (4 subsequent siblings)
  16 siblings, 2 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen

From: Klaus Jensen <k.jensen@samsung.com>

Move clearing of the structure from "clear before use" to "clear
after use".

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index e2932239c661..431f26c2f589 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -209,6 +209,11 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
     }
 }
 
+static void nvme_req_clear(NvmeRequest *req)
+{
+    memset(&req->cqe, 0x0, sizeof(req->cqe));
+}
+
 static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
                                   size_t len)
 {
@@ -458,6 +463,7 @@ static void nvme_post_cqes(void *opaque)
         nvme_inc_cq_tail(cq);
         pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
             sizeof(req->cqe));
+        nvme_req_clear(req);
         QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
     }
     if (cq->tail != cq->head) {
@@ -1601,7 +1607,6 @@ static void nvme_process_sq(void *opaque)
         req = QTAILQ_FIRST(&sq->req_list);
         QTAILQ_REMOVE(&sq->req_list, req, entry);
         QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
-        memset(&req->cqe, 0, sizeof(req->cqe));
         req->cqe.cid = cmd.cid;
 
         status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (11 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 16:06   ` Minwoo Im
  2020-07-29 17:53   ` Maxim Levitsky
  2020-07-20 11:37 ` [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing Klaus Jensen
                   ` (3 subsequent siblings)
  16 siblings, 2 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen

From: Klaus Jensen <k.jensen@samsung.com>

Instead of passing around the NvmeNamespace, add it as a member in the
NvmeRequest structure.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c | 21 ++++++++++-----------
 hw/block/nvme.h |  1 +
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 431f26c2f589..54cd20f1ce22 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -211,6 +211,7 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
 
 static void nvme_req_clear(NvmeRequest *req)
 {
+    req->ns = NULL;
     memset(&req->cqe, 0x0, sizeof(req->cqe));
 }
 
@@ -610,8 +611,7 @@ static void nvme_rw_cb(void *opaque, int ret)
     nvme_enqueue_req_completion(cq, req);
 }
 
-static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
-    NvmeRequest *req)
+static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
          BLOCK_ACCT_FLUSH);
@@ -620,10 +620,10 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
-    NvmeRequest *req)
+static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+    NvmeNamespace *ns = req->ns;
     const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
     const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
     uint64_t slba = le64_to_cpu(rw->slba);
@@ -647,10 +647,10 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
-    NvmeRequest *req)
+static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+    NvmeNamespace *ns = req->ns;
     uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
     uint64_t slba = le64_to_cpu(rw->slba);
 
@@ -706,7 +706,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
 
 static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
-    NvmeNamespace *ns;
     uint32_t nsid = le32_to_cpu(cmd->nsid);
 
     trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), cmd->opcode);
@@ -716,15 +715,15 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         return NVME_INVALID_NSID | NVME_DNR;
     }
 
-    ns = &n->namespaces[nsid - 1];
+    req->ns = &n->namespaces[nsid - 1];
     switch (cmd->opcode) {
     case NVME_CMD_FLUSH:
-        return nvme_flush(n, ns, cmd, req);
+        return nvme_flush(n, cmd, req);
     case NVME_CMD_WRITE_ZEROES:
-        return nvme_write_zeroes(n, ns, cmd, req);
+        return nvme_write_zeroes(n, cmd, req);
     case NVME_CMD_WRITE:
     case NVME_CMD_READ:
-        return nvme_rw(n, ns, cmd, req);
+        return nvme_rw(n, cmd, req);
     default:
         trace_pci_nvme_err_invalid_opc(cmd->opcode);
         return NVME_INVALID_OPCODE | NVME_DNR;
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 137cd8c2bf20..586fd3d62700 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -21,6 +21,7 @@ typedef struct NvmeAsyncEvent {
 
 typedef struct NvmeRequest {
     struct NvmeSQueue       *sq;
+    struct NvmeNamespace    *ns;
     BlockAIOCB              *aiocb;
     uint16_t                status;
     NvmeCqe                 cqe;
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (12 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 16:08   ` Minwoo Im
  2020-07-29 18:18   ` Maxim Levitsky
  2020-07-20 11:37 ` [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter Klaus Jensen
                   ` (2 subsequent siblings)
  16 siblings, 2 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen

From: Klaus Jensen <k.jensen@samsung.com>

Always destroy the request qsg/iov at the end of request use.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c | 48 +++++++++++++++++-------------------------------
 1 file changed, 17 insertions(+), 31 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 54cd20f1ce22..b53afdeb3fb6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -213,6 +213,14 @@ static void nvme_req_clear(NvmeRequest *req)
 {
     req->ns = NULL;
     memset(&req->cqe, 0x0, sizeof(req->cqe));
+
+    if (req->qsg.sg) {
+        qemu_sglist_destroy(&req->qsg);
+    }
+
+    if (req->iov.iov) {
+        qemu_iovec_destroy(&req->iov);
+    }
 }
 
 static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
@@ -297,15 +305,14 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
 
     status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
     if (status) {
-        goto unmap;
+        return status;
     }
 
     len -= trans_len;
     if (len) {
         if (unlikely(!prp2)) {
             trace_pci_nvme_err_invalid_prp2_missing();
-            status = NVME_INVALID_FIELD | NVME_DNR;
-            goto unmap;
+            return NVME_INVALID_FIELD | NVME_DNR;
         }
 
         if (len > n->page_size) {
@@ -326,13 +333,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
                 if (i == n->max_prp_ents - 1 && len > n->page_size) {
                     if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
                         trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
-                        status = NVME_INVALID_FIELD | NVME_DNR;
-                        goto unmap;
+                        return NVME_INVALID_FIELD | NVME_DNR;
                     }
 
                     if (prp_list_in_cmb != nvme_addr_is_cmb(n, prp_ent)) {
-                        status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
-                        goto unmap;
+                        return NVME_INVALID_USE_OF_CMB | NVME_DNR;
                     }
 
                     i = 0;
@@ -345,14 +350,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
 
                 if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
                     trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
-                    status = NVME_INVALID_FIELD | NVME_DNR;
-                    goto unmap;
+                    return NVME_INVALID_FIELD | NVME_DNR;
                 }
 
                 trans_len = MIN(len, n->page_size);
                 status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
                 if (status) {
-                    goto unmap;
+                    return status;
                 }
 
                 len -= trans_len;
@@ -361,27 +365,16 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
         } else {
             if (unlikely(prp2 & (n->page_size - 1))) {
                 trace_pci_nvme_err_invalid_prp2_align(prp2);
-                status = NVME_INVALID_FIELD | NVME_DNR;
-                goto unmap;
+                return NVME_INVALID_FIELD | NVME_DNR;
             }
             status = nvme_map_addr(n, qsg, iov, prp2, len);
             if (status) {
-                goto unmap;
+                return status;
             }
         }
     }
+
     return NVME_SUCCESS;
-
-unmap:
-    if (iov && iov->iov) {
-        qemu_iovec_destroy(iov);
-    }
-
-    if (qsg && qsg->sg) {
-        qemu_sglist_destroy(qsg);
-    }
-
-    return status;
 }
 
 static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
@@ -601,13 +594,6 @@ static void nvme_rw_cb(void *opaque, int ret)
         req->status = NVME_INTERNAL_DEV_ERROR;
     }
 
-    if (req->qsg.nalloc) {
-        qemu_sglist_destroy(&req->qsg);
-    }
-    if (req->iov.nalloc) {
-        qemu_iovec_destroy(&req->iov);
-    }
-
     nvme_enqueue_req_completion(cq, req);
 }
 
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (13 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 16:10   ` Minwoo Im
  2020-07-29 18:25   ` Maxim Levitsky
  2020-07-20 11:37 ` [PATCH 16/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp Klaus Jensen
  2020-07-27  9:42 ` [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
  16 siblings, 2 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen

From: Klaus Jensen <k.jensen@samsung.com>

Keep a copy of the raw nvme command in the NvmeRequest and remove the
now redundant NvmeCmd parameter.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme.c | 177 +++++++++++++++++++++++++-----------------------
 hw/block/nvme.h |   1 +
 2 files changed, 93 insertions(+), 85 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b53afdeb3fb6..0b3dceccc89b 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -425,9 +425,9 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
     return status;
 }
 
-static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
-                         NvmeRequest *req)
+static uint16_t nvme_map(NvmeCtrl *n, size_t len, NvmeRequest *req)
 {
+    NvmeCmd *cmd = &req->cmd;
     uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
 
@@ -597,7 +597,7 @@ static void nvme_rw_cb(void *opaque, int ret)
     nvme_enqueue_req_completion(cq, req);
 }
 
-static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
 {
     block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
          BLOCK_ACCT_FLUSH);
@@ -606,9 +606,9 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
 {
-    NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
     NvmeNamespace *ns = req->ns;
     const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
     const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
@@ -633,9 +633,9 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 {
-    NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
     NvmeNamespace *ns = req->ns;
     uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
     uint64_t slba = le64_to_cpu(rw->slba);
@@ -664,7 +664,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         return status;
     }
 
-    if (nvme_map(n, cmd, data_size, req)) {
+    if (nvme_map(n, data_size, req)) {
         block_acct_invalid(blk_get_stats(n->conf.blk), acct);
         return NVME_INVALID_FIELD | NVME_DNR;
     }
@@ -690,11 +690,12 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
-    uint32_t nsid = le32_to_cpu(cmd->nsid);
+    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
 
-    trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), cmd->opcode);
+    trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
+                          req->cmd.opcode);
 
     if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
         trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces);
@@ -702,16 +703,16 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     }
 
     req->ns = &n->namespaces[nsid - 1];
-    switch (cmd->opcode) {
+    switch (req->cmd.opcode) {
     case NVME_CMD_FLUSH:
-        return nvme_flush(n, cmd, req);
+        return nvme_flush(n, req);
     case NVME_CMD_WRITE_ZEROES:
-        return nvme_write_zeroes(n, cmd, req);
+        return nvme_write_zeroes(n, req);
     case NVME_CMD_WRITE:
     case NVME_CMD_READ:
-        return nvme_rw(n, cmd, req);
+        return nvme_rw(n, req);
     default:
-        trace_pci_nvme_err_invalid_opc(cmd->opcode);
+        trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
         return NVME_INVALID_OPCODE | NVME_DNR;
     }
 }
@@ -727,10 +728,10 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
     }
 }
 
-static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
+static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
 {
-    NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
-    NvmeRequest *req, *next;
+    NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
+    NvmeRequest *r, *next;
     NvmeSQueue *sq;
     NvmeCQueue *cq;
     uint16_t qid = le16_to_cpu(c->qid);
@@ -744,19 +745,19 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
 
     sq = n->sq[qid];
     while (!QTAILQ_EMPTY(&sq->out_req_list)) {
-        req = QTAILQ_FIRST(&sq->out_req_list);
-        assert(req->aiocb);
-        blk_aio_cancel(req->aiocb);
+        r = QTAILQ_FIRST(&sq->out_req_list);
+        assert(r->aiocb);
+        blk_aio_cancel(r->aiocb);
     }
     if (!nvme_check_cqid(n, sq->cqid)) {
         cq = n->cq[sq->cqid];
         QTAILQ_REMOVE(&cq->sq_list, sq, entry);
 
         nvme_post_cqes(cq);
-        QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
-            if (req->sq == sq) {
-                QTAILQ_REMOVE(&cq->req_list, req, entry);
-                QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+        QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) {
+            if (r->sq == sq) {
+                QTAILQ_REMOVE(&cq->req_list, r, entry);
+                QTAILQ_INSERT_TAIL(&sq->req_list, r, entry);
             }
         }
     }
@@ -793,10 +794,10 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
     n->sq[sqid] = sq;
 }
 
-static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
+static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)
 {
     NvmeSQueue *sq;
-    NvmeCreateSq *c = (NvmeCreateSq *)cmd;
+    NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd;
 
     uint16_t cqid = le16_to_cpu(c->cqid);
     uint16_t sqid = le16_to_cpu(c->sqid);
@@ -831,10 +832,10 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
     return NVME_SUCCESS;
 }
 
-static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
-                                uint32_t buf_len, uint64_t off,
-                                NvmeRequest *req)
+static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
+                                uint64_t off, NvmeRequest *req)
 {
+    NvmeCmd *cmd = &req->cmd;
     uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
     uint32_t nsid = le32_to_cpu(cmd->nsid);
@@ -889,10 +890,11 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
                         DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
-                                 uint64_t off, NvmeRequest *req)
+static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
+                                 NvmeRequest *req)
 {
     uint32_t trans_len;
+    NvmeCmd *cmd = &req->cmd;
     uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
     NvmeFwSlotInfoLog fw_log = {
@@ -911,11 +913,11 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
                         DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
-                                uint32_t buf_len, uint64_t off,
-                                NvmeRequest *req)
+static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
+                                uint64_t off, NvmeRequest *req)
 {
     uint32_t trans_len;
+    NvmeCmd *cmd = &req->cmd;
     uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
     NvmeErrorLog errlog;
@@ -936,8 +938,10 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
                         DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
 {
+    NvmeCmd *cmd = &req->cmd;
+
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
     uint32_t dw11 = le32_to_cpu(cmd->cdw11);
     uint32_t dw12 = le32_to_cpu(cmd->cdw12);
@@ -972,11 +976,11 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
     switch (lid) {
     case NVME_LOG_ERROR_INFO:
-        return nvme_error_info(n, cmd, rae, len, off, req);
+        return nvme_error_info(n, rae, len, off, req);
     case NVME_LOG_SMART_INFO:
-        return nvme_smart_info(n, cmd, rae, len, off, req);
+        return nvme_smart_info(n, rae, len, off, req);
     case NVME_LOG_FW_SLOT_INFO:
-        return nvme_fw_log_info(n, cmd, len, off, req);
+        return nvme_fw_log_info(n, len, off, req);
     default:
         trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -994,9 +998,9 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
     }
 }
 
-static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
+static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
 {
-    NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
+    NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
     NvmeCQueue *cq;
     uint16_t qid = le16_to_cpu(c->qid);
 
@@ -1037,10 +1041,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
     cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
 }
 
-static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
+static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
 {
     NvmeCQueue *cq;
-    NvmeCreateCq *c = (NvmeCreateCq *)cmd;
+    NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd;
     uint16_t cqid = le16_to_cpu(c->cqid);
     uint16_t vector = le16_to_cpu(c->irq_vector);
     uint16_t qsize = le16_to_cpu(c->qsize);
@@ -1088,9 +1092,9 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
     return NVME_SUCCESS;
 }
 
-static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c,
-                                   NvmeRequest *req)
+static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
 {
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
     uint64_t prp1 = le64_to_cpu(c->prp1);
     uint64_t prp2 = le64_to_cpu(c->prp2);
 
@@ -1100,10 +1104,10 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c,
                         prp2, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c,
-                                 NvmeRequest *req)
+static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req)
 {
     NvmeNamespace *ns;
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
     uint32_t nsid = le32_to_cpu(c->nsid);
     uint64_t prp1 = le64_to_cpu(c->prp1);
     uint64_t prp2 = le64_to_cpu(c->prp2);
@@ -1121,9 +1125,9 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c,
                         prp2, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c,
-                                     NvmeRequest *req)
+static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req)
 {
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
     static const int data_len = NVME_IDENTIFY_DATA_SIZE;
     uint32_t min_nsid = le32_to_cpu(c->nsid);
     uint64_t prp1 = le64_to_cpu(c->prp1);
@@ -1160,9 +1164,9 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c,
     return ret;
 }
 
-static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c,
-                                            NvmeRequest *req)
+static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
 {
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
     uint32_t nsid = le32_to_cpu(c->nsid);
     uint64_t prp1 = le64_to_cpu(c->prp1);
     uint64_t prp2 = le64_to_cpu(c->prp2);
@@ -1201,28 +1205,28 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c,
                         DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
 {
-    NvmeIdentify *c = (NvmeIdentify *)cmd;
+    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
 
     switch (le32_to_cpu(c->cns)) {
     case NVME_ID_CNS_NS:
-        return nvme_identify_ns(n, c, req);
+        return nvme_identify_ns(n, req);
     case NVME_ID_CNS_CTRL:
-        return nvme_identify_ctrl(n, c, req);
+        return nvme_identify_ctrl(n, req);
     case NVME_ID_CNS_NS_ACTIVE_LIST:
-        return nvme_identify_nslist(n, c, req);
+        return nvme_identify_nslist(n, req);
     case NVME_ID_CNS_NS_DESCR_LIST:
-        return nvme_identify_ns_descr_list(n, c, req);
+        return nvme_identify_ns_descr_list(n, req);
     default:
         trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
         return NVME_INVALID_FIELD | NVME_DNR;
     }
 }
 
-static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
 {
-    uint16_t sqid = le32_to_cpu(cmd->cdw10) & 0xffff;
+    uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff;
 
     req->cqe.result = 1;
     if (nvme_check_sqid(n, sqid)) {
@@ -1272,9 +1276,9 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
     return cpu_to_le64(ts.all);
 }
 
-static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
-                                           NvmeRequest *req)
+static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
 {
+    NvmeCmd *cmd = &req->cmd;
     uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
 
@@ -1284,8 +1288,9 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
                         prp2, DMA_DIRECTION_FROM_DEVICE, req);
 }
 
-static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
 {
+    NvmeCmd *cmd = &req->cmd;
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
     uint32_t dw11 = le32_to_cpu(cmd->cdw11);
     uint32_t nsid = le32_to_cpu(cmd->nsid);
@@ -1359,7 +1364,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         result = n->features.async_config;
         goto out;
     case NVME_TIMESTAMP:
-        return nvme_get_feature_timestamp(n, cmd, req);
+        return nvme_get_feature_timestamp(n, req);
     default:
         break;
     }
@@ -1405,11 +1410,11 @@ out:
     return NVME_SUCCESS;
 }
 
-static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
-                                           NvmeRequest *req)
+static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
 {
     uint16_t ret;
     uint64_t timestamp;
+    NvmeCmd *cmd = &req->cmd;
     uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
     uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
 
@@ -1424,8 +1429,9 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
     return NVME_SUCCESS;
 }
 
-static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
 {
+    NvmeCmd *cmd = &req->cmd;
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
     uint32_t dw11 = le32_to_cpu(cmd->cdw11);
     uint32_t nsid = le32_to_cpu(cmd->nsid);
@@ -1516,14 +1522,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         n->features.async_config = dw11;
         break;
     case NVME_TIMESTAMP:
-        return nvme_set_feature_timestamp(n, cmd, req);
+        return nvme_set_feature_timestamp(n, req);
     default:
         return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
     }
     return NVME_SUCCESS;
 }
 
-static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
 {
     trace_pci_nvme_aer(nvme_cid(req));
 
@@ -1542,33 +1548,33 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     return NVME_NO_COMPLETE;
 }
 
-static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
-    trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), cmd->opcode);
+    trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode);
 
-    switch (cmd->opcode) {
+    switch (req->cmd.opcode) {
     case NVME_ADM_CMD_DELETE_SQ:
-        return nvme_del_sq(n, cmd);
+        return nvme_del_sq(n, req);
     case NVME_ADM_CMD_CREATE_SQ:
-        return nvme_create_sq(n, cmd);
+        return nvme_create_sq(n, req);
     case NVME_ADM_CMD_GET_LOG_PAGE:
-        return nvme_get_log(n, cmd, req);
+        return nvme_get_log(n, req);
     case NVME_ADM_CMD_DELETE_CQ:
-        return nvme_del_cq(n, cmd);
+        return nvme_del_cq(n, req);
     case NVME_ADM_CMD_CREATE_CQ:
-        return nvme_create_cq(n, cmd);
+        return nvme_create_cq(n, req);
     case NVME_ADM_CMD_IDENTIFY:
-        return nvme_identify(n, cmd, req);
+        return nvme_identify(n, req);
     case NVME_ADM_CMD_ABORT:
-        return nvme_abort(n, cmd, req);
+        return nvme_abort(n, req);
     case NVME_ADM_CMD_SET_FEATURES:
-        return nvme_set_feature(n, cmd, req);
+        return nvme_set_feature(n, req);
     case NVME_ADM_CMD_GET_FEATURES:
-        return nvme_get_feature(n, cmd, req);
+        return nvme_get_feature(n, req);
     case NVME_ADM_CMD_ASYNC_EV_REQ:
-        return nvme_aer(n, cmd, req);
+        return nvme_aer(n, req);
     default:
-        trace_pci_nvme_err_invalid_admin_opc(cmd->opcode);
+        trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
         return NVME_INVALID_OPCODE | NVME_DNR;
     }
 }
@@ -1593,9 +1599,10 @@ static void nvme_process_sq(void *opaque)
         QTAILQ_REMOVE(&sq->req_list, req, entry);
         QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
         req->cqe.cid = cmd.cid;
+        memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
 
-        status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
-            nvme_admin_cmd(n, &cmd, req);
+        status = sq->sqid ? nvme_io_cmd(n, req) :
+            nvme_admin_cmd(n, req);
         if (status != NVME_NO_COMPLETE) {
             req->status = status;
             nvme_enqueue_req_completion(cq, req);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 586fd3d62700..52ba794f2e9a 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -25,6 +25,7 @@ typedef struct NvmeRequest {
     BlockAIOCB              *aiocb;
     uint16_t                status;
     NvmeCqe                 cqe;
+    NvmeCmd                 cmd;
     BlockAcctCookie         acct;
     QEMUSGList              qsg;
     QEMUIOVector            iov;
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 16/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (14 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter Klaus Jensen
@ 2020-07-20 11:37 ` Klaus Jensen
  2020-07-29 16:15   ` Minwoo Im
  2020-07-27  9:42 ` [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-20 11:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, Max Reitz, Keith Busch,
	Klaus Jensen, Maxim Levitsky

From: Klaus Jensen <k.jensen@samsung.com>

Since clean up of the request qsg/iov is now always done post-use, there
is no need to use a stack-allocated qsg/iov in nvme_dma_prp.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Acked-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 0b3dceccc89b..b6da5a9f3fc6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -381,45 +381,39 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
                              uint64_t prp1, uint64_t prp2, DMADirection dir,
                              NvmeRequest *req)
 {
-    QEMUSGList qsg;
-    QEMUIOVector iov;
     uint16_t status = NVME_SUCCESS;
 
-    status = nvme_map_prp(n, &qsg, &iov, prp1, prp2, len, req);
+    status = nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, len, req);
     if (status) {
         return status;
     }
 
-    if (qsg.nsg > 0) {
+    if (req->qsg.nsg > 0) {
         uint64_t residual;
 
         if (dir == DMA_DIRECTION_TO_DEVICE) {
-            residual = dma_buf_write(ptr, len, &qsg);
+            residual = dma_buf_write(ptr, len, &req->qsg);
         } else {
-            residual = dma_buf_read(ptr, len, &qsg);
+            residual = dma_buf_read(ptr, len, &req->qsg);
         }
 
         if (unlikely(residual)) {
             trace_pci_nvme_err_invalid_dma();
             status = NVME_INVALID_FIELD | NVME_DNR;
         }
-
-        qemu_sglist_destroy(&qsg);
     } else {
         size_t bytes;
 
         if (dir == DMA_DIRECTION_TO_DEVICE) {
-            bytes = qemu_iovec_to_buf(&iov, 0, ptr, len);
+            bytes = qemu_iovec_to_buf(&req->iov, 0, ptr, len);
         } else {
-            bytes = qemu_iovec_from_buf(&iov, 0, ptr, len);
+            bytes = qemu_iovec_from_buf(&req->iov, 0, ptr, len);
         }
 
         if (unlikely(bytes != len)) {
             trace_pci_nvme_err_invalid_dma();
             status = NVME_INVALID_FIELD | NVME_DNR;
         }
-
-        qemu_iovec_destroy(&iov);
     }
 
     return status;
-- 
2.27.0



^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup
  2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
                   ` (15 preceding siblings ...)
  2020-07-20 11:37 ` [PATCH 16/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp Klaus Jensen
@ 2020-07-27  9:42 ` Klaus Jensen
  2020-07-27 20:44   ` Keith Busch
  16 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-27  9:42 UTC (permalink / raw)
  To: qemu-devel; +Cc: Kevin Wolf, Keith Busch, Klaus Jensen, qemu-block, Max Reitz

On Jul 20 13:37, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> This series consists of patches that refactors dma read/write and adds a
> number of address mapping helper functions.
> 
> Based-on: <20200706061303.246057-1-its@irrelevant.dk>
> 
> Klaus Jensen (16):
>   hw/block/nvme: memset preallocated requests structures
>   hw/block/nvme: add mapping helpers
>   hw/block/nvme: replace dma_acct with blk_acct equivalent
>   hw/block/nvme: remove redundant has_sg member
>   hw/block/nvme: refactor dma read/write
>   hw/block/nvme: pass request along for tracing
>   hw/block/nvme: add request mapping helper
>   hw/block/nvme: verify validity of prp lists in the cmb
>   hw/block/nvme: refactor request bounds checking
>   hw/block/nvme: add check for mdts
>   hw/block/nvme: be consistent about zeros vs zeroes
>   hw/block/nvme: refactor NvmeRequest clearing
>   hw/block/nvme: add a namespace reference in NvmeRequest
>   hw/block/nvme: consolidate qsg/iov clearing
>   hw/block/nvme: remove NvmeCmd parameter
>   hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp
> 
>  block/nvme.c          |   4 +-
>  hw/block/nvme.c       | 498 +++++++++++++++++++++++++++---------------
>  hw/block/nvme.h       |   4 +-
>  hw/block/trace-events |   4 +
>  include/block/nvme.h  |   4 +-
>  5 files changed, 331 insertions(+), 183 deletions(-)
> 
> -- 
> 2.27.0
> 

Gentle ping on this.

Requesting reviews on

  [02/16]: hw/block/nvme: add mapping helpers
  [11/16]: hw/block/nvme: be consistent about zeros vs zeroes
  [12/16]: hw/block/nvme: refactor NvmeRequest clearing
  [13/16]: hw/block/nvme: add a namespace reference in NvmeRequest
  [14/16]: hw/block/nvme: consolidate qsg/iov clearing
  [15/16]: hw/block/nvme: remove NvmeCmd parameter

I think only 02/16 has some meat on it.


Thanks,
Klaus


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup
  2020-07-27  9:42 ` [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
@ 2020-07-27 20:44   ` Keith Busch
  0 siblings, 0 replies; 58+ messages in thread
From: Keith Busch @ 2020-07-27 20:44 UTC (permalink / raw)
  To: Klaus Jensen; +Cc: Kevin Wolf, Klaus Jensen, qemu-devel, qemu-block, Max Reitz

On Mon, Jul 27, 2020 at 11:42:46AM +0200, Klaus Jensen wrote:
> On Jul 20 13:37, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > This series consists of patches that refactors dma read/write and adds a
> > number of address mapping helper functions.
> > 
> > Based-on: <20200706061303.246057-1-its@irrelevant.dk>
> > 
> > Klaus Jensen (16):
> >   hw/block/nvme: memset preallocated requests structures
> >   hw/block/nvme: add mapping helpers
> >   hw/block/nvme: replace dma_acct with blk_acct equivalent
> >   hw/block/nvme: remove redundant has_sg member
> >   hw/block/nvme: refactor dma read/write
> >   hw/block/nvme: pass request along for tracing
> >   hw/block/nvme: add request mapping helper
> >   hw/block/nvme: verify validity of prp lists in the cmb
> >   hw/block/nvme: refactor request bounds checking
> >   hw/block/nvme: add check for mdts
> >   hw/block/nvme: be consistent about zeros vs zeroes
> >   hw/block/nvme: refactor NvmeRequest clearing
> >   hw/block/nvme: add a namespace reference in NvmeRequest
> >   hw/block/nvme: consolidate qsg/iov clearing
> >   hw/block/nvme: remove NvmeCmd parameter
> >   hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp
> > 
> >  block/nvme.c          |   4 +-
> >  hw/block/nvme.c       | 498 +++++++++++++++++++++++++++---------------
> >  hw/block/nvme.h       |   4 +-
> >  hw/block/trace-events |   4 +
> >  include/block/nvme.h  |   4 +-
> >  5 files changed, 331 insertions(+), 183 deletions(-)
> > 
> > -- 
> > 2.27.0
> > 
> 
> Gentle ping on this.

I'll have free time to get back to this probably end of the week,
possibly early next week.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/16] hw/block/nvme: add mapping helpers
  2020-07-20 11:37 ` [PATCH 02/16] hw/block/nvme: add mapping helpers Klaus Jensen
@ 2020-07-29 13:57   ` Maxim Levitsky
  2020-07-29 18:23     ` Klaus Jensen
  2020-07-29 15:19   ` Minwoo Im
  2020-07-29 20:40   ` Andrzej Jakowski
  2 siblings, 1 reply; 58+ messages in thread
From: Maxim Levitsky @ 2020-07-29 13:57 UTC (permalink / raw)
  To: Klaus Jensen, qemu-devel
  Cc: Kevin Wolf, Klaus Jensen, Keith Busch, qemu-block, Max Reitz

On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
> use them in nvme_map_prp.
> 
> This fixes a bug where in the case of a CMB transfer, the device would
> map to the buffer with a wrong length.
> 
> Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c       | 109 +++++++++++++++++++++++++++++++++++-------
>  hw/block/trace-events |   2 +
>  2 files changed, 94 insertions(+), 17 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 4d7b730a62b6..9b1a080cdc70 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -109,6 +109,11 @@ static uint16_t nvme_sqid(NvmeRequest *req)
>      return le16_to_cpu(req->sq->sqid);
>  }
>  
> +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
> +{
> +    return &n->cmbuf[addr - n->ctrl_mem.addr];
I would add an assert here just in case we do out of bounds array access.
> +}
> +
>  static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
>  {
>      hwaddr low = n->ctrl_mem.addr;
> @@ -120,7 +125,7 @@ static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
>  static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
>  {
>      if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
> -        memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
> +        memcpy(buf, nvme_addr_to_cmb(n, addr), size);
OK.
>          return;
>      }
>  
> @@ -203,29 +208,91 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
>      }
>  }
>  
> +static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
> +                                  size_t len)
> +{
> +    if (!len) {
> +        return NVME_SUCCESS;
> +    }
> +
> +    trace_pci_nvme_map_addr_cmb(addr, len);
> +
> +    if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
> +        return NVME_DATA_TRAS_ERROR;
> +    }
> +
> +    qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
> +
> +    return NVME_SUCCESS;
> +}
OK
> +
> +static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
> +                              hwaddr addr, size_t len)
> +{
> +    if (!len) {
> +        return NVME_SUCCESS;
> +    }
> +
> +    trace_pci_nvme_map_addr(addr, len);
> +
> +    if (nvme_addr_is_cmb(n, addr)) {
> +        if (qsg && qsg->sg) {
> +            return NVME_INVALID_USE_OF_CMB | NVME_DNR;
> +        }
> +
> +        assert(iov);
> +
> +        if (!iov->iov) {
> +            qemu_iovec_init(iov, 1);
> +        }
> +
> +        return nvme_map_addr_cmb(n, iov, addr, len);
> +    }
> +
> +    if (iov && iov->iov) {
> +        return NVME_INVALID_USE_OF_CMB | NVME_DNR;
> +    }
> +
> +    assert(qsg);
> +
> +    if (!qsg->sg) {
> +        pci_dma_sglist_init(qsg, &n->parent_obj, 1);
> +    }
> +
> +    qemu_sglist_add(qsg, addr, len);
> +
> +    return NVME_SUCCESS;
> +}
OK
> +
>  static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>                               uint64_t prp2, uint32_t len, NvmeCtrl *n)
>  {
>      hwaddr trans_len = n->page_size - (prp1 % n->page_size);
>      trans_len = MIN(len, trans_len);
>      int num_prps = (len >> n->page_bits) + 1;
> +    uint16_t status;
>  
>      if (unlikely(!prp1)) {
>          trace_pci_nvme_err_invalid_prp();
>          return NVME_INVALID_FIELD | NVME_DNR;
> -    } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr &&
> -               prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> -        qsg->nsg = 0;
> +    }
> +
> +    if (nvme_addr_is_cmb(n, prp1)) {
>          qemu_iovec_init(iov, num_prps);
> -        qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
>      } else {
>          pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> -        qemu_sglist_add(qsg, prp1, trans_len);
>      }
> +
> +    status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
> +    if (status) {
> +        goto unmap;
> +    }
> +
>      len -= trans_len;
>      if (len) {
>          if (unlikely(!prp2)) {
>              trace_pci_nvme_err_invalid_prp2_missing();
> +            status = NVME_INVALID_FIELD | NVME_DNR;
>              goto unmap;
>          }
>          if (len > n->page_size) {
> @@ -242,6 +309,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>                  if (i == n->max_prp_ents - 1 && len > n->page_size) {
>                      if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>                          trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> +                        status = NVME_INVALID_FIELD | NVME_DNR;
>                          goto unmap;
>                      }
>  
> @@ -255,14 +323,14 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>  
>                  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>                      trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> +                    status = NVME_INVALID_FIELD | NVME_DNR;
>                      goto unmap;
>                  }
>  
>                  trans_len = MIN(len, n->page_size);
> -                if (qsg->nsg){
> -                    qemu_sglist_add(qsg, prp_ent, trans_len);
> -                } else {
> -                    qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
> +                status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
> +                if (status) {
> +                    goto unmap;
>                  }
>                  len -= trans_len;
>                  i++;
> @@ -270,20 +338,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>          } else {
>              if (unlikely(prp2 & (n->page_size - 1))) {
>                  trace_pci_nvme_err_invalid_prp2_align(prp2);
> +                status = NVME_INVALID_FIELD | NVME_DNR;
>                  goto unmap;
>              }
> -            if (qsg->nsg) {
> -                qemu_sglist_add(qsg, prp2, len);
> -            } else {
> -                qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
> +            status = nvme_map_addr(n, qsg, iov, prp2, len);
> +            if (status) {
> +                goto unmap;
>              }
>          }
>      }
>      return NVME_SUCCESS;
>  
> - unmap:
> -    qemu_sglist_destroy(qsg);
> -    return NVME_INVALID_FIELD | NVME_DNR;
> +unmap:
> +    if (iov && iov->iov) {
> +        qemu_iovec_destroy(iov);
> +    }
> +
> +    if (qsg && qsg->sg) {
> +        qemu_sglist_destroy(qsg);
> +    }
> +
> +    return status;
>  }
>  
>  static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 7b7303cab1dd..f3b2d004e078 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -33,6 +33,8 @@ pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
>  pci_nvme_irq_pin(void) "pulsing IRQ pin"
>  pci_nvme_irq_masked(void) "IRQ is masked"
>  pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
> +pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
> +pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
>  pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
>  pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8""
>  pci_nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""

Looks good. I could have missed something, but compared to older version of similiar code I reviewed, 
this looks much better and easy to t understand.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/16] hw/block/nvme: add mapping helpers
  2020-07-20 11:37 ` [PATCH 02/16] hw/block/nvme: add mapping helpers Klaus Jensen
  2020-07-29 13:57   ` Maxim Levitsky
@ 2020-07-29 15:19   ` Minwoo Im
  2020-07-29 20:40   ` Andrzej Jakowski
  2 siblings, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 15:19 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

Hi Klaus,

On 20-07-20 13:37:34, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
> use them in nvme_map_prp.
> 
> This fixes a bug where in the case of a CMB transfer, the device would
> map to the buffer with a wrong length.
> 
> Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>

>  static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>                               uint64_t prp2, uint32_t len, NvmeCtrl *n)
>  {
>      hwaddr trans_len = n->page_size - (prp1 % n->page_size);
>      trans_len = MIN(len, trans_len);
>      int num_prps = (len >> n->page_bits) + 1;
> +    uint16_t status;
>  
>      if (unlikely(!prp1)) {
>          trace_pci_nvme_err_invalid_prp();
>          return NVME_INVALID_FIELD | NVME_DNR;
> -    } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr &&
> -               prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> -        qsg->nsg = 0;
> +    }
> +
> +    if (nvme_addr_is_cmb(n, prp1)) {
>          qemu_iovec_init(iov, num_prps);
> -        qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
>      } else {
>          pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> -        qemu_sglist_add(qsg, prp1, trans_len);
>      }
> +
> +    status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
> +    if (status) {
> +        goto unmap;
> +    }
> +
>      len -= trans_len;
>      if (len) {
>          if (unlikely(!prp2)) {
>              trace_pci_nvme_err_invalid_prp2_missing();
> +            status = NVME_INVALID_FIELD | NVME_DNR;
>              goto unmap;
>          }
>          if (len > n->page_size) {
> @@ -242,6 +309,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>                  if (i == n->max_prp_ents - 1 && len > n->page_size) {
>                      if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>                          trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> +                        status = NVME_INVALID_FIELD | NVME_DNR;
>                          goto unmap;
>                      }
>  
> @@ -255,14 +323,14 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>  
>                  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>                      trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> +                    status = NVME_INVALID_FIELD | NVME_DNR;
>                      goto unmap;
>                  }
>  
>                  trans_len = MIN(len, n->page_size);
> -                if (qsg->nsg){
> -                    qemu_sglist_add(qsg, prp_ent, trans_len);
> -                } else {
> -                    qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
> +                status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
> +                if (status) {
> +                    goto unmap;
>                  }
>                  len -= trans_len;
>                  i++;
> @@ -270,20 +338,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>          } else {
>              if (unlikely(prp2 & (n->page_size - 1))) {
>                  trace_pci_nvme_err_invalid_prp2_align(prp2);
> +                status = NVME_INVALID_FIELD | NVME_DNR;
>                  goto unmap;
>              }
> -            if (qsg->nsg) {
> -                qemu_sglist_add(qsg, prp2, len);
> -            } else {
> -                qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
> +            status = nvme_map_addr(n, qsg, iov, prp2, len);
> +            if (status) {
> +                goto unmap;

I like these parts which is much better to read the codes.

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 03/16] hw/block/nvme: replace dma_acct with blk_acct equivalent
  2020-07-20 11:37 ` [PATCH 03/16] hw/block/nvme: replace dma_acct with blk_acct equivalent Klaus Jensen
@ 2020-07-29 15:23   ` Minwoo Im
  0 siblings, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 15:23 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

Klaus,

On 20-07-20 13:37:35, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> The QSG isn't always initialized, so accounting could be wrong. Issue a
> call to blk_acct_start instead with the size taken from the QSG or IOV
> depending on the kind of I/O.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  hw/block/nvme.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 9b1a080cdc70..cb236d1c8c46 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -620,9 +620,10 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>          return NVME_INVALID_FIELD | NVME_DNR;
>      }
>  
> -    dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
>      if (req->qsg.nsg > 0) {
>          req->has_sg = true;
> +        block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->qsg.size,
> +                         acct);
>          req->aiocb = is_write ?
>              dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
>                            nvme_rw_cb, req) :
> @@ -630,6 +631,8 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>                           nvme_rw_cb, req);
>      } else {
>          req->has_sg = false;
> +        block_acct_start(blk_get_stats(n->conf.blk), &req->acct, req->iov.size,
> +                         acct);
>          req->aiocb = is_write ?
>              blk_aio_pwritev(n->conf.blk, data_offset, &req->iov, 0, nvme_rw_cb,
>                              req) :

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 04/16] hw/block/nvme: remove redundant has_sg member
  2020-07-20 11:37 ` [PATCH 04/16] hw/block/nvme: remove redundant has_sg member Klaus Jensen
@ 2020-07-29 15:29   ` Minwoo Im
  2020-07-29 18:29     ` Klaus Jensen
       [not found]     ` <CGME20200729182946epcas2p1bef465a70c1a815654a07814aa379dc3@epcms2p5>
  0 siblings, 2 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 15:29 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

Klaus,

On 20-07-20 13:37:36, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Remove the has_sg member from NvmeRequest since it's redundant.
> 
> Also, make sure the request iov is destroyed at completion time.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  hw/block/nvme.c | 11 ++++++-----
>  hw/block/nvme.h |  1 -
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index cb236d1c8c46..6a1a1626b87b 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -548,16 +548,20 @@ static void nvme_rw_cb(void *opaque, int ret)
>          block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
>          req->status = NVME_INTERNAL_DEV_ERROR;
>      }
> -    if (req->has_sg) {
> +
> +    if (req->qsg.nalloc) {

Personally, I prefer has_xxx or is_xxx to check whether the request is
based on sg or iov as an inline function, but 'nalloc' is also fine to
figure out the meaning of purpose here.

>          qemu_sglist_destroy(&req->qsg);
>      }
> +    if (req->iov.nalloc) {
> +        qemu_iovec_destroy(&req->iov);
> +    }
> +

Maybe this can be in a separated commit?

Otherwise, It looks good to me.

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 05/16] hw/block/nvme: refactor dma read/write
  2020-07-20 11:37 ` [PATCH 05/16] hw/block/nvme: refactor dma read/write Klaus Jensen
@ 2020-07-29 15:35   ` Minwoo Im
  2020-07-29 17:35   ` Maxim Levitsky
  1 sibling, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 15:35 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz,
	Keith Busch, Maxim Levitsky

Klaus,

On 20-07-20 13:37:37, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Refactor the nvme_dma_{read,write}_prp functions into a common function
> taking a DMADirection parameter.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/16] hw/block/nvme: pass request along for tracing
  2020-07-20 11:37 ` [PATCH 06/16] hw/block/nvme: pass request along for tracing Klaus Jensen
@ 2020-07-29 15:49   ` Minwoo Im
  2020-07-29 19:49     ` Klaus Jensen
  0 siblings, 1 reply; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 15:49 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz,
	Keith Busch, Maxim Levitsky

Klaus,

On 20-07-20 13:37:38, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Pass along the NvmeRequest in various functions since it is very useful
> for tracing.

One doubt here.
  This patch has put NvmeRequest argument to the nvme_map_prp() to trace
  the request's command id.  But can we just trace the cid before this
  kind of prp mapping, somewhere like nvme_process_sq() level.  Then we
  can figure out the tracing for the prp mapping is from which request.

Tracing for cid is definitely great, but feels like too much cost to
pass argument to trace 'cid' in the middle of the dma mapping stage.

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 07/16] hw/block/nvme: add request mapping helper
  2020-07-20 11:37 ` [PATCH 07/16] hw/block/nvme: add request mapping helper Klaus Jensen
@ 2020-07-29 15:52   ` Minwoo Im
  2020-07-29 18:31     ` Maxim Levitsky
  0 siblings, 1 reply; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 15:52 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

Klaus,

On 20-07-20 13:37:39, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Introduce the nvme_map helper to remove some noise in the main nvme_rw
> function.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  hw/block/nvme.c | 13 ++++++++++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index f1e04608804b..68c33a11c144 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -413,6 +413,15 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
>      return status;
>  }
>  
> +static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
> +                         NvmeRequest *req)

Can we specify what is going to be mapped in this function? like
nvme_map_dptr?

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 08/16] hw/block/nvme: verify validity of prp lists in the cmb
  2020-07-20 11:37 ` [PATCH 08/16] hw/block/nvme: verify validity of prp lists in the cmb Klaus Jensen
@ 2020-07-29 15:54   ` Minwoo Im
  0 siblings, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 15:54 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 20-07-20 13:37:40, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Before this patch the device already supported PRP lists in the CMB, but
> it did not check for the validity of it nor announced the support in the
> Identify Controller data structure LISTS field.
> 
> If some of the PRPs in a PRP list are in the CMB, then ALL entries must
> be there. This patch makes sure that requirement is verified as well as
> properly announcing support for PRP lists in the CMB.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 09/16] hw/block/nvme: refactor request bounds checking
  2020-07-20 11:37 ` [PATCH 09/16] hw/block/nvme: refactor request bounds checking Klaus Jensen
@ 2020-07-29 15:56   ` Minwoo Im
  0 siblings, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 15:56 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 20-07-20 13:37:41, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Hoist bounds checking into its own function and check for wrap-around.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 10/16] hw/block/nvme: add check for mdts
  2020-07-20 11:37 ` [PATCH 10/16] hw/block/nvme: add check for mdts Klaus Jensen
@ 2020-07-29 16:00   ` Minwoo Im
  2020-07-29 19:30     ` Klaus Jensen
  0 siblings, 1 reply; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 16:00 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 20-07-20 13:37:42, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Add 'mdts' device parameter to control the Maximum Data Transfer Size of
> the controller and check that it is respected.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  hw/block/nvme.c       | 32 ++++++++++++++++++++++++++++++--
>  hw/block/nvme.h       |  1 +
>  hw/block/trace-events |  1 +
>  3 files changed, 32 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 35bc1a7b7e21..10fe53873ae9 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -18,9 +18,10 @@
>   * Usage: add options:
>   *      -drive file=<file>,if=none,id=<drive_id>
>   *      -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
> - *              cmb_size_mb=<cmb_size_mb[optional]>, \
> + *              [cmb_size_mb=<cmb_size_mb>,] \
>   *              [pmrdev=<mem_backend_file_id>,] \
> - *              max_ioqpairs=<N[optional]>
> + *              [max_ioqpairs=<N>,] \
> + *              [mdts=<N>]

Nitpick:
  cmb and ioqpairs-things could be in another thread. :)

>   *
>   * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
>   * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
> @@ -553,6 +554,17 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
>      }
>  }
>  
> +static inline uint16_t nvme_check_mdts(NvmeCtrl *n, size_t len)
> +{
> +    uint8_t mdts = n->params.mdts;
> +
> +    if (mdts && len > n->page_size << mdts) {
> +        return NVME_INVALID_FIELD | NVME_DNR;
> +    }
> +
> +    return NVME_SUCCESS;
> +}
> +
>  static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeNamespace *ns,
>                                           uint64_t slba, uint32_t nlb)
>  {
> @@ -646,6 +658,13 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>  
>      trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba);
>  
> +    status = nvme_check_mdts(n, data_size);
> +    if (status) {
> +        trace_pci_nvme_err_mdts(nvme_cid(req), data_size);
> +        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
> +        return status;
> +    }
> +
>      status = nvme_check_bounds(n, ns, slba, nlb);
>      if (status) {
>          trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
> @@ -938,6 +957,7 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      uint32_t numdl, numdu;
>      uint64_t off, lpol, lpou;
>      size_t   len;
> +    uint16_t status;
>  
>      numdl = (dw10 >> 16);
>      numdu = (dw11 & 0xffff);
> @@ -953,6 +973,12 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  
>      trace_pci_nvme_get_log(nvme_cid(req), lid, lsp, rae, len, off);
>  
> +    status = nvme_check_mdts(n, len);
> +    if (status) {
> +        trace_pci_nvme_err_mdts(nvme_cid(req), len);
> +        return status;
> +    }
> +
>      switch (lid) {
>      case NVME_LOG_ERROR_INFO:
>          return nvme_error_info(n, cmd, rae, len, off, req);
> @@ -2275,6 +2301,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
>      id->ieee[0] = 0x00;
>      id->ieee[1] = 0x02;
>      id->ieee[2] = 0xb3;
> +    id->mdts = n->params.mdts;
>      id->ver = cpu_to_le32(NVME_SPEC_VER);
>      id->oacs = cpu_to_le16(0);
>  
> @@ -2394,6 +2421,7 @@ static Property nvme_props[] = {
>      DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
>      DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
>      DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
> +    DEFINE_PROP_UINT8("mdts", NvmeCtrl, params.mdts, 7),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 5519b5cc7686..137cd8c2bf20 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -11,6 +11,7 @@ typedef struct NvmeParams {
>      uint32_t cmb_size_mb;
>      uint8_t  aerl;
>      uint32_t aer_max_queued;
> +    uint8_t  mdts;
>  } NvmeParams;
>  
>  typedef struct NvmeAsyncEvent {
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 6d0cd588c786..5d7d4679650b 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -85,6 +85,7 @@ pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
>  pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
>  
>  # nvme traces for error conditions
> +pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %"PRIu64""
>  pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
>  pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
>  pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
> -- 
> 2.27.0
> 
> 

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes
  2020-07-20 11:37 ` [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes Klaus Jensen
@ 2020-07-29 16:01   ` Minwoo Im
  2020-07-29 17:39   ` Maxim Levitsky
  1 sibling, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 16:01 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, Fam Zheng, qemu-block, Klaus Jensen, qemu-devel,
	Max Reitz, Keith Busch

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing
  2020-07-20 11:37 ` [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing Klaus Jensen
@ 2020-07-29 16:04   ` Minwoo Im
  2020-07-29 17:47   ` Maxim Levitsky
  1 sibling, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 16:04 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 20-07-20 13:37:44, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Move clearing of the structure from "clear before use" to "clear
> after use".

Yah, agree on this.

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest
  2020-07-20 11:37 ` [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest Klaus Jensen
@ 2020-07-29 16:06   ` Minwoo Im
  2020-07-29 17:53   ` Maxim Levitsky
  1 sibling, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 16:06 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 20-07-20 13:37:45, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Instead of passing around the NvmeNamespace, add it as a member in the
> NvmeRequest structure.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing
  2020-07-20 11:37 ` [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing Klaus Jensen
@ 2020-07-29 16:08   ` Minwoo Im
  2020-07-29 18:18   ` Maxim Levitsky
  1 sibling, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 16:08 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 20-07-20 13:37:46, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Always destroy the request qsg/iov at the end of request use.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c | 48 +++++++++++++++++-------------------------------
>  1 file changed, 17 insertions(+), 31 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 54cd20f1ce22..b53afdeb3fb6 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -213,6 +213,14 @@ static void nvme_req_clear(NvmeRequest *req)
>  {
>      req->ns = NULL;
>      memset(&req->cqe, 0x0, sizeof(req->cqe));
> +
> +    if (req->qsg.sg) {
> +        qemu_sglist_destroy(&req->qsg);
> +    }
> +
> +    if (req->iov.iov) {
> +        qemu_iovec_destroy(&req->iov);
> +    }

Oh okay.  This looks like update for the previous patch in this series.
And I also agree on starting to make focus on nvme_req_clear() for
wrap-up.

Looks good to me.

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter
  2020-07-20 11:37 ` [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter Klaus Jensen
@ 2020-07-29 16:10   ` Minwoo Im
  2020-07-29 19:44     ` Klaus Jensen
  2020-07-29 18:25   ` Maxim Levitsky
  1 sibling, 1 reply; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 16:10 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 20-07-20 13:37:47, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Keep a copy of the raw nvme command in the NvmeRequest and remove the
> now redundant NvmeCmd parameter.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>

I would really have suggested this change from 13th patch!

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>

Thanks,


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 16/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp
  2020-07-20 11:37 ` [PATCH 16/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp Klaus Jensen
@ 2020-07-29 16:15   ` Minwoo Im
  2020-07-29 19:57     ` Klaus Jensen
  0 siblings, 1 reply; 58+ messages in thread
From: Minwoo Im @ 2020-07-29 16:15 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 20-07-20 13:37:48, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Since clean up of the request qsg/iov is now always done post-use, there
> is no need to use a stack-allocated qsg/iov in nvme_dma_prp.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Acked-by: Keith Busch <kbusch@kernel.org>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

> ---
>  hw/block/nvme.c | 18 ++++++------------
>  1 file changed, 6 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 0b3dceccc89b..b6da5a9f3fc6 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -381,45 +381,39 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
>                               uint64_t prp1, uint64_t prp2, DMADirection dir,
>                               NvmeRequest *req)
>  {
> -    QEMUSGList qsg;
> -    QEMUIOVector iov;
>      uint16_t status = NVME_SUCCESS;
>  
> -    status = nvme_map_prp(n, &qsg, &iov, prp1, prp2, len, req);
> +    status = nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, len, req);

After this change, can we make nvme_map_prp() just receive
NvmeRequest *req without &req->qsg, &req->iov by retrieve them from
inside of the nvme_map_prp()?


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 05/16] hw/block/nvme: refactor dma read/write
  2020-07-20 11:37 ` [PATCH 05/16] hw/block/nvme: refactor dma read/write Klaus Jensen
  2020-07-29 15:35   ` Minwoo Im
@ 2020-07-29 17:35   ` Maxim Levitsky
  2020-07-29 18:38     ` Klaus Jensen
  1 sibling, 1 reply; 58+ messages in thread
From: Maxim Levitsky @ 2020-07-29 17:35 UTC (permalink / raw)
  To: Klaus Jensen, qemu-devel
  Cc: Kevin Wolf, Klaus Jensen, Keith Busch, qemu-block, Max Reitz

On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Refactor the nvme_dma_{read,write}_prp functions into a common function
> taking a DMADirection parameter.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> ---
>  hw/block/nvme.c | 88 ++++++++++++++++++++++++-------------------------
>  1 file changed, 43 insertions(+), 45 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 6a1a1626b87b..d314a604db81 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -361,55 +361,50 @@ unmap:
>      return status;
>  }
>  
> -static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> -                                   uint64_t prp1, uint64_t prp2)
> +static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> +                             uint64_t prp1, uint64_t prp2, DMADirection dir)
>  {
>      QEMUSGList qsg;
>      QEMUIOVector iov;
>      uint16_t status = NVME_SUCCESS;
>  
> -    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
> -        return NVME_INVALID_FIELD | NVME_DNR;
> +    status = nvme_map_prp(&qsg, &iov, prp1, prp2, len, n);
> +    if (status) {
> +        return status;
>      }
> +
>      if (qsg.nsg > 0) {
> -        if (dma_buf_write(ptr, len, &qsg)) {
> -            status = NVME_INVALID_FIELD | NVME_DNR;
> +        uint64_t residual;
> +
> +        if (dir == DMA_DIRECTION_TO_DEVICE) {
> +            residual = dma_buf_write(ptr, len, &qsg);
> +        } else {
> +            residual = dma_buf_read(ptr, len, &qsg);
>          }
> -        qemu_sglist_destroy(&qsg);
> -    } else {
> -        if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
> -            status = NVME_INVALID_FIELD | NVME_DNR;
> -        }
> -        qemu_iovec_destroy(&iov);
> -    }
> -    return status;
> -}
>  
> -static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> -    uint64_t prp1, uint64_t prp2)
> -{
> -    QEMUSGList qsg;
> -    QEMUIOVector iov;
> -    uint16_t status = NVME_SUCCESS;
> -
> -    trace_pci_nvme_dma_read(prp1, prp2);
> -
> -    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
> -        return NVME_INVALID_FIELD | NVME_DNR;
> -    }
> -    if (qsg.nsg > 0) {
> -        if (unlikely(dma_buf_read(ptr, len, &qsg))) {
> +        if (unlikely(residual)) {
>              trace_pci_nvme_err_invalid_dma();
>              status = NVME_INVALID_FIELD | NVME_DNR;
>          }
> +
>          qemu_sglist_destroy(&qsg);
>      } else {
> -        if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) {
> +        size_t bytes;
> +
> +        if (dir == DMA_DIRECTION_TO_DEVICE) {
> +            bytes = qemu_iovec_to_buf(&iov, 0, ptr, len);
> +        } else {
> +            bytes = qemu_iovec_from_buf(&iov, 0, ptr, len);
> +        }
> +
> +        if (unlikely(bytes != len)) {
>              trace_pci_nvme_err_invalid_dma();
>              status = NVME_INVALID_FIELD | NVME_DNR;
>          }
> +
>          qemu_iovec_destroy(&iov);
>      }
> +
I know I reviewed this, but thinking now, why not to add an assert here
that we don't have both iov and qsg with data.

Best regards,
	Maxim Levitsky

>      return status;
>  }
>  
> @@ -840,8 +835,8 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
>          nvme_clear_events(n, NVME_AER_TYPE_SMART);
>      }
>  
> -    return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
> -                             prp2);
> +    return nvme_dma_prp(n, (uint8_t *) &smart + off, trans_len, prp1, prp2,
> +                        DMA_DIRECTION_FROM_DEVICE);
>  }
>  
>  static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> @@ -862,8 +857,8 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
>  
>      trans_len = MIN(sizeof(fw_log) - off, buf_len);
>  
> -    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
> -                             prp2);
> +    return nvme_dma_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, prp2,
> +                        DMA_DIRECTION_FROM_DEVICE);
>  }
>  
>  static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
> @@ -887,7 +882,8 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
>  
>      trans_len = MIN(sizeof(errlog) - off, buf_len);
>  
> -    return nvme_dma_read_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2);
> +    return nvme_dma_prp(n, (uint8_t *)&errlog, trans_len, prp1, prp2,
> +                        DMA_DIRECTION_FROM_DEVICE);
>  }
>  
>  static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> @@ -1042,8 +1038,8 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c)
>  
>      trace_pci_nvme_identify_ctrl();
>  
> -    return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
> -        prp1, prp2);
> +    return nvme_dma_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), prp1,
> +                        prp2, DMA_DIRECTION_FROM_DEVICE);
>  }
>  
>  static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
> @@ -1062,8 +1058,8 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
>  
>      ns = &n->namespaces[nsid - 1];
>  
> -    return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
> -        prp1, prp2);
> +    return nvme_dma_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns), prp1,
> +                        prp2, DMA_DIRECTION_FROM_DEVICE);
>  }
>  
>  static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
> @@ -1098,7 +1094,8 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
>              break;
>          }
>      }
> -    ret = nvme_dma_read_prp(n, (uint8_t *)list, data_len, prp1, prp2);
> +    ret = nvme_dma_prp(n, (uint8_t *)list, data_len, prp1, prp2,
> +                       DMA_DIRECTION_FROM_DEVICE);
>      g_free(list);
>      return ret;
>  }
> @@ -1139,7 +1136,8 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c)
>      ns_descrs->uuid.hdr.nidl = NVME_NIDT_UUID_LEN;
>      stl_be_p(&ns_descrs->uuid.v, nsid);
>  
> -    return nvme_dma_read_prp(n, list, NVME_IDENTIFY_DATA_SIZE, prp1, prp2);
> +    return nvme_dma_prp(n, list, NVME_IDENTIFY_DATA_SIZE, prp1, prp2,
> +                        DMA_DIRECTION_FROM_DEVICE);
>  }
>  
>  static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
> @@ -1220,8 +1218,8 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
>  
>      uint64_t timestamp = nvme_get_timestamp(n);
>  
> -    return nvme_dma_read_prp(n, (uint8_t *)&timestamp,
> -                                 sizeof(timestamp), prp1, prp2);
> +    return nvme_dma_prp(n, (uint8_t *)&timestamp, sizeof(timestamp), prp1,
> +                        prp2, DMA_DIRECTION_FROM_DEVICE);
>  }
>  
>  static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> @@ -1352,8 +1350,8 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd)
>      uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>      uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>  
> -    ret = nvme_dma_write_prp(n, (uint8_t *)&timestamp,
> -                                sizeof(timestamp), prp1, prp2);
> +    ret = nvme_dma_prp(n, (uint8_t *)&timestamp, sizeof(timestamp), prp1,
> +                       prp2, DMA_DIRECTION_TO_DEVICE);
>      if (ret != NVME_SUCCESS) {
>          return ret;
>      }




^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes
  2020-07-20 11:37 ` [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes Klaus Jensen
  2020-07-29 16:01   ` Minwoo Im
@ 2020-07-29 17:39   ` Maxim Levitsky
  1 sibling, 0 replies; 58+ messages in thread
From: Maxim Levitsky @ 2020-07-29 17:39 UTC (permalink / raw)
  To: Klaus Jensen, qemu-devel
  Cc: Kevin Wolf, Fam Zheng, qemu-block, Klaus Jensen, Max Reitz, Keith Busch

On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> The NVM Express specification generally uses 'zeroes' and not 'zeros',
> so let us align with it.
> 
> Cc: Fam Zheng <fam@euphon.net>
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  block/nvme.c         | 4 ++--
>  hw/block/nvme.c      | 8 ++++----
>  include/block/nvme.h | 4 ++--
>  3 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index c1c4c07ac6cc..05485fdd1189 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -537,7 +537,7 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
>                            s->page_size / sizeof(uint64_t) * s->page_size);
>  
>      oncs = le16_to_cpu(idctrl->oncs);
> -    s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROS);
> +    s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES);
>      s->supports_discard = !!(oncs & NVME_ONCS_DSM);
>  
>      memset(resp, 0, 4096);
> @@ -1201,7 +1201,7 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
>      }
>  
>      NvmeCmd cmd = {
> -        .opcode = NVME_CMD_WRITE_ZEROS,
> +        .opcode = NVME_CMD_WRITE_ZEROES,
>          .nsid = cpu_to_le32(s->nsid),
>          .cdw10 = cpu_to_le32((offset >> s->blkshift) & 0xFFFFFFFF),
>          .cdw11 = cpu_to_le32(((offset >> s->blkshift) >> 32) & 0xFFFFFFFF),
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 10fe53873ae9..e2932239c661 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -614,7 +614,7 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>      return NVME_NO_COMPLETE;
>  }
>  
> -static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
> +static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>      NvmeRequest *req)
>  {
>      NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
> @@ -714,8 +714,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      switch (cmd->opcode) {
>      case NVME_CMD_FLUSH:
>          return nvme_flush(n, ns, cmd, req);
> -    case NVME_CMD_WRITE_ZEROS:
> -        return nvme_write_zeros(n, ns, cmd, req);
> +    case NVME_CMD_WRITE_ZEROES:
> +        return nvme_write_zeroes(n, ns, cmd, req);
>      case NVME_CMD_WRITE:
>      case NVME_CMD_READ:
>          return nvme_rw(n, ns, cmd, req);
> @@ -2328,7 +2328,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
>      id->sqes = (0x6 << 4) | 0x6;
>      id->cqes = (0x4 << 4) | 0x4;
>      id->nn = cpu_to_le32(n->num_namespaces);
> -    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP |
> +    id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROES | NVME_ONCS_TIMESTAMP |
>                             NVME_ONCS_FEATURES);
>  
>      subnqn = g_strdup_printf("nqn.2019-08.org.qemu:%s", n->params.serial);
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 370df7fc0570..65e68a82c897 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -460,7 +460,7 @@ enum NvmeIoCommands {
>      NVME_CMD_READ               = 0x02,
>      NVME_CMD_WRITE_UNCOR        = 0x04,
>      NVME_CMD_COMPARE            = 0x05,
> -    NVME_CMD_WRITE_ZEROS        = 0x08,
> +    NVME_CMD_WRITE_ZEROES       = 0x08,
>      NVME_CMD_DSM                = 0x09,
>  };
>  
> @@ -838,7 +838,7 @@ enum NvmeIdCtrlOncs {
>      NVME_ONCS_COMPARE       = 1 << 0,
>      NVME_ONCS_WRITE_UNCORR  = 1 << 1,
>      NVME_ONCS_DSM           = 1 << 2,
> -    NVME_ONCS_WRITE_ZEROS   = 1 << 3,
> +    NVME_ONCS_WRITE_ZEROES  = 1 << 3,
>      NVME_ONCS_FEATURES      = 1 << 4,
>      NVME_ONCS_RESRVATIONS   = 1 << 5,
>      NVME_ONCS_TIMESTAMP     = 1 << 6,

Nothing against this.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Best regards,
	Maxim Levitsky



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing
  2020-07-20 11:37 ` [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing Klaus Jensen
  2020-07-29 16:04   ` Minwoo Im
@ 2020-07-29 17:47   ` Maxim Levitsky
  2020-07-29 19:02     ` Klaus Jensen
  1 sibling, 1 reply; 58+ messages in thread
From: Maxim Levitsky @ 2020-07-29 17:47 UTC (permalink / raw)
  To: Klaus Jensen, qemu-devel
  Cc: Kevin Wolf, Klaus Jensen, Keith Busch, qemu-block, Max Reitz

On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Move clearing of the structure from "clear before use" to "clear
> after use".
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index e2932239c661..431f26c2f589 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -209,6 +209,11 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
>      }
>  }
>  
> +static void nvme_req_clear(NvmeRequest *req)
> +{
> +    memset(&req->cqe, 0x0, sizeof(req->cqe));
> +}
> +
>  static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
>                                    size_t len)
>  {
> @@ -458,6 +463,7 @@ static void nvme_post_cqes(void *opaque)
>          nvme_inc_cq_tail(cq);
>          pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
>              sizeof(req->cqe));
> +        nvme_req_clear(req);

Don't we need some barrier here to avoid reordering the writes?
pci_dma_write does seem to include a barrier prior to the write it does
but not afterward.

Also what is the motivation of switching the order?
I think somewhat that it is a good thing to clear a buffer,
before it is setup.


>          QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
>      }
>      if (cq->tail != cq->head) {
> @@ -1601,7 +1607,6 @@ static void nvme_process_sq(void *opaque)
>          req = QTAILQ_FIRST(&sq->req_list);
>          QTAILQ_REMOVE(&sq->req_list, req, entry);
>          QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
> -        memset(&req->cqe, 0, sizeof(req->cqe));
>          req->cqe.cid = cmd.cid;
>  
>          status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :


Best regards,
	Maxim Levitsky



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest
  2020-07-20 11:37 ` [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest Klaus Jensen
  2020-07-29 16:06   ` Minwoo Im
@ 2020-07-29 17:53   ` Maxim Levitsky
  1 sibling, 0 replies; 58+ messages in thread
From: Maxim Levitsky @ 2020-07-29 17:53 UTC (permalink / raw)
  To: Klaus Jensen, qemu-devel
  Cc: Kevin Wolf, Klaus Jensen, Keith Busch, qemu-block, Max Reitz

On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Instead of passing around the NvmeNamespace, add it as a member in the
> NvmeRequest structure.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c | 21 ++++++++++-----------
>  hw/block/nvme.h |  1 +
>  2 files changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 431f26c2f589..54cd20f1ce22 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -211,6 +211,7 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
>  
>  static void nvme_req_clear(NvmeRequest *req)
>  {
> +    req->ns = NULL;
>      memset(&req->cqe, 0x0, sizeof(req->cqe));
>  }
>  
> @@ -610,8 +611,7 @@ static void nvme_rw_cb(void *opaque, int ret)
>      nvme_enqueue_req_completion(cq, req);
>  }
>  
> -static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
> -    NvmeRequest *req)
> +static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  {
>      block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
>           BLOCK_ACCT_FLUSH);
> @@ -620,10 +620,10 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>      return NVME_NO_COMPLETE;
>  }
>  
> -static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
> -    NvmeRequest *req)
> +static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  {
>      NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
> +    NvmeNamespace *ns = req->ns;
>      const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
>      const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
>      uint64_t slba = le64_to_cpu(rw->slba);
> @@ -647,10 +647,10 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>      return NVME_NO_COMPLETE;
>  }
>  
> -static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
> -    NvmeRequest *req)
> +static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  {
>      NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
> +    NvmeNamespace *ns = req->ns;
>      uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
>      uint64_t slba = le64_to_cpu(rw->slba);
>  
> @@ -706,7 +706,6 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
>  
>  static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  {
> -    NvmeNamespace *ns;
>      uint32_t nsid = le32_to_cpu(cmd->nsid);
>  
>      trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), cmd->opcode);
> @@ -716,15 +715,15 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>          return NVME_INVALID_NSID | NVME_DNR;
>      }
>  
> -    ns = &n->namespaces[nsid - 1];
> +    req->ns = &n->namespaces[nsid - 1];
>      switch (cmd->opcode) {
>      case NVME_CMD_FLUSH:
> -        return nvme_flush(n, ns, cmd, req);
> +        return nvme_flush(n, cmd, req);
>      case NVME_CMD_WRITE_ZEROES:
> -        return nvme_write_zeroes(n, ns, cmd, req);
> +        return nvme_write_zeroes(n, cmd, req);
>      case NVME_CMD_WRITE:
>      case NVME_CMD_READ:
> -        return nvme_rw(n, ns, cmd, req);
> +        return nvme_rw(n, cmd, req);
>      default:
>          trace_pci_nvme_err_invalid_opc(cmd->opcode);
>          return NVME_INVALID_OPCODE | NVME_DNR;
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 137cd8c2bf20..586fd3d62700 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -21,6 +21,7 @@ typedef struct NvmeAsyncEvent {
>  
>  typedef struct NvmeRequest {
>      struct NvmeSQueue       *sq;
> +    struct NvmeNamespace    *ns;
>      BlockAIOCB              *aiocb;
>      uint16_t                status;
>      NvmeCqe                 cqe;

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing
  2020-07-20 11:37 ` [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing Klaus Jensen
  2020-07-29 16:08   ` Minwoo Im
@ 2020-07-29 18:18   ` Maxim Levitsky
  2020-07-29 19:49     ` Klaus Jensen
  1 sibling, 1 reply; 58+ messages in thread
From: Maxim Levitsky @ 2020-07-29 18:18 UTC (permalink / raw)
  To: Klaus Jensen, qemu-devel
  Cc: Kevin Wolf, Klaus Jensen, Keith Busch, qemu-block, Max Reitz

On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Always destroy the request qsg/iov at the end of request use.
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c | 48 +++++++++++++++++-------------------------------
>  1 file changed, 17 insertions(+), 31 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 54cd20f1ce22..b53afdeb3fb6 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -213,6 +213,14 @@ static void nvme_req_clear(NvmeRequest *req)
>  {
>      req->ns = NULL;
>      memset(&req->cqe, 0x0, sizeof(req->cqe));
> +
> +    if (req->qsg.sg) {
> +        qemu_sglist_destroy(&req->qsg);
> +    }
> +
> +    if (req->iov.iov) {
> +        qemu_iovec_destroy(&req->iov);
> +    }
>  }
>  
>  static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
> @@ -297,15 +305,14 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
>  
>      status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
>      if (status) {
> -        goto unmap;
> +        return status;
>      }
>  
>      len -= trans_len;
>      if (len) {
>          if (unlikely(!prp2)) {
>              trace_pci_nvme_err_invalid_prp2_missing();
> -            status = NVME_INVALID_FIELD | NVME_DNR;
> -            goto unmap;
> +            return NVME_INVALID_FIELD | NVME_DNR;
>          }
>  
>          if (len > n->page_size) {
> @@ -326,13 +333,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
>                  if (i == n->max_prp_ents - 1 && len > n->page_size) {
>                      if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>                          trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> -                        status = NVME_INVALID_FIELD | NVME_DNR;
> -                        goto unmap;
> +                        return NVME_INVALID_FIELD | NVME_DNR;
>                      }
>  
>                      if (prp_list_in_cmb != nvme_addr_is_cmb(n, prp_ent)) {
> -                        status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
> -                        goto unmap;
> +                        return NVME_INVALID_USE_OF_CMB | NVME_DNR;
>                      }
>  
>                      i = 0;
> @@ -345,14 +350,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
>  
>                  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>                      trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> -                    status = NVME_INVALID_FIELD | NVME_DNR;
> -                    goto unmap;
> +                    return NVME_INVALID_FIELD | NVME_DNR;
>                  }
>  
>                  trans_len = MIN(len, n->page_size);
>                  status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
>                  if (status) {
> -                    goto unmap;
> +                    return status;
>                  }
>  
>                  len -= trans_len;
> @@ -361,27 +365,16 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
>          } else {
>              if (unlikely(prp2 & (n->page_size - 1))) {
>                  trace_pci_nvme_err_invalid_prp2_align(prp2);
> -                status = NVME_INVALID_FIELD | NVME_DNR;
> -                goto unmap;
> +                return NVME_INVALID_FIELD | NVME_DNR;
>              }
>              status = nvme_map_addr(n, qsg, iov, prp2, len);
>              if (status) {
> -                goto unmap;
> +                return status;
>              }
>          }
>      }
> +
>      return NVME_SUCCESS;
> -
> -unmap:
> -    if (iov && iov->iov) {
> -        qemu_iovec_destroy(iov);
> -    }
> -
> -    if (qsg && qsg->sg) {
> -        qemu_sglist_destroy(qsg);
> -    }
> -
> -    return status;
>  }
>  
>  static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> @@ -601,13 +594,6 @@ static void nvme_rw_cb(void *opaque, int ret)
>          req->status = NVME_INTERNAL_DEV_ERROR;
>      }
>  
> -    if (req->qsg.nalloc) {
> -        qemu_sglist_destroy(&req->qsg);
> -    }
> -    if (req->iov.nalloc) {
> -        qemu_iovec_destroy(&req->iov);
> -    }
> -
>      nvme_enqueue_req_completion(cq, req);
>  }
>  

This and former patch I guess answer my own question about why to clear the request after its cqe got posted.

Looks reasonable.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/16] hw/block/nvme: add mapping helpers
  2020-07-29 13:57   ` Maxim Levitsky
@ 2020-07-29 18:23     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 18:23 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 29 16:57, Maxim Levitsky wrote:
> On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
> > use them in nvme_map_prp.
> > 
> > This fixes a bug where in the case of a CMB transfer, the device would
> > map to the buffer with a wrong length.
> > 
> > Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > ---
> >  hw/block/nvme.c       | 109 +++++++++++++++++++++++++++++++++++-------
> >  hw/block/trace-events |   2 +
> >  2 files changed, 94 insertions(+), 17 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 4d7b730a62b6..9b1a080cdc70 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -109,6 +109,11 @@ static uint16_t nvme_sqid(NvmeRequest *req)
> >      return le16_to_cpu(req->sq->sqid);
> >  }
> >  
> > +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
> > +{
> > +    return &n->cmbuf[addr - n->ctrl_mem.addr];
> I would add an assert here just in case we do out of bounds array access.

We never end up in nvme_addr_to_cmb without nvme_addr_is_cmb checking
the bounds. But an assert cant hurt if someone decides to use it in
error.

I'll add it!


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter
  2020-07-20 11:37 ` [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter Klaus Jensen
  2020-07-29 16:10   ` Minwoo Im
@ 2020-07-29 18:25   ` Maxim Levitsky
  2020-07-29 20:00     ` Klaus Jensen
  1 sibling, 1 reply; 58+ messages in thread
From: Maxim Levitsky @ 2020-07-29 18:25 UTC (permalink / raw)
  To: Klaus Jensen, qemu-devel
  Cc: Kevin Wolf, Klaus Jensen, Keith Busch, qemu-block, Max Reitz

On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Keep a copy of the raw nvme command in the NvmeRequest and remove the
> now redundant NvmeCmd parameter.

Shouldn't you clear the req->cmd in nvme_req_clear too for consistency?

Other than that looks OK, but I might have missed something.

Best regards,
	Maxim Levitsky
> 
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c | 177 +++++++++++++++++++++++++-----------------------
>  hw/block/nvme.h |   1 +
>  2 files changed, 93 insertions(+), 85 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index b53afdeb3fb6..0b3dceccc89b 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -425,9 +425,9 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
>      return status;
>  }
>  
> -static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
> -                         NvmeRequest *req)
> +static uint16_t nvme_map(NvmeCtrl *n, size_t len, NvmeRequest *req)
>  {
> +    NvmeCmd *cmd = &req->cmd;
>      uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>      uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>  
> @@ -597,7 +597,7 @@ static void nvme_rw_cb(void *opaque, int ret)
>      nvme_enqueue_req_completion(cq, req);
>  }
>  
> -static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_flush(NvmeCtrl *n, NvmeRequest *req)
>  {
>      block_acct_start(blk_get_stats(n->conf.blk), &req->acct, 0,
>           BLOCK_ACCT_FLUSH);
> @@ -606,9 +606,9 @@ static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      return NVME_NO_COMPLETE;
>  }
>  
> -static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeRequest *req)
>  {
> -    NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
> +    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
>      NvmeNamespace *ns = req->ns;
>      const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
>      const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
> @@ -633,9 +633,9 @@ static uint16_t nvme_write_zeroes(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      return NVME_NO_COMPLETE;
>  }
>  
> -static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
>  {
> -    NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
> +    NvmeRwCmd *rw = (NvmeRwCmd *)&req->cmd;
>      NvmeNamespace *ns = req->ns;
>      uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
>      uint64_t slba = le64_to_cpu(rw->slba);
> @@ -664,7 +664,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>          return status;
>      }
>  
> -    if (nvme_map(n, cmd, data_size, req)) {
> +    if (nvme_map(n, data_size, req)) {
>          block_acct_invalid(blk_get_stats(n->conf.blk), acct);
>          return NVME_INVALID_FIELD | NVME_DNR;
>      }
> @@ -690,11 +690,12 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      return NVME_NO_COMPLETE;
>  }
>  
> -static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest *req)
>  {
> -    uint32_t nsid = le32_to_cpu(cmd->nsid);
> +    uint32_t nsid = le32_to_cpu(req->cmd.nsid);
>  
> -    trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req), cmd->opcode);
> +    trace_pci_nvme_io_cmd(nvme_cid(req), nsid, nvme_sqid(req),
> +                          req->cmd.opcode);
>  
>      if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
>          trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces);
> @@ -702,16 +703,16 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      }
>  
>      req->ns = &n->namespaces[nsid - 1];
> -    switch (cmd->opcode) {
> +    switch (req->cmd.opcode) {
>      case NVME_CMD_FLUSH:
> -        return nvme_flush(n, cmd, req);
> +        return nvme_flush(n, req);
>      case NVME_CMD_WRITE_ZEROES:
> -        return nvme_write_zeroes(n, cmd, req);
> +        return nvme_write_zeroes(n, req);
>      case NVME_CMD_WRITE:
>      case NVME_CMD_READ:
> -        return nvme_rw(n, cmd, req);
> +        return nvme_rw(n, req);
>      default:
> -        trace_pci_nvme_err_invalid_opc(cmd->opcode);
> +        trace_pci_nvme_err_invalid_opc(req->cmd.opcode);
>          return NVME_INVALID_OPCODE | NVME_DNR;
>      }
>  }
> @@ -727,10 +728,10 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
>      }
>  }
>  
> -static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
> +static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest *req)
>  {
> -    NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
> -    NvmeRequest *req, *next;
> +    NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
> +    NvmeRequest *r, *next;
>      NvmeSQueue *sq;
>      NvmeCQueue *cq;
>      uint16_t qid = le16_to_cpu(c->qid);
> @@ -744,19 +745,19 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
>  
>      sq = n->sq[qid];
>      while (!QTAILQ_EMPTY(&sq->out_req_list)) {
> -        req = QTAILQ_FIRST(&sq->out_req_list);
> -        assert(req->aiocb);
> -        blk_aio_cancel(req->aiocb);
> +        r = QTAILQ_FIRST(&sq->out_req_list);
> +        assert(r->aiocb);
> +        blk_aio_cancel(r->aiocb);
>      }
>      if (!nvme_check_cqid(n, sq->cqid)) {
>          cq = n->cq[sq->cqid];
>          QTAILQ_REMOVE(&cq->sq_list, sq, entry);
>  
>          nvme_post_cqes(cq);
> -        QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
> -            if (req->sq == sq) {
> -                QTAILQ_REMOVE(&cq->req_list, req, entry);
> -                QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
> +        QTAILQ_FOREACH_SAFE(r, &cq->req_list, entry, next) {
> +            if (r->sq == sq) {
> +                QTAILQ_REMOVE(&cq->req_list, r, entry);
> +                QTAILQ_INSERT_TAIL(&sq->req_list, r, entry);
>              }
>          }
>      }
> @@ -793,10 +794,10 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
>      n->sq[sqid] = sq;
>  }
>  
> -static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
> +static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeRequest *req)
>  {
>      NvmeSQueue *sq;
> -    NvmeCreateSq *c = (NvmeCreateSq *)cmd;
> +    NvmeCreateSq *c = (NvmeCreateSq *)&req->cmd;
>  
>      uint16_t cqid = le16_to_cpu(c->cqid);
>      uint16_t sqid = le16_to_cpu(c->sqid);
> @@ -831,10 +832,10 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
>      return NVME_SUCCESS;
>  }
>  
> -static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
> -                                uint32_t buf_len, uint64_t off,
> -                                NvmeRequest *req)
> +static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
> +                                uint64_t off, NvmeRequest *req)
>  {
> +    NvmeCmd *cmd = &req->cmd;
>      uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>      uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>      uint32_t nsid = le32_to_cpu(cmd->nsid);
> @@ -889,10 +890,11 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
>                          DMA_DIRECTION_FROM_DEVICE, req);
>  }
>  
> -static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> -                                 uint64_t off, NvmeRequest *req)
> +static uint16_t nvme_fw_log_info(NvmeCtrl *n, uint32_t buf_len, uint64_t off,
> +                                 NvmeRequest *req)
>  {
>      uint32_t trans_len;
> +    NvmeCmd *cmd = &req->cmd;
>      uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>      uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>      NvmeFwSlotInfoLog fw_log = {
> @@ -911,11 +913,11 @@ static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
>                          DMA_DIRECTION_FROM_DEVICE, req);
>  }
>  
> -static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
> -                                uint32_t buf_len, uint64_t off,
> -                                NvmeRequest *req)
> +static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
> +                                uint64_t off, NvmeRequest *req)
>  {
>      uint32_t trans_len;
> +    NvmeCmd *cmd = &req->cmd;
>      uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>      uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>      NvmeErrorLog errlog;
> @@ -936,8 +938,10 @@ static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
>                          DMA_DIRECTION_FROM_DEVICE, req);
>  }
>  
> -static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
>  {
> +    NvmeCmd *cmd = &req->cmd;
> +
>      uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>      uint32_t dw12 = le32_to_cpu(cmd->cdw12);
> @@ -972,11 +976,11 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  
>      switch (lid) {
>      case NVME_LOG_ERROR_INFO:
> -        return nvme_error_info(n, cmd, rae, len, off, req);
> +        return nvme_error_info(n, rae, len, off, req);
>      case NVME_LOG_SMART_INFO:
> -        return nvme_smart_info(n, cmd, rae, len, off, req);
> +        return nvme_smart_info(n, rae, len, off, req);
>      case NVME_LOG_FW_SLOT_INFO:
> -        return nvme_fw_log_info(n, cmd, len, off, req);
> +        return nvme_fw_log_info(n, len, off, req);
>      default:
>          trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
>          return NVME_INVALID_FIELD | NVME_DNR;
> @@ -994,9 +998,9 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
>      }
>  }
>  
> -static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
> +static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req)
>  {
> -    NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
> +    NvmeDeleteQ *c = (NvmeDeleteQ *)&req->cmd;
>      NvmeCQueue *cq;
>      uint16_t qid = le16_to_cpu(c->qid);
>  
> @@ -1037,10 +1041,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
>      cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
>  }
>  
> -static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
> +static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req)
>  {
>      NvmeCQueue *cq;
> -    NvmeCreateCq *c = (NvmeCreateCq *)cmd;
> +    NvmeCreateCq *c = (NvmeCreateCq *)&req->cmd;
>      uint16_t cqid = le16_to_cpu(c->cqid);
>      uint16_t vector = le16_to_cpu(c->irq_vector);
>      uint16_t qsize = le16_to_cpu(c->qsize);
> @@ -1088,9 +1092,9 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
>      return NVME_SUCCESS;
>  }
>  
> -static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c,
> -                                   NvmeRequest *req)
> +static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
>  {
> +    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
>      uint64_t prp1 = le64_to_cpu(c->prp1);
>      uint64_t prp2 = le64_to_cpu(c->prp2);
>  
> @@ -1100,10 +1104,10 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c,
>                          prp2, DMA_DIRECTION_FROM_DEVICE, req);
>  }
>  
> -static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c,
> -                                 NvmeRequest *req)
> +static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest *req)
>  {
>      NvmeNamespace *ns;
> +    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
>      uint32_t nsid = le32_to_cpu(c->nsid);
>      uint64_t prp1 = le64_to_cpu(c->prp1);
>      uint64_t prp2 = le64_to_cpu(c->prp2);
> @@ -1121,9 +1125,9 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c,
>                          prp2, DMA_DIRECTION_FROM_DEVICE, req);
>  }
>  
> -static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c,
> -                                     NvmeRequest *req)
> +static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeRequest *req)
>  {
> +    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
>      static const int data_len = NVME_IDENTIFY_DATA_SIZE;
>      uint32_t min_nsid = le32_to_cpu(c->nsid);
>      uint64_t prp1 = le64_to_cpu(c->prp1);
> @@ -1160,9 +1164,9 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c,
>      return ret;
>  }
>  
> -static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c,
> -                                            NvmeRequest *req)
> +static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
>  {
> +    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
>      uint32_t nsid = le32_to_cpu(c->nsid);
>      uint64_t prp1 = le64_to_cpu(c->prp1);
>      uint64_t prp2 = le64_to_cpu(c->prp2);
> @@ -1201,28 +1205,28 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeIdentify *c,
>                          DMA_DIRECTION_FROM_DEVICE, req);
>  }
>  
> -static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
>  {
> -    NvmeIdentify *c = (NvmeIdentify *)cmd;
> +    NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
>  
>      switch (le32_to_cpu(c->cns)) {
>      case NVME_ID_CNS_NS:
> -        return nvme_identify_ns(n, c, req);
> +        return nvme_identify_ns(n, req);
>      case NVME_ID_CNS_CTRL:
> -        return nvme_identify_ctrl(n, c, req);
> +        return nvme_identify_ctrl(n, req);
>      case NVME_ID_CNS_NS_ACTIVE_LIST:
> -        return nvme_identify_nslist(n, c, req);
> +        return nvme_identify_nslist(n, req);
>      case NVME_ID_CNS_NS_DESCR_LIST:
> -        return nvme_identify_ns_descr_list(n, c, req);
> +        return nvme_identify_ns_descr_list(n, req);
>      default:
>          trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
>          return NVME_INVALID_FIELD | NVME_DNR;
>      }
>  }
>  
> -static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_abort(NvmeCtrl *n, NvmeRequest *req)
>  {
> -    uint16_t sqid = le32_to_cpu(cmd->cdw10) & 0xffff;
> +    uint16_t sqid = le32_to_cpu(req->cmd.cdw10) & 0xffff;
>  
>      req->cqe.result = 1;
>      if (nvme_check_sqid(n, sqid)) {
> @@ -1272,9 +1276,9 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n)
>      return cpu_to_le64(ts.all);
>  }
>  
> -static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
> -                                           NvmeRequest *req)
> +static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
>  {
> +    NvmeCmd *cmd = &req->cmd;
>      uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>      uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>  
> @@ -1284,8 +1288,9 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
>                          prp2, DMA_DIRECTION_FROM_DEVICE, req);
>  }
>  
> -static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req)
>  {
> +    NvmeCmd *cmd = &req->cmd;
>      uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>      uint32_t nsid = le32_to_cpu(cmd->nsid);
> @@ -1359,7 +1364,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>          result = n->features.async_config;
>          goto out;
>      case NVME_TIMESTAMP:
> -        return nvme_get_feature_timestamp(n, cmd, req);
> +        return nvme_get_feature_timestamp(n, req);
>      default:
>          break;
>      }
> @@ -1405,11 +1410,11 @@ out:
>      return NVME_SUCCESS;
>  }
>  
> -static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
> -                                           NvmeRequest *req)
> +static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeRequest *req)
>  {
>      uint16_t ret;
>      uint64_t timestamp;
> +    NvmeCmd *cmd = &req->cmd;
>      uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
>      uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
>  
> @@ -1424,8 +1429,9 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
>      return NVME_SUCCESS;
>  }
>  
> -static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
>  {
> +    NvmeCmd *cmd = &req->cmd;
>      uint32_t dw10 = le32_to_cpu(cmd->cdw10);
>      uint32_t dw11 = le32_to_cpu(cmd->cdw11);
>      uint32_t nsid = le32_to_cpu(cmd->nsid);
> @@ -1516,14 +1522,14 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>          n->features.async_config = dw11;
>          break;
>      case NVME_TIMESTAMP:
> -        return nvme_set_feature_timestamp(n, cmd, req);
> +        return nvme_set_feature_timestamp(n, req);
>      default:
>          return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
>      }
>      return NVME_SUCCESS;
>  }
>  
> -static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_aer(NvmeCtrl *n, NvmeRequest *req)
>  {
>      trace_pci_nvme_aer(nvme_cid(req));
>  
> @@ -1542,33 +1548,33 @@ static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>      return NVME_NO_COMPLETE;
>  }
>  
> -static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
> +static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
>  {
> -    trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), cmd->opcode);
> +    trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode);
>  
> -    switch (cmd->opcode) {
> +    switch (req->cmd.opcode) {
>      case NVME_ADM_CMD_DELETE_SQ:
> -        return nvme_del_sq(n, cmd);
> +        return nvme_del_sq(n, req);
>      case NVME_ADM_CMD_CREATE_SQ:
> -        return nvme_create_sq(n, cmd);
> +        return nvme_create_sq(n, req);
>      case NVME_ADM_CMD_GET_LOG_PAGE:
> -        return nvme_get_log(n, cmd, req);
> +        return nvme_get_log(n, req);
>      case NVME_ADM_CMD_DELETE_CQ:
> -        return nvme_del_cq(n, cmd);
> +        return nvme_del_cq(n, req);
>      case NVME_ADM_CMD_CREATE_CQ:
> -        return nvme_create_cq(n, cmd);
> +        return nvme_create_cq(n, req);
>      case NVME_ADM_CMD_IDENTIFY:
> -        return nvme_identify(n, cmd, req);
> +        return nvme_identify(n, req);
>      case NVME_ADM_CMD_ABORT:
> -        return nvme_abort(n, cmd, req);
> +        return nvme_abort(n, req);
>      case NVME_ADM_CMD_SET_FEATURES:
> -        return nvme_set_feature(n, cmd, req);
> +        return nvme_set_feature(n, req);
>      case NVME_ADM_CMD_GET_FEATURES:
> -        return nvme_get_feature(n, cmd, req);
> +        return nvme_get_feature(n, req);
>      case NVME_ADM_CMD_ASYNC_EV_REQ:
> -        return nvme_aer(n, cmd, req);
> +        return nvme_aer(n, req);
>      default:
> -        trace_pci_nvme_err_invalid_admin_opc(cmd->opcode);
> +        trace_pci_nvme_err_invalid_admin_opc(req->cmd.opcode);
>          return NVME_INVALID_OPCODE | NVME_DNR;
>      }
>  }
> @@ -1593,9 +1599,10 @@ static void nvme_process_sq(void *opaque)
>          QTAILQ_REMOVE(&sq->req_list, req, entry);
>          QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
>          req->cqe.cid = cmd.cid;
> +        memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
>  
> -        status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
> -            nvme_admin_cmd(n, &cmd, req);
> +        status = sq->sqid ? nvme_io_cmd(n, req) :
> +            nvme_admin_cmd(n, req);
>          if (status != NVME_NO_COMPLETE) {
>              req->status = status;
>              nvme_enqueue_req_completion(cq, req);
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 586fd3d62700..52ba794f2e9a 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -25,6 +25,7 @@ typedef struct NvmeRequest {
>      BlockAIOCB              *aiocb;
>      uint16_t                status;
>      NvmeCqe                 cqe;
> +    NvmeCmd                 cmd;
>      BlockAcctCookie         acct;
>      QEMUSGList              qsg;
>      QEMUIOVector            iov;




^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 04/16] hw/block/nvme: remove redundant has_sg member
  2020-07-29 15:29   ` Minwoo Im
@ 2020-07-29 18:29     ` Klaus Jensen
       [not found]     ` <CGME20200729182946epcas2p1bef465a70c1a815654a07814aa379dc3@epcms2p5>
  1 sibling, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 18:29 UTC (permalink / raw)
  To: Minwoo Im
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 30 00:29, Minwoo Im wrote:
> Klaus,
> 

Hi Minwoo,

Thanks for the reviews and welcome to the party! :)

> On 20-07-20 13:37:36, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Remove the has_sg member from NvmeRequest since it's redundant.
> > 
> > Also, make sure the request iov is destroyed at completion time.
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  hw/block/nvme.c | 11 ++++++-----
> >  hw/block/nvme.h |  1 -
> >  2 files changed, 6 insertions(+), 6 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index cb236d1c8c46..6a1a1626b87b 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -548,16 +548,20 @@ static void nvme_rw_cb(void *opaque, int ret)
> >          block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
> >          req->status = NVME_INTERNAL_DEV_ERROR;
> >      }
> > -    if (req->has_sg) {
> > +
> > +    if (req->qsg.nalloc) {
> 
> Personally, I prefer has_xxx or is_xxx to check whether the request is
> based on sg or iov as an inline function, but 'nalloc' is also fine to
> figure out the meaning of purpose here.
> 

What I really want to do is get rid of this duality with qsg and iovs at
some point. I kinda wanna get rid of the dma helpers and the qsg
entirely and do the DMA handling directly.

Maybe an `int flags` member in NvmeRequest would be better for this,
such as NVME_REQ_DMA etc.

> >          qemu_sglist_destroy(&req->qsg);
> >      }
> > +    if (req->iov.nalloc) {
> > +        qemu_iovec_destroy(&req->iov);
> > +    }
> > +
> 
> Maybe this can be in a separated commit?
> 

Yeah. I guess whenever a commit message includes "Also, ..." you really
should factor the change out ;)

I'll split it.

> Otherwise, It looks good to me.
> 
> Thanks,
> 

Does that mean I can add your R-b? :)


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 07/16] hw/block/nvme: add request mapping helper
  2020-07-29 15:52   ` Minwoo Im
@ 2020-07-29 18:31     ` Maxim Levitsky
  2020-07-29 19:22       ` Klaus Jensen
  0 siblings, 1 reply; 58+ messages in thread
From: Maxim Levitsky @ 2020-07-29 18:31 UTC (permalink / raw)
  To: Minwoo Im, Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Thu, 2020-07-30 at 00:52 +0900, Minwoo Im wrote:
> Klaus,
> 
> On 20-07-20 13:37:39, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Introduce the nvme_map helper to remove some noise in the main nvme_rw
> > function.
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  hw/block/nvme.c | 13 ++++++++++---
> >  1 file changed, 10 insertions(+), 3 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index f1e04608804b..68c33a11c144 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -413,6 +413,15 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> >      return status;
> >  }
> >  
> > +static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
> > +                         NvmeRequest *req)
> 
> Can we specify what is going to be mapped in this function? like
> nvme_map_dptr?
I also once complained about the name, and I do like this idea!

Best regards,
	Maxim Levitsky

> 
> Thanks,
> 




^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 05/16] hw/block/nvme: refactor dma read/write
  2020-07-29 17:35   ` Maxim Levitsky
@ 2020-07-29 18:38     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 18:38 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 29 20:35, Maxim Levitsky wrote:
> On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Refactor the nvme_dma_{read,write}_prp functions into a common function
> > taking a DMADirection parameter.
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  hw/block/nvme.c | 88 ++++++++++++++++++++++++-------------------------
> >  1 file changed, 43 insertions(+), 45 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 6a1a1626b87b..d314a604db81 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -361,55 +361,50 @@ unmap:
> >      return status;
> >  }
> >  
> > -static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> > -                                   uint64_t prp1, uint64_t prp2)
> > +static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> > +                             uint64_t prp1, uint64_t prp2, DMADirection dir)
> >  {
> >      QEMUSGList qsg;
> >      QEMUIOVector iov;
> >      uint16_t status = NVME_SUCCESS;
> >  
> > -    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
> > -        return NVME_INVALID_FIELD | NVME_DNR;
> > +    status = nvme_map_prp(&qsg, &iov, prp1, prp2, len, n);
> > +    if (status) {
> > +        return status;
> >      }
> > +
> >      if (qsg.nsg > 0) {
> > -        if (dma_buf_write(ptr, len, &qsg)) {
> > -            status = NVME_INVALID_FIELD | NVME_DNR;
> > +        uint64_t residual;
> > +
> > +        if (dir == DMA_DIRECTION_TO_DEVICE) {
> > +            residual = dma_buf_write(ptr, len, &qsg);
> > +        } else {
> > +            residual = dma_buf_read(ptr, len, &qsg);
> >          }
> > -        qemu_sglist_destroy(&qsg);
> > -    } else {
> > -        if (qemu_iovec_to_buf(&iov, 0, ptr, len) != len) {
> > -            status = NVME_INVALID_FIELD | NVME_DNR;
> > -        }
> > -        qemu_iovec_destroy(&iov);
> > -    }
> > -    return status;
> > -}
> >  
> > -static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> > -    uint64_t prp1, uint64_t prp2)
> > -{
> > -    QEMUSGList qsg;
> > -    QEMUIOVector iov;
> > -    uint16_t status = NVME_SUCCESS;
> > -
> > -    trace_pci_nvme_dma_read(prp1, prp2);
> > -
> > -    if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
> > -        return NVME_INVALID_FIELD | NVME_DNR;
> > -    }
> > -    if (qsg.nsg > 0) {
> > -        if (unlikely(dma_buf_read(ptr, len, &qsg))) {
> > +        if (unlikely(residual)) {
> >              trace_pci_nvme_err_invalid_dma();
> >              status = NVME_INVALID_FIELD | NVME_DNR;
> >          }
> > +
> >          qemu_sglist_destroy(&qsg);
> >      } else {
> > -        if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) {
> > +        size_t bytes;
> > +
> > +        if (dir == DMA_DIRECTION_TO_DEVICE) {
> > +            bytes = qemu_iovec_to_buf(&iov, 0, ptr, len);
> > +        } else {
> > +            bytes = qemu_iovec_from_buf(&iov, 0, ptr, len);
> > +        }
> > +
> > +        if (unlikely(bytes != len)) {
> >              trace_pci_nvme_err_invalid_dma();
> >              status = NVME_INVALID_FIELD | NVME_DNR;
> >          }
> > +
> >          qemu_iovec_destroy(&iov);
> >      }
> > +
> I know I reviewed this, but thinking now, why not to add an assert here
> that we don't have both iov and qsg with data.
> 

Good point. I added it after the nvme_map_prp call.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing
  2020-07-29 17:47   ` Maxim Levitsky
@ 2020-07-29 19:02     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 19:02 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 29 20:47, Maxim Levitsky wrote:
> On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Move clearing of the structure from "clear before use" to "clear
> > after use".
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > ---
> >  hw/block/nvme.c | 7 ++++++-
> >  1 file changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index e2932239c661..431f26c2f589 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -209,6 +209,11 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
> >      }
> >  }
> >  
> > +static void nvme_req_clear(NvmeRequest *req)
> > +{
> > +    memset(&req->cqe, 0x0, sizeof(req->cqe));
> > +}
> > +
> >  static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
> >                                    size_t len)
> >  {
> > @@ -458,6 +463,7 @@ static void nvme_post_cqes(void *opaque)
> >          nvme_inc_cq_tail(cq);
> >          pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
> >              sizeof(req->cqe));
> > +        nvme_req_clear(req);
> 
> Don't we need some barrier here to avoid reordering the writes?
> pci_dma_write does seem to include a barrier prior to the write it does
> but not afterward.
> 


> Also what is the motivation of switching the order?

This was just preference. But I did not consider that I would be
breaking any DMA rules here.

> I think somewhat that it is a good thing to clear a buffer,
> before it is setup.
> 

I'll reverse my preference and keep the status quo since I have no
better motivation than personal preference.

The introduction of nvme_req_clear is just in preparation for
consolidating more cleanup here, but I'll drop this patch and introduce
nvme_req_clear later.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 07/16] hw/block/nvme: add request mapping helper
  2020-07-29 18:31     ` Maxim Levitsky
@ 2020-07-29 19:22       ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 19:22 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz,
	Minwoo Im, Keith Busch

On Jul 29 21:31, Maxim Levitsky wrote:
> On Thu, 2020-07-30 at 00:52 +0900, Minwoo Im wrote:
> > Klaus,
> > 
> > On 20-07-20 13:37:39, Klaus Jensen wrote:
> > > From: Klaus Jensen <k.jensen@samsung.com>
> > > 
> > > Introduce the nvme_map helper to remove some noise in the main nvme_rw
> > > function.
> > > 
> > > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > > Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > ---
> > >  hw/block/nvme.c | 13 ++++++++++---
> > >  1 file changed, 10 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index f1e04608804b..68c33a11c144 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -413,6 +413,15 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> > >      return status;
> > >  }
> > >  
> > > +static uint16_t nvme_map(NvmeCtrl *n, NvmeCmd *cmd, size_t len,
> > > +                         NvmeRequest *req)
> > 
> > Can we specify what is going to be mapped in this function? like
> > nvme_map_dptr?
> I also once complained about the name, and I do like this idea!
> 

Hehe. I will change it ;)

Note that when I post support for metadata, it will have to change
again! Because then the function will be mapping both DPTR and MPTR.
But lets discuss naming when we get to that ;)


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 10/16] hw/block/nvme: add check for mdts
  2020-07-29 16:00   ` Minwoo Im
@ 2020-07-29 19:30     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 19:30 UTC (permalink / raw)
  To: Minwoo Im
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 30 01:00, Minwoo Im wrote:
> On 20-07-20 13:37:42, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Add 'mdts' device parameter to control the Maximum Data Transfer Size of
> > the controller and check that it is respected.
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> > ---
> >  hw/block/nvme.c       | 32 ++++++++++++++++++++++++++++++--
> >  hw/block/nvme.h       |  1 +
> >  hw/block/trace-events |  1 +
> >  3 files changed, 32 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 35bc1a7b7e21..10fe53873ae9 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -18,9 +18,10 @@
> >   * Usage: add options:
> >   *      -drive file=<file>,if=none,id=<drive_id>
> >   *      -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
> > - *              cmb_size_mb=<cmb_size_mb[optional]>, \
> > + *              [cmb_size_mb=<cmb_size_mb>,] \
> >   *              [pmrdev=<mem_backend_file_id>,] \
> > - *              max_ioqpairs=<N[optional]>
> > + *              [max_ioqpairs=<N>,] \
> > + *              [mdts=<N>]
> 
> Nitpick:
>   cmb and ioqpairs-things could be in another thread. :)
> 

So, with that I wanted to align the way optional parameters was
described. And I actually messed it up anyway. I'll remove the "fixes"
and just keep the addition of mdts there. 


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter
  2020-07-29 16:10   ` Minwoo Im
@ 2020-07-29 19:44     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 19:44 UTC (permalink / raw)
  To: Minwoo Im
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 30 01:10, Minwoo Im wrote:
> On 20-07-20 13:37:47, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Keep a copy of the raw nvme command in the NvmeRequest and remove the
> > now redundant NvmeCmd parameter.
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> 
> I would really have suggested this change from 13th patch!
> 
> Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>
> 

I squashed the two patches. If don't think it makes the match much
harder to review since the added namespace reference is a pretty small
change.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing
  2020-07-29 18:18   ` Maxim Levitsky
@ 2020-07-29 19:49     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 19:49 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 29 21:18, Maxim Levitsky wrote:
> On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Always destroy the request qsg/iov at the end of request use.
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > ---
> >  hw/block/nvme.c | 48 +++++++++++++++++-------------------------------
> >  1 file changed, 17 insertions(+), 31 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 54cd20f1ce22..b53afdeb3fb6 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -213,6 +213,14 @@ static void nvme_req_clear(NvmeRequest *req)
> >  {
> >      req->ns = NULL;
> >      memset(&req->cqe, 0x0, sizeof(req->cqe));
> > +
> > +    if (req->qsg.sg) {
> > +        qemu_sglist_destroy(&req->qsg);
> > +    }
> > +
> > +    if (req->iov.iov) {
> > +        qemu_iovec_destroy(&req->iov);
> > +    }
> >  }
> >  
> >  static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
> > @@ -297,15 +305,14 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
> >  
> >      status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
> >      if (status) {
> > -        goto unmap;
> > +        return status;
> >      }
> >  
> >      len -= trans_len;
> >      if (len) {
> >          if (unlikely(!prp2)) {
> >              trace_pci_nvme_err_invalid_prp2_missing();
> > -            status = NVME_INVALID_FIELD | NVME_DNR;
> > -            goto unmap;
> > +            return NVME_INVALID_FIELD | NVME_DNR;
> >          }
> >  
> >          if (len > n->page_size) {
> > @@ -326,13 +333,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
> >                  if (i == n->max_prp_ents - 1 && len > n->page_size) {
> >                      if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
> >                          trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> > -                        status = NVME_INVALID_FIELD | NVME_DNR;
> > -                        goto unmap;
> > +                        return NVME_INVALID_FIELD | NVME_DNR;
> >                      }
> >  
> >                      if (prp_list_in_cmb != nvme_addr_is_cmb(n, prp_ent)) {
> > -                        status = NVME_INVALID_USE_OF_CMB | NVME_DNR;
> > -                        goto unmap;
> > +                        return NVME_INVALID_USE_OF_CMB | NVME_DNR;
> >                      }
> >  
> >                      i = 0;
> > @@ -345,14 +350,13 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
> >  
> >                  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
> >                      trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> > -                    status = NVME_INVALID_FIELD | NVME_DNR;
> > -                    goto unmap;
> > +                    return NVME_INVALID_FIELD | NVME_DNR;
> >                  }
> >  
> >                  trans_len = MIN(len, n->page_size);
> >                  status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
> >                  if (status) {
> > -                    goto unmap;
> > +                    return status;
> >                  }
> >  
> >                  len -= trans_len;
> > @@ -361,27 +365,16 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
> >          } else {
> >              if (unlikely(prp2 & (n->page_size - 1))) {
> >                  trace_pci_nvme_err_invalid_prp2_align(prp2);
> > -                status = NVME_INVALID_FIELD | NVME_DNR;
> > -                goto unmap;
> > +                return NVME_INVALID_FIELD | NVME_DNR;
> >              }
> >              status = nvme_map_addr(n, qsg, iov, prp2, len);
> >              if (status) {
> > -                goto unmap;
> > +                return status;
> >              }
> >          }
> >      }
> > +
> >      return NVME_SUCCESS;
> > -
> > -unmap:
> > -    if (iov && iov->iov) {
> > -        qemu_iovec_destroy(iov);
> > -    }
> > -
> > -    if (qsg && qsg->sg) {
> > -        qemu_sglist_destroy(qsg);
> > -    }
> > -
> > -    return status;
> >  }
> >  
> >  static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> > @@ -601,13 +594,6 @@ static void nvme_rw_cb(void *opaque, int ret)
> >          req->status = NVME_INTERNAL_DEV_ERROR;
> >      }
> >  
> > -    if (req->qsg.nalloc) {
> > -        qemu_sglist_destroy(&req->qsg);
> > -    }
> > -    if (req->iov.nalloc) {
> > -        qemu_iovec_destroy(&req->iov);
> > -    }
> > -
> >      nvme_enqueue_req_completion(cq, req);
> >  }
> >  
> 
> This and former patch I guess answer my own question about why to clear the request after its cqe got posted.
> 
> Looks reasonable.
> 

I ended up with a compromise. I keep clearing as a "before-use" job, but
we don't want to keep the qsg and iovs hanging around until the request
gets reused, so I'm adding a nvme_req_exit() to free that memory when
the cqe has been posted.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 06/16] hw/block/nvme: pass request along for tracing
  2020-07-29 15:49   ` Minwoo Im
@ 2020-07-29 19:49     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 19:49 UTC (permalink / raw)
  To: Minwoo Im
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz,
	Keith Busch, Maxim Levitsky

On Jul 30 00:49, Minwoo Im wrote:
> Klaus,
> 
> On 20-07-20 13:37:38, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Pass along the NvmeRequest in various functions since it is very useful
> > for tracing.
> 
> One doubt here.
>   This patch has put NvmeRequest argument to the nvme_map_prp() to trace
>   the request's command id.  But can we just trace the cid before this
>   kind of prp mapping, somewhere like nvme_process_sq() level.  Then we
>   can figure out the tracing for the prp mapping is from which request.
> 
> Tracing for cid is definitely great, but feels like too much cost to
> pass argument to trace 'cid' in the middle of the dma mapping stage.
> 

Good point Minwoo.

I ended up dropping the patch and just replacing it with a patch that
adds tracing to nvme_map_prp.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 16/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp
  2020-07-29 16:15   ` Minwoo Im
@ 2020-07-29 19:57     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 19:57 UTC (permalink / raw)
  To: Minwoo Im
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 30 01:15, Minwoo Im wrote:
> On 20-07-20 13:37:48, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Since clean up of the request qsg/iov is now always done post-use, there
> > is no need to use a stack-allocated qsg/iov in nvme_dma_prp.
> > 
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > Acked-by: Keith Busch <kbusch@kernel.org>
> > Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> 
> > ---
> >  hw/block/nvme.c | 18 ++++++------------
> >  1 file changed, 6 insertions(+), 12 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 0b3dceccc89b..b6da5a9f3fc6 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -381,45 +381,39 @@ static uint16_t nvme_dma_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> >                               uint64_t prp1, uint64_t prp2, DMADirection dir,
> >                               NvmeRequest *req)
> >  {
> > -    QEMUSGList qsg;
> > -    QEMUIOVector iov;
> >      uint16_t status = NVME_SUCCESS;
> >  
> > -    status = nvme_map_prp(n, &qsg, &iov, prp1, prp2, len, req);
> > +    status = nvme_map_prp(n, &req->qsg, &req->iov, prp1, prp2, len, req);
> 
> After this change, can we make nvme_map_prp() just receive
> NvmeRequest *req without &req->qsg, &req->iov by retrieve them from
> inside of the nvme_map_prp()?

Absolutely. I added a follow up patch to do this :)


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter
  2020-07-29 18:25   ` Maxim Levitsky
@ 2020-07-29 20:00     ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 20:00 UTC (permalink / raw)
  To: Maxim Levitsky
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 29 21:25, Maxim Levitsky wrote:
> On Mon, 2020-07-20 at 13:37 +0200, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Keep a copy of the raw nvme command in the NvmeRequest and remove the
> > now redundant NvmeCmd parameter.
> 
> Shouldn't you clear the req->cmd in nvme_req_clear too for consistency?

It always gets unconditionally overwritten with a memcpy in
nvme_process_sq, so we are not leaving anything dangling (like we would
do with the namespace reference because it's usually not initialized for
Admin commands)


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/16] hw/block/nvme: add mapping helpers
  2020-07-20 11:37 ` [PATCH 02/16] hw/block/nvme: add mapping helpers Klaus Jensen
  2020-07-29 13:57   ` Maxim Levitsky
  2020-07-29 15:19   ` Minwoo Im
@ 2020-07-29 20:40   ` Andrzej Jakowski
  2020-07-29 21:24     ` Klaus Jensen
  2 siblings, 1 reply; 58+ messages in thread
From: Andrzej Jakowski @ 2020-07-29 20:40 UTC (permalink / raw)
  To: Klaus Jensen, qemu-devel
  Cc: Kevin Wolf, Klaus Jensen, Keith Busch, qemu-block, Max Reitz

On 7/20/20 4:37 AM, Klaus Jensen wrote:
> From: Klaus Jensen <k.jensen@samsung.com>
> 
> Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
> use them in nvme_map_prp.
> 
> This fixes a bug where in the case of a CMB transfer, the device would
> map to the buffer with a wrong length.
> 
> Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> ---
>  hw/block/nvme.c       | 109 +++++++++++++++++++++++++++++++++++-------
>  hw/block/trace-events |   2 +
>  2 files changed, 94 insertions(+), 17 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 4d7b730a62b6..9b1a080cdc70 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -109,6 +109,11 @@ static uint16_t nvme_sqid(NvmeRequest *req)
>      return le16_to_cpu(req->sq->sqid);
>  }
>  
> +static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
> +{
> +    return &n->cmbuf[addr - n->ctrl_mem.addr];
> +}
> +
>  static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
>  {
>      hwaddr low = n->ctrl_mem.addr;
> @@ -120,7 +125,7 @@ static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr)
>  static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
>  {
>      if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
> -        memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
> +        memcpy(buf, nvme_addr_to_cmb(n, addr), size);
>          return;
>      }
>  
> @@ -203,29 +208,91 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
>      }
>  }
>  
> +static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
> +                                  size_t len)
> +{
> +    if (!len) {
> +        return NVME_SUCCESS;
> +    }
> +
> +    trace_pci_nvme_map_addr_cmb(addr, len);
> +
> +    if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
> +        return NVME_DATA_TRAS_ERROR;
> +    }
> +
> +    qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
> +
> +    return NVME_SUCCESS;
> +}
> +
> +static uint16_t nvme_map_addr(NvmeCtrl *n, QEMUSGList *qsg, QEMUIOVector *iov,
> +                              hwaddr addr, size_t len)
> +{
> +    if (!len) {
> +        return NVME_SUCCESS;
> +    }
> +
> +    trace_pci_nvme_map_addr(addr, len);
> +
> +    if (nvme_addr_is_cmb(n, addr)) {
> +        if (qsg && qsg->sg) {
> +            return NVME_INVALID_USE_OF_CMB | NVME_DNR;
> +        }
> +
> +        assert(iov);
> +
> +        if (!iov->iov) {
> +            qemu_iovec_init(iov, 1);
> +        }
> +
> +        return nvme_map_addr_cmb(n, iov, addr, len);
> +    }
> +
> +    if (iov && iov->iov) {
> +        return NVME_INVALID_USE_OF_CMB | NVME_DNR;
> +    }
> +
> +    assert(qsg);
> +
> +    if (!qsg->sg) {
> +        pci_dma_sglist_init(qsg, &n->parent_obj, 1);
> +    }
> +
> +    qemu_sglist_add(qsg, addr, len);
> +
> +    return NVME_SUCCESS;
> +}
> +
>  static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>                               uint64_t prp2, uint32_t len, NvmeCtrl *n)
>  {
>      hwaddr trans_len = n->page_size - (prp1 % n->page_size);
>      trans_len = MIN(len, trans_len);
>      int num_prps = (len >> n->page_bits) + 1;
> +    uint16_t status;
>  
>      if (unlikely(!prp1)) {
>          trace_pci_nvme_err_invalid_prp();
>          return NVME_INVALID_FIELD | NVME_DNR;
> -    } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr &&
> -               prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> -        qsg->nsg = 0;
> +    }
> +
> +    if (nvme_addr_is_cmb(n, prp1)) {
>          qemu_iovec_init(iov, num_prps);
> -        qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
>      } else {
>          pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
> -        qemu_sglist_add(qsg, prp1, trans_len);
>      }
> +
> +    status = nvme_map_addr(n, qsg, iov, prp1, trans_len);
> +    if (status) {
> +        goto unmap;
> +    }
> +
>      len -= trans_len;
>      if (len) {
>          if (unlikely(!prp2)) {
>              trace_pci_nvme_err_invalid_prp2_missing();
> +            status = NVME_INVALID_FIELD | NVME_DNR;
>              goto unmap;
>          }
>          if (len > n->page_size) {
> @@ -242,6 +309,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>                  if (i == n->max_prp_ents - 1 && len > n->page_size) {
>                      if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>                          trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> +                        status = NVME_INVALID_FIELD | NVME_DNR;
>                          goto unmap;
>                      }
>  
> @@ -255,14 +323,14 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>  
>                  if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>                      trace_pci_nvme_err_invalid_prplist_ent(prp_ent);
> +                    status = NVME_INVALID_FIELD | NVME_DNR;
>                      goto unmap;
>                  }
>  
>                  trans_len = MIN(len, n->page_size);
> -                if (qsg->nsg){
> -                    qemu_sglist_add(qsg, prp_ent, trans_len);
> -                } else {
> -                    qemu_iovec_add(iov, (void *)&n->cmbuf[prp_ent - n->ctrl_mem.addr], trans_len);
> +                status = nvme_map_addr(n, qsg, iov, prp_ent, trans_len);
> +                if (status) {
> +                    goto unmap;
>                  }
>                  len -= trans_len;
>                  i++;
> @@ -270,20 +338,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>          } else {
>              if (unlikely(prp2 & (n->page_size - 1))) {
>                  trace_pci_nvme_err_invalid_prp2_align(prp2);
> +                status = NVME_INVALID_FIELD | NVME_DNR;
>                  goto unmap;
>              }
> -            if (qsg->nsg) {
> -                qemu_sglist_add(qsg, prp2, len);
> -            } else {
> -                qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
> +            status = nvme_map_addr(n, qsg, iov, prp2, len);
> +            if (status) {
> +                goto unmap;
>              }
>          }
>      }
>      return NVME_SUCCESS;
>  
> - unmap:
> -    qemu_sglist_destroy(qsg);
> -    return NVME_INVALID_FIELD | NVME_DNR;
> +unmap:
> +    if (iov && iov->iov) {
> +        qemu_iovec_destroy(iov);
> +    }
> +
> +    if (qsg && qsg->sg) {
> +        qemu_sglist_destroy(qsg);
> +    }
> +
> +    return status;

I think it would make sense to move whole unmap block to a separate function.
That function could be called from here and after completing IO and would contain
unified deinitialization block - so no code repetitions would be necessary.
Other than that it looks good to me. Thx!

Reviewed-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>

>  }
>  
>  static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 7b7303cab1dd..f3b2d004e078 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -33,6 +33,8 @@ pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
>  pci_nvme_irq_pin(void) "pulsing IRQ pin"
>  pci_nvme_irq_masked(void) "IRQ is masked"
>  pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64""
> +pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
> +pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
>  pci_nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
>  pci_nvme_admin_cmd(uint16_t cid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" sqid %"PRIu16" opc 0x%"PRIx8""
>  pci_nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
> 



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/16] hw/block/nvme: add mapping helpers
  2020-07-29 20:40   ` Andrzej Jakowski
@ 2020-07-29 21:24     ` Klaus Jensen
  2020-07-29 21:51       ` Andrzej Jakowski
  0 siblings, 1 reply; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 21:24 UTC (permalink / raw)
  To: Andrzej Jakowski
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 29 13:40, Andrzej Jakowski wrote:
> On 7/20/20 4:37 AM, Klaus Jensen wrote:
> > From: Klaus Jensen <k.jensen@samsung.com>
> > 
> > Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
> > use them in nvme_map_prp.
> > 
> > This fixes a bug where in the case of a CMB transfer, the device would
> > map to the buffer with a wrong length.
> > 
> > Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
> > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > ---
> >  hw/block/nvme.c       | 109 +++++++++++++++++++++++++++++++++++-------
> >  hw/block/trace-events |   2 +
> >  2 files changed, 94 insertions(+), 17 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 4d7b730a62b6..9b1a080cdc70 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -270,20 +338,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
> >          } else {
> >              if (unlikely(prp2 & (n->page_size - 1))) {
> >                  trace_pci_nvme_err_invalid_prp2_align(prp2);
> > +                status = NVME_INVALID_FIELD | NVME_DNR;
> >                  goto unmap;
> >              }
> > -            if (qsg->nsg) {
> > -                qemu_sglist_add(qsg, prp2, len);
> > -            } else {
> > -                qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
> > +            status = nvme_map_addr(n, qsg, iov, prp2, len);
> > +            if (status) {
> > +                goto unmap;
> >              }
> >          }
> >      }
> >      return NVME_SUCCESS;
> >  
> > - unmap:
> > -    qemu_sglist_destroy(qsg);
> > -    return NVME_INVALID_FIELD | NVME_DNR;
> > +unmap:
> > +    if (iov && iov->iov) {
> > +        qemu_iovec_destroy(iov);
> > +    }
> > +
> > +    if (qsg && qsg->sg) {
> > +        qemu_sglist_destroy(qsg);
> > +    }
> > +
> > +    return status;
> 
> I think it would make sense to move whole unmap block to a separate function.
> That function could be called from here and after completing IO and would contain
> unified deinitialization block - so no code repetitions would be necessary.
> Other than that it looks good to me. Thx!
> 
> Reviewed-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
> 
 
Hi Andrzej,

Thanks for the review :)

Yes, this is done in a later patch ("hw/block/nvme: consolidate qsg/iov
clearing"), but kept here to reduce churn.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/16] hw/block/nvme: add mapping helpers
  2020-07-29 21:24     ` Klaus Jensen
@ 2020-07-29 21:51       ` Andrzej Jakowski
  2020-07-29 21:53         ` Klaus Jensen
  0 siblings, 1 reply; 58+ messages in thread
From: Andrzej Jakowski @ 2020-07-29 21:51 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On 7/29/20 2:24 PM, Klaus Jensen wrote:
> On Jul 29 13:40, Andrzej Jakowski wrote:
>> On 7/20/20 4:37 AM, Klaus Jensen wrote:
>>> From: Klaus Jensen <k.jensen@samsung.com>
>>>
>>> Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
>>> use them in nvme_map_prp.
>>>
>>> This fixes a bug where in the case of a CMB transfer, the device would
>>> map to the buffer with a wrong length.
>>>
>>> Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
>>> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
>>> ---
>>>  hw/block/nvme.c       | 109 +++++++++++++++++++++++++++++++++++-------
>>>  hw/block/trace-events |   2 +
>>>  2 files changed, 94 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
>>> index 4d7b730a62b6..9b1a080cdc70 100644
>>> --- a/hw/block/nvme.c
>>> +++ b/hw/block/nvme.c
>>> @@ -270,20 +338,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
>>>          } else {
>>>              if (unlikely(prp2 & (n->page_size - 1))) {
>>>                  trace_pci_nvme_err_invalid_prp2_align(prp2);
>>> +                status = NVME_INVALID_FIELD | NVME_DNR;
>>>                  goto unmap;
>>>              }
>>> -            if (qsg->nsg) {
>>> -                qemu_sglist_add(qsg, prp2, len);
>>> -            } else {
>>> -                qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
>>> +            status = nvme_map_addr(n, qsg, iov, prp2, len);
>>> +            if (status) {
>>> +                goto unmap;
>>>              }
>>>          }
>>>      }
>>>      return NVME_SUCCESS;
>>>  
>>> - unmap:
>>> -    qemu_sglist_destroy(qsg);
>>> -    return NVME_INVALID_FIELD | NVME_DNR;
>>> +unmap:
>>> +    if (iov && iov->iov) {
>>> +        qemu_iovec_destroy(iov);
>>> +    }
>>> +
>>> +    if (qsg && qsg->sg) {
>>> +        qemu_sglist_destroy(qsg);
>>> +    }
>>> +
>>> +    return status;
>>
>> I think it would make sense to move whole unmap block to a separate function.
>> That function could be called from here and after completing IO and would contain
>> unified deinitialization block - so no code repetitions would be necessary.
>> Other than that it looks good to me. Thx!
>>
>> Reviewed-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
>>
>  
> Hi Andrzej,
> 
> Thanks for the review :)
> 
> Yes, this is done in a later patch ("hw/block/nvme: consolidate qsg/iov
> clearing"), but kept here to reduce churn.
> 
Yep, noticed that after sending email :)
Do you plan to submit second version of these patches incorporating some
of the feedback?



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 02/16] hw/block/nvme: add mapping helpers
  2020-07-29 21:51       ` Andrzej Jakowski
@ 2020-07-29 21:53         ` Klaus Jensen
  0 siblings, 0 replies; 58+ messages in thread
From: Klaus Jensen @ 2020-07-29 21:53 UTC (permalink / raw)
  To: Andrzej Jakowski
  Cc: Kevin Wolf, qemu-block, Klaus Jensen, qemu-devel, Max Reitz, Keith Busch

On Jul 29 14:51, Andrzej Jakowski wrote:
> On 7/29/20 2:24 PM, Klaus Jensen wrote:
> > On Jul 29 13:40, Andrzej Jakowski wrote:
> >> On 7/20/20 4:37 AM, Klaus Jensen wrote:
> >>> From: Klaus Jensen <k.jensen@samsung.com>
> >>>
> >>> Add nvme_map_addr, nvme_map_addr_cmb and nvme_addr_to_cmb helpers and
> >>> use them in nvme_map_prp.
> >>>
> >>> This fixes a bug where in the case of a CMB transfer, the device would
> >>> map to the buffer with a wrong length.
> >>>
> >>> Fixes: b2b2b67a00574 ("nvme: Add support for Read Data and Write Data in CMBs.")
> >>> Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> >>> ---
> >>>  hw/block/nvme.c       | 109 +++++++++++++++++++++++++++++++++++-------
> >>>  hw/block/trace-events |   2 +
> >>>  2 files changed, 94 insertions(+), 17 deletions(-)
> >>>
> >>> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> >>> index 4d7b730a62b6..9b1a080cdc70 100644
> >>> --- a/hw/block/nvme.c
> >>> +++ b/hw/block/nvme.c
> >>> @@ -270,20 +338,27 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
> >>>          } else {
> >>>              if (unlikely(prp2 & (n->page_size - 1))) {
> >>>                  trace_pci_nvme_err_invalid_prp2_align(prp2);
> >>> +                status = NVME_INVALID_FIELD | NVME_DNR;
> >>>                  goto unmap;
> >>>              }
> >>> -            if (qsg->nsg) {
> >>> -                qemu_sglist_add(qsg, prp2, len);
> >>> -            } else {
> >>> -                qemu_iovec_add(iov, (void *)&n->cmbuf[prp2 - n->ctrl_mem.addr], trans_len);
> >>> +            status = nvme_map_addr(n, qsg, iov, prp2, len);
> >>> +            if (status) {
> >>> +                goto unmap;
> >>>              }
> >>>          }
> >>>      }
> >>>      return NVME_SUCCESS;
> >>>  
> >>> - unmap:
> >>> -    qemu_sglist_destroy(qsg);
> >>> -    return NVME_INVALID_FIELD | NVME_DNR;
> >>> +unmap:
> >>> +    if (iov && iov->iov) {
> >>> +        qemu_iovec_destroy(iov);
> >>> +    }
> >>> +
> >>> +    if (qsg && qsg->sg) {
> >>> +        qemu_sglist_destroy(qsg);
> >>> +    }
> >>> +
> >>> +    return status;
> >>
> >> I think it would make sense to move whole unmap block to a separate function.
> >> That function could be called from here and after completing IO and would contain
> >> unified deinitialization block - so no code repetitions would be necessary.
> >> Other than that it looks good to me. Thx!
> >>
> >> Reviewed-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
> >>
> >  
> > Hi Andrzej,
> > 
> > Thanks for the review :)
> > 
> > Yes, this is done in a later patch ("hw/block/nvme: consolidate qsg/iov
> > clearing"), but kept here to reduce churn.
> > 
> Yep, noticed that after sending email :)
> Do you plan to submit second version of these patches incorporating some
> of the feedback?
> 

Yes, so you can defer your reviews for v2 ;)


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH 04/16] hw/block/nvme: remove redundant has_sg member
       [not found]     ` <CGME20200729182946epcas2p1bef465a70c1a815654a07814aa379dc3@epcms2p5>
@ 2020-07-30  0:34       ` Minwoo Im
  0 siblings, 0 replies; 58+ messages in thread
From: Minwoo Im @ 2020-07-30  0:34 UTC (permalink / raw)
  To: Klaus Jensen, Minwoo Im
  Cc: Kevin Wolf, qemu-block, Klaus Birkelund Jensen, qemu-devel,
	Max Reitz, Keith Busch, Minwoo Im

> -----Original Message-----
> From: Qemu-devel <qemu-devel-bounces+minwoo.im=samsung.com@nongnu.org> On
> Behalf Of Klaus Jensen
> Sent: Thursday, July 30, 2020 3:29 AM
> To: Minwoo Im <minwoo.im.dev@gmail.com>
> Cc: Kevin Wolf <kwolf@redhat.com>; qemu-block@nongnu.org; Klaus Jensen
> <k.jensen@samsung.com>; qemu-devel@nongnu.org; Max Reitz <mreitz@redhat.com>;
> Keith Busch <kbusch@kernel.org>
> Subject: Re: [PATCH 04/16] hw/block/nvme: remove redundant has_sg member
> 
> On Jul 30 00:29, Minwoo Im wrote:
> > Klaus,
> >
> 
> Hi Minwoo,
> 
> Thanks for the reviews and welcome to the party! :)
> 
> > On 20-07-20 13:37:36, Klaus Jensen wrote:
> > > From: Klaus Jensen <k.jensen@samsung.com>
> > >
> > > Remove the has_sg member from NvmeRequest since it's redundant.
> > >
> > > Also, make sure the request iov is destroyed at completion time.
> > >
> > > Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
> > > Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> > > ---
> > >  hw/block/nvme.c | 11 ++++++-----
> > >  hw/block/nvme.h |  1 -
> > >  2 files changed, 6 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > > index cb236d1c8c46..6a1a1626b87b 100644
> > > --- a/hw/block/nvme.c
> > > +++ b/hw/block/nvme.c
> > > @@ -548,16 +548,20 @@ static void nvme_rw_cb(void *opaque, int ret)
> > >          block_acct_failed(blk_get_stats(n->conf.blk), &req->acct);
> > >          req->status = NVME_INTERNAL_DEV_ERROR;
> > >      }
> > > -    if (req->has_sg) {
> > > +
> > > +    if (req->qsg.nalloc) {
> >
> > Personally, I prefer has_xxx or is_xxx to check whether the request is
> > based on sg or iov as an inline function, but 'nalloc' is also fine to
> > figure out the meaning of purpose here.
> >
> 
> What I really want to do is get rid of this duality with qsg and iovs at
> some point. I kinda wanna get rid of the dma helpers and the qsg
> entirely and do the DMA handling directly.
> 
> Maybe an `int flags` member in NvmeRequest would be better for this,
> such as NVME_REQ_DMA etc.

That looks better, but it looks like this is out of scope of this series.
Anyway, Please take my tag for this patch.

Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>

> 
> > >          qemu_sglist_destroy(&req->qsg);
> > >      }
> > > +    if (req->iov.nalloc) {
> > > +        qemu_iovec_destroy(&req->iov);
> > > +    }
> > > +
> >
> > Maybe this can be in a separated commit?
> >
> 
> Yeah. I guess whenever a commit message includes "Also, ..." you really
> should factor the change out ;)
> 
> I'll split it.
> 
> > Otherwise, It looks good to me.
> >
> > Thanks,
> >
> 
> Does that mean I can add your R-b? :)



^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2020-07-30  0:35 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-20 11:37 [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
2020-07-20 11:37 ` [PATCH 01/16] hw/block/nvme: memset preallocated requests structures Klaus Jensen
2020-07-20 11:37 ` [PATCH 02/16] hw/block/nvme: add mapping helpers Klaus Jensen
2020-07-29 13:57   ` Maxim Levitsky
2020-07-29 18:23     ` Klaus Jensen
2020-07-29 15:19   ` Minwoo Im
2020-07-29 20:40   ` Andrzej Jakowski
2020-07-29 21:24     ` Klaus Jensen
2020-07-29 21:51       ` Andrzej Jakowski
2020-07-29 21:53         ` Klaus Jensen
2020-07-20 11:37 ` [PATCH 03/16] hw/block/nvme: replace dma_acct with blk_acct equivalent Klaus Jensen
2020-07-29 15:23   ` Minwoo Im
2020-07-20 11:37 ` [PATCH 04/16] hw/block/nvme: remove redundant has_sg member Klaus Jensen
2020-07-29 15:29   ` Minwoo Im
2020-07-29 18:29     ` Klaus Jensen
     [not found]     ` <CGME20200729182946epcas2p1bef465a70c1a815654a07814aa379dc3@epcms2p5>
2020-07-30  0:34       ` Minwoo Im
2020-07-20 11:37 ` [PATCH 05/16] hw/block/nvme: refactor dma read/write Klaus Jensen
2020-07-29 15:35   ` Minwoo Im
2020-07-29 17:35   ` Maxim Levitsky
2020-07-29 18:38     ` Klaus Jensen
2020-07-20 11:37 ` [PATCH 06/16] hw/block/nvme: pass request along for tracing Klaus Jensen
2020-07-29 15:49   ` Minwoo Im
2020-07-29 19:49     ` Klaus Jensen
2020-07-20 11:37 ` [PATCH 07/16] hw/block/nvme: add request mapping helper Klaus Jensen
2020-07-29 15:52   ` Minwoo Im
2020-07-29 18:31     ` Maxim Levitsky
2020-07-29 19:22       ` Klaus Jensen
2020-07-20 11:37 ` [PATCH 08/16] hw/block/nvme: verify validity of prp lists in the cmb Klaus Jensen
2020-07-29 15:54   ` Minwoo Im
2020-07-20 11:37 ` [PATCH 09/16] hw/block/nvme: refactor request bounds checking Klaus Jensen
2020-07-29 15:56   ` Minwoo Im
2020-07-20 11:37 ` [PATCH 10/16] hw/block/nvme: add check for mdts Klaus Jensen
2020-07-29 16:00   ` Minwoo Im
2020-07-29 19:30     ` Klaus Jensen
2020-07-20 11:37 ` [PATCH 11/16] hw/block/nvme: be consistent about zeros vs zeroes Klaus Jensen
2020-07-29 16:01   ` Minwoo Im
2020-07-29 17:39   ` Maxim Levitsky
2020-07-20 11:37 ` [PATCH 12/16] hw/block/nvme: refactor NvmeRequest clearing Klaus Jensen
2020-07-29 16:04   ` Minwoo Im
2020-07-29 17:47   ` Maxim Levitsky
2020-07-29 19:02     ` Klaus Jensen
2020-07-20 11:37 ` [PATCH 13/16] hw/block/nvme: add a namespace reference in NvmeRequest Klaus Jensen
2020-07-29 16:06   ` Minwoo Im
2020-07-29 17:53   ` Maxim Levitsky
2020-07-20 11:37 ` [PATCH 14/16] hw/block/nvme: consolidate qsg/iov clearing Klaus Jensen
2020-07-29 16:08   ` Minwoo Im
2020-07-29 18:18   ` Maxim Levitsky
2020-07-29 19:49     ` Klaus Jensen
2020-07-20 11:37 ` [PATCH 15/16] hw/block/nvme: remove NvmeCmd parameter Klaus Jensen
2020-07-29 16:10   ` Minwoo Im
2020-07-29 19:44     ` Klaus Jensen
2020-07-29 18:25   ` Maxim Levitsky
2020-07-29 20:00     ` Klaus Jensen
2020-07-20 11:37 ` [PATCH 16/16] hw/block/nvme: use preallocated qsg/iov in nvme_dma_prp Klaus Jensen
2020-07-29 16:15   ` Minwoo Im
2020-07-29 19:57     ` Klaus Jensen
2020-07-27  9:42 ` [PATCH 00/16] hw/block/nvme: dma handling and address mapping cleanup Klaus Jensen
2020-07-27 20:44   ` Keith Busch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.