All of lore.kernel.org
 help / color / mirror / Atom feed
From: Klaus Jensen <its@irrelevant.dk>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
	Eduardo Habkost <ehabkost@redhat.com>,
	qemu-block@nongnu.org, "Michael S. Tsirkin" <mst@redhat.com>,
	Klaus Jensen <k.jensen@samsung.com>,
	Max Reitz <mreitz@redhat.com>, Keith Busch <kbusch@kernel.org>,
	Klaus Jensen <its@irrelevant.dk>,
	Maxim Levitsky <mlevitsk@redhat.com>
Subject: [PATCH 02/17] hw/block/nvme: handle dma errors
Date: Fri,  4 Sep 2020 16:19:41 +0200	[thread overview]
Message-ID: <20200904141956.576630-3-its@irrelevant.dk> (raw)
In-Reply-To: <20200904141956.576630-1-its@irrelevant.dk>

From: Klaus Jensen <k.jensen@samsung.com>

Handling DMA errors gracefully is required for the device to pass the
block/011 test ("disable PCI device while doing I/O") in the blktests
suite.

With this patch the device passes the test by retrying "critical"
transfers (posting of completion entries and processing of submission
queue entries).

If DMA errors occur at any other point in the execution of the command
(say, while mapping the PRPs), the command is aborted with a Data
Transfer Error status code.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Acked-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
---
 hw/block/nvme.c       | 43 ++++++++++++++++++++++++++++++++-----------
 hw/block/trace-events |  2 ++
 include/block/nvme.h  |  2 +-
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 63078f600920..49bcdf31ced6 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -140,14 +140,14 @@ static inline void *nvme_addr_to_cmb(NvmeCtrl *n, hwaddr addr)
     return &n->cmbuf[addr - n->ctrl_mem.addr];
 }
 
-static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+static int nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
 {
     if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) {
         memcpy(buf, nvme_addr_to_cmb(n, addr), size);
-        return;
+        return 0;
     }
 
-    pci_dma_read(&n->parent_obj, addr, buf, size);
+    return pci_dma_read(&n->parent_obj, addr, buf, size);
 }
 
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
@@ -253,7 +253,7 @@ static uint16_t nvme_map_addr_cmb(NvmeCtrl *n, QEMUIOVector *iov, hwaddr addr,
     trace_pci_nvme_map_addr_cmb(addr, len);
 
     if (!nvme_addr_is_cmb(n, addr) || !nvme_addr_is_cmb(n, addr + len - 1)) {
-        return NVME_DATA_TRAS_ERROR;
+        return NVME_DATA_TRANSFER_ERROR;
     }
 
     qemu_iovec_add(iov, nvme_addr_to_cmb(n, addr), len);
@@ -307,6 +307,7 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
     int num_prps = (len >> n->page_bits) + 1;
     uint16_t status;
     bool prp_list_in_cmb = false;
+    int ret;
 
     QEMUSGList *qsg = &req->qsg;
     QEMUIOVector *iov = &req->iov;
@@ -347,7 +348,11 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
 
             nents = (len + n->page_size - 1) >> n->page_bits;
             prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-            nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+            ret = nvme_addr_read(n, prp2, (void *)prp_list, prp_trans);
+            if (ret) {
+                trace_pci_nvme_err_addr_read(prp2);
+                return NVME_DATA_TRANSFER_ERROR;
+            }
             while (len != 0) {
                 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
 
@@ -364,8 +369,12 @@ static uint16_t nvme_map_prp(NvmeCtrl *n, uint64_t prp1, uint64_t prp2,
                     i = 0;
                     nents = (len + n->page_size - 1) >> n->page_bits;
                     prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
-                    nvme_addr_read(n, prp_ent, (void *)prp_list,
-                        prp_trans);
+                    ret = nvme_addr_read(n, prp_ent, (void *)prp_list,
+                                         prp_trans);
+                    if (ret) {
+                        trace_pci_nvme_err_addr_read(prp_ent);
+                        return NVME_DATA_TRANSFER_ERROR;
+                    }
                     prp_ent = le64_to_cpu(prp_list[i]);
                 }
 
@@ -457,6 +466,7 @@ static void nvme_post_cqes(void *opaque)
     NvmeCQueue *cq = opaque;
     NvmeCtrl *n = cq->ctrl;
     NvmeRequest *req, *next;
+    int ret;
 
     QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
         NvmeSQueue *sq;
@@ -466,15 +476,21 @@ static void nvme_post_cqes(void *opaque)
             break;
         }
 
-        QTAILQ_REMOVE(&cq->req_list, req, entry);
         sq = req->sq;
         req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
         req->cqe.sq_id = cpu_to_le16(sq->sqid);
         req->cqe.sq_head = cpu_to_le16(sq->head);
         addr = cq->dma_addr + cq->tail * n->cqe_size;
+        ret = pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
+                            sizeof(req->cqe));
+        if (ret) {
+            trace_pci_nvme_err_addr_write(addr);
+            timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                      500 * SCALE_MS);
+            break;
+        }
+        QTAILQ_REMOVE(&cq->req_list, req, entry);
         nvme_inc_cq_tail(cq);
-        pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
-            sizeof(req->cqe));
         nvme_req_exit(req);
         QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
     }
@@ -1611,7 +1627,12 @@ static void nvme_process_sq(void *opaque)
 
     while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
         addr = sq->dma_addr + sq->head * n->sqe_size;
-        nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
+        if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
+            trace_pci_nvme_err_addr_read(addr);
+            timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                      500 * SCALE_MS);
+            break;
+        }
         nvme_inc_sq_head(sq);
 
         req = QTAILQ_FIRST(&sq->req_list);
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 72cf2d15cb8e..50d5702e6b80 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -86,6 +86,8 @@ pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
 
 # nvme traces for error conditions
 pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %zu"
+pci_nvme_err_addr_read(uint64_t addr) "addr 0x%"PRIx64""
+pci_nvme_err_addr_write(uint64_t addr) "addr 0x%"PRIx64""
 pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
 pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
 pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 65e68a82c897..c8d0a3473f0d 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -630,7 +630,7 @@ enum NvmeStatusCodes {
     NVME_INVALID_OPCODE         = 0x0001,
     NVME_INVALID_FIELD          = 0x0002,
     NVME_CID_CONFLICT           = 0x0003,
-    NVME_DATA_TRAS_ERROR        = 0x0004,
+    NVME_DATA_TRANSFER_ERROR    = 0x0004,
     NVME_POWER_LOSS_ABORT       = 0x0005,
     NVME_INTERNAL_DEV_ERROR     = 0x0006,
     NVME_CMD_ABORT_REQ          = 0x0007,
-- 
2.28.0



  parent reply	other threads:[~2020-09-04 14:23 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-04 14:19 [PATCH 00/17] hw/block/nvme: multiple namespaces support Klaus Jensen
2020-09-04 14:19 ` [PATCH 01/17] pci: pass along the return value of dma_memory_rw Klaus Jensen
2020-09-04 14:19 ` Klaus Jensen [this message]
2020-09-07  2:34   ` [PATCH 02/17] hw/block/nvme: handle dma errors Philippe Mathieu-Daudé
2020-09-07  7:49     ` Klaus Jensen
2020-09-04 14:19 ` [PATCH 03/17] hw/block/nvme: commonize nvme_rw error handling Klaus Jensen
2020-09-04 14:19 ` [PATCH 04/17] hw/block/nvme: alignment style fixes Klaus Jensen
2020-09-07  2:29   ` Philippe Mathieu-Daudé
2020-09-04 14:19 ` [PATCH 05/17] hw/block/nvme: add a lba to bytes helper Klaus Jensen
2020-09-04 14:19 ` [PATCH 06/17] hw/block/nvme: fix endian conversion Klaus Jensen
2020-09-04 14:19 ` [PATCH 07/17] hw/block/nvme: add symbolic command name to trace events Klaus Jensen
2020-09-07  2:14   ` Philippe Mathieu-Daudé
2020-09-04 14:19 ` [PATCH 08/17] hw/block/nvme: refactor aio submission Klaus Jensen
2020-09-04 19:47   ` Keith Busch
2020-09-04 20:38     ` Klaus Jensen
2020-09-04 21:15       ` Keith Busch
2020-09-04 21:38         ` Klaus Jensen
2020-09-04 14:19 ` [PATCH 09/17] hw/block/nvme: default request status to success Klaus Jensen
2020-09-07  2:17   ` Philippe Mathieu-Daudé
2020-09-04 14:19 ` [PATCH 10/17] hw/block/nvme: support multiple parallel aios per request Klaus Jensen
2020-09-04 14:19 ` [PATCH 11/17] hw/block/nvme: harden cmb access Klaus Jensen
2020-09-04 14:19 ` [PATCH 12/17] hw/block/nvme: add support for scatter gather lists Klaus Jensen
2020-09-04 14:19 ` [PATCH 13/17] hw/block/nvme: add support for sgl bit bucket descriptor Klaus Jensen
2020-09-04 14:19 ` [PATCH 14/17] hw/block/nvme: refactor identify active namespace id list Klaus Jensen
2020-09-04 14:19 ` [PATCH 15/17] hw/block/nvme: support multiple namespaces Klaus Jensen
2020-09-04 14:19 ` [PATCH 16/17] pci: allocate pci id for nvme Klaus Jensen
2020-09-04 14:19 ` [PATCH 17/17] hw/block/nvme: change controller pci id Klaus Jensen
2020-09-07  2:28   ` Philippe Mathieu-Daudé
2020-09-07  7:23     ` Klaus Jensen
2020-09-07  8:36       ` Philippe Mathieu-Daudé
2020-09-07  8:58         ` Klaus Jensen
2020-09-07  9:20           ` Klaus Jensen
2020-09-07  9:33             ` Philippe Mathieu-Daudé
2020-09-07 10:37     ` Dr. David Alan Gilbert
2020-09-07 10:50       ` Klaus Jensen
2020-09-07 10:52         ` Dr. David Alan Gilbert
2020-09-07 11:02           ` Klaus Jensen
2020-09-08 15:39             ` Keith Busch
2020-09-04 16:12 ` [PATCH 00/17] hw/block/nvme: multiple namespaces support Philippe Mathieu-Daudé
2020-09-04 17:17   ` Klaus Jensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200904141956.576630-3-its@irrelevant.dk \
    --to=its@irrelevant.dk \
    --cc=ehabkost@redhat.com \
    --cc=k.jensen@samsung.com \
    --cc=kbusch@kernel.org \
    --cc=kwolf@redhat.com \
    --cc=mlevitsk@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=mst@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.