All of lore.kernel.org
 help / color / mirror / Atom feed
From: Klaus Birkelund Jensen <klaus@birkelund.eu>
To: qemu-block@nongnu.org
Cc: Keith Busch <keith.busch@intel.com>,
	Kevin Wolf <kwolf@redhat.com>,
	qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>
Subject: [Qemu-devel] [PATCH 2/8] nvme: bump supported spec to 1.3
Date: Fri, 17 May 2019 10:42:28 +0200	[thread overview]
Message-ID: <20190517084234.26923-3-klaus@birkelund.eu> (raw)
In-Reply-To: <20190517084234.26923-1-klaus@birkelund.eu>

Bump the supported NVMe version to 1.3. To do so, this patch adds a
number of missing 'Mandatory' features from the spec:

  * Support for returning a Namespace Identification Descriptor List in
    the Identify command (CNS 03h).
  * Support for the Asynchronous Event Request command.
  * Support for the Get Log Page command and the mandatory Error
    Information, Smart / Health Information and Firmware Slot
    Information log pages.
  * Support for the Abort command.

As a side-effect, this bump also fixes support for multiple namespaces.

The implementation of AER, Get Log Page and Abort commands has been
imported and slightly modified from Keith's qemu-nvme tree[1]. Thanks!

  [1]: http://git.infradead.org/users/kbusch/qemu-nvme.git

Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
---
 hw/block/nvme.c       | 792 ++++++++++++++++++++++++++++++++++++------
 hw/block/nvme.h       |  31 +-
 hw/block/trace-events |  16 +-
 include/block/nvme.h  |  58 +++-
 4 files changed, 783 insertions(+), 114 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b689c0776e72..65dfc04f71e5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -9,17 +9,35 @@
  */
 
 /**
- * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e
+ * Reference Specs: http://www.nvmexpress.org, 1.3d, 1.2, 1.1, 1.0e
  *
  *  http://www.nvmexpress.org/resources/
  */
 
 /**
  * Usage: add options:
- *      -drive file=<file>,if=none,id=<drive_id>
- *      -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
- *              cmb_size_mb=<cmb_size_mb[optional]>, \
- *              num_queues=<N[optional]>
+ *     -drive file=<file>,if=none,id=<drive_id>
+ *     -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>
+ *
+ * The "file" option must point to a path to a real file that you will use as
+ * the backing storage for your NVMe device. It must be a non-zero length, as
+ * this will be the disk image that your nvme controller will use to carve up
+ * namespaces for storage.
+ *
+ * Note the "drive" option's "id" name must match the "device nvme" drive's
+ * name to link the block device used for backing storage to the nvme
+ * interface.
+ *
+ * Advanced optional options:
+ *
+ *   num_ns=<int>          : Namespaces to make out of the backing storage,
+ *                           Default:1
+ *   num_queues=<int>      : Number of possible IO Queues, Default:64
+ *   cmb_size_mb=<int>     : Size of CMB in MBs, Default:0
+ *
+ * Parameters will be verified against conflicting capabilities and attributes
+ * and fail to load if there is a conflict or a configuration the emulated
+ * device is unable to handle.
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
@@ -38,6 +56,12 @@
 #include "trace.h"
 #include "nvme.h"
 
+#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
+#define NVME_TEMPERATURE 0x143
+#define NVME_ELPE 3
+#define NVME_AERL 3
+#define NVME_OP_ABORTED 0xff
+
 #define NVME_GUEST_ERR(trace, fmt, ...) \
     do { \
         (trace_##trace)(__VA_ARGS__); \
@@ -57,6 +81,16 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
     }
 }
 
+static void nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+{
+    if (n->cmbsz && addr >= n->ctrl_mem.addr &&
+                addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
+        memcpy((void *)&n->cmbuf[addr - n->ctrl_mem.addr], buf, size);
+        return;
+    }
+    pci_dma_write(&n->parent_obj, addr, buf, size);
+}
+
 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
 {
     return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1;
@@ -244,6 +278,24 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
     return status;
 }
 
+static void nvme_post_cqe(NvmeCQueue *cq, NvmeRequest *req)
+{
+    NvmeCtrl *n = cq->ctrl;
+    NvmeSQueue *sq = req->sq;
+    NvmeCqe *cqe = &req->cqe;
+    uint8_t phase = cq->phase;
+    hwaddr addr;
+
+    addr = cq->dma_addr + cq->tail * n->cqe_size;
+    cqe->status = cpu_to_le16((req->status << 1) | phase);
+    cqe->sq_id = cpu_to_le16(sq->sqid);
+    cqe->sq_head = cpu_to_le16(sq->head);
+    nvme_addr_write(n, addr, (void *) cqe, sizeof(*cqe));
+    nvme_inc_cq_tail(cq);
+
+    QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+}
+
 static void nvme_post_cqes(void *opaque)
 {
     NvmeCQueue *cq = opaque;
@@ -251,24 +303,14 @@ static void nvme_post_cqes(void *opaque)
     NvmeRequest *req, *next;
 
     QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
-        NvmeSQueue *sq;
-        hwaddr addr;
-
         if (nvme_cq_full(cq)) {
             break;
         }
 
         QTAILQ_REMOVE(&cq->req_list, req, entry);
-        sq = req->sq;
-        req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
-        req->cqe.sq_id = cpu_to_le16(sq->sqid);
-        req->cqe.sq_head = cpu_to_le16(sq->head);
-        addr = cq->dma_addr + cq->tail * n->cqe_size;
-        nvme_inc_cq_tail(cq);
-        pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
-            sizeof(req->cqe));
-        QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+        nvme_post_cqe(cq, req);
     }
+
     if (cq->tail != cq->head) {
         nvme_irq_assert(n, cq);
     }
@@ -277,11 +319,88 @@ static void nvme_post_cqes(void *opaque)
 static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
 {
     assert(cq->cqid == req->sq->cqid);
+
+    trace_nvme_enqueue_req_completion(req->cqe.cid, cq->cqid);
     QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
     QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
     timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
 }
 
+static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
+    uint8_t event_info, uint8_t log_page)
+{
+    NvmeAsyncEvent *event;
+
+    trace_nvme_enqueue_event(event_type, event_info, log_page);
+
+    /*
+     * Do not enqueue the event if something of this type is already queued.
+     * This bounds the size of the event queue and makes sure it does not grow
+     * indefinitely when events are not processed by the host (i.e. does not
+     * issue any AERs).
+     */
+    if (n->aer_mask_queued & (1 << event_type)) {
+        return;
+    }
+    n->aer_mask_queued |= (1 << event_type);
+
+    event = g_new(NvmeAsyncEvent, 1);
+    event->result = (NvmeAerResult) {
+        .event_type = event_type,
+        .event_info = event_info,
+        .log_page   = log_page,
+    };
+
+    QSIMPLEQ_INSERT_TAIL(&n->aer_queue, event, entry);
+
+    timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+}
+
+static void nvme_process_aers(void *opaque)
+{
+    NvmeCtrl *n = opaque;
+    NvmeRequest *req;
+    NvmeAerResult *result;
+    NvmeAsyncEvent *event, *next;
+
+    trace_nvme_process_aers();
+
+    QSIMPLEQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
+        /* can't post cqe if there is nothing to complete */
+        if (!n->outstanding_aers) {
+            trace_nvme_no_outstanding_aers();
+            break;
+        }
+
+        /* ignore if masked (cqe posted, but event not cleared) */
+        if (n->aer_mask & (1 << event->result.event_type)) {
+            trace_nvme_aer_masked(event->result.event_type, n->aer_mask);
+            continue;
+        }
+
+        QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry);
+
+        n->aer_mask |= 1 << event->result.event_type;
+        n->aer_mask_queued &= ~(1 << event->result.event_type);
+        n->outstanding_aers--;
+
+        req = n->aer_reqs[n->outstanding_aers];
+
+        result = (NvmeAerResult *) &req->cqe.result;
+        result->event_type = event->result.event_type;
+        result->event_info = event->result.event_info;
+        result->log_page = event->result.log_page;
+        g_free(event);
+
+        req->status = NVME_SUCCESS;
+
+        trace_nvme_aer_post_cqe(result->event_type, result->event_info,
+            result->log_page);
+
+        nvme_enqueue_req_completion(&n->admin_cq, req);
+    }
+}
+
 static void nvme_rw_cb(void *opaque, int ret)
 {
     NvmeRequest *req = opaque;
@@ -318,7 +437,7 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
 {
     NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
     const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
-    const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
+    const uint8_t data_shift = ns->id_ns.lbaf[lba_index].lbads;
     uint64_t slba = le64_to_cpu(rw->slba);
     uint32_t nlb  = le16_to_cpu(rw->nlb) + 1;
     uint64_t offset = slba << data_shift;
@@ -347,9 +466,9 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     uint64_t prp2 = le64_to_cpu(rw->prp2);
 
     uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
-    uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
+    uint8_t data_shift = ns->id_ns.lbaf[lba_index].lbads;
     uint64_t data_size = (uint64_t)nlb << data_shift;
-    uint64_t data_offset = slba << data_shift;
+    uint64_t data_offset = ns->blk_offset + (slba << data_shift);
     int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
     enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
 
@@ -391,8 +510,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     NvmeNamespace *ns;
     uint32_t nsid = le32_to_cpu(cmd->nsid);
 
-    if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
-        trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+    if (unlikely(nsid == 0 || nsid > n->params.num_ns)) {
+        trace_nvme_err_invalid_ns(nsid, n->params.num_ns);
         return NVME_INVALID_NSID | NVME_DNR;
     }
 
@@ -420,6 +539,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
     if (sq->sqid) {
         g_free(sq);
     }
+    n->qs_created--;
 }
 
 static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -486,6 +606,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
     cq = n->cq[cqid];
     QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
     n->sq[sqid] = sq;
+    n->qs_created++;
 }
 
 static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -535,6 +656,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
     if (cq->cqid) {
         g_free(cq);
     }
+    n->qs_created--;
 }
 
 static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -575,6 +697,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
     msix_vector_use(&n->parent_obj, cq->vector);
     n->cq[cqid] = cq;
     cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+    n->qs_created++;
 }
 
 static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -637,8 +760,8 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
 
     trace_nvme_identify_ns(nsid);
 
-    if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
-        trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+    if (unlikely(nsid == 0 || nsid > n->params.num_ns)) {
+        trace_nvme_err_invalid_ns(nsid, n->params.num_ns);
         return NVME_INVALID_NSID | NVME_DNR;
     }
 
@@ -648,7 +771,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
         prp1, prp2);
 }
 
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c)
 {
     static const int data_len = 4 * KiB;
     uint32_t min_nsid = le32_to_cpu(c->nsid);
@@ -658,10 +781,10 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
     uint16_t ret;
     int i, j = 0;
 
-    trace_nvme_identify_nslist(min_nsid);
+    trace_nvme_identify_ns_list(min_nsid);
 
     list = g_malloc0(data_len);
-    for (i = 0; i < n->num_namespaces; i++) {
+    for (i = 0; i < n->params.num_ns; i++) {
         if (i < min_nsid) {
             continue;
         }
@@ -675,6 +798,25 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
     return ret;
 }
 
+static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c)
+{
+    static const int data_len = sizeof(NvmeIdentifyNamespaceDescriptor) + 0x10;
+    uint32_t nsid = le32_to_cpu(c->nsid);
+    uint64_t prp1 = le64_to_cpu(c->prp1);
+    uint64_t prp2 = le64_to_cpu(c->prp2);
+    NvmeIdentifyNamespaceDescriptor *list;
+    uint16_t ret;
+
+    trace_nvme_identify_ns_descriptor_list(nsid);
+
+    list = g_malloc0(data_len);
+    list->nidt = 0x3;
+    list->nidl = 0x10;
+
+    ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2);
+    g_free(list);
+    return ret;
+}
 
 static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
 {
@@ -686,7 +828,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
     case 0x01:
         return nvme_identify_ctrl(n, c);
     case 0x02:
-        return nvme_identify_nslist(n, c);
+        return nvme_identify_ns_list(n, c);
+    case 0x03:
+        return nvme_identify_ns_descriptor_list(n, cmd);
     default:
         trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -696,18 +840,49 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
     uint32_t result;
 
+    trace_nvme_getfeat(dw10);
+
     switch (dw10) {
+    case NVME_ARBITRATION:
+        result = cpu_to_le32(n->features.arbitration);
+        break;
+    case NVME_POWER_MANAGEMENT:
+        result = cpu_to_le32(n->features.power_mgmt);
+        break;
+    case NVME_TEMPERATURE_THRESHOLD:
+        result = cpu_to_le32(n->features.temp_thresh);
+        break;
+    case NVME_ERROR_RECOVERY:
+        result = cpu_to_le32(n->features.err_rec);
+        break;
     case NVME_VOLATILE_WRITE_CACHE:
         result = blk_enable_write_cache(n->conf.blk);
         trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
         break;
     case NVME_NUMBER_OF_QUEUES:
         result = cpu_to_le32((n->params.num_queues - 2) |
-            ((n->params.num_queues - 2) << 16));
+                            ((n->params.num_queues - 2) << 16));
         trace_nvme_getfeat_numq(result);
         break;
+    case NVME_INTERRUPT_COALESCING:
+        result = cpu_to_le32(n->features.int_coalescing);
+        break;
+    case NVME_INTERRUPT_VECTOR_CONF:
+        if ((dw11 & 0xffff) > n->params.num_queues) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
+        result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]);
+        break;
+    case NVME_WRITE_ATOMICITY:
+        result = cpu_to_le32(n->features.write_atomicity);
+        break;
+    case NVME_ASYNCHRONOUS_EVENT_CONF:
+        result = cpu_to_le32(n->features.async_config);
+        break;
     default:
         trace_nvme_err_invalid_getfeat(dw10);
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -722,22 +897,244 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
     uint32_t dw11 = le32_to_cpu(cmd->cdw11);
 
+    trace_nvme_setfeat(dw10, dw11);
+
     switch (dw10) {
+    case NVME_TEMPERATURE_THRESHOLD:
+        n->features.temp_thresh = dw11;
+        if (n->features.temp_thresh <= n->temperature) {
+            nvme_enqueue_event(n, NVME_AER_TYPE_SMART,
+                NVME_AER_INFO_SMART_TEMP_THRESH, NVME_LOG_SMART_INFO);
+        }
+        break;
     case NVME_VOLATILE_WRITE_CACHE:
         blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
         break;
     case NVME_NUMBER_OF_QUEUES:
+        if (n->qs_created > 2) {
+            return NVME_CMD_SEQ_ERROR | NVME_DNR;
+        }
+
+        if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
+            return NVME_INVALID_FIELD | NVME_DNR;
+        }
+
         trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
                                 ((dw11 >> 16) & 0xFFFF) + 1,
                                 n->params.num_queues - 1,
                                 n->params.num_queues - 1);
         req->cqe.result = cpu_to_le32((n->params.num_queues - 2) |
-                                      ((n->params.num_queues - 2) << 16));
+                                     ((n->params.num_queues - 2) << 16));
         break;
+    case NVME_ASYNCHRONOUS_EVENT_CONF:
+        n->features.async_config = dw11;
+        break;
+    case NVME_ARBITRATION:
+    case NVME_POWER_MANAGEMENT:
+    case NVME_ERROR_RECOVERY:
+    case NVME_INTERRUPT_COALESCING:
+    case NVME_INTERRUPT_VECTOR_CONF:
+    case NVME_WRITE_ATOMICITY:
+        return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;
     default:
         trace_nvme_err_invalid_setfeat(dw10);
         return NVME_INVALID_FIELD | NVME_DNR;
     }
+
+    return NVME_SUCCESS;
+}
+
+static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
+{
+    n->aer_mask &= ~(1 << event_type);
+    if (!QSIMPLEQ_EMPTY(&n->aer_queue)) {
+        timer_mod(n->aer_timer,
+            qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+    }
+}
+
+static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+    uint32_t buf_len, uint64_t off, NvmeRequest *req)
+{
+    uint32_t trans_len;
+    uint64_t prp1 = le64_to_cpu(cmd->prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+    if (off > sizeof(*n->elpes) * (NVME_ELPE + 1)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    trans_len = MIN(sizeof(*n->elpes) * (NVME_ELPE + 1) - off, buf_len);
+
+    if (!rae) {
+        nvme_clear_events(n, NVME_AER_TYPE_ERROR);
+    }
+
+    return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1,
+        prp2);
+}
+
+static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+    uint32_t buf_len, uint64_t off, NvmeRequest *req)
+{
+    uint64_t prp1 = le64_to_cpu(cmd->prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+    uint32_t trans_len;
+    time_t current_ms;
+    NvmeSmartLog smart;
+
+    if (cmd->nsid != 0 && cmd->nsid != 0xffffffff) {
+        trace_nvme_err(req->cqe.cid, "smart log not supported for namespace",
+            NVME_INVALID_FIELD | NVME_DNR);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    if (off > sizeof(smart)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    trans_len = MIN(sizeof(smart) - off, buf_len);
+
+    memset(&smart, 0x0, sizeof(smart));
+    smart.number_of_error_log_entries[0] = cpu_to_le64(0);
+    smart.temperature[0] = n->temperature & 0xff;
+    smart.temperature[1] = (n->temperature >> 8) & 0xff;
+
+    if (n->features.temp_thresh <= n->temperature) {
+        smart.critical_warning |= NVME_SMART_TEMPERATURE;
+    }
+
+    current_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    smart.power_on_hours[0] = cpu_to_le64(
+        (((current_ms - n->starttime_ms) / 1000) / 60) / 60);
+
+    if (!rae) {
+        nvme_clear_events(n, NVME_AER_TYPE_SMART);
+    }
+
+    return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
+        prp2);
+}
+
+static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
+    uint64_t off, NvmeRequest *req)
+{
+    uint32_t trans_len;
+    uint64_t prp1 = le64_to_cpu(cmd->prp1);
+    uint64_t prp2 = le64_to_cpu(cmd->prp2);
+    NvmeFwSlotInfoLog fw_log;
+
+    if (off > sizeof(fw_log)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    memset(&fw_log, 0, sizeof(NvmeFwSlotInfoLog));
+
+    trans_len = MIN(sizeof(fw_log) - off, buf_len);
+
+    return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
+        prp2);
+}
+
+static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+    uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+    uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+    uint32_t dw12 = le32_to_cpu(cmd->cdw12);
+    uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+    uint16_t lid = dw10 & 0xff;
+    uint8_t  rae = (dw10 >> 15) & 0x1;
+    uint32_t numdl, numdu, len;
+    uint64_t off, lpol, lpou;
+
+    numdl = (dw10 >> 16);
+    numdu = (dw11 & 0xffff);
+    lpol = dw12;
+    lpou = dw13;
+
+    len = (((numdu << 16) | numdl) + 1) << 2;
+    off = (lpou << 32ULL) | lpol;
+
+    if (off & 0x3) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    trace_nvme_get_log(req->cqe.cid, lid);
+
+    switch (lid) {
+    case NVME_LOG_ERROR_INFO:
+        return nvme_error_log_info(n, cmd, rae, len, off, req);
+    case NVME_LOG_SMART_INFO:
+        return nvme_smart_info(n, cmd, rae, len, off, req);
+    case NVME_LOG_FW_SLOT_INFO:
+        return nvme_fw_log_info(n, cmd, len, off, req);
+    default:
+        trace_nvme_err_invalid_log_page(req->cqe.cid, lid);
+        return NVME_INVALID_LOG_ID | NVME_DNR;
+    }
+}
+
+static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+    trace_nvme_aer(req->cqe.cid);
+
+    if (n->outstanding_aers > NVME_AERL) {
+        trace_nvme_aer_aerl_exceeded();
+        return NVME_AER_LIMIT_EXCEEDED;
+    }
+
+    n->aer_reqs[n->outstanding_aers] = req;
+    timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+    n->outstanding_aers++;
+
+    return NVME_NO_COMPLETE;
+}
+
+static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+    NvmeSQueue *sq;
+    NvmeRequest *new;
+    uint32_t index = 0;
+    uint16_t sqid = cmd->cdw10 & 0xffff;
+    uint16_t cid = (cmd->cdw10 >> 16) & 0xffff;
+
+    req->cqe.result = 1;
+    if (nvme_check_sqid(n, sqid)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    sq = n->sq[sqid];
+
+    /* only consider queued (and not executing) commands for abort */
+    while ((sq->head + index) % sq->size != sq->tail) {
+        NvmeCmd abort_cmd;
+        hwaddr addr;
+
+        addr = sq->dma_addr + ((sq->head + index) % sq->size) * n->sqe_size;
+
+        nvme_addr_read(n, addr, (void *) &abort_cmd, sizeof(abort_cmd));
+        if (abort_cmd.cid == cid) {
+            req->cqe.result = 0;
+            new = QTAILQ_FIRST(&sq->req_list);
+            QTAILQ_REMOVE(&sq->req_list, new, entry);
+            QTAILQ_INSERT_TAIL(&sq->out_req_list, new, entry);
+
+            memset(&new->cqe, 0, sizeof(new->cqe));
+            new->cqe.cid = cid;
+            new->status = NVME_CMD_ABORT_REQ;
+
+            abort_cmd.opcode = NVME_OP_ABORTED;
+            nvme_addr_write(n, addr, (void *) &abort_cmd, sizeof(abort_cmd));
+
+            nvme_enqueue_req_completion(n->cq[sq->cqid], new);
+
+            return NVME_SUCCESS;
+        }
+
+        ++index;
+    }
+
     return NVME_SUCCESS;
 }
 
@@ -758,6 +1155,12 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         return nvme_set_feature(n, cmd, req);
     case NVME_ADM_CMD_GET_FEATURES:
         return nvme_get_feature(n, cmd, req);
+    case NVME_ADM_CMD_GET_LOG_PAGE:
+        return nvme_get_log(n, cmd, req);
+    case NVME_ADM_CMD_ASYNC_EV_REQ:
+        return nvme_aer(n, cmd, req);
+    case NVME_ADM_CMD_ABORT:
+        return nvme_abort(n, cmd, req);
     default:
         trace_nvme_err_invalid_admin_opc(cmd->opcode);
         return NVME_INVALID_OPCODE | NVME_DNR;
@@ -780,6 +1183,10 @@ static void nvme_process_sq(void *opaque)
         nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
         nvme_inc_sq_head(sq);
 
+        if (cmd.opcode == NVME_OP_ABORTED) {
+            continue;
+        }
+
         req = QTAILQ_FIRST(&sq->req_list);
         QTAILQ_REMOVE(&sq->req_list, req, entry);
         QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
@@ -797,6 +1204,7 @@ static void nvme_process_sq(void *opaque)
 
 static void nvme_clear_ctrl(NvmeCtrl *n)
 {
+    NvmeAsyncEvent *event;
     int i;
 
     blk_drain(n->conf.blk);
@@ -812,8 +1220,19 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
         }
     }
 
+    if (n->aer_timer) {
+        timer_del(n->aer_timer);
+        timer_free(n->aer_timer);
+        n->aer_timer = NULL;
+    }
+    while ((event = QSIMPLEQ_FIRST(&n->aer_queue)) != NULL) {
+        QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry);
+        g_free(event);
+    }
+
     blk_flush(n->conf.blk);
     n->bar.cc = 0;
+    n->outstanding_aers = 0;
 }
 
 static int nvme_start_ctrl(NvmeCtrl *n)
@@ -906,6 +1325,9 @@ static int nvme_start_ctrl(NvmeCtrl *n)
     nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
         NVME_AQA_ASQS(n->bar.aqa) + 1);
 
+    n->aer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_aers, n);
+    QSIMPLEQ_INIT(&n->aer_queue);
+
     return 0;
 }
 
@@ -1098,6 +1520,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
                            "completion queue doorbell write"
                            " for nonexistent queue,"
                            " sqid=%"PRIu32", ignoring", qid);
+
+            if (n->outstanding_aers) {
+                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                    NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+                    NVME_LOG_ERROR_INFO);
+            }
+
             return;
         }
 
@@ -1108,6 +1537,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
                            " beyond queue size, sqid=%"PRIu32","
                            " new_head=%"PRIu16", ignoring",
                            qid, new_head);
+
+            if (n->outstanding_aers) {
+                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                    NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO);
+            }
+
             return;
         }
 
@@ -1136,6 +1571,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
                            "submission queue doorbell write"
                            " for nonexistent queue,"
                            " sqid=%"PRIu32", ignoring", qid);
+
+            if (n->outstanding_aers) {
+                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                    NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+                    NVME_LOG_ERROR_INFO);
+            }
+
             return;
         }
 
@@ -1146,6 +1588,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
                            " beyond queue size, sqid=%"PRIu32","
                            " new_tail=%"PRIu16", ignoring",
                            qid, new_tail);
+
+            if (n->outstanding_aers) {
+                nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+                    NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO);
+            }
+
             return;
         }
 
@@ -1198,134 +1646,271 @@ static const MemoryRegionOps nvme_cmb_ops = {
     },
 };
 
-static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
 {
-    NvmeCtrl *n = NVME(pci_dev);
-    NvmeIdCtrl *id = &n->id_ctrl;
-
-    int i;
-    int64_t bs_size;
-    uint8_t *pci_conf;
-
-    if (!n->params.num_queues) {
-        error_setg(errp, "num_queues can't be zero");
-        return;
-    }
+    NvmeParams *params = &n->params;
 
     if (!n->conf.blk) {
-        error_setg(errp, "drive property not set");
-        return;
+        error_setg(errp, "nvme: block backend not configured");
+        return 1;
     }
 
-    bs_size = blk_getlength(n->conf.blk);
-    if (bs_size < 0) {
-        error_setg(errp, "could not get backing file size");
-        return;
+    if (!params->serial) {
+        error_setg(errp, "nvme: serial not configured");
+        return 1;
     }
 
-    if (!n->params.serial) {
-        error_setg(errp, "serial property not set");
-        return;
+    if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) {
+        error_setg(errp, "nvme: invalid queue configuration");
+        return 1;
     }
+
+    return 0;
+}
+
+static int nvme_init_blk(NvmeCtrl *n, Error **errp)
+{
     blkconf_blocksizes(&n->conf);
     if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
-                                       false, errp)) {
-        return;
+        false, errp)) {
+        return 1;
     }
 
-    pci_conf = pci_dev->config;
-    pci_conf[PCI_INTERRUPT_PIN] = 1;
-    pci_config_set_prog_interface(pci_dev->config, 0x2);
-    pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
-    pcie_endpoint_cap_init(pci_dev, 0x80);
+    return 0;
+}
+
+static int nvme_init_state(NvmeCtrl *n, Error **errp)
+{
+    int64_t bs_size;
+    Error *local_err = NULL;
+
+    if (!n->params.serial) {
+        error_setg(errp, "serial property not set");
+        return 1;
+    }
+
+    if (nvme_check_constraints(n, &local_err)) {
+        error_propagate_prepend(errp, local_err,
+            "nvme_check_constraints failed");
+        return 1;
+    }
 
-    n->num_namespaces = 1;
     n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
-    n->ns_size = bs_size / (uint64_t)n->num_namespaces;
 
-    n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
+    bs_size = blk_getlength(n->conf.blk);
+    if (bs_size < 0) {
+        error_setg(errp, "could not get backing file size");
+        return 1;
+    }
+
+    n->ns_size = bs_size / (uint64_t) n->params.num_ns;
+
+    n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
     n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
+    n->elpes = g_new0(NvmeErrorLog, NVME_ELPE + 1);
+    n->aer_reqs = g_new0(NvmeRequest *, NVME_AERL + 1);
+    n->features.int_vector_config = g_malloc0_n(n->params.num_queues,
+        sizeof(*n->features.int_vector_config));
 
-    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
-                          "nvme", n->reg_size);
+    return 0;
+}
+
+static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+    NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
+    NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
+
+    NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
+    NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
+    NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
+    NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
+    NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
+    NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2);
+    NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
+
+    n->cmbloc = n->bar.cmbloc;
+    n->cmbsz = n->bar.cmbsz;
+
+    n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
+    memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
+                            "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
+    pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
+        PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
+        PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
+}
+
+static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+    uint8_t *pci_conf = pci_dev->config;
+
+    pci_conf[PCI_INTERRUPT_PIN] = 1;
+    pci_config_set_prog_interface(pci_conf, 0x2);
+    pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+    pci_config_set_device_id(pci_conf, 0x5845);
+    pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
+    pcie_endpoint_cap_init(pci_dev, 0x80);
+
+    memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
+        n->reg_size);
     pci_register_bar(pci_dev, 0,
         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
         &n->iomem);
     msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
 
+    if (n->params.cmb_size_mb) {
+        nvme_init_cmb(n, pci_dev);
+    }
+}
+
+static void nvme_init_ctrl(NvmeCtrl *n)
+{
+    NvmeIdCtrl *id = &n->id_ctrl;
+    NvmeParams *params = &n->params;
+    uint8_t *pci_conf = n->parent_obj.config;
+
     id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
     id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
     strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
     strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
-    strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+    strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' ');
     id->rab = 6;
     id->ieee[0] = 0x00;
     id->ieee[1] = 0x02;
     id->ieee[2] = 0xb3;
+    id->cmic = 0;
+    id->ver = cpu_to_le32(0x00010300);
     id->oacs = cpu_to_le16(0);
-    id->frmw = 7 << 1;
-    id->lpa = 1 << 0;
+    id->acl = 3;
+    id->aerl = NVME_AERL;
+    id->frmw = 7 << 1 | 1;
+    id->lpa = 1 << 2;
+    id->elpe = NVME_ELPE;
+    id->npss = 0;
     id->sqes = (0x6 << 4) | 0x6;
     id->cqes = (0x4 << 4) | 0x4;
-    id->nn = cpu_to_le32(n->num_namespaces);
+    id->nn = cpu_to_le32(params->num_ns);
     id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS);
+    id->fuses = cpu_to_le16(0);
+    id->fna = 0;
+    if (blk_enable_write_cache(n->conf.blk)) {
+        id->vwc = 1;
+    }
+    id->awun = cpu_to_le16(0);
+    id->awupf = cpu_to_le16(0);
+    id->sgls = cpu_to_le32(0);
+
+    strcpy((char *) id->subnqn, "nqn.2014-08.org.nvmexpress:uuid:");
+    qemu_uuid_unparse(&qemu_uuid,
+        (char *) id->subnqn + strlen((char *) id->subnqn));
+
     id->psd[0].mp = cpu_to_le16(0x9c4);
     id->psd[0].enlat = cpu_to_le32(0x10);
     id->psd[0].exlat = cpu_to_le32(0x4);
-    if (blk_enable_write_cache(n->conf.blk)) {
-        id->vwc = 1;
+
+    n->temperature = NVME_TEMPERATURE;
+    n->features.temp_thresh = 0x14d;
+
+    for (int i = 0; i < n->params.num_queues; i++) {
+        n->features.int_vector_config[i] = i;
     }
 
     n->bar.cap = 0;
     NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
     NVME_CAP_SET_CQR(n->bar.cap, 1);
-    NVME_CAP_SET_AMS(n->bar.cap, 1);
     NVME_CAP_SET_TO(n->bar.cap, 0xf);
     NVME_CAP_SET_CSS(n->bar.cap, 1);
     NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
 
-    n->bar.vs = 0x00010200;
+    n->bar.vs = 0x00010300;
     n->bar.intmc = n->bar.intms = 0;
+}
 
-    if (n->params.cmb_size_mb) {
+static uint64_t nvme_ns_calc_blks(NvmeCtrl *n, NvmeNamespace *ns)
+{
+    return n->ns_size / nvme_ns_lbads_bytes(ns);
+}
 
-        NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
-        NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
+static void nvme_ns_init_identify(NvmeCtrl *n, NvmeIdNs *id_ns)
+{
+    id_ns->nlbaf = 0;
+    id_ns->flbas = 0;
+    id_ns->mc = 0;
+    id_ns->dpc = 0;
+    id_ns->dps = 0;
+    id_ns->lbaf[0].lbads = BDRV_SECTOR_BITS;
+}
 
-        NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
-        NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
-        NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
-        NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
-        NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
-        NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
-        NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
+static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+    NvmeIdNs *id_ns = &ns->id_ns;
 
-        n->cmbloc = n->bar.cmbloc;
-        n->cmbsz = n->bar.cmbsz;
+    nvme_ns_init_identify(n, id_ns);
 
-        n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
-        memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
-                              "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
-        pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
-            PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
-            PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
+    ns->ns_blks = nvme_ns_calc_blks(n, ns);
+    id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(ns->ns_blks);
 
+    return 0;
+}
+
+static int nvme_init_namespaces(NvmeCtrl *n, Error **errp)
+{
+    int64_t bs_size;
+    Error *local_err = NULL;
+    NvmeNamespace *ns;
+
+    n->namespaces = g_new0(NvmeNamespace, n->params.num_ns);
+
+    bs_size = blk_getlength(n->conf.blk);
+    if (bs_size < 0) {
+        error_setg_errno(errp, -bs_size, "blk_getlength");
+        return 1;
+    }
+
+    n->ns_size = bs_size / (uint64_t) n->params.num_ns;
+
+    for (int i = 0; i < n->params.num_ns; i++) {
+        ns = &n->namespaces[i];
+        ns->id = i + 1;
+        ns->blk_offset = i * n->ns_size;
+
+        if (nvme_init_namespace(n, ns, &local_err)) {
+            error_propagate_prepend(errp, local_err,
+                "nvme_init_namespace: ");
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+{
+    NvmeCtrl *n = NVME(pci_dev);
+    Error *local_err = NULL;
+
+    if (nvme_check_constraints(n, &local_err)) {
+        error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
+        return;
+    }
+
+    if (nvme_init_blk(n, &local_err)) {
+        error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
+        return;
+    }
+
+    if (nvme_init_state(n, &local_err)) {
+        error_propagate_prepend(errp, local_err, "nvme_init_state: ");
+        return;
     }
 
-    for (i = 0; i < n->num_namespaces; i++) {
-        NvmeNamespace *ns = &n->namespaces[i];
-        NvmeIdNs *id_ns = &ns->id_ns;
-        id_ns->nsfeat = 0;
-        id_ns->nlbaf = 0;
-        id_ns->flbas = 0;
-        id_ns->mc = 0;
-        id_ns->dpc = 0;
-        id_ns->dps = 0;
-        id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
-        id_ns->ncap  = id_ns->nuse = id_ns->nsze =
-            cpu_to_le64(n->ns_size >>
-                id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
+    nvme_init_pci(n, pci_dev);
+    nvme_init_ctrl(n);
+
+    if (nvme_init_namespaces(n, &local_err)) {
+        error_propagate_prepend(errp, local_err,
+            "nvme_init_namespaces: ");
+        return;
     }
 }
 
@@ -1337,6 +1922,9 @@ static void nvme_exit(PCIDevice *pci_dev)
     g_free(n->namespaces);
     g_free(n->cq);
     g_free(n->sq);
+    g_free(n->elpes);
+    g_free(n->aer_reqs);
+    g_free(n->features.int_vector_config);
 
     if (n->params.cmb_size_mb) {
         g_free(n->cmbuf);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 8866373058f6..8925a05445da 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -6,11 +6,13 @@
 #define DEFINE_NVME_PROPERTIES(_state, _props) \
     DEFINE_PROP_STRING("serial", _state, _props.serial), \
     DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \
-    DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64)
+    DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \
+    DEFINE_PROP_UINT32("num_ns", _state, _props.num_ns, 1)
 
 typedef struct NvmeParams {
     char     *serial;
     uint32_t num_queues;
+    uint32_t num_ns;
     uint32_t cmb_size_mb;
 } NvmeParams;
 
@@ -63,6 +65,9 @@ typedef struct NvmeCQueue {
 
 typedef struct NvmeNamespace {
     NvmeIdNs        id_ns;
+    uint32_t        id;
+    uint64_t        ns_blks;
+    uint64_t        blk_offset;
 } NvmeNamespace;
 
 #define TYPE_NVME "nvme"
@@ -77,26 +82,48 @@ typedef struct NvmeCtrl {
     BlockConf    conf;
     NvmeParams   params;
 
+    uint64_t    starttime_ms;
+    uint16_t    temperature;
     uint32_t    page_size;
     uint16_t    page_bits;
     uint16_t    max_prp_ents;
     uint16_t    cqe_size;
     uint16_t    sqe_size;
     uint32_t    reg_size;
-    uint32_t    num_namespaces;
     uint32_t    max_q_ents;
     uint64_t    ns_size;
+    uint8_t     outstanding_aers;
     uint32_t    cmbsz;
     uint32_t    cmbloc;
     uint8_t     *cmbuf;
     uint64_t    irq_status;
+    uint32_t    qs_created;
 
+    QSIMPLEQ_HEAD(, NvmeAsyncEvent) aer_queue;
+    QEMUTimer   *aer_timer;
+    uint8_t     aer_mask;
+    uint8_t     aer_mask_queued;
+
+    NvmeErrorLog    *elpes;
+    NvmeRequest     **aer_reqs;
     NvmeNamespace   *namespaces;
     NvmeSQueue      **sq;
     NvmeCQueue      **cq;
     NvmeSQueue      admin_sq;
     NvmeCQueue      admin_cq;
+    NvmeFeatureVal  features;
     NvmeIdCtrl      id_ctrl;
 } NvmeCtrl;
 
+static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
+{
+    NvmeIdNs *id = &ns->id_ns;
+    return id->lbaf[NVME_ID_NS_FLBAS_INDEX(id->flbas)].lbads;
+}
+
+static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
+{
+    return 1 << nvme_ns_lbads(ns);
+}
+
 #endif /* HW_NVME_H */
diff --git a/hw/block/trace-events b/hw/block/trace-events
index b92039a5739f..abec518167d0 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -42,10 +42,22 @@ nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
 nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
 nvme_identify_ctrl(void) "identify controller"
 nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
-nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_identify_ns_list(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_identify_ns_descriptor_list(uint16_t ns) "identify namespace descriptor list, nsid=%"PRIu16""
+nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32""
+nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32""
 nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
 nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
 nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
+nvme_get_log(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
+nvme_process_aers(void) "processing aers"
+nvme_aer(uint16_t cid) "cid %"PRIu16""
+nvme_aer_aerl_exceeded(void) "aerl exceeded"
+nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
+nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid) "cid %"PRIu16" cqid %"PRIu16""
+nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
 nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
 nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
@@ -60,6 +72,7 @@ nvme_mmio_shutdown_set(void) "shutdown bit set"
 nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
 
 # nvme traces for error conditions
+nvme_err(uint16_t cid, const char *s, uint16_t status) "cid %"PRIu16" \"%s\" status 0x%"PRIx16""
 nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
 nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
 nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
@@ -85,6 +98,7 @@ nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion q
 nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
 nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
 nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
+nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
 nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
 nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
 nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 849a6f3fa346..5a169e7ed7ac 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -386,8 +386,8 @@ enum NvmeAsyncEventRequest {
     NVME_AER_TYPE_SMART                     = 1,
     NVME_AER_TYPE_IO_SPECIFIC               = 6,
     NVME_AER_TYPE_VENDOR_SPECIFIC           = 7,
-    NVME_AER_INFO_ERR_INVALID_SQ            = 0,
-    NVME_AER_INFO_ERR_INVALID_DB            = 1,
+    NVME_AER_INFO_ERR_INVALID_DB_REGISTER   = 0,
+    NVME_AER_INFO_ERR_INVALID_DB_VALUE      = 1,
     NVME_AER_INFO_ERR_DIAG_FAIL             = 2,
     NVME_AER_INFO_ERR_PERS_INTERNAL_ERR     = 3,
     NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR    = 4,
@@ -445,7 +445,8 @@ enum NvmeStatusCodes {
     NVME_FW_REQ_RESET           = 0x010b,
     NVME_INVALID_QUEUE_DEL      = 0x010c,
     NVME_FID_NOT_SAVEABLE       = 0x010d,
-    NVME_FID_NOT_NSID_SPEC      = 0x010f,
+    NVME_FEAT_NOT_CHANGABLE     = 0x010e,
+    NVME_FEAT_NOT_NSID_SPEC     = 0x010f,
     NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
     NVME_CONFLICTING_ATTRS      = 0x0180,
     NVME_INVALID_PROT_INFO      = 0x0181,
@@ -462,6 +463,13 @@ enum NvmeStatusCodes {
     NVME_NO_COMPLETE            = 0xffff,
 };
 
+typedef struct NvmeIdentifyNamespaceDescriptor {
+    uint8_t nidt;
+    uint8_t nidl;
+    uint8_t rsvd[2];
+    uint8_t nid[];
+} NvmeIdentifyNamespaceDescriptor;
+
 typedef struct NvmeFwSlotInfoLog {
     uint8_t     afi;
     uint8_t     reserved1[7];
@@ -543,7 +551,15 @@ typedef struct NvmeIdCtrl {
     uint8_t     ieee[3];
     uint8_t     cmic;
     uint8_t     mdts;
-    uint8_t     rsvd255[178];
+    uint16_t    cntlid;
+    uint32_t    ver;
+    uint16_t    rtd3r;
+    uint32_t    rtd3e;
+    uint32_t    oaes;
+    uint32_t    ctratt;
+    uint8_t     rsvd111[12];
+    uint8_t     fguid[16];
+    uint8_t     rsvd255[128];
     uint16_t    oacs;
     uint8_t     acl;
     uint8_t     aerl;
@@ -551,10 +567,28 @@ typedef struct NvmeIdCtrl {
     uint8_t     lpa;
     uint8_t     elpe;
     uint8_t     npss;
-    uint8_t     rsvd511[248];
+    uint8_t     avscc;
+    uint8_t     apsta;
+    uint16_t    wctemp;
+    uint16_t    cctemp;
+    uint16_t    mtfa;
+    uint32_t    hmpre;
+    uint32_t    hmmin;
+    uint8_t     tnvmcap[16];
+    uint8_t     unvmcap[16];
+    uint32_t    rpmbs;
+    uint16_t    edstt;
+    uint8_t     dsto;
+    uint8_t     fwug;
+    uint16_t    kas;
+    uint16_t    hctma;
+    uint16_t    mntmt;
+    uint16_t    mxtmt;
+    uint32_t    sanicap;
+    uint8_t     rsvd511[180];
     uint8_t     sqes;
     uint8_t     cqes;
-    uint16_t    rsvd515;
+    uint16_t    maxcmd;
     uint32_t    nn;
     uint16_t    oncs;
     uint16_t    fuses;
@@ -562,8 +596,14 @@ typedef struct NvmeIdCtrl {
     uint8_t     vwc;
     uint16_t    awun;
     uint16_t    awupf;
-    uint8_t     rsvd703[174];
-    uint8_t     rsvd2047[1344];
+    uint8_t     nvscc;
+    uint8_t     rsvd531;
+    uint16_t    acwu;
+    uint16_t    rsvd535;
+    uint32_t    sgls;
+    uint8_t     rsvd767[228];
+    uint8_t     subnqn[256];
+    uint8_t     rsvd2047[1024];
     NvmePSD     psd[32];
     uint8_t     vs[1024];
 } NvmeIdCtrl;
@@ -637,7 +677,7 @@ typedef struct NvmeRangeType {
 
 typedef struct NvmeLBAF {
     uint16_t    ms;
-    uint8_t     ds;
+    uint8_t     lbads;
     uint8_t     rp;
 } NvmeLBAF;
 
-- 
2.21.0



  parent reply	other threads:[~2019-05-17  8:49 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-17  8:42 [Qemu-devel] [PATCH 0/8] nvme: v1.3, sgls, metadata and new 'ocssd' device Klaus Birkelund Jensen
2019-05-17  8:42 ` [Qemu-devel] [PATCH 1/8] nvme: move device parameters to separate struct Klaus Birkelund Jensen
2019-05-17  8:42 ` Klaus Birkelund Jensen [this message]
2019-05-17  8:42 ` [Qemu-devel] [PATCH 3/8] nvme: simplify PRP mappings Klaus Birkelund Jensen
2019-05-17  8:42 ` [Qemu-devel] [PATCH 4/8] nvme: allow multiple i/o's per request Klaus Birkelund Jensen
2019-05-17  8:42 ` [Qemu-devel] [PATCH 5/8] nvme: add support for metadata Klaus Birkelund Jensen
2019-05-22  6:12   ` [Qemu-devel] [Qemu-block] " Klaus Birkelund
2019-05-17  8:42 ` [Qemu-devel] [PATCH 6/8] nvme: add support for scatter gather lists Klaus Birkelund Jensen
2019-05-17  8:42 ` [Qemu-devel] [PATCH 7/8] nvme: keep a copy of the NVMe command in request Klaus Birkelund Jensen
2019-05-17  8:42 ` [Qemu-devel] [PATCH 8/8] nvme: add an OpenChannel 2.0 NVMe device (ocssd) Klaus Birkelund Jensen
2019-05-20 16:45   ` Eric Blake
2019-05-20 17:33     ` Klaus Birkelund
2019-05-20 13:01 ` [Qemu-devel] [PATCH 0/8] nvme: v1.3, sgls, metadata and new 'ocssd' device Kevin Wolf
2019-05-20 13:32   ` Klaus Birkelund
     [not found]   ` <20190520193445.GA22742@apples.localdomain>
     [not found]     ` <20190521080115.GA4971@linux.fritz.box>
2019-05-21 20:14       ` Klaus Birkelund

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190517084234.26923-3-klaus@birkelund.eu \
    --to=klaus@birkelund.eu \
    --cc=keith.busch@intel.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.