From: Klaus Birkelund Jensen <klaus@birkelund.eu>
To: qemu-block@nongnu.org
Cc: Keith Busch <keith.busch@intel.com>,
Kevin Wolf <kwolf@redhat.com>,
qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>
Subject: [Qemu-devel] [PATCH 2/8] nvme: bump supported spec to 1.3
Date: Fri, 17 May 2019 10:42:28 +0200 [thread overview]
Message-ID: <20190517084234.26923-3-klaus@birkelund.eu> (raw)
In-Reply-To: <20190517084234.26923-1-klaus@birkelund.eu>
Bump the supported NVMe version to 1.3. To do so, this patch adds a
number of missing 'Mandatory' features from the spec:
* Support for returning a Namespace Identification Descriptor List in
the Identify command (CNS 03h).
* Support for the Asynchronous Event Request command.
* Support for the Get Log Page command and the mandatory Error
Information, Smart / Health Information and Firmware Slot
Information log pages.
* Support for the Abort command.
As a side-effect, this bump also fixes support for multiple namespaces.
The implementation of AER, Get Log Page and Abort commands has been
imported and slightly modified from Keith's qemu-nvme tree[1]. Thanks!
[1]: http://git.infradead.org/users/kbusch/qemu-nvme.git
Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
---
hw/block/nvme.c | 792 ++++++++++++++++++++++++++++++++++++------
hw/block/nvme.h | 31 +-
hw/block/trace-events | 16 +-
include/block/nvme.h | 58 +++-
4 files changed, 783 insertions(+), 114 deletions(-)
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index b689c0776e72..65dfc04f71e5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -9,17 +9,35 @@
*/
/**
- * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e
+ * Reference Specs: http://www.nvmexpress.org, 1.3d, 1.2, 1.1, 1.0e
*
* http://www.nvmexpress.org/resources/
*/
/**
* Usage: add options:
- * -drive file=<file>,if=none,id=<drive_id>
- * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
- * cmb_size_mb=<cmb_size_mb[optional]>, \
- * num_queues=<N[optional]>
+ * -drive file=<file>,if=none,id=<drive_id>
+ * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>
+ *
+ * The "file" option must point to a path to a real file that you will use as
+ * the backing storage for your NVMe device. It must be a non-zero length, as
+ * this will be the disk image that your nvme controller will use to carve up
+ * namespaces for storage.
+ *
+ * Note the "drive" option's "id" name must match the "device nvme" drive's
+ * name to link the block device used for backing storage to the nvme
+ * interface.
+ *
+ * Advanced optional options:
+ *
+ * num_ns=<int> : Namespaces to make out of the backing storage,
+ * Default:1
+ * num_queues=<int> : Number of possible IO Queues, Default:64
+ * cmb_size_mb=<int> : Size of CMB in MBs, Default:0
+ *
+ * Parameters will be verified against conflicting capabilities and attributes
+ * and fail to load if there is a conflict or a configuration the emulated
+ * device is unable to handle.
*
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
@@ -38,6 +56,12 @@
#include "trace.h"
#include "nvme.h"
+#define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
+#define NVME_TEMPERATURE 0x143
+#define NVME_ELPE 3
+#define NVME_AERL 3
+#define NVME_OP_ABORTED 0xff
+
#define NVME_GUEST_ERR(trace, fmt, ...) \
do { \
(trace_##trace)(__VA_ARGS__); \
@@ -57,6 +81,16 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
}
}
+static void nvme_addr_write(NvmeCtrl *n, hwaddr addr, void *buf, int size)
+{
+ if (n->cmbsz && addr >= n->ctrl_mem.addr &&
+ addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
+ memcpy((void *)&n->cmbuf[addr - n->ctrl_mem.addr], buf, size);
+ return;
+ }
+ pci_dma_write(&n->parent_obj, addr, buf, size);
+}
+
static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
{
return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1;
@@ -244,6 +278,24 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
return status;
}
+static void nvme_post_cqe(NvmeCQueue *cq, NvmeRequest *req)
+{
+ NvmeCtrl *n = cq->ctrl;
+ NvmeSQueue *sq = req->sq;
+ NvmeCqe *cqe = &req->cqe;
+ uint8_t phase = cq->phase;
+ hwaddr addr;
+
+ addr = cq->dma_addr + cq->tail * n->cqe_size;
+ cqe->status = cpu_to_le16((req->status << 1) | phase);
+ cqe->sq_id = cpu_to_le16(sq->sqid);
+ cqe->sq_head = cpu_to_le16(sq->head);
+ nvme_addr_write(n, addr, (void *) cqe, sizeof(*cqe));
+ nvme_inc_cq_tail(cq);
+
+ QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+}
+
static void nvme_post_cqes(void *opaque)
{
NvmeCQueue *cq = opaque;
@@ -251,24 +303,14 @@ static void nvme_post_cqes(void *opaque)
NvmeRequest *req, *next;
QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
- NvmeSQueue *sq;
- hwaddr addr;
-
if (nvme_cq_full(cq)) {
break;
}
QTAILQ_REMOVE(&cq->req_list, req, entry);
- sq = req->sq;
- req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
- req->cqe.sq_id = cpu_to_le16(sq->sqid);
- req->cqe.sq_head = cpu_to_le16(sq->head);
- addr = cq->dma_addr + cq->tail * n->cqe_size;
- nvme_inc_cq_tail(cq);
- pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
- sizeof(req->cqe));
- QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
+ nvme_post_cqe(cq, req);
}
+
if (cq->tail != cq->head) {
nvme_irq_assert(n, cq);
}
@@ -277,11 +319,88 @@ static void nvme_post_cqes(void *opaque)
static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
{
assert(cq->cqid == req->sq->cqid);
+
+ trace_nvme_enqueue_req_completion(req->cqe.cid, cq->cqid);
QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
}
+static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
+ uint8_t event_info, uint8_t log_page)
+{
+ NvmeAsyncEvent *event;
+
+ trace_nvme_enqueue_event(event_type, event_info, log_page);
+
+ /*
+ * Do not enqueue the event if something of this type is already queued.
+ * This bounds the size of the event queue and makes sure it does not grow
+ * indefinitely when events are not processed by the host (i.e. does not
+ * issue any AERs).
+ */
+ if (n->aer_mask_queued & (1 << event_type)) {
+ return;
+ }
+ n->aer_mask_queued |= (1 << event_type);
+
+ event = g_new(NvmeAsyncEvent, 1);
+ event->result = (NvmeAerResult) {
+ .event_type = event_type,
+ .event_info = event_info,
+ .log_page = log_page,
+ };
+
+ QSIMPLEQ_INSERT_TAIL(&n->aer_queue, event, entry);
+
+ timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+}
+
+static void nvme_process_aers(void *opaque)
+{
+ NvmeCtrl *n = opaque;
+ NvmeRequest *req;
+ NvmeAerResult *result;
+ NvmeAsyncEvent *event, *next;
+
+ trace_nvme_process_aers();
+
+ QSIMPLEQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
+ /* can't post cqe if there is nothing to complete */
+ if (!n->outstanding_aers) {
+ trace_nvme_no_outstanding_aers();
+ break;
+ }
+
+ /* ignore if masked (cqe posted, but event not cleared) */
+ if (n->aer_mask & (1 << event->result.event_type)) {
+ trace_nvme_aer_masked(event->result.event_type, n->aer_mask);
+ continue;
+ }
+
+ QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry);
+
+ n->aer_mask |= 1 << event->result.event_type;
+ n->aer_mask_queued &= ~(1 << event->result.event_type);
+ n->outstanding_aers--;
+
+ req = n->aer_reqs[n->outstanding_aers];
+
+ result = (NvmeAerResult *) &req->cqe.result;
+ result->event_type = event->result.event_type;
+ result->event_info = event->result.event_info;
+ result->log_page = event->result.log_page;
+ g_free(event);
+
+ req->status = NVME_SUCCESS;
+
+ trace_nvme_aer_post_cqe(result->event_type, result->event_info,
+ result->log_page);
+
+ nvme_enqueue_req_completion(&n->admin_cq, req);
+ }
+}
+
static void nvme_rw_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
@@ -318,7 +437,7 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
{
NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
- const uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
+ const uint8_t data_shift = ns->id_ns.lbaf[lba_index].lbads;
uint64_t slba = le64_to_cpu(rw->slba);
uint32_t nlb = le16_to_cpu(rw->nlb) + 1;
uint64_t offset = slba << data_shift;
@@ -347,9 +466,9 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
uint64_t prp2 = le64_to_cpu(rw->prp2);
uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
- uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
+ uint8_t data_shift = ns->id_ns.lbaf[lba_index].lbads;
uint64_t data_size = (uint64_t)nlb << data_shift;
- uint64_t data_offset = slba << data_shift;
+ uint64_t data_offset = ns->blk_offset + (slba << data_shift);
int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
@@ -391,8 +510,8 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
NvmeNamespace *ns;
uint32_t nsid = le32_to_cpu(cmd->nsid);
- if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
- trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+ if (unlikely(nsid == 0 || nsid > n->params.num_ns)) {
+ trace_nvme_err_invalid_ns(nsid, n->params.num_ns);
return NVME_INVALID_NSID | NVME_DNR;
}
@@ -420,6 +539,7 @@ static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
if (sq->sqid) {
g_free(sq);
}
+ n->qs_created--;
}
static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -486,6 +606,7 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
cq = n->cq[cqid];
QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
n->sq[sqid] = sq;
+ n->qs_created++;
}
static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -535,6 +656,7 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
if (cq->cqid) {
g_free(cq);
}
+ n->qs_created--;
}
static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -575,6 +697,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
msix_vector_use(&n->parent_obj, cq->vector);
n->cq[cqid] = cq;
cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
+ n->qs_created++;
}
static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
@@ -637,8 +760,8 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
trace_nvme_identify_ns(nsid);
- if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
- trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+ if (unlikely(nsid == 0 || nsid > n->params.num_ns)) {
+ trace_nvme_err_invalid_ns(nsid, n->params.num_ns);
return NVME_INVALID_NSID | NVME_DNR;
}
@@ -648,7 +771,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c)
prp1, prp2);
}
-static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
+static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeIdentify *c)
{
static const int data_len = 4 * KiB;
uint32_t min_nsid = le32_to_cpu(c->nsid);
@@ -658,10 +781,10 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
uint16_t ret;
int i, j = 0;
- trace_nvme_identify_nslist(min_nsid);
+ trace_nvme_identify_ns_list(min_nsid);
list = g_malloc0(data_len);
- for (i = 0; i < n->num_namespaces; i++) {
+ for (i = 0; i < n->params.num_ns; i++) {
if (i < min_nsid) {
continue;
}
@@ -675,6 +798,25 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c)
return ret;
}
+static uint16_t nvme_identify_ns_descriptor_list(NvmeCtrl *n, NvmeCmd *c)
+{
+ static const int data_len = sizeof(NvmeIdentifyNamespaceDescriptor) + 0x10;
+ uint32_t nsid = le32_to_cpu(c->nsid);
+ uint64_t prp1 = le64_to_cpu(c->prp1);
+ uint64_t prp2 = le64_to_cpu(c->prp2);
+ NvmeIdentifyNamespaceDescriptor *list;
+ uint16_t ret;
+
+ trace_nvme_identify_ns_descriptor_list(nsid);
+
+ list = g_malloc0(data_len);
+ list->nidt = 0x3;
+ list->nidl = 0x10;
+
+ ret = nvme_dma_read_prp(n, (uint8_t *) list, data_len, prp1, prp2);
+ g_free(list);
+ return ret;
+}
static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
{
@@ -686,7 +828,9 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
case 0x01:
return nvme_identify_ctrl(n, c);
case 0x02:
- return nvme_identify_nslist(n, c);
+ return nvme_identify_ns_list(n, c);
+ case 0x03:
+ return nvme_identify_ns_descriptor_list(n, cmd);
default:
trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns));
return NVME_INVALID_FIELD | NVME_DNR;
@@ -696,18 +840,49 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+ uint32_t dw11 = le32_to_cpu(cmd->cdw11);
uint32_t result;
+ trace_nvme_getfeat(dw10);
+
switch (dw10) {
+ case NVME_ARBITRATION:
+ result = cpu_to_le32(n->features.arbitration);
+ break;
+ case NVME_POWER_MANAGEMENT:
+ result = cpu_to_le32(n->features.power_mgmt);
+ break;
+ case NVME_TEMPERATURE_THRESHOLD:
+ result = cpu_to_le32(n->features.temp_thresh);
+ break;
+ case NVME_ERROR_RECOVERY:
+ result = cpu_to_le32(n->features.err_rec);
+ break;
case NVME_VOLATILE_WRITE_CACHE:
result = blk_enable_write_cache(n->conf.blk);
trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
break;
case NVME_NUMBER_OF_QUEUES:
result = cpu_to_le32((n->params.num_queues - 2) |
- ((n->params.num_queues - 2) << 16));
+ ((n->params.num_queues - 2) << 16));
trace_nvme_getfeat_numq(result);
break;
+ case NVME_INTERRUPT_COALESCING:
+ result = cpu_to_le32(n->features.int_coalescing);
+ break;
+ case NVME_INTERRUPT_VECTOR_CONF:
+ if ((dw11 & 0xffff) > n->params.num_queues) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ result = cpu_to_le32(n->features.int_vector_config[dw11 & 0xffff]);
+ break;
+ case NVME_WRITE_ATOMICITY:
+ result = cpu_to_le32(n->features.write_atomicity);
+ break;
+ case NVME_ASYNCHRONOUS_EVENT_CONF:
+ result = cpu_to_le32(n->features.async_config);
+ break;
default:
trace_nvme_err_invalid_getfeat(dw10);
return NVME_INVALID_FIELD | NVME_DNR;
@@ -722,22 +897,244 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
uint32_t dw10 = le32_to_cpu(cmd->cdw10);
uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+ trace_nvme_setfeat(dw10, dw11);
+
switch (dw10) {
+ case NVME_TEMPERATURE_THRESHOLD:
+ n->features.temp_thresh = dw11;
+ if (n->features.temp_thresh <= n->temperature) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_SMART,
+ NVME_AER_INFO_SMART_TEMP_THRESH, NVME_LOG_SMART_INFO);
+ }
+ break;
case NVME_VOLATILE_WRITE_CACHE:
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
break;
case NVME_NUMBER_OF_QUEUES:
+ if (n->qs_created > 2) {
+ return NVME_CMD_SEQ_ERROR | NVME_DNR;
+ }
+
+ if ((dw11 & 0xffff) == 0xffff || ((dw11 >> 16) & 0xffff) == 0xffff) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1,
((dw11 >> 16) & 0xFFFF) + 1,
n->params.num_queues - 1,
n->params.num_queues - 1);
req->cqe.result = cpu_to_le32((n->params.num_queues - 2) |
- ((n->params.num_queues - 2) << 16));
+ ((n->params.num_queues - 2) << 16));
break;
+ case NVME_ASYNCHRONOUS_EVENT_CONF:
+ n->features.async_config = dw11;
+ break;
+ case NVME_ARBITRATION:
+ case NVME_POWER_MANAGEMENT:
+ case NVME_ERROR_RECOVERY:
+ case NVME_INTERRUPT_COALESCING:
+ case NVME_INTERRUPT_VECTOR_CONF:
+ case NVME_WRITE_ATOMICITY:
+ return NVME_FEAT_NOT_CHANGABLE | NVME_DNR;
default:
trace_nvme_err_invalid_setfeat(dw10);
return NVME_INVALID_FIELD | NVME_DNR;
}
+
+ return NVME_SUCCESS;
+}
+
+static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
+{
+ n->aer_mask &= ~(1 << event_type);
+ if (!QSIMPLEQ_EMPTY(&n->aer_queue)) {
+ timer_mod(n->aer_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+ }
+}
+
+static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+ uint32_t buf_len, uint64_t off, NvmeRequest *req)
+{
+ uint32_t trans_len;
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+ if (off > sizeof(*n->elpes) * (NVME_ELPE + 1)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ trans_len = MIN(sizeof(*n->elpes) * (NVME_ELPE + 1) - off, buf_len);
+
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_ERROR);
+ }
+
+ return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1,
+ prp2);
+}
+
+static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+ uint32_t buf_len, uint64_t off, NvmeRequest *req)
+{
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+
+ uint32_t trans_len;
+ time_t current_ms;
+ NvmeSmartLog smart;
+
+ if (cmd->nsid != 0 && cmd->nsid != 0xffffffff) {
+ trace_nvme_err(req->cqe.cid, "smart log not supported for namespace",
+ NVME_INVALID_FIELD | NVME_DNR);
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ if (off > sizeof(smart)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ trans_len = MIN(sizeof(smart) - off, buf_len);
+
+ memset(&smart, 0x0, sizeof(smart));
+ smart.number_of_error_log_entries[0] = cpu_to_le64(0);
+ smart.temperature[0] = n->temperature & 0xff;
+ smart.temperature[1] = (n->temperature >> 8) & 0xff;
+
+ if (n->features.temp_thresh <= n->temperature) {
+ smart.critical_warning |= NVME_SMART_TEMPERATURE;
+ }
+
+ current_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+ smart.power_on_hours[0] = cpu_to_le64(
+ (((current_ms - n->starttime_ms) / 1000) / 60) / 60);
+
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_SMART);
+ }
+
+ return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
+ prp2);
+}
+
+static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
+ uint64_t off, NvmeRequest *req)
+{
+ uint32_t trans_len;
+ uint64_t prp1 = le64_to_cpu(cmd->prp1);
+ uint64_t prp2 = le64_to_cpu(cmd->prp2);
+ NvmeFwSlotInfoLog fw_log;
+
+ if (off > sizeof(fw_log)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ memset(&fw_log, 0, sizeof(NvmeFwSlotInfoLog));
+
+ trans_len = MIN(sizeof(fw_log) - off, buf_len);
+
+ return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
+ prp2);
+}
+
+static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+ uint32_t dw10 = le32_to_cpu(cmd->cdw10);
+ uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+ uint32_t dw12 = le32_to_cpu(cmd->cdw12);
+ uint32_t dw13 = le32_to_cpu(cmd->cdw13);
+ uint16_t lid = dw10 & 0xff;
+ uint8_t rae = (dw10 >> 15) & 0x1;
+ uint32_t numdl, numdu, len;
+ uint64_t off, lpol, lpou;
+
+ numdl = (dw10 >> 16);
+ numdu = (dw11 & 0xffff);
+ lpol = dw12;
+ lpou = dw13;
+
+ len = (((numdu << 16) | numdl) + 1) << 2;
+ off = (lpou << 32ULL) | lpol;
+
+ if (off & 0x3) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ trace_nvme_get_log(req->cqe.cid, lid);
+
+ switch (lid) {
+ case NVME_LOG_ERROR_INFO:
+ return nvme_error_log_info(n, cmd, rae, len, off, req);
+ case NVME_LOG_SMART_INFO:
+ return nvme_smart_info(n, cmd, rae, len, off, req);
+ case NVME_LOG_FW_SLOT_INFO:
+ return nvme_fw_log_info(n, cmd, len, off, req);
+ default:
+ trace_nvme_err_invalid_log_page(req->cqe.cid, lid);
+ return NVME_INVALID_LOG_ID | NVME_DNR;
+ }
+}
+
+static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+ trace_nvme_aer(req->cqe.cid);
+
+ if (n->outstanding_aers > NVME_AERL) {
+ trace_nvme_aer_aerl_exceeded();
+ return NVME_AER_LIMIT_EXCEEDED;
+ }
+
+ n->aer_reqs[n->outstanding_aers] = req;
+ timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+ n->outstanding_aers++;
+
+ return NVME_NO_COMPLETE;
+}
+
+static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+ NvmeSQueue *sq;
+ NvmeRequest *new;
+ uint32_t index = 0;
+ uint16_t sqid = cmd->cdw10 & 0xffff;
+ uint16_t cid = (cmd->cdw10 >> 16) & 0xffff;
+
+ req->cqe.result = 1;
+ if (nvme_check_sqid(n, sqid)) {
+ return NVME_INVALID_FIELD | NVME_DNR;
+ }
+
+ sq = n->sq[sqid];
+
+ /* only consider queued (and not executing) commands for abort */
+ while ((sq->head + index) % sq->size != sq->tail) {
+ NvmeCmd abort_cmd;
+ hwaddr addr;
+
+ addr = sq->dma_addr + ((sq->head + index) % sq->size) * n->sqe_size;
+
+ nvme_addr_read(n, addr, (void *) &abort_cmd, sizeof(abort_cmd));
+ if (abort_cmd.cid == cid) {
+ req->cqe.result = 0;
+ new = QTAILQ_FIRST(&sq->req_list);
+ QTAILQ_REMOVE(&sq->req_list, new, entry);
+ QTAILQ_INSERT_TAIL(&sq->out_req_list, new, entry);
+
+ memset(&new->cqe, 0, sizeof(new->cqe));
+ new->cqe.cid = cid;
+ new->status = NVME_CMD_ABORT_REQ;
+
+ abort_cmd.opcode = NVME_OP_ABORTED;
+ nvme_addr_write(n, addr, (void *) &abort_cmd, sizeof(abort_cmd));
+
+ nvme_enqueue_req_completion(n->cq[sq->cqid], new);
+
+ return NVME_SUCCESS;
+ }
+
+ ++index;
+ }
+
return NVME_SUCCESS;
}
@@ -758,6 +1155,12 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
return nvme_set_feature(n, cmd, req);
case NVME_ADM_CMD_GET_FEATURES:
return nvme_get_feature(n, cmd, req);
+ case NVME_ADM_CMD_GET_LOG_PAGE:
+ return nvme_get_log(n, cmd, req);
+ case NVME_ADM_CMD_ASYNC_EV_REQ:
+ return nvme_aer(n, cmd, req);
+ case NVME_ADM_CMD_ABORT:
+ return nvme_abort(n, cmd, req);
default:
trace_nvme_err_invalid_admin_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
@@ -780,6 +1183,10 @@ static void nvme_process_sq(void *opaque)
nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
nvme_inc_sq_head(sq);
+ if (cmd.opcode == NVME_OP_ABORTED) {
+ continue;
+ }
+
req = QTAILQ_FIRST(&sq->req_list);
QTAILQ_REMOVE(&sq->req_list, req, entry);
QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
@@ -797,6 +1204,7 @@ static void nvme_process_sq(void *opaque)
static void nvme_clear_ctrl(NvmeCtrl *n)
{
+ NvmeAsyncEvent *event;
int i;
blk_drain(n->conf.blk);
@@ -812,8 +1220,19 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
}
}
+ if (n->aer_timer) {
+ timer_del(n->aer_timer);
+ timer_free(n->aer_timer);
+ n->aer_timer = NULL;
+ }
+ while ((event = QSIMPLEQ_FIRST(&n->aer_queue)) != NULL) {
+ QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry);
+ g_free(event);
+ }
+
blk_flush(n->conf.blk);
n->bar.cc = 0;
+ n->outstanding_aers = 0;
}
static int nvme_start_ctrl(NvmeCtrl *n)
@@ -906,6 +1325,9 @@ static int nvme_start_ctrl(NvmeCtrl *n)
nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
NVME_AQA_ASQS(n->bar.aqa) + 1);
+ n->aer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_aers, n);
+ QSIMPLEQ_INIT(&n->aer_queue);
+
return 0;
}
@@ -1098,6 +1520,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
"completion queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1108,6 +1537,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
" beyond queue size, sqid=%"PRIu32","
" new_head=%"PRIu16", ignoring",
qid, new_head);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1136,6 +1571,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
"submission queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1146,6 +1588,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
" beyond queue size, sqid=%"PRIu32","
" new_tail=%"PRIu16", ignoring",
qid, new_tail);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO);
+ }
+
return;
}
@@ -1198,134 +1646,271 @@ static const MemoryRegionOps nvme_cmb_ops = {
},
};
-static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
{
- NvmeCtrl *n = NVME(pci_dev);
- NvmeIdCtrl *id = &n->id_ctrl;
-
- int i;
- int64_t bs_size;
- uint8_t *pci_conf;
-
- if (!n->params.num_queues) {
- error_setg(errp, "num_queues can't be zero");
- return;
- }
+ NvmeParams *params = &n->params;
if (!n->conf.blk) {
- error_setg(errp, "drive property not set");
- return;
+ error_setg(errp, "nvme: block backend not configured");
+ return 1;
}
- bs_size = blk_getlength(n->conf.blk);
- if (bs_size < 0) {
- error_setg(errp, "could not get backing file size");
- return;
+ if (!params->serial) {
+ error_setg(errp, "nvme: serial not configured");
+ return 1;
}
- if (!n->params.serial) {
- error_setg(errp, "serial property not set");
- return;
+ if ((params->num_queues < 1 || params->num_queues > NVME_MAX_QS)) {
+ error_setg(errp, "nvme: invalid queue configuration");
+ return 1;
}
+
+ return 0;
+}
+
+static int nvme_init_blk(NvmeCtrl *n, Error **errp)
+{
blkconf_blocksizes(&n->conf);
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
- false, errp)) {
- return;
+ false, errp)) {
+ return 1;
}
- pci_conf = pci_dev->config;
- pci_conf[PCI_INTERRUPT_PIN] = 1;
- pci_config_set_prog_interface(pci_dev->config, 0x2);
- pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
- pcie_endpoint_cap_init(pci_dev, 0x80);
+ return 0;
+}
+
+static int nvme_init_state(NvmeCtrl *n, Error **errp)
+{
+ int64_t bs_size;
+ Error *local_err = NULL;
+
+ if (!n->params.serial) {
+ error_setg(errp, "serial property not set");
+ return 1;
+ }
+
+ if (nvme_check_constraints(n, &local_err)) {
+ error_propagate_prepend(errp, local_err,
+ "nvme_check_constraints failed");
+ return 1;
+ }
- n->num_namespaces = 1;
n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
- n->ns_size = bs_size / (uint64_t)n->num_namespaces;
- n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
+ bs_size = blk_getlength(n->conf.blk);
+ if (bs_size < 0) {
+ error_setg(errp, "could not get backing file size");
+ return 1;
+ }
+
+ n->ns_size = bs_size / (uint64_t) n->params.num_ns;
+
+ n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
+ n->elpes = g_new0(NvmeErrorLog, NVME_ELPE + 1);
+ n->aer_reqs = g_new0(NvmeRequest *, NVME_AERL + 1);
+ n->features.int_vector_config = g_malloc0_n(n->params.num_queues,
+ sizeof(*n->features.int_vector_config));
- memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
- "nvme", n->reg_size);
+ return 0;
+}
+
+static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+ NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
+ NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
+
+ NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
+ NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
+ NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
+ NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
+ NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
+ NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2);
+ NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
+
+ n->cmbloc = n->bar.cmbloc;
+ n->cmbsz = n->bar.cmbsz;
+
+ n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
+ memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
+ "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
+ pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
+}
+
+static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev)
+{
+ uint8_t *pci_conf = pci_dev->config;
+
+ pci_conf[PCI_INTERRUPT_PIN] = 1;
+ pci_config_set_prog_interface(pci_conf, 0x2);
+ pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
+ pci_config_set_device_id(pci_conf, 0x5845);
+ pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS);
+ pcie_endpoint_cap_init(pci_dev, 0x80);
+
+ memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme",
+ n->reg_size);
pci_register_bar(pci_dev, 0,
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
&n->iomem);
msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL);
+ if (n->params.cmb_size_mb) {
+ nvme_init_cmb(n, pci_dev);
+ }
+}
+
+static void nvme_init_ctrl(NvmeCtrl *n)
+{
+ NvmeIdCtrl *id = &n->id_ctrl;
+ NvmeParams *params = &n->params;
+ uint8_t *pci_conf = n->parent_obj.config;
+
id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
- strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' ');
+ strpadcpy((char *)id->sn, sizeof(id->sn), params->serial, ' ');
id->rab = 6;
id->ieee[0] = 0x00;
id->ieee[1] = 0x02;
id->ieee[2] = 0xb3;
+ id->cmic = 0;
+ id->ver = cpu_to_le32(0x00010300);
id->oacs = cpu_to_le16(0);
- id->frmw = 7 << 1;
- id->lpa = 1 << 0;
+ id->acl = 3;
+ id->aerl = NVME_AERL;
+ id->frmw = 7 << 1 | 1;
+ id->lpa = 1 << 2;
+ id->elpe = NVME_ELPE;
+ id->npss = 0;
id->sqes = (0x6 << 4) | 0x6;
id->cqes = (0x4 << 4) | 0x4;
- id->nn = cpu_to_le32(n->num_namespaces);
+ id->nn = cpu_to_le32(params->num_ns);
id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS);
+ id->fuses = cpu_to_le16(0);
+ id->fna = 0;
+ if (blk_enable_write_cache(n->conf.blk)) {
+ id->vwc = 1;
+ }
+ id->awun = cpu_to_le16(0);
+ id->awupf = cpu_to_le16(0);
+ id->sgls = cpu_to_le32(0);
+
+ strcpy((char *) id->subnqn, "nqn.2014-08.org.nvmexpress:uuid:");
+ qemu_uuid_unparse(&qemu_uuid,
+ (char *) id->subnqn + strlen((char *) id->subnqn));
+
id->psd[0].mp = cpu_to_le16(0x9c4);
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
- if (blk_enable_write_cache(n->conf.blk)) {
- id->vwc = 1;
+
+ n->temperature = NVME_TEMPERATURE;
+ n->features.temp_thresh = 0x14d;
+
+ for (int i = 0; i < n->params.num_queues; i++) {
+ n->features.int_vector_config[i] = i;
}
n->bar.cap = 0;
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
NVME_CAP_SET_CQR(n->bar.cap, 1);
- NVME_CAP_SET_AMS(n->bar.cap, 1);
NVME_CAP_SET_TO(n->bar.cap, 0xf);
NVME_CAP_SET_CSS(n->bar.cap, 1);
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
- n->bar.vs = 0x00010200;
+ n->bar.vs = 0x00010300;
n->bar.intmc = n->bar.intms = 0;
+}
- if (n->params.cmb_size_mb) {
+static uint64_t nvme_ns_calc_blks(NvmeCtrl *n, NvmeNamespace *ns)
+{
+ return n->ns_size / nvme_ns_lbads_bytes(ns);
+}
- NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
- NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
+static void nvme_ns_init_identify(NvmeCtrl *n, NvmeIdNs *id_ns)
+{
+ id_ns->nlbaf = 0;
+ id_ns->flbas = 0;
+ id_ns->mc = 0;
+ id_ns->dpc = 0;
+ id_ns->dps = 0;
+ id_ns->lbaf[0].lbads = BDRV_SECTOR_BITS;
+}
- NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
- NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
- NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
- NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1);
- NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1);
- NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
- NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb);
+static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+ NvmeIdNs *id_ns = &ns->id_ns;
- n->cmbloc = n->bar.cmbloc;
- n->cmbsz = n->bar.cmbsz;
+ nvme_ns_init_identify(n, id_ns);
- n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
- memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
- "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
- pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc),
- PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
- PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
+ ns->ns_blks = nvme_ns_calc_blks(n, ns);
+ id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(ns->ns_blks);
+ return 0;
+}
+
+static int nvme_init_namespaces(NvmeCtrl *n, Error **errp)
+{
+ int64_t bs_size;
+ Error *local_err = NULL;
+ NvmeNamespace *ns;
+
+ n->namespaces = g_new0(NvmeNamespace, n->params.num_ns);
+
+ bs_size = blk_getlength(n->conf.blk);
+ if (bs_size < 0) {
+ error_setg_errno(errp, -bs_size, "blk_getlength");
+ return 1;
+ }
+
+ n->ns_size = bs_size / (uint64_t) n->params.num_ns;
+
+ for (int i = 0; i < n->params.num_ns; i++) {
+ ns = &n->namespaces[i];
+ ns->id = i + 1;
+ ns->blk_offset = i * n->ns_size;
+
+ if (nvme_init_namespace(n, ns, &local_err)) {
+ error_propagate_prepend(errp, local_err,
+ "nvme_init_namespace: ");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
+{
+ NvmeCtrl *n = NVME(pci_dev);
+ Error *local_err = NULL;
+
+ if (nvme_check_constraints(n, &local_err)) {
+ error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
+ return;
+ }
+
+ if (nvme_init_blk(n, &local_err)) {
+ error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
+ return;
+ }
+
+ if (nvme_init_state(n, &local_err)) {
+ error_propagate_prepend(errp, local_err, "nvme_init_state: ");
+ return;
}
- for (i = 0; i < n->num_namespaces; i++) {
- NvmeNamespace *ns = &n->namespaces[i];
- NvmeIdNs *id_ns = &ns->id_ns;
- id_ns->nsfeat = 0;
- id_ns->nlbaf = 0;
- id_ns->flbas = 0;
- id_ns->mc = 0;
- id_ns->dpc = 0;
- id_ns->dps = 0;
- id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
- id_ns->ncap = id_ns->nuse = id_ns->nsze =
- cpu_to_le64(n->ns_size >>
- id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
+ nvme_init_pci(n, pci_dev);
+ nvme_init_ctrl(n);
+
+ if (nvme_init_namespaces(n, &local_err)) {
+ error_propagate_prepend(errp, local_err,
+ "nvme_init_namespaces: ");
+ return;
}
}
@@ -1337,6 +1922,9 @@ static void nvme_exit(PCIDevice *pci_dev)
g_free(n->namespaces);
g_free(n->cq);
g_free(n->sq);
+ g_free(n->elpes);
+ g_free(n->aer_reqs);
+ g_free(n->features.int_vector_config);
if (n->params.cmb_size_mb) {
g_free(n->cmbuf);
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 8866373058f6..8925a05445da 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -6,11 +6,13 @@
#define DEFINE_NVME_PROPERTIES(_state, _props) \
DEFINE_PROP_STRING("serial", _state, _props.serial), \
DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \
- DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64)
+ DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \
+ DEFINE_PROP_UINT32("num_ns", _state, _props.num_ns, 1)
typedef struct NvmeParams {
char *serial;
uint32_t num_queues;
+ uint32_t num_ns;
uint32_t cmb_size_mb;
} NvmeParams;
@@ -63,6 +65,9 @@ typedef struct NvmeCQueue {
typedef struct NvmeNamespace {
NvmeIdNs id_ns;
+ uint32_t id;
+ uint64_t ns_blks;
+ uint64_t blk_offset;
} NvmeNamespace;
#define TYPE_NVME "nvme"
@@ -77,26 +82,48 @@ typedef struct NvmeCtrl {
BlockConf conf;
NvmeParams params;
+ uint64_t starttime_ms;
+ uint16_t temperature;
uint32_t page_size;
uint16_t page_bits;
uint16_t max_prp_ents;
uint16_t cqe_size;
uint16_t sqe_size;
uint32_t reg_size;
- uint32_t num_namespaces;
uint32_t max_q_ents;
uint64_t ns_size;
+ uint8_t outstanding_aers;
uint32_t cmbsz;
uint32_t cmbloc;
uint8_t *cmbuf;
uint64_t irq_status;
+ uint32_t qs_created;
+ QSIMPLEQ_HEAD(, NvmeAsyncEvent) aer_queue;
+ QEMUTimer *aer_timer;
+ uint8_t aer_mask;
+ uint8_t aer_mask_queued;
+
+ NvmeErrorLog *elpes;
+ NvmeRequest **aer_reqs;
NvmeNamespace *namespaces;
NvmeSQueue **sq;
NvmeCQueue **cq;
NvmeSQueue admin_sq;
NvmeCQueue admin_cq;
+ NvmeFeatureVal features;
NvmeIdCtrl id_ctrl;
} NvmeCtrl;
+static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
+{
+ NvmeIdNs *id = &ns->id_ns;
+ return id->lbaf[NVME_ID_NS_FLBAS_INDEX(id->flbas)].lbads;
+}
+
+static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
+{
+ return 1 << nvme_ns_lbads(ns);
+}
+
#endif /* HW_NVME_H */
diff --git a/hw/block/trace-events b/hw/block/trace-events
index b92039a5739f..abec518167d0 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -42,10 +42,22 @@ nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16""
nvme_del_cq(uint16_t cqid) "deleted completion queue, sqid=%"PRIu16""
nvme_identify_ctrl(void) "identify controller"
nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16""
-nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_identify_ns_list(uint16_t ns) "identify namespace list, nsid=%"PRIu16""
+nvme_identify_ns_descriptor_list(uint16_t ns) "identify namespace descriptor list, nsid=%"PRIu16""
+nvme_getfeat(uint32_t fid) "fid 0x%"PRIx32""
+nvme_setfeat(uint32_t fid, uint32_t val) "fid 0x%"PRIx32" val 0x%"PRIx32""
nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s"
nvme_getfeat_numq(int result) "get feature number of queues, result=%d"
nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d"
+nvme_get_log(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
+nvme_process_aers(void) "processing aers"
+nvme_aer(uint16_t cid) "cid %"PRIu16""
+nvme_aer_aerl_exceeded(void) "aerl exceeded"
+nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8""
+nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid) "cid %"PRIu16" cqid %"PRIu16""
+nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8""
+nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64""
nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64""
nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64""
@@ -60,6 +72,7 @@ nvme_mmio_shutdown_set(void) "shutdown bit set"
nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
# nvme traces for error conditions
+nvme_err(uint16_t cid, const char *s, uint16_t status) "cid %"PRIu16" \"%s\" status 0x%"PRIx16""
nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size"
nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64""
nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
@@ -85,6 +98,7 @@ nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion q
nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16""
nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32""
nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32""
+nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16""
nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues"
nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues"
nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null"
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 849a6f3fa346..5a169e7ed7ac 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -386,8 +386,8 @@ enum NvmeAsyncEventRequest {
NVME_AER_TYPE_SMART = 1,
NVME_AER_TYPE_IO_SPECIFIC = 6,
NVME_AER_TYPE_VENDOR_SPECIFIC = 7,
- NVME_AER_INFO_ERR_INVALID_SQ = 0,
- NVME_AER_INFO_ERR_INVALID_DB = 1,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE = 1,
NVME_AER_INFO_ERR_DIAG_FAIL = 2,
NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3,
NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4,
@@ -445,7 +445,8 @@ enum NvmeStatusCodes {
NVME_FW_REQ_RESET = 0x010b,
NVME_INVALID_QUEUE_DEL = 0x010c,
NVME_FID_NOT_SAVEABLE = 0x010d,
- NVME_FID_NOT_NSID_SPEC = 0x010f,
+ NVME_FEAT_NOT_CHANGABLE = 0x010e,
+ NVME_FEAT_NOT_NSID_SPEC = 0x010f,
NVME_FW_REQ_SUSYSTEM_RESET = 0x0110,
NVME_CONFLICTING_ATTRS = 0x0180,
NVME_INVALID_PROT_INFO = 0x0181,
@@ -462,6 +463,13 @@ enum NvmeStatusCodes {
NVME_NO_COMPLETE = 0xffff,
};
+typedef struct NvmeIdentifyNamespaceDescriptor {
+ uint8_t nidt;
+ uint8_t nidl;
+ uint8_t rsvd[2];
+ uint8_t nid[];
+} NvmeIdentifyNamespaceDescriptor;
+
typedef struct NvmeFwSlotInfoLog {
uint8_t afi;
uint8_t reserved1[7];
@@ -543,7 +551,15 @@ typedef struct NvmeIdCtrl {
uint8_t ieee[3];
uint8_t cmic;
uint8_t mdts;
- uint8_t rsvd255[178];
+ uint16_t cntlid;
+ uint32_t ver;
+ uint16_t rtd3r;
+ uint32_t rtd3e;
+ uint32_t oaes;
+ uint32_t ctratt;
+ uint8_t rsvd111[12];
+ uint8_t fguid[16];
+ uint8_t rsvd255[128];
uint16_t oacs;
uint8_t acl;
uint8_t aerl;
@@ -551,10 +567,28 @@ typedef struct NvmeIdCtrl {
uint8_t lpa;
uint8_t elpe;
uint8_t npss;
- uint8_t rsvd511[248];
+ uint8_t avscc;
+ uint8_t apsta;
+ uint16_t wctemp;
+ uint16_t cctemp;
+ uint16_t mtfa;
+ uint32_t hmpre;
+ uint32_t hmmin;
+ uint8_t tnvmcap[16];
+ uint8_t unvmcap[16];
+ uint32_t rpmbs;
+ uint16_t edstt;
+ uint8_t dsto;
+ uint8_t fwug;
+ uint16_t kas;
+ uint16_t hctma;
+ uint16_t mntmt;
+ uint16_t mxtmt;
+ uint32_t sanicap;
+ uint8_t rsvd511[180];
uint8_t sqes;
uint8_t cqes;
- uint16_t rsvd515;
+ uint16_t maxcmd;
uint32_t nn;
uint16_t oncs;
uint16_t fuses;
@@ -562,8 +596,14 @@ typedef struct NvmeIdCtrl {
uint8_t vwc;
uint16_t awun;
uint16_t awupf;
- uint8_t rsvd703[174];
- uint8_t rsvd2047[1344];
+ uint8_t nvscc;
+ uint8_t rsvd531;
+ uint16_t acwu;
+ uint16_t rsvd535;
+ uint32_t sgls;
+ uint8_t rsvd767[228];
+ uint8_t subnqn[256];
+ uint8_t rsvd2047[1024];
NvmePSD psd[32];
uint8_t vs[1024];
} NvmeIdCtrl;
@@ -637,7 +677,7 @@ typedef struct NvmeRangeType {
typedef struct NvmeLBAF {
uint16_t ms;
- uint8_t ds;
+ uint8_t lbads;
uint8_t rp;
} NvmeLBAF;
--
2.21.0
next prev parent reply other threads:[~2019-05-17 8:49 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-17 8:42 [Qemu-devel] [PATCH 0/8] nvme: v1.3, sgls, metadata and new 'ocssd' device Klaus Birkelund Jensen
2019-05-17 8:42 ` [Qemu-devel] [PATCH 1/8] nvme: move device parameters to separate struct Klaus Birkelund Jensen
2019-05-17 8:42 ` Klaus Birkelund Jensen [this message]
2019-05-17 8:42 ` [Qemu-devel] [PATCH 3/8] nvme: simplify PRP mappings Klaus Birkelund Jensen
2019-05-17 8:42 ` [Qemu-devel] [PATCH 4/8] nvme: allow multiple i/o's per request Klaus Birkelund Jensen
2019-05-17 8:42 ` [Qemu-devel] [PATCH 5/8] nvme: add support for metadata Klaus Birkelund Jensen
2019-05-22 6:12 ` [Qemu-devel] [Qemu-block] " Klaus Birkelund
2019-05-17 8:42 ` [Qemu-devel] [PATCH 6/8] nvme: add support for scatter gather lists Klaus Birkelund Jensen
2019-05-17 8:42 ` [Qemu-devel] [PATCH 7/8] nvme: keep a copy of the NVMe command in request Klaus Birkelund Jensen
2019-05-17 8:42 ` [Qemu-devel] [PATCH 8/8] nvme: add an OpenChannel 2.0 NVMe device (ocssd) Klaus Birkelund Jensen
2019-05-20 16:45 ` Eric Blake
2019-05-20 17:33 ` Klaus Birkelund
2019-05-20 13:01 ` [Qemu-devel] [PATCH 0/8] nvme: v1.3, sgls, metadata and new 'ocssd' device Kevin Wolf
2019-05-20 13:32 ` Klaus Birkelund
[not found] ` <20190520193445.GA22742@apples.localdomain>
[not found] ` <20190521080115.GA4971@linux.fritz.box>
2019-05-21 20:14 ` Klaus Birkelund
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190517084234.26923-3-klaus@birkelund.eu \
--to=klaus@birkelund.eu \
--cc=keith.busch@intel.com \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.