All of lore.kernel.org
 help / color / mirror / Atom feed
From: Klaus Jensen <its@irrelevant.dk>
To: qemu-block@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>, Fam Zheng <fam@euphon.net>,
	Javier Gonzalez <javier@javigon.com>,
	qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>,
	Keith Busch <keith.busch@intel.com>,
	Paul Durrant <Paul.Durrant@citrix.com>,
	Stephen Bates <sbates@raithlin.com>
Subject: [PATCH v2 16/20] nvme: support multiple namespaces
Date: Tue, 15 Oct 2019 12:38:56 +0200	[thread overview]
Message-ID: <20191015103900.313928-17-its@irrelevant.dk> (raw)
In-Reply-To: <20191015103900.313928-1-its@irrelevant.dk>

This adds support for multiple namespaces by introducing a new 'nvme-ns'
device model. The nvme device creates a bus named from the device name
('id'). The nvme-ns devices then connect to this and registers
themselves with the nvme device.

This changes how an nvme device is created. Example with two namespaces:

  -drive file=nvme0n1.img,if=none,id=disk1
  -drive file=nvme0n2.img,if=none,id=disk2
  -device nvme,serial=deadbeef,id=nvme0
  -device nvme-ns,drive=disk1,bus=nvme0,nsid=1
  -device nvme-ns,drive=disk2,bus=nvme0,nsid=2

The drive property is kept on the nvme device to keep the change
backward compatible, but the property is now optional. Specifying a
drive for the nvme device will always create the namespace with nsid 1.

Signed-off-by: Klaus Jensen <klaus.jensen@cnexlabs.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/Makefile.objs |   2 +-
 hw/block/nvme-ns.c     | 139 +++++++++++++++++++++++++++
 hw/block/nvme-ns.h     |  58 +++++++++++
 hw/block/nvme.c        | 212 +++++++++++++++++++++++++----------------
 hw/block/nvme.h        |  51 +++++-----
 hw/block/trace-events  |   5 +-
 6 files changed, 352 insertions(+), 115 deletions(-)
 create mode 100644 hw/block/nvme-ns.c
 create mode 100644 hw/block/nvme-ns.h

diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs
index f5f643f0cc06..d44a2f4b780d 100644
--- a/hw/block/Makefile.objs
+++ b/hw/block/Makefile.objs
@@ -7,7 +7,7 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
 common-obj-$(CONFIG_XEN) += xen-block.o
 common-obj-$(CONFIG_ECC) += ecc.o
 common-obj-$(CONFIG_ONENAND) += onenand.o
-common-obj-$(CONFIG_NVME_PCI) += nvme.o
+common-obj-$(CONFIG_NVME_PCI) += nvme.o nvme-ns.o
 
 obj-$(CONFIG_SH4) += tc58128.o
 
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
new file mode 100644
index 000000000000..aa76bb63ef45
--- /dev/null
+++ b/hw/block/nvme-ns.c
@@ -0,0 +1,139 @@
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/cutils.h"
+#include "qemu/log.h"
+#include "hw/block/block.h"
+#include "hw/pci/msix.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/qdev-core.h"
+
+#include "nvme.h"
+#include "nvme-ns.h"
+
+static int nvme_ns_init(NvmeNamespace *ns)
+{
+    NvmeIdNs *id_ns = &ns->id_ns;
+
+    id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
+    id_ns->nuse = id_ns->ncap = id_ns->nsze =
+        cpu_to_le64(nvme_ns_nlbas(ns));
+
+    return 0;
+}
+
+static int nvme_ns_init_blk(NvmeNamespace *ns, NvmeIdCtrl *id, Error **errp)
+{
+    blkconf_blocksizes(&ns->conf);
+
+    if (!blkconf_apply_backend_options(&ns->conf,
+        blk_is_read_only(ns->conf.blk), false, errp)) {
+        return 1;
+    }
+
+    ns->size = blk_getlength(ns->conf.blk);
+    if (ns->size < 0) {
+        error_setg_errno(errp, -ns->size, "blk_getlength");
+        return 1;
+    }
+
+    if (!blk_enable_write_cache(ns->conf.blk)) {
+        id->vwc = 0;
+    }
+
+    return 0;
+}
+
+static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
+{
+    if (!ns->conf.blk) {
+        error_setg(errp, "block backend not configured");
+        return 1;
+    }
+
+    return 0;
+}
+
+int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (nvme_ns_check_constraints(ns, &local_err)) {
+        error_propagate_prepend(errp, local_err,
+            "nvme_ns_check_constraints: ");
+        return 1;
+    }
+
+    if (nvme_ns_init_blk(ns, &n->id_ctrl, &local_err)) {
+        error_propagate_prepend(errp, local_err, "nvme_ns_init_blk: ");
+        return 1;
+    }
+
+    nvme_ns_init(ns);
+    if (nvme_register_namespace(n, ns, &local_err)) {
+        error_propagate_prepend(errp, local_err, "nvme_register_namespace: ");
+        return 1;
+    }
+
+    return 0;
+}
+
+static void nvme_ns_realize(DeviceState *dev, Error **errp)
+{
+    NvmeNamespace *ns = NVME_NS(dev);
+    BusState *s = qdev_get_parent_bus(dev);
+    NvmeCtrl *n = NVME(s->parent);
+    Error *local_err = NULL;
+
+    if (nvme_ns_setup(n, ns, &local_err)) {
+        error_propagate_prepend(errp, local_err, "nvme_ns_setup: ");
+        return;
+    }
+}
+
+static Property nvme_ns_props[] = {
+    DEFINE_BLOCK_PROPERTIES(NvmeNamespace, conf),
+    DEFINE_NVME_NS_PROPERTIES(NvmeNamespace, params),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void nvme_ns_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+
+    dc->bus_type = TYPE_NVME_BUS;
+    dc->realize = nvme_ns_realize;
+    dc->props = nvme_ns_props;
+    dc->desc = "virtual nvme namespace";
+}
+
+static void nvme_ns_instance_init(Object *obj)
+{
+    NvmeNamespace *ns = NVME_NS(obj);
+    char *bootindex = g_strdup_printf("/namespace@%d,0", ns->params.nsid);
+
+    device_add_bootindex_property(obj, &ns->conf.bootindex, "bootindex",
+        bootindex, DEVICE(obj), &error_abort);
+
+    g_free(bootindex);
+}
+
+static const TypeInfo nvme_ns_info = {
+    .name = TYPE_NVME_NS,
+    .parent = TYPE_DEVICE,
+    .class_init = nvme_ns_class_init,
+    .instance_size = sizeof(NvmeNamespace),
+    .instance_init = nvme_ns_instance_init,
+};
+
+static void nvme_ns_register_types(void)
+{
+    type_register_static(&nvme_ns_info);
+}
+
+type_init(nvme_ns_register_types)
diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
new file mode 100644
index 000000000000..64dd054cf6a9
--- /dev/null
+++ b/hw/block/nvme-ns.h
@@ -0,0 +1,58 @@
+#ifndef NVME_NS_H
+#define NVME_NS_H
+
+#define TYPE_NVME_NS "nvme-ns"
+#define NVME_NS(obj) \
+    OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
+
+#define DEFINE_NVME_NS_PROPERTIES(_state, _props) \
+    DEFINE_PROP_UINT32("nsid", _state, _props.nsid, 0)
+
+typedef struct NvmeNamespaceParams {
+    uint32_t nsid;
+} NvmeNamespaceParams;
+
+typedef struct NvmeNamespace {
+    DeviceState parent_obj;
+    BlockConf   conf;
+    int64_t     size;
+
+    NvmeIdNs            id_ns;
+    NvmeNamespaceParams params;
+} NvmeNamespace;
+
+static inline uint32_t nvme_nsid(NvmeNamespace *ns)
+{
+    if (ns) {
+        return ns->params.nsid;
+    }
+
+    return -1;
+}
+
+static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns)
+{
+    NvmeIdNs *id_ns = &ns->id_ns;
+    return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
+}
+
+static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
+{
+    return nvme_ns_lbaf(ns).ds;
+}
+
+static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
+{
+    return 1 << nvme_ns_lbads(ns);
+}
+
+static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns)
+{
+    return ns->size >> nvme_ns_lbads(ns);
+}
+
+typedef struct NvmeCtrl NvmeCtrl;
+
+int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
+
+#endif /* NVME_NS_H */
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 0a5cd079df9a..a23e9bc4e5ef 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -17,7 +17,8 @@
 /**
  * Usage: add options:
  *     -drive file=<file>,if=none,id=<drive_id>
- *     -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>
+ *     -device nvme,serial=<serial>,id=nvme0
+ *     -device nvme-ns,drive=<drive_id>,bus=nvme0,nsid=1
  *
  * Advanced optional options:
  *
@@ -31,6 +32,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/units.h"
+#include "qemu/error-report.h"
 #include "hw/block/block.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci.h"
@@ -46,6 +48,7 @@
 #include "qemu/cutils.h"
 #include "trace.h"
 #include "nvme.h"
+#include "nvme-ns.h"
 
 #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE
 #define NVME_TEMPERATURE 0x143
@@ -882,11 +885,12 @@ static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
 
 static void nvme_rw_cb(NvmeRequest *req, void *opaque)
 {
+    NvmeNamespace *ns = req->ns;
     NvmeSQueue *sq = req->sq;
     NvmeCtrl *n = sq->ctrl;
     NvmeCQueue *cq = n->cq[sq->cqid];
 
-    trace_nvme_rw_cb(req->cid, req->cmd.nsid);
+    trace_nvme_rw_cb(req->cid, nvme_nsid(ns));
 
     nvme_enqueue_req_completion(cq, req);
 }
@@ -919,6 +923,7 @@ static void nvme_aio_cb(void *opaque, int ret)
         block_acct_failed(stats, acct);
 
         if (req) {
+            NvmeNamespace *ns = req->ns;
             NvmeRwCmd *rw = (NvmeRwCmd *) &req->cmd;
             NvmeSQueue *sq = req->sq;
             NvmeCtrl *n = sq->ctrl;
@@ -941,7 +946,7 @@ static void nvme_aio_cb(void *opaque, int ret)
                 aio->offset, nvme_aio_opc_str(aio), req, status);
 
             nvme_set_error_page(n, sq->sqid, cpu_to_le16(req->cid), status,
-                offsetof(NvmeRwCmd, slba), rw->slba, 1);
+                offsetof(NvmeRwCmd, slba), rw->slba, nvme_nsid(ns));
 
             error_setg_errno(&local_err, -ret, "aio failed");
             error_report_err(local_err);
@@ -1007,7 +1012,7 @@ static inline uint16_t nvme_check_bounds(NvmeCtrl *n, NvmeRequest *req)
     uint64_t nsze = le64_to_cpu(ns->id_ns.nsze);
 
     if (unlikely((req->slba + req->nlb) > nsze)) {
-        block_acct_invalid(blk_get_stats(n->conf.blk),
+        block_acct_invalid(blk_get_stats(ns->conf.blk),
             nvme_req_is_write(req) ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
         trace_nvme_err_invalid_lba_range(req->slba, req->nlb, nsze);
         return NVME_LBA_RANGE | NVME_DNR;
@@ -1042,7 +1047,9 @@ static uint16_t nvme_check_rw(NvmeCtrl *n, NvmeRequest *req)
 
 static uint16_t nvme_flush(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
-    NvmeAIO *aio = nvme_aio_new(n->conf.blk, 0x0, NULL, req, NULL);
+    NvmeNamespace *ns = req->ns;
+
+    NvmeAIO *aio = nvme_aio_new(ns->conf.blk, 0x0, NULL, req, NULL);
 
     nvme_req_register_aio(req, aio, NVME_AIO_OPC_FLUSH);
     nvme_aio(aio);
@@ -1064,19 +1071,18 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     req->slba = le64_to_cpu(rw->slba);
     req->nlb  = le16_to_cpu(rw->nlb) + 1;
 
-    trace_nvme_write_zeros(req->cid, le32_to_cpu(cmd->nsid), req->slba,
-        req->nlb);
+    trace_nvme_write_zeros(req->cid, nvme_nsid(ns), req->slba, req->nlb);
 
     status = nvme_check_bounds(n, req);
     if (unlikely(status)) {
-        block_acct_invalid(blk_get_stats(n->conf.blk), BLOCK_ACCT_WRITE);
+        block_acct_invalid(blk_get_stats(ns->conf.blk), BLOCK_ACCT_WRITE);
         return status;
     }
 
     offset = req->slba << nvme_ns_lbads(ns);
     count = req->nlb << nvme_ns_lbads(ns);
 
-    aio = nvme_aio_new(n->conf.blk, offset, NULL, req, NULL);
+    aio = nvme_aio_new(ns->conf.blk, offset, NULL, req, NULL);
 
     aio->iov.size = count;
 
@@ -1098,22 +1104,23 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
     req->nlb  = le16_to_cpu(rw->nlb) + 1;
     req->slba = le64_to_cpu(rw->slba);
 
-    trace_nvme_rw(nvme_req_is_write(req) ? "write" : "read", req->nlb,
-        req->nlb << nvme_ns_lbads(req->ns), req->slba);
+    trace_nvme_rw(req->cid, nvme_req_is_write(req) ? "write" : "read",
+        nvme_nsid(ns), req->nlb, req->nlb << nvme_ns_lbads(ns),
+        req->slba);
 
     status = nvme_check_rw(n, req);
     if (status) {
-        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+        block_acct_invalid(blk_get_stats(ns->conf.blk), acct);
         return status;
     }
 
     status = nvme_map(n, cmd, req);
     if (status) {
-        block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+        block_acct_invalid(blk_get_stats(ns->conf.blk), acct);
         return status;
     }
 
-    nvme_rw_aio(n->conf.blk, req->slba << nvme_ns_lbads(ns), &req->qsg, req);
+    nvme_rw_aio(ns->conf.blk, req->slba << nvme_ns_lbads(ns), &req->qsg, req);
     nvme_req_set_cb(req, nvme_rw_cb, NULL);
 
     return NVME_NO_COMPLETE;
@@ -1125,13 +1132,13 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
     trace_nvme_io_cmd(req->cid, nsid, le16_to_cpu(req->sq->sqid), cmd->opcode);
 
-    if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
+    req->ns = nvme_ns(n, nsid);
+
+    if (unlikely(!req->ns)) {
         trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
         return NVME_INVALID_NSID | NVME_DNR;
     }
 
-    req->ns = &n->namespaces[nsid - 1];
-
     switch (cmd->opcode) {
     case NVME_CMD_FLUSH:
         return nvme_flush(n, cmd, req);
@@ -1296,19 +1303,25 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
     uint64_t units_read = 0, units_written = 0, read_commands = 0,
         write_commands = 0;
     NvmeSmartLog smart;
-    BlockAcctStats *s;
 
-    if (!nsid || (nsid != 0xffffffff && nsid > n->num_namespaces)) {
+    if (!nsid || (nsid != 0xffffffff && !nvme_ns(n, nsid))) {
         trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
         return NVME_INVALID_NSID | NVME_DNR;
     }
 
-    s = blk_get_stats(n->conf.blk);
+    for (int i = 1; i <= n->num_namespaces; i++) {
+        NvmeNamespace *ns = nvme_ns(n, i);
+        if (!ns) {
+            continue;
+        }
 
-    units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
-    units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
-    read_commands = s->nr_ops[BLOCK_ACCT_READ];
-    write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
+        BlockAcctStats *s = blk_get_stats(ns->conf.blk);
+
+        units_read += s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
+        units_written += s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
+        read_commands += s->nr_ops[BLOCK_ACCT_READ];
+        write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
+    }
 
     if (off > sizeof(smart)) {
         return NVME_INVALID_FIELD | NVME_DNR;
@@ -1508,18 +1521,16 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
 static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
-    NvmeNamespace *ns;
     uint32_t nsid = le32_to_cpu(cmd->nsid);
+    NvmeNamespace *ns = nvme_ns(n, nsid);
 
     trace_nvme_identify_ns(nsid);
 
-    if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
+    if (unlikely(!ns)) {
         trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
         return NVME_INVALID_NSID | NVME_DNR;
     }
 
-    ns = &n->namespaces[nsid - 1];
-
     return nvme_dma_read(n, (uint8_t *) &ns->id_ns, sizeof(ns->id_ns), cmd,
         req);
 }
@@ -1536,11 +1547,11 @@ static uint16_t nvme_identify_ns_list(NvmeCtrl *n, NvmeCmd *cmd,
     trace_nvme_identify_ns_list(min_nsid);
 
     list = g_malloc0(data_len);
-    for (i = 0; i < n->num_namespaces; i++) {
-        if (i < min_nsid) {
+    for (i = 1; i <= n->num_namespaces; i++) {
+        if (i <= min_nsid || !nvme_ns(n, i)) {
             continue;
         }
-        list[j++] = cpu_to_le32(i + 1);
+        list[j++] = cpu_to_le32(i);
         if (j == data_len / sizeof(uint32_t)) {
             break;
         }
@@ -1569,7 +1580,7 @@ static uint16_t nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeCmd *cmd,
 
     trace_nvme_identify_ns_descr_list(nsid);
 
-    if (unlikely(nsid == 0 || nsid > n->num_namespaces)) {
+    if (unlikely(!nvme_ns(n, nsid))) {
         trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
         return NVME_INVALID_NSID | NVME_DNR;
     }
@@ -1666,9 +1677,11 @@ static uint16_t nvme_get_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
 
 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
+    NvmeNamespace *ns;
+
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
     uint32_t dw11 = le32_to_cpu(cmd->cdw11);
-    uint32_t result;
+    uint32_t result, nsid;
 
     trace_nvme_getfeat(dw10);
 
@@ -1686,7 +1699,15 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         result = cpu_to_le32(n->features.err_rec);
         break;
     case NVME_VOLATILE_WRITE_CACHE:
-        result = blk_enable_write_cache(n->conf.blk);
+        nsid = le32_to_cpu(req->cmd.nsid);
+        ns = nvme_ns(n, nsid);
+
+        if (unlikely(!ns)) {
+            trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+            return NVME_INVALID_NSID | NVME_DNR;
+        }
+
+        result = blk_enable_write_cache(ns->conf.blk);
         trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled");
         break;
     case NVME_NUMBER_OF_QUEUES:
@@ -1740,8 +1761,11 @@ static uint16_t nvme_set_feature_timestamp(NvmeCtrl *n, NvmeCmd *cmd,
 
 static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 {
+    NvmeNamespace *ns;
+
     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
     uint32_t dw11 = le32_to_cpu(cmd->cdw11);
+    uint32_t nsid;
 
     trace_nvme_setfeat(dw10, dw11);
 
@@ -1757,7 +1781,15 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         break;
 
     case NVME_VOLATILE_WRITE_CACHE:
-        blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
+        nsid = le32_to_cpu(req->cmd.nsid);
+        ns = nvme_ns(n, nsid);
+
+        if (unlikely(!ns)) {
+            trace_nvme_err_invalid_ns(nsid, n->num_namespaces);
+            return NVME_INVALID_NSID | NVME_DNR;
+        }
+
+        blk_set_enable_write_cache(ns->conf.blk, dw11 & 1);
         break;
     case NVME_NUMBER_OF_QUEUES:
         if (n->qs_created > 2) {
@@ -1932,9 +1964,17 @@ static void nvme_process_sq(void *opaque)
 
 static void nvme_clear_ctrl(NvmeCtrl *n)
 {
+    NvmeNamespace *ns;
     int i;
 
-    blk_drain(n->conf.blk);
+    for (i = 1; i <= n->num_namespaces; i++) {
+        ns = nvme_ns(n, i);
+        if (!ns) {
+            continue;
+        }
+
+        blk_drain(ns->conf.blk);
+    }
 
     for (i = 0; i < n->params.num_queues; i++) {
         if (n->sq[i] != NULL) {
@@ -1961,7 +2001,15 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
 
     n->outstanding_aers = 0;
 
-    blk_flush(n->conf.blk);
+    for (i = 1; i <= n->num_namespaces; i++) {
+        ns = nvme_ns(n, i);
+        if (!ns) {
+            continue;
+        }
+
+        blk_flush(ns->conf.blk);
+    }
+
     n->bar.cc = 0;
 }
 
@@ -2382,8 +2430,8 @@ static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
 {
     NvmeParams *params = &n->params;
 
-    if (!n->conf.blk) {
-        error_setg(errp, "nvme: block backend not configured");
+    if (!n->namespace.conf.blk && !n->parent_obj.qdev.id) {
+        error_setg(errp, "nvme: invalid 'id' parameter");
         return 1;
     }
 
@@ -2400,22 +2448,10 @@ static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
     return 0;
 }
 
-static int nvme_init_blk(NvmeCtrl *n, Error **errp)
-{
-    blkconf_blocksizes(&n->conf);
-    if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
-        false, errp)) {
-        return 1;
-    }
-
-    return 0;
-}
-
 static void nvme_init_state(NvmeCtrl *n)
 {
-    n->num_namespaces = 1;
+    n->num_namespaces = 0;
     n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4);
-    n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
     n->sq = g_new0(NvmeSQueue *, n->params.num_queues);
     n->cq = g_new0(NvmeCQueue *, n->params.num_queues);
     n->elpes = g_new0(NvmeErrorLog, n->params.elpe + 1);
@@ -2507,12 +2543,7 @@ static void nvme_init_ctrl(NvmeCtrl *n)
     id->cqes = (0x4 << 4) | 0x4;
     id->nn = cpu_to_le32(n->num_namespaces);
     id->oncs = cpu_to_le16(NVME_ONCS_WRITE_ZEROS | NVME_ONCS_TIMESTAMP);
-
-
-    if (blk_enable_write_cache(n->conf.blk)) {
-        id->vwc = 1;
-    }
-
+    id->vwc = 1;
     id->sgls = cpu_to_le32(0x1);
 
     strcpy((char *) id->subnqn, "nqn.2019-08.org.qemu:");
@@ -2533,22 +2564,25 @@ static void nvme_init_ctrl(NvmeCtrl *n)
     n->bar.intmc = n->bar.intms = 0;
 }
 
-static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
+int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
 {
-    int64_t bs_size;
-    NvmeIdNs *id_ns = &ns->id_ns;
+    uint32_t nsid = nvme_nsid(ns);
+
+    if (nsid == 0 || nsid > NVME_MAX_NAMESPACES) {
+        error_setg(errp, "invalid nsid");
+        return 1;
+    }
 
-    bs_size = blk_getlength(n->conf.blk);
-    if (bs_size < 0) {
-        error_setg_errno(errp, -bs_size, "blk_getlength");
+    if (n->namespaces[nsid - 1]) {
+        error_setg(errp, "nsid must be unique");
         return 1;
     }
 
-    id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
-    n->ns_size = bs_size;
+    trace_nvme_register_namespace(nsid);
 
-    id_ns->ncap = id_ns->nuse = id_ns->nsze =
-        cpu_to_le64(nvme_ns_nlbas(n, ns));
+    n->namespaces[nsid - 1] = ns;
+    n->num_namespaces = MAX(n->num_namespaces, nsid);
+    n->id_ctrl.nn = cpu_to_le32(n->num_namespaces);
 
     return 0;
 }
@@ -2556,30 +2590,31 @@ static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
 static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 {
     NvmeCtrl *n = NVME(pci_dev);
+    NvmeNamespace *ns;
     Error *local_err = NULL;
-    int i;
 
     if (nvme_check_constraints(n, &local_err)) {
         error_propagate_prepend(errp, local_err, "nvme_check_constraints: ");
         return;
     }
 
+    qbus_create_inplace(&n->bus, sizeof(NvmeBus), TYPE_NVME_BUS,
+        &pci_dev->qdev, n->parent_obj.qdev.id);
+
     nvme_init_state(n);
+    nvme_init_pci(n, pci_dev);
+    nvme_init_ctrl(n);
 
-    if (nvme_init_blk(n, &local_err)) {
-        error_propagate_prepend(errp, local_err, "nvme_init_blk: ");
-        return;
-    }
+    /* setup a namespace if the controller drive property was given */
+    if (n->namespace.conf.blk) {
+        ns = &n->namespace;
+        ns->params.nsid = 1;
 
-    for (i = 0; i < n->num_namespaces; i++) {
-        if (nvme_init_namespace(n, &n->namespaces[i], &local_err)) {
-            error_propagate_prepend(errp, local_err, "nvme_init_namespace: ");
+        if (nvme_ns_setup(n, ns, &local_err)) {
+            error_propagate_prepend(errp, local_err, "nvme_ns_setup: ");
             return;
         }
     }
-
-    nvme_init_pci(n, pci_dev);
-    nvme_init_ctrl(n);
 }
 
 static void nvme_exit(PCIDevice *pci_dev)
@@ -2601,7 +2636,7 @@ static void nvme_exit(PCIDevice *pci_dev)
 }
 
 static Property nvme_props[] = {
-    DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
+    DEFINE_BLOCK_PROPERTIES(NvmeCtrl, namespace.conf),
     DEFINE_NVME_PROPERTIES(NvmeCtrl, params),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -2633,26 +2668,35 @@ static void nvme_instance_init(Object *obj)
 {
     NvmeCtrl *s = NVME(obj);
 
-    device_add_bootindex_property(obj, &s->conf.bootindex,
-                                  "bootindex", "/namespace@1,0",
-                                  DEVICE(obj), &error_abort);
+    if (s->namespace.conf.blk) {
+        device_add_bootindex_property(obj, &s->namespace.conf.bootindex,
+                                      "bootindex", "/namespace@1,0",
+                                      DEVICE(obj), &error_abort);
+    }
 }
 
 static const TypeInfo nvme_info = {
     .name          = TYPE_NVME,
     .parent        = TYPE_PCI_DEVICE,
     .instance_size = sizeof(NvmeCtrl),
-    .class_init    = nvme_class_init,
     .instance_init = nvme_instance_init,
+    .class_init    = nvme_class_init,
     .interfaces = (InterfaceInfo[]) {
         { INTERFACE_PCIE_DEVICE },
         { }
     },
 };
 
+static const TypeInfo nvme_bus_info = {
+    .name = TYPE_NVME_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = sizeof(NvmeBus),
+};
+
 static void nvme_register_types(void)
 {
     type_register_static(&nvme_info);
+    type_register_static(&nvme_bus_info);
 }
 
 type_init(nvme_register_types)
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 2f6706899de1..c7da74625bc9 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -2,6 +2,9 @@
 #define HW_NVME_H
 
 #include "block/nvme.h"
+#include "nvme-ns.h"
+
+#define NVME_MAX_NAMESPACES 256
 
 #define DEFINE_NVME_PROPERTIES(_state, _props) \
     DEFINE_PROP_STRING("serial", _state, _props.serial), \
@@ -72,7 +75,6 @@ static inline const char *nvme_aio_opc_str(NvmeAIO *aio)
 #define NVME_REQ_TRANSFER_MASK 0x3
 
 typedef struct NvmeSQueue    NvmeSQueue;
-typedef struct NvmeNamespace NvmeNamespace;
 typedef void NvmeRequestCompletionFunc(NvmeRequest *req, void *opaque);
 
 struct NvmeRequest {
@@ -122,7 +124,7 @@ static inline bool nvme_req_is_cmb(NvmeRequest *req)
     return (req->flags & NVME_REQ_TRANSFER_MASK) == NVME_REQ_TRANSFER_CMB;
 }
 
-static void nvme_req_set_cmb(NvmeRequest *req)
+static inline void nvme_req_set_cmb(NvmeRequest *req)
 {
     req->flags = NVME_REQ_TRANSFER_CMB;
 }
@@ -173,9 +175,12 @@ struct NvmeCQueue {
     QTAILQ_HEAD(, NvmeRequest) req_list;
 };
 
-struct NvmeNamespace {
-    NvmeIdNs        id_ns;
-};
+#define TYPE_NVME_BUS "nvme-bus"
+#define NVME_BUS(obj) OBJECT_CHECK(NvmeBus, (obj), TYPE_NVME_BUS)
+
+typedef struct NvmeBus {
+    BusState parent_bus;
+} NvmeBus;
 
 #define TYPE_NVME "nvme"
 #define NVME(obj) \
@@ -186,8 +191,8 @@ typedef struct NvmeCtrl {
     MemoryRegion iomem;
     MemoryRegion ctrl_mem;
     NvmeBar      bar;
-    BlockConf    conf;
     NvmeParams   params;
+    NvmeBus      bus;
 
     uint32_t    page_size;
     uint16_t    page_bits;
@@ -197,7 +202,6 @@ typedef struct NvmeCtrl {
     uint32_t    reg_size;
     uint32_t    num_namespaces;
     uint32_t    max_q_ents;
-    uint64_t    ns_size;
     uint8_t     outstanding_aers;
     uint32_t    cmbsz;
     uint32_t    cmbloc;
@@ -217,7 +221,8 @@ typedef struct NvmeCtrl {
     NvmeRequest **aer_reqs;
     QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
 
-    NvmeNamespace   *namespaces;
+    NvmeNamespace   namespace;
+    NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES];
     NvmeSQueue      **sq;
     NvmeCQueue      **cq;
     NvmeSQueue      admin_sq;
@@ -227,6 +232,15 @@ typedef struct NvmeCtrl {
     NvmeIdCtrl      id_ctrl;
 } NvmeCtrl;
 
+static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
+{
+    if (!nsid) {
+        return NULL;
+    }
+
+    return n->namespaces[nsid - 1];
+}
+
 static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
 {
     return req->sq->ctrl;
@@ -238,25 +252,6 @@ static inline bool nvme_is_error(uint16_t status, uint16_t err)
     return (status & 0xfff) == err;
 }
 
-static inline NvmeLBAF nvme_ns_lbaf(NvmeNamespace *ns)
-{
-    NvmeIdNs *id_ns = &ns->id_ns;
-    return id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)];
-}
-
-static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
-{
-    return nvme_ns_lbaf(ns).ds;
-}
-
-static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
-{
-    return 1 << nvme_ns_lbads(ns);
-}
-
-static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns)
-{
-    return n->ns_size >> nvme_ns_lbads(ns);
-}
+int nvme_register_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
 
 #endif /* HW_NVME_H */
diff --git a/hw/block/trace-events b/hw/block/trace-events
index 59d42a3b8c39..95c24f514754 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -29,6 +29,7 @@ hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int t
 
 # nvme.c
 # nvme traces for successful events
+nvme_register_namespace(uint32_t nsid) "nsid %"PRIu32""
 nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
 nvme_irq_pin(void) "pulsing IRQ pin"
 nvme_irq_masked(void) "IRQ is masked"
@@ -38,7 +39,7 @@ nvme_map_sgl(uint16_t cid, uint8_t typ, uint32_t nlb, uint64_t len) "cid %"PRIu1
 nvme_req_register_aio(uint16_t cid, void *aio, const char *blkname, uint64_t offset, uint64_t count, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" count %"PRIu64" opc \"%s\" req %p"
 nvme_aio_cb(uint16_t cid, void *aio, const char *blkname, uint64_t offset, const char *opc, void *req) "cid %"PRIu16" aio %p blk \"%s\" offset %"PRIu64" opc \"%s\" req %p"
 nvme_io_cmd(uint16_t cid, uint32_t nsid, uint16_t sqid, uint8_t opcode) "cid %"PRIu16" nsid %"PRIu32" sqid %"PRIu16" opc 0x%"PRIx8""
-nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64""
+nvme_rw(uint16_t cid, const char *verb, uint32_t nsid, uint32_t nlb, uint64_t count, uint64_t lba) "cid %"PRIu16" %s nsid %"PRIu32" nlb %"PRIu32" count %"PRIu64" lba 0x%"PRIx64""
 nvme_rw_cb(uint16_t cid, uint32_t nsid) "cid %"PRIu16" nsid %"PRIu32""
 nvme_write_zeros(uint16_t cid, uint32_t nsid, uint64_t slba, uint32_t nlb) "cid %"PRIu16" nsid %"PRIu32" slba %"PRIu64" nlb %"PRIu32""
 nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
@@ -90,7 +91,7 @@ nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not pa
 nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64""
 nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred"
 nvme_err_invalid_prp(void) "invalid PRP"
-nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u"
+nvme_err_invalid_ns(uint32_t nsid, uint32_t nn) "nsid %"PRIu32" nn %"PRIu32""
 nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
 nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
 nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
-- 
2.23.0



  parent reply	other threads:[~2019-10-15 10:59 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-15 10:38 [PATCH v2 00/20] nvme: support NVMe v1.3d, SGLs and multiple namespaces Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 01/20] nvme: remove superfluous breaks Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 02/20] nvme: move device parameters to separate struct Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 03/20] nvme: add missing fields in the identify controller data structure Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 04/20] nvme: populate the mandatory subnqn and ver fields Klaus Jensen
2019-11-12 15:04   ` Beata Michalska
2019-11-13  6:16     ` Klaus Birkelund
2019-10-15 10:38 ` [PATCH v2 05/20] nvme: allow completion queues in the cmb Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 06/20] nvme: add support for the abort command Klaus Jensen
2019-11-12 15:04   ` Beata Michalska
2019-11-13  6:12     ` Klaus Birkelund
2019-11-15 11:56       ` Beata Michalska
2019-11-18  8:49         ` Klaus Birkelund
2019-10-15 10:38 ` [PATCH v2 07/20] nvme: refactor device realization Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 08/20] nvme: add support for the get log page command Klaus Jensen
2019-11-12 15:04   ` Beata Michalska
2019-11-19 20:01     ` Klaus Birkelund
2019-10-15 10:38 ` [PATCH v2 09/20] nvme: add support for the asynchronous event request command Klaus Jensen
2019-11-12 15:04   ` Beata Michalska
2019-11-19 19:51     ` Klaus Birkelund
2019-11-25 12:44       ` Beata Michalska
2019-10-15 10:38 ` [PATCH v2 10/20] nvme: add logging to error information log page Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 11/20] nvme: add missing mandatory features Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 12/20] nvme: bump supported specification version to 1.3 Klaus Jensen
2019-11-12 15:05   ` Beata Michalska
2019-11-18  9:48     ` Klaus Birkelund
2019-11-25 12:13       ` Beata Michalska
2019-11-26  8:40         ` Klaus Birkelund
2019-10-15 10:38 ` [PATCH v2 13/20] nvme: refactor prp mapping Klaus Jensen
2019-11-12 15:23   ` Beata Michalska
2019-11-20  9:39     ` Klaus Birkelund
2019-11-25 13:15       ` Beata Michalska
2019-10-15 10:38 ` [PATCH v2 14/20] nvme: allow multiple aios per command Klaus Jensen
2019-11-12 15:25   ` Beata Michalska
2019-11-21 11:57     ` Klaus Birkelund
2019-11-25 13:59       ` Beata Michalska
2019-10-15 10:38 ` [PATCH v2 15/20] nvme: add support for scatter gather lists Klaus Jensen
2019-11-12 15:25   ` Beata Michalska
2019-11-25  6:21     ` Klaus Birkelund
2019-11-25 14:10       ` Beata Michalska
2019-11-26  8:34         ` Klaus Birkelund
2019-10-15 10:38 ` Klaus Jensen [this message]
2019-10-15 10:38 ` [PATCH v2 17/20] nvme: bump controller pci device id Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 18/20] nvme: remove redundant NvmeCmd pointer parameter Klaus Jensen
2019-10-15 10:38 ` [PATCH v2 19/20] nvme: make lba data size configurable Klaus Jensen
2019-11-12 15:24   ` Beata Michalska
2019-11-13  7:13     ` Klaus Birkelund
2019-10-15 10:39 ` [PATCH v2 20/20] nvme: handle dma errors Klaus Jensen
2019-10-15 17:19 ` [PATCH v2 00/20] nvme: support NVMe v1.3d, SGLs and multiple namespaces no-reply
2019-10-15 17:26 ` no-reply
2019-10-16  6:29 ` Fam Zheng
2019-10-28  6:09 ` Klaus Birkelund

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191015103900.313928-17-its@irrelevant.dk \
    --to=its@irrelevant.dk \
    --cc=Paul.Durrant@citrix.com \
    --cc=fam@euphon.net \
    --cc=javier@javigon.com \
    --cc=keith.busch@intel.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=sbates@raithlin.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.