All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] add boot partitions support and read test case
       [not found] <CGME20210601144231epcas5p3b931fe9421737d104dc3c5be50c928f3@epcas5p3.samsung.com>
@ 2021-06-01 14:37 ` Gollu Appalanaidu
       [not found]   ` <CGME20210601144234epcas5p153e855ad673876cf67e57d4b539dc274@epcas5p1.samsung.com>
       [not found]   ` <CGME20210601144242epcas5p1292f656f45aebd1b9c75fe54797d6b46@epcas5p1.samsung.com>
  0 siblings, 2 replies; 13+ messages in thread
From: Gollu Appalanaidu @ 2021-06-01 14:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, mreitz, its, stefanha, kbusch

-v2:
Boot partitions write and read offset corrections

This series adds the boot partition feature as well test case for
reading boot partition area.

Gollu Appalanaidu (2):
  hw/nvme: add support for boot partiotions
  tests/qtest/nvme-test: add boot partition read test

 hw/nvme/ctrl.c          | 207 ++++++++++++++++++++++++++++++++++++++++
 hw/nvme/nvme.h          |   3 +
 hw/nvme/trace-events    |   7 ++
 include/block/nvme.h    |  75 ++++++++++++++-
 tests/qtest/nvme-test.c | 118 ++++++++++++++++++++++-
 5 files changed, 408 insertions(+), 2 deletions(-)

-- 
2.17.1



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 1/2] hw/nvme: add support for boot partiotions
       [not found]   ` <CGME20210601144234epcas5p153e855ad673876cf67e57d4b539dc274@epcas5p1.samsung.com>
@ 2021-06-01 14:37     ` Gollu Appalanaidu
  2021-06-01 17:19       ` Keith Busch
  2021-06-03  9:48       ` Stefan Hajnoczi
  0 siblings, 2 replies; 13+ messages in thread
From: Gollu Appalanaidu @ 2021-06-01 14:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, mreitz, its, stefanha, kbusch

NVMe Boot Partitions provides an area that may be read by the host
without initializing queues or even enabling the controller. This
allows various platform initialization code to be stored on the NVMe
device instead of some separete medium.

This patch adds the read support for such an area, as well as support
for updating the boot partition contents from the host through the
FW Download and Commit commands.

Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
---
 hw/nvme/ctrl.c       | 207 +++++++++++++++++++++++++++++++++++++++++++
 hw/nvme/nvme.h       |   3 +
 hw/nvme/trace-events |   7 ++
 include/block/nvme.h |  75 +++++++++++++++-
 4 files changed, 291 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 0bcaf7192f..ab72091802 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -100,6 +100,12 @@
  *   the minimum memory page size (CAP.MPSMIN). The default value is 0 (i.e.
  *   defaulting to the value of `mdts`).
  *
+ * - `bootpart`
+ *   NVMe Boot Partitions provides an area that may be read by the host without
+ *   initializing queues or even enabling the controller. This 'bootpart' block
+ *   device stores platform initialization code. Its size shall be in 256 KiB
+ *   units.
+ *
  * nvme namespace device parameters
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * - `shared`
@@ -215,6 +221,8 @@ static const uint32_t nvme_cse_acs[256] = {
     [NVME_ADM_CMD_ASYNC_EV_REQ]     = NVME_CMD_EFF_CSUPP,
     [NVME_ADM_CMD_NS_ATTACHMENT]    = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
     [NVME_ADM_CMD_FORMAT_NVM]       = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
+    [NVME_ADM_CMD_DOWNLOAD_FW]      = NVME_CMD_EFF_CSUPP,
+    [NVME_ADM_CMD_COMMIT_FW]        = NVME_CMD_EFF_CSUPP,
 };
 
 static const uint32_t nvme_cse_iocs_none[256];
@@ -5145,6 +5153,112 @@ static uint16_t nvme_format(NvmeCtrl *n, NvmeRequest *req)
     return req->status;
 }
 
+struct nvme_bp_read_ctx {
+    NvmeCtrl *n;
+    QEMUSGList qsg;
+};
+
+static void nvme_bp_read_cb(void *opaque, int ret)
+{
+    struct nvme_bp_read_ctx *ctx = opaque;
+    NvmeCtrl *n = ctx->n;
+
+    trace_pci_nvme_bp_read_cb();
+
+    if (ret) {
+        NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
+        NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_ERROR);
+        goto free;
+    }
+
+    NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
+    NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_SUCCESS);
+
+free:
+    if (ctx->qsg.sg) {
+        qemu_sglist_destroy(&ctx->qsg);
+    }
+
+    g_free(ctx);
+}
+
+static void nvme_fw_commit_cb(void *opaque, int ret)
+{
+    NvmeRequest *req = opaque;
+
+    trace_pci_nvme_fw_commit_cb(nvme_cid(req));
+
+    if (ret) {
+        nvme_aio_err(req, ret);
+    }
+
+    nvme_enqueue_req_completion(nvme_cq(req), req);
+}
+
+static uint16_t nvme_fw_commit(NvmeCtrl *n, NvmeRequest *req)
+{
+    uint32_t dw10 = le32_to_cpu(req->cmd.cdw10);
+    uint8_t fwug = n->id_ctrl.fwug;
+    uint8_t fs = dw10 & 0x7;
+    uint8_t ca = (dw10 >> 3) & 0x7;
+    uint8_t bpid = dw10 >> 31;
+    int64_t offset = 0;
+
+    trace_pci_nvme_fw_commit(nvme_cid(req), dw10, fwug, fs, ca,
+                            bpid);
+
+    if (fs || ca == NVME_FW_CA_REPLACE) {
+        return NVME_INVALID_FW_SLOT | NVME_DNR;
+    }
+    /*
+     * current firmware commit command only support boot partions
+     * related commit actions
+     */
+    if (ca < NVME_FW_CA_REPLACE_BP) {
+        return NVME_FW_ACTIVATE_PROHIBITED | NVME_DNR;
+    }
+
+    if (ca == NVME_FW_CA_ACTIVATE_BP) {
+        NVME_BPINFO_CLEAR_ABPID(n->bar.bpinfo);
+        NVME_BPINFO_SET_ABPID(n->bar.bpinfo, bpid);
+        return NVME_SUCCESS;
+    }
+
+    if (bpid) {
+        offset = n->bp_size;
+    }
+
+    nvme_sg_init(n, &req->sg, false);
+    qemu_iovec_add(&req->sg.iov, n->bp_data, n->bp_size);
+
+    req->aiocb = blk_aio_pwritev(n->blk_bp, offset, &req->sg.iov, 0,
+                                 nvme_fw_commit_cb, req);
+
+    return NVME_NO_COMPLETE;
+}
+
+static uint16_t nvme_fw_download(NvmeCtrl *n, NvmeRequest *req)
+{
+    uint32_t numd = le32_to_cpu(req->cmd.cdw10);
+    uint32_t offset = le32_to_cpu(req->cmd.cdw11);
+    size_t len = 0;
+    uint16_t status = NVME_SUCCESS;
+
+    trace_pci_nvme_fw_download(nvme_cid(req), numd, offset, n->id_ctrl.fwug);
+
+    len = (numd + 1) << 2;
+    offset <<= 2;
+
+    if (len + offset > n->bp_size) {
+        trace_pci_nvme_fw_download_invalid_bp_size(offset, len, n->bp_size);
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    status = nvme_h2c(n, n->bp_data + offset, len, req);
+
+    return status;
+}
+
 static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
 {
     trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), req->cmd.opcode,
@@ -5181,6 +5295,10 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeRequest *req)
         return nvme_get_feature(n, req);
     case NVME_ADM_CMD_ASYNC_EV_REQ:
         return nvme_aer(n, req);
+    case NVME_ADM_CMD_COMMIT_FW:
+        return nvme_fw_commit(n, req);
+    case NVME_ADM_CMD_DOWNLOAD_FW:
+        return nvme_fw_download(n, req);
     case NVME_ADM_CMD_NS_ATTACHMENT:
         return nvme_ns_attachment(n, req);
     case NVME_ADM_CMD_FORMAT_NVM:
@@ -5265,6 +5383,7 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
     n->qs_created = false;
 
     n->bar.cc = 0;
+    NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
 }
 
 static void nvme_ctrl_shutdown(NvmeCtrl *n)
@@ -5546,6 +5665,47 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
         NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
                        "invalid write to read only CMBSZ, ignored");
         return;
+    case 0x44:  /* BPRSEL */
+        n->bar.bprsel = data & 0xffffffff;
+        size_t bp_len = NVME_BPRSEL_BPRSZ(n->bar.bprsel) * 4 * KiB;
+        int64_t bp_offset = NVME_BPRSEL_BPROF(n->bar.bprsel) * 4 * KiB;
+        int64_t off = 0;
+        struct nvme_bp_read_ctx *ctx;
+
+        trace_pci_nvme_mmio_bprsel(data, n->bar.bprsel,
+                                   NVME_BPRSEL_BPID(n->bar.bpinfo),
+                                   bp_offset, bp_len);
+
+        if (bp_len + bp_offset > n->bp_size) {
+            NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
+            NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_ERROR);
+            return;
+        }
+
+        off = NVME_BPRSEL_BPID(n->bar.bpinfo) * n->bp_size + bp_offset;
+
+        NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
+        NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_READING);
+
+        ctx = g_new(struct nvme_bp_read_ctx, 1);
+
+        ctx->n = n;
+
+        pci_dma_sglist_init(&ctx->qsg, &n->parent_obj, 1);
+
+        qemu_sglist_add(&ctx->qsg, n->bar.bpmbl, bp_len);
+
+        dma_blk_read(n->blk_bp, &ctx->qsg, off , BDRV_SECTOR_SIZE,
+                     nvme_bp_read_cb, ctx);
+        return;
+    case 0x48:  /* BPMBL */
+        n->bar.bpmbl = size == 8 ? data :
+            (n->bar.bpmbl & ~0xffffffff) | (data & 0xfffff000);
+        trace_pci_nvme_mmio_bpmbl(data, n->bar.bpmbl);
+        return;
+    case 0x4c:  /* BPMBL hi */
+        n->bar.bpmbl = (n->bar.bpmbl & 0xffffffff) | (data << 32);
+        return;
     case 0x50:  /* CMBMSC */
         if (!NVME_CAP_CMBS(n->bar.cap)) {
             return;
@@ -6074,6 +6234,9 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     id->mdts = n->params.mdts;
     id->ver = cpu_to_le32(NVME_SPEC_VER);
     id->oacs = cpu_to_le16(NVME_OACS_NS_MGMT | NVME_OACS_FORMAT);
+    if (n->blk_bp) {
+        id->oacs |= NVME_OACS_FW;
+    }
     id->cntrltype = 0x1;
 
     /*
@@ -6135,9 +6298,44 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
     NVME_CAP_SET_CMBS(n->bar.cap, n->params.cmb_size_mb ? 1 : 0);
     NVME_CAP_SET_PMRS(n->bar.cap, n->pmr.dev ? 1 : 0);
+    NVME_CAP_SET_BPS(n->bar.cap, 0x1);
 
     n->bar.vs = NVME_SPEC_VER;
     n->bar.intmc = n->bar.intms = 0;
+
+    /* Boot Partition Information (BPINFO) */
+    n->bar.bpinfo = 0;
+}
+
+static int nvme_init_boot_partitions(NvmeCtrl *n, Error **errp)
+{
+    BlockBackend *blk = n->blk_bp;
+    uint64_t len, perm, shared_perm;
+    size_t bp_size;
+    int ret;
+
+    len = blk_getlength(blk);
+    if (len % (256 * KiB)) {
+        error_setg(errp, "boot partitions image size shall be"\
+                   " multiple of 256 KiB current size %lu", len);
+        return -1;
+    }
+
+    perm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
+    shared_perm = BLK_PERM_ALL;
+
+    ret = blk_set_perm(blk, perm, shared_perm, errp);
+    if (ret) {
+        return ret;
+    }
+
+    bp_size = len / (256 * KiB);
+    NVME_BPINFO_SET_BPSZ(n->bar.bpinfo, bp_size);
+    n->bp_size = bp_size * 128 * KiB;
+
+    n->bp_data = g_malloc(n->bp_size);
+
+    return 0;
 }
 
 static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
@@ -6207,6 +6405,13 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 
         nvme_attach_ns(n, ns);
     }
+
+    if (n->blk_bp) {
+        if (nvme_init_boot_partitions(n, errp)) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
 }
 
 static void nvme_exit(PCIDevice *pci_dev)
@@ -6229,6 +6434,7 @@ static void nvme_exit(PCIDevice *pci_dev)
     g_free(n->cq);
     g_free(n->sq);
     g_free(n->aer_reqs);
+    g_free(n->bp_data);
 
     if (n->params.cmb_size_mb) {
         g_free(n->cmb.buf);
@@ -6247,6 +6453,7 @@ static Property nvme_props[] = {
                      HostMemoryBackend *),
     DEFINE_PROP_LINK("subsys", NvmeCtrl, subsys, TYPE_NVME_SUBSYS,
                      NvmeSubsystem *),
+    DEFINE_PROP_DRIVE("bootpart", NvmeCtrl, blk_bp),
     DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial),
     DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0),
     DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index c55293f6d3..1f51e8907e 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -435,6 +435,9 @@ typedef struct NvmeCtrl {
     DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
 
     NvmeSubsystem   *subsys;
+    BlockBackend    *blk_bp;
+    uint8_t         *bp_data;
+    uint64_t        bp_size;
 
     NvmeNamespace   namespace;
     NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index ea33d0ccc3..11234b2dba 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -80,6 +80,11 @@ pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PR
 pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
 pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
 pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
+pci_nvme_fw_commit(uint16_t cid, uint32_t dw10, uint8_t fwug, uint8_t fs, uint8_t ca, uint8_t bpid) "cid %"PRIu16" dw10 %"PRIu32" fwug %"PRIu8" fs %"PRIu8" ca %"PRIu8" bpid %"PRIu8""
+pci_nvme_fw_download(uint16_t cid, uint32_t numd, uint32_t ofst, uint8_t fwug) "cid %"PRIu16" numd %"PRIu32" ofst %"PRIu32" fwug %"PRIu8""
+pci_nvme_fw_commit_cb(uint16_t cid) "cid %"PRIu16""
+pci_nvme_bp_read_cb(void) ""
+pci_nvme_fw_download_invalid_bp_size(uint32_t ofst, size_t len, uint64_t bp_size) "ofst %"PRIu32" len %zu bp_size %"PRIu64""
 pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16""
 pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
 pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 0x%"PRIx64" data 0x%"PRIx64" size %d"
@@ -96,6 +101,8 @@ pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin co
 pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded"
 pci_nvme_mmio_stopped(void) "cleared controller enable bit"
 pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
+pci_nvme_mmio_bpmbl(uint64_t data, uint64_t bpmbl) "wrote MMIO, boot partitions buffer location data=0x%"PRIx64", bpmbl=0x%"PRIx64""
+pci_nvme_mmio_bprsel(uint64_t data, uint8_t bp_id, uint64_t bp_off, uint64_t bp_size, uint64_t bprsel) "wrote MMIO, boot partitions read select data=0x%"PRIx64", bp_id=0x%"PRIx8" bp_off=0x%"PRIx64", bp_off=0x%"PRIx64", bprsel=0x%"PRIx64""
 pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
 pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
 pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 2b9d849023..4785687ab8 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -38,6 +38,7 @@ enum NvmeCapShift {
     CAP_DSTRD_SHIFT    = 32,
     CAP_NSSRS_SHIFT    = 36,
     CAP_CSS_SHIFT      = 37,
+    CAP_BPS_SHIFT      = 45,
     CAP_MPSMIN_SHIFT   = 48,
     CAP_MPSMAX_SHIFT   = 52,
     CAP_PMRS_SHIFT     = 56,
@@ -52,6 +53,7 @@ enum NvmeCapMask {
     CAP_DSTRD_MASK     = 0xf,
     CAP_NSSRS_MASK     = 0x1,
     CAP_CSS_MASK       = 0xff,
+    CAP_BPS_MASK       = 0x1,
     CAP_MPSMIN_MASK    = 0xf,
     CAP_MPSMAX_MASK    = 0xf,
     CAP_PMRS_MASK      = 0x1,
@@ -65,6 +67,7 @@ enum NvmeCapMask {
 #define NVME_CAP_DSTRD(cap) (((cap) >> CAP_DSTRD_SHIFT)  & CAP_DSTRD_MASK)
 #define NVME_CAP_NSSRS(cap) (((cap) >> CAP_NSSRS_SHIFT)  & CAP_NSSRS_MASK)
 #define NVME_CAP_CSS(cap)   (((cap) >> CAP_CSS_SHIFT)    & CAP_CSS_MASK)
+#define NVME_CAP_BPS(cap)   (((cap) >> CAP_BPS_SHIFT)    & CAP_BPS_MASK)
 #define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK)
 #define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK)
 #define NVME_CAP_PMRS(cap)  (((cap) >> CAP_PMRS_SHIFT)   & CAP_PMRS_MASK)
@@ -84,6 +87,8 @@ enum NvmeCapMask {
                                                            << CAP_NSSRS_SHIFT)
 #define NVME_CAP_SET_CSS(cap, val)    (cap |= (uint64_t)(val & CAP_CSS_MASK)   \
                                                            << CAP_CSS_SHIFT)
+#define NVME_CAP_SET_BPS(cap, val)    (cap |= (uint64_t)(val & CAP_BPS_MASK)   \
+                                                           << CAP_BPS_SHIFT)
 #define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & CAP_MPSMIN_MASK)\
                                                            << CAP_MPSMIN_SHIFT)
 #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
@@ -495,6 +500,63 @@ enum NvmePmrmscMask {
 #define NVME_PMRMSC_SET_CBA(pmrmsc, val)   \
     (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
 
+enum NvmeBpReadStatus {
+    NVME_BPINFO_BRS_NOREAD  = 0x0,
+    NVME_BPINFO_BRS_READING = 0x1,
+    NVME_BPINFO_BRS_SUCCESS = 0x2,
+    NVME_BPINFO_BRS_ERROR   = 0x3,
+};
+
+enum NvmeBpInfoShift {
+    BPINFO_BPSZ_SHIFT   = 0,
+    BPINFO_BRS_SHIFT    = 24,
+    BPINFO_ABPID_SHIFT  = 31,
+};
+
+enum NvmeBpInfoMask {
+    BPINFO_BPSZ_MASK  = 0x7fff,
+    BPINFO_BRS_MASK   = 0x3,
+    BPINFO_ABPID_MASK = 0x1,
+};
+
+#define NVME_BPINFO_SET_BPSZ(bpinfo, val) \
+    (bpinfo |= (uint64_t)(val & BPINFO_BPSZ_MASK)  << BPINFO_BPSZ_SHIFT)
+#define NVME_BPINFO_SET_BRS(bpinfo, val)   \
+    (bpinfo |= (uint64_t)(val & BPINFO_BRS_MASK) << BPINFO_BRS_SHIFT)
+#define NVME_BPINFO_SET_ABPID(bpinfo, val)   \
+    (bpinfo |= (uint64_t)(val & BPINFO_ABPID_MASK) << BPINFO_ABPID_SHIFT)
+
+#define NVME_BPINFO_BPSZ(bpinfo)   \
+    ((bpinfo >> BPINFO_BPSZ_SHIFT) & BPINFO_BPSZ_MASK)
+#define NVME_BPINFO_BRS(bpinfo)   \
+    ((bpinfo >> BPINFO_BRS_SHIFT) & BPINFO_BRS_MASK)
+#define NVME_BPINFO_ABPID(bpinfo)   \
+    ((bpinfo >> BPINFO_ABPID_SHIFT) & BPINFO_ABPID_MASK)
+
+#define NVME_BPINFO_CLEAR_ABPID(bpinfo)  \
+    (bpinfo &= (uint64_t)(~(BPINFO_ABPID_MASK << BPINFO_ABPID_SHIFT)))
+#define NVME_BPINFO_CLEAR_BRS(bpinfo)   \
+    (bpinfo &= (uint64_t)(~(BPINFO_BRS_MASK << BPINFO_BRS_SHIFT)))
+
+enum NvmeBpReadSelectShift {
+    BPRSEL_BPRSZ_SHIFT  = 0,
+    BPRSEL_BPROF_SHIFT  = 10,
+    BPRSEL_BPID_SHIFT   = 31,
+};
+
+enum NvmeBpReadSelectMask {
+    BPRSEL_BPRSZ_MASK  = 0x3ff,
+    BPRSEL_BPROF_MASK  = 0xffff,
+    BPRSEL_BPID_MASK   = 0x1,
+};
+
+#define NVME_BPRSEL_BPRSZ(bprsel)   \
+    ((bprsel >> BPRSEL_BPRSZ_SHIFT) & BPRSEL_BPRSZ_MASK)
+#define NVME_BPRSEL_BPROF(bprsel)   \
+    ((bprsel >> BPRSEL_BPROF_SHIFT) & BPRSEL_BPROF_MASK)
+#define NVME_BPRSEL_BPID(bprsel)   \
+    ((bprsel >> BPRSEL_BPID_SHIFT) & BPRSEL_BPID_MASK)
+
 enum NvmeSglDescriptorType {
     NVME_SGL_DESCR_TYPE_DATA_BLOCK          = 0x0,
     NVME_SGL_DESCR_TYPE_BIT_BUCKET          = 0x1,
@@ -564,7 +626,7 @@ enum NvmeAdminCommands {
     NVME_ADM_CMD_SET_FEATURES   = 0x09,
     NVME_ADM_CMD_GET_FEATURES   = 0x0a,
     NVME_ADM_CMD_ASYNC_EV_REQ   = 0x0c,
-    NVME_ADM_CMD_ACTIVATE_FW    = 0x10,
+    NVME_ADM_CMD_COMMIT_FW      = 0x10,
     NVME_ADM_CMD_DOWNLOAD_FW    = 0x11,
     NVME_ADM_CMD_NS_ATTACHMENT  = 0x15,
     NVME_ADM_CMD_FORMAT_NVM     = 0x80,
@@ -846,6 +908,8 @@ enum NvmeStatusCodes {
     NVME_FEAT_NOT_CHANGEABLE    = 0x010e,
     NVME_FEAT_NOT_NS_SPEC       = 0x010f,
     NVME_FW_REQ_SUSYSTEM_RESET  = 0x0110,
+    NVME_FW_ACTIVATE_PROHIBITED = 0x0113,
+    NVME_BP_WRITE_PROHIBITED    = 0x011e,
     NVME_NS_ALREADY_ATTACHED    = 0x0118,
     NVME_NS_PRIVATE             = 0x0119,
     NVME_NS_NOT_ATTACHED        = 0x011a,
@@ -1107,6 +1171,15 @@ enum NvmeIdCtrlFrmw {
     NVME_FRMW_SLOT1_RO = 1 << 0,
 };
 
+enum NvmeFwCommitActions {
+    NVME_FW_CA_REPLACE                  = 0x00,
+    NVME_FW_CA_REPLACE_AND_ACTIVATE     = 0x01,
+    NVME_FW_CA_ACTIVATE                 = 0x02,
+    NVME_FW_CA_REPLACE_AND_ACTIVATE_NOW = 0x03,
+    NVME_FW_CA_REPLACE_BP               = 0x06,
+    NVME_FW_CA_ACTIVATE_BP              = 0x07,
+};
+
 enum NvmeIdCtrlLpa {
     NVME_LPA_NS_SMART = 1 << 0,
     NVME_LPA_CSE      = 1 << 1,
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 2/2] tests/qtest/nvme-test: add boot partition read test
       [not found]   ` <CGME20210601144242epcas5p1292f656f45aebd1b9c75fe54797d6b46@epcas5p1.samsung.com>
@ 2021-06-01 14:37     ` Gollu Appalanaidu
  2021-06-01 17:43       ` Klaus Jensen
  0 siblings, 1 reply; 13+ messages in thread
From: Gollu Appalanaidu @ 2021-06-01 14:37 UTC (permalink / raw)
  To: qemu-devel
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, mreitz, its, stefanha, kbusch

Add a test case for reading an NVMe Boot Partition without
enabling the controller.

Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
---
 tests/qtest/nvme-test.c | 118 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 117 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c
index d32c953a38..39d2e26f76 100644
--- a/tests/qtest/nvme-test.c
+++ b/tests/qtest/nvme-test.c
@@ -13,6 +13,19 @@
 #include "libqos/libqtest.h"
 #include "libqos/qgraph.h"
 #include "libqos/pci.h"
+#include "libqos/pci-pc.h"
+#include "libqos/malloc-pc.h"
+#include "libqos/malloc.h"
+#include "libqos/libqos.h"
+#include "include/block/nvme.h"
+#include "include/hw/pci/pci.h"
+
+#define NVME_BPINFO_BPSZ_UNITS  (128 * KiB)
+#define NVME_BRS_BPSZ_UNITS     (4 * KiB)
+#define NVME_BRS_READ_MAX_TIME  1000000
+#define TEST_IMAGE_SIZE         (2 * 128 * KiB)
+
+static char *t_path;
 
 typedef struct QNvme QNvme;
 
@@ -44,6 +57,13 @@ static void *nvme_create(void *pci_bus, QGuestAllocator *alloc, void *addr)
     return &nvme->obj;
 }
 
+static void drive_destroy(void *path)
+{
+    unlink(path);
+    g_free(path);
+    qos_invalidate_command_line();
+}
+
 /* This used to cause a NULL pointer dereference.  */
 static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc)
 {
@@ -66,12 +86,100 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc)
     g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211);
 }
 
+static void nvmetest_bp_read_test(void *obj, void *data, QGuestAllocator *alloc)
+{
+    uint16_t test_size = 32;
+    size_t bp_test_len = test_size * NVME_BRS_BPSZ_UNITS;
+    uint8_t *read_buf = g_malloc(bp_test_len);
+    uint8_t *cmp_buf = g_malloc(bp_test_len);
+    QNvme *nvme = obj;
+    QPCIDevice *pdev = &nvme->dev;
+    QPCIBar nvme_bar;
+    uint8_t brs = 0;
+    uint64_t sleep_time = 0;
+    uintptr_t guest_buf;
+    uint64_t buf_addr;
+
+    memset(cmp_buf, 0x42, bp_test_len);
+
+    guest_buf = guest_alloc(alloc, bp_test_len);
+    buf_addr = cpu_to_le64(guest_buf);
+
+    qpci_device_enable(pdev);
+    nvme_bar = qpci_iomap(pdev, 0, NULL);
+
+    /* BPINFO */
+    uint32_t bpinfo = qpci_io_readl(pdev, nvme_bar, 0x40);
+    uint16_t single_bp_size = bpinfo & BPINFO_BPSZ_MASK;
+    uint8_t active_bpid = bpinfo >> BPINFO_ABPID_SHIFT;
+    uint8_t read_select = (bpinfo >> BPINFO_BRS_SHIFT) & BPINFO_BRS_MASK;
+
+    g_assert_cmpint(single_bp_size, ==, 0x1);
+    g_assert_cmpint(active_bpid, ==, 0);
+    g_assert_cmpint(read_select, ==, NVME_BPINFO_BRS_NOREAD);
+
+    /* BPMBL */
+    uint64_t bpmbl = buf_addr;
+    uint32_t bpmbl_low = bpmbl & 0xffffffff;
+    uint32_t bpmbl_hi = (bpmbl >> 32) & 0xffffffff;
+    qpci_io_writel(pdev, nvme_bar, 0x48, bpmbl_low);
+    qpci_io_writel(pdev, nvme_bar, 0x4c, bpmbl_hi);
+
+    /* BPRSEL */
+    qpci_io_writel(pdev, nvme_bar, 0x44, 32);
+
+    while (true) {
+        usleep(1000);
+        sleep_time += 1000;
+        brs = qpci_io_readb(pdev, nvme_bar, 0x43) & BPINFO_BRS_MASK;
+        if (brs == NVME_BPINFO_BRS_SUCCESS || brs == NVME_BPINFO_BRS_ERROR ||
+            sleep_time == NVME_BRS_READ_MAX_TIME) {
+            break;
+        }
+    }
+    g_assert_cmpint(brs, ==, NVME_BPINFO_BRS_SUCCESS);
+
+    qtest_memread(pdev->bus->qts, guest_buf, read_buf, bp_test_len);
+    g_assert_cmpint(memcmp(cmp_buf, read_buf, bp_test_len), ==, 0);
+
+    g_free(cmp_buf);
+    g_free(read_buf);
+    g_test_queue_destroy(drive_destroy, t_path);
+}
+
 static void nvme_register_nodes(void)
 {
+    int fd;
+    FILE *fh;
+    uint16_t bpsz = 2;
+    size_t bp_len = NVME_BPINFO_BPSZ_UNITS * bpsz;
+    size_t ret;
+    uint8_t *pattern = g_malloc(bp_len);
+
+    t_path = g_strdup("/tmp/qtest.XXXXXX");
+
+    /* Create a temporary raw image */
+    fd = mkstemp(t_path);
+    g_assert_cmpint(fd, >=, 0);
+    ret = ftruncate(fd, TEST_IMAGE_SIZE);
+    g_assert_cmpint(ret, ==, 0);
+    close(fd);
+
+    memset(pattern, 0x42, bp_len);
+
+    fh = fopen(t_path, "w+");
+    ret = fwrite(pattern, NVME_BPINFO_BPSZ_UNITS, bpsz, fh);
+    g_assert_cmpint(ret, ==, bpsz);
+    fclose(fh);
+
+    char *bp_cmd_line = g_strdup_printf("-drive id=bp0,file=%s,if=none,"
+                                        "format=raw", t_path);
+
     QOSGraphEdgeOptions opts = {
         .extra_device_opts = "addr=04.0,drive=drv0,serial=foo",
         .before_cmd_line = "-drive id=drv0,if=none,file=null-co://,"
-                           "file.read-zeroes=on,format=raw",
+                           "file.read-zeroes=on,format=raw ",
+                           bp_cmd_line,
     };
 
     add_qpci_address(&opts, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) });
@@ -83,6 +191,14 @@ static void nvme_register_nodes(void)
     qos_add_test("oob-cmb-access", "nvme", nvmetest_oob_cmb_test, &(QOSGraphTestOptions) {
         .edge.extra_device_opts = "cmb_size_mb=2"
     });
+
+    qos_add_test("bp-read-access", "nvme", nvmetest_bp_read_test,
+                 &(QOSGraphTestOptions) {
+        .edge.extra_device_opts = "bootpart=bp0"
+    });
+
+    /* Clean Up */
+    g_free(pattern);
 }
 
 libqos_init(nvme_register_nodes);
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-01 14:37     ` [PATCH v2 1/2] hw/nvme: add support for boot partiotions Gollu Appalanaidu
@ 2021-06-01 17:19       ` Keith Busch
  2021-06-01 17:41         ` Klaus Jensen
  2021-06-03  9:48       ` Stefan Hajnoczi
  1 sibling, 1 reply; 13+ messages in thread
From: Keith Busch @ 2021-06-01 17:19 UTC (permalink / raw)
  To: Gollu Appalanaidu
  Cc: fam, kwolf, qemu-block, qemu-devel, mreitz, stefanha, its

On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
> NVMe Boot Partitions provides an area that may be read by the host
> without initializing queues or even enabling the controller. This
> allows various platform initialization code to be stored on the NVMe
> device instead of some separete medium.
> 
> This patch adds the read support for such an area, as well as support
> for updating the boot partition contents from the host through the
> FW Download and Commit commands.

Please provide some details on what platform initilization sequence
running on QEMU is going to make use of this feature.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-01 17:19       ` Keith Busch
@ 2021-06-01 17:41         ` Klaus Jensen
  2021-06-01 17:46           ` Klaus Jensen
  2021-06-01 18:07           ` Keith Busch
  0 siblings, 2 replies; 13+ messages in thread
From: Klaus Jensen @ 2021-06-01 17:41 UTC (permalink / raw)
  To: Keith Busch
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, qemu-devel, mreitz, stefanha

[-- Attachment #1: Type: text/plain, Size: 1129 bytes --]

On Jun  1 10:19, Keith Busch wrote:
>On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
>> NVMe Boot Partitions provides an area that may be read by the host
>> without initializing queues or even enabling the controller. This
>> allows various platform initialization code to be stored on the NVMe
>> device instead of some separete medium.
>>
>> This patch adds the read support for such an area, as well as support
>> for updating the boot partition contents from the host through the
>> FW Download and Commit commands.
>
>Please provide some details on what platform initilization sequence
>running on QEMU is going to make use of this feature.
>

I totally get your reluctance to accept useless features like device 
self-test and ill-supported ones like write uncorrectable.

But I think this feature qualifies just fine for the device. It is 
useful for embedded development and while there might not be any qemu 
boards that wants to use this *right now*, it allows for 
experimentation. And this is a feature that actually *is* implemented by 
real products for embedded systems.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 2/2] tests/qtest/nvme-test: add boot partition read test
  2021-06-01 14:37     ` [PATCH v2 2/2] tests/qtest/nvme-test: add boot partition read test Gollu Appalanaidu
@ 2021-06-01 17:43       ` Klaus Jensen
  0 siblings, 0 replies; 13+ messages in thread
From: Klaus Jensen @ 2021-06-01 17:43 UTC (permalink / raw)
  To: Gollu Appalanaidu
  Cc: fam, kwolf, qemu-block, qemu-devel, mreitz, stefanha, kbusch

[-- Attachment #1: Type: text/plain, Size: 5764 bytes --]

On Jun  1 20:07, Gollu Appalanaidu wrote:
>Add a test case for reading an NVMe Boot Partition without
>enabling the controller.
>
>Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
>---
> tests/qtest/nvme-test.c | 118 +++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 117 insertions(+), 1 deletion(-)
>
>diff --git a/tests/qtest/nvme-test.c b/tests/qtest/nvme-test.c
>index d32c953a38..39d2e26f76 100644
>--- a/tests/qtest/nvme-test.c
>+++ b/tests/qtest/nvme-test.c
>@@ -13,6 +13,19 @@
> #include "libqos/libqtest.h"
> #include "libqos/qgraph.h"
> #include "libqos/pci.h"
>+#include "libqos/pci-pc.h"
>+#include "libqos/malloc-pc.h"
>+#include "libqos/malloc.h"
>+#include "libqos/libqos.h"
>+#include "include/block/nvme.h"
>+#include "include/hw/pci/pci.h"
>+
>+#define NVME_BPINFO_BPSZ_UNITS  (128 * KiB)
>+#define NVME_BRS_BPSZ_UNITS     (4 * KiB)
>+#define NVME_BRS_READ_MAX_TIME  1000000
>+#define TEST_IMAGE_SIZE         (2 * 128 * KiB)
>+
>+static char *t_path;
>
> typedef struct QNvme QNvme;
>
>@@ -44,6 +57,13 @@ static void *nvme_create(void *pci_bus, QGuestAllocator *alloc, void *addr)
>     return &nvme->obj;
> }
>
>+static void drive_destroy(void *path)
>+{
>+    unlink(path);
>+    g_free(path);
>+    qos_invalidate_command_line();
>+}
>+
> /* This used to cause a NULL pointer dereference.  */
> static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc)
> {
>@@ -66,12 +86,100 @@ static void nvmetest_oob_cmb_test(void *obj, void *data, QGuestAllocator *alloc)
>     g_assert_cmpint(qpci_io_readl(pdev, bar, cmb_bar_size - 1), !=, 0x44332211);
> }
>
>+static void nvmetest_bp_read_test(void *obj, void *data, QGuestAllocator *alloc)
>+{
>+    uint16_t test_size = 32;
>+    size_t bp_test_len = test_size * NVME_BRS_BPSZ_UNITS;
>+    uint8_t *read_buf = g_malloc(bp_test_len);
>+    uint8_t *cmp_buf = g_malloc(bp_test_len);
>+    QNvme *nvme = obj;
>+    QPCIDevice *pdev = &nvme->dev;
>+    QPCIBar nvme_bar;
>+    uint8_t brs = 0;
>+    uint64_t sleep_time = 0;
>+    uintptr_t guest_buf;
>+    uint64_t buf_addr;
>+
>+    memset(cmp_buf, 0x42, bp_test_len);

This one byte pattern is too simple and won't catch a lot of possible 
bugs.

The test case should use generate_pattern() (see 
tests/qtest/libqos/libqos.h).

>+
>+    guest_buf = guest_alloc(alloc, bp_test_len);
>+    buf_addr = cpu_to_le64(guest_buf);
>+
>+    qpci_device_enable(pdev);
>+    nvme_bar = qpci_iomap(pdev, 0, NULL);
>+
>+    /* BPINFO */
>+    uint32_t bpinfo = qpci_io_readl(pdev, nvme_bar, 0x40);
>+    uint16_t single_bp_size = bpinfo & BPINFO_BPSZ_MASK;
>+    uint8_t active_bpid = bpinfo >> BPINFO_ABPID_SHIFT;
>+    uint8_t read_select = (bpinfo >> BPINFO_BRS_SHIFT) & BPINFO_BRS_MASK;
>+
>+    g_assert_cmpint(single_bp_size, ==, 0x1);
>+    g_assert_cmpint(active_bpid, ==, 0);
>+    g_assert_cmpint(read_select, ==, NVME_BPINFO_BRS_NOREAD);
>+
>+    /* BPMBL */
>+    uint64_t bpmbl = buf_addr;
>+    uint32_t bpmbl_low = bpmbl & 0xffffffff;
>+    uint32_t bpmbl_hi = (bpmbl >> 32) & 0xffffffff;
>+    qpci_io_writel(pdev, nvme_bar, 0x48, bpmbl_low);
>+    qpci_io_writel(pdev, nvme_bar, 0x4c, bpmbl_hi);
>+
>+    /* BPRSEL */
>+    qpci_io_writel(pdev, nvme_bar, 0x44, 32);
>+
>+    while (true) {
>+        usleep(1000);
>+        sleep_time += 1000;
>+        brs = qpci_io_readb(pdev, nvme_bar, 0x43) & BPINFO_BRS_MASK;
>+        if (brs == NVME_BPINFO_BRS_SUCCESS || brs == NVME_BPINFO_BRS_ERROR ||
>+            sleep_time == NVME_BRS_READ_MAX_TIME) {
>+            break;
>+        }
>+    }
>+    g_assert_cmpint(brs, ==, NVME_BPINFO_BRS_SUCCESS);
>+
>+    qtest_memread(pdev->bus->qts, guest_buf, read_buf, bp_test_len);
>+    g_assert_cmpint(memcmp(cmp_buf, read_buf, bp_test_len), ==, 0);
>+
>+    g_free(cmp_buf);
>+    g_free(read_buf);
>+    g_test_queue_destroy(drive_destroy, t_path);
>+}
>+
> static void nvme_register_nodes(void)
> {
>+    int fd;
>+    FILE *fh;
>+    uint16_t bpsz = 2;
>+    size_t bp_len = NVME_BPINFO_BPSZ_UNITS * bpsz;
>+    size_t ret;
>+    uint8_t *pattern = g_malloc(bp_len);
>+
>+    t_path = g_strdup("/tmp/qtest.XXXXXX");
>+
>+    /* Create a temporary raw image */
>+    fd = mkstemp(t_path);
>+    g_assert_cmpint(fd, >=, 0);
>+    ret = ftruncate(fd, TEST_IMAGE_SIZE);
>+    g_assert_cmpint(ret, ==, 0);
>+    close(fd);
>+
>+    memset(pattern, 0x42, bp_len);
>+
>+    fh = fopen(t_path, "w+");
>+    ret = fwrite(pattern, NVME_BPINFO_BPSZ_UNITS, bpsz, fh);
>+    g_assert_cmpint(ret, ==, bpsz);
>+    fclose(fh);
>+
>+    char *bp_cmd_line = g_strdup_printf("-drive id=bp0,file=%s,if=none,"
>+                                        "format=raw", t_path);
>+
>     QOSGraphEdgeOptions opts = {
>         .extra_device_opts = "addr=04.0,drive=drv0,serial=foo",
>         .before_cmd_line = "-drive id=drv0,if=none,file=null-co://,"
>-                           "file.read-zeroes=on,format=raw",
>+                           "file.read-zeroes=on,format=raw ",
>+                           bp_cmd_line,
>     };
>
>     add_qpci_address(&opts, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) });
>@@ -83,6 +191,14 @@ static void nvme_register_nodes(void)
>     qos_add_test("oob-cmb-access", "nvme", nvmetest_oob_cmb_test, &(QOSGraphTestOptions) {
>         .edge.extra_device_opts = "cmb_size_mb=2"
>     });
>+
>+    qos_add_test("bp-read-access", "nvme", nvmetest_bp_read_test,
>+                 &(QOSGraphTestOptions) {
>+        .edge.extra_device_opts = "bootpart=bp0"
>+    });
>+
>+    /* Clean Up */
>+    g_free(pattern);
> }
>
> libqos_init(nvme_register_nodes);
>-- 
>2.17.1
>
>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-01 17:41         ` Klaus Jensen
@ 2021-06-01 17:46           ` Klaus Jensen
  2021-06-01 18:07           ` Keith Busch
  1 sibling, 0 replies; 13+ messages in thread
From: Klaus Jensen @ 2021-06-01 17:46 UTC (permalink / raw)
  To: Keith Busch
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, qemu-devel, mreitz, stefanha

[-- Attachment #1: Type: text/plain, Size: 1357 bytes --]

On Jun  1 19:41, Klaus Jensen wrote:
>On Jun  1 10:19, Keith Busch wrote:
>>On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
>>>NVMe Boot Partitions provides an area that may be read by the host
>>>without initializing queues or even enabling the controller. This
>>>allows various platform initialization code to be stored on the NVMe
>>>device instead of some separete medium.
>>>
>>>This patch adds the read support for such an area, as well as support
>>>for updating the boot partition contents from the host through the
>>>FW Download and Commit commands.
>>
>>Please provide some details on what platform initilization sequence
>>running on QEMU is going to make use of this feature.
>>
>
>I totally get your reluctance to accept useless features like device 
>self-test and ill-supported ones like write uncorrectable.
>
>But I think this feature qualifies just fine for the device. It is 
>useful for embedded development and while there might not be any qemu 
>boards that wants to use this *right now*, it allows for 
>experimentation. And this is a feature that actually *is* implemented 
>by real products for embedded systems.

And while the test-case does need more work, there actually *is* a test 
accompanying this. That's a higher standard than 98% of the features we 
have in this device :P

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-01 17:41         ` Klaus Jensen
  2021-06-01 17:46           ` Klaus Jensen
@ 2021-06-01 18:07           ` Keith Busch
  2021-06-01 19:07             ` Klaus Jensen
  1 sibling, 1 reply; 13+ messages in thread
From: Keith Busch @ 2021-06-01 18:07 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, qemu-devel, mreitz, stefanha

On Tue, Jun 01, 2021 at 07:41:34PM +0200, Klaus Jensen wrote:
> On Jun  1 10:19, Keith Busch wrote:
> > On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
> > > NVMe Boot Partitions provides an area that may be read by the host
> > > without initializing queues or even enabling the controller. This
> > > allows various platform initialization code to be stored on the NVMe
> > > device instead of some separete medium.
> > > 
> > > This patch adds the read support for such an area, as well as support
> > > for updating the boot partition contents from the host through the
> > > FW Download and Commit commands.
> > 
> > Please provide some details on what platform initilization sequence
> > running on QEMU is going to make use of this feature.
> > 
> 
> I totally get your reluctance to accept useless features like device
> self-test and ill-supported ones like write uncorrectable.
> 
> But I think this feature qualifies just fine for the device. It is useful
> for embedded development and while there might not be any qemu boards that
> wants to use this *right now*, it allows for experimentation. And this is a
> feature that actually *is* implemented by real products for embedded
> systems.

That wasn't my request, though. I am well aware of the feature and also
have hardware that implements it. It just sounds like you haven't
actually tested this feature under the protocol's intended use cases
inside this environment. I think that type of testing and a high level
description of it in the changelog ought to be part of acceptance
criteria.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-01 18:07           ` Keith Busch
@ 2021-06-01 19:07             ` Klaus Jensen
  2021-06-01 20:23               ` Keith Busch
  0 siblings, 1 reply; 13+ messages in thread
From: Klaus Jensen @ 2021-06-01 19:07 UTC (permalink / raw)
  To: Keith Busch
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, qemu-devel, mreitz, stefanha

[-- Attachment #1: Type: text/plain, Size: 1800 bytes --]

On Jun  1 11:07, Keith Busch wrote:
>On Tue, Jun 01, 2021 at 07:41:34PM +0200, Klaus Jensen wrote:
>> On Jun  1 10:19, Keith Busch wrote:
>> > On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
>> > > NVMe Boot Partitions provides an area that may be read by the host
>> > > without initializing queues or even enabling the controller. This
>> > > allows various platform initialization code to be stored on the NVMe
>> > > device instead of some separete medium.
>> > >
>> > > This patch adds the read support for such an area, as well as support
>> > > for updating the boot partition contents from the host through the
>> > > FW Download and Commit commands.
>> >
>> > Please provide some details on what platform initilization sequence
>> > running on QEMU is going to make use of this feature.
>> >
>>
>> I totally get your reluctance to accept useless features like device
>> self-test and ill-supported ones like write uncorrectable.
>>
>> But I think this feature qualifies just fine for the device. It is useful
>> for embedded development and while there might not be any qemu boards that
>> wants to use this *right now*, it allows for experimentation. And this is a
>> feature that actually *is* implemented by real products for embedded
>> systems.
>
>That wasn't my request, though. I am well aware of the feature and also
>have hardware that implements it. It just sounds like you haven't
>actually tested this feature under the protocol's intended use cases
>inside this environment. I think that type of testing and a high level
>description of it in the changelog ought to be part of acceptance
>criteria.
>

Alright, I see.

You'd like to see this tested by defining a new board that loads 
firmware over PCIe from the device?

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-01 19:07             ` Klaus Jensen
@ 2021-06-01 20:23               ` Keith Busch
  0 siblings, 0 replies; 13+ messages in thread
From: Keith Busch @ 2021-06-01 20:23 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, qemu-devel, mreitz, stefanha

On Tue, Jun 01, 2021 at 09:07:56PM +0200, Klaus Jensen wrote:
> On Jun  1 11:07, Keith Busch wrote:
> > On Tue, Jun 01, 2021 at 07:41:34PM +0200, Klaus Jensen wrote:
> > > On Jun  1 10:19, Keith Busch wrote:
> > > > On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
> > > > > NVMe Boot Partitions provides an area that may be read by the host
> > > > > without initializing queues or even enabling the controller. This
> > > > > allows various platform initialization code to be stored on the NVMe
> > > > > device instead of some separete medium.
> > > > >
> > > > > This patch adds the read support for such an area, as well as support
> > > > > for updating the boot partition contents from the host through the
> > > > > FW Download and Commit commands.
> > > >
> > > > Please provide some details on what platform initilization sequence
> > > > running on QEMU is going to make use of this feature.
> > > >
> > > 
> > > I totally get your reluctance to accept useless features like device
> > > self-test and ill-supported ones like write uncorrectable.
> > > 
> > > But I think this feature qualifies just fine for the device. It is useful
> > > for embedded development and while there might not be any qemu boards that
> > > wants to use this *right now*, it allows for experimentation. And this is a
> > > feature that actually *is* implemented by real products for embedded
> > > systems.
> > 
> > That wasn't my request, though. I am well aware of the feature and also
> > have hardware that implements it. It just sounds like you haven't
> > actually tested this feature under the protocol's intended use cases
> > inside this environment. I think that type of testing and a high level
> > description of it in the changelog ought to be part of acceptance
> > criteria.
> > 
> 
> Alright, I see.
> 
> You'd like to see this tested by defining a new board that loads firmware
> over PCIe from the device?

Yes, something like that.

When the feature was initially published, I took a very brief look at
how qemu could use it and concluded this wasn't very practical here. I
would be happy to know if there's any example platform that can use it,
though. That, to me, demostrates sufficient value.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-01 14:37     ` [PATCH v2 1/2] hw/nvme: add support for boot partiotions Gollu Appalanaidu
  2021-06-01 17:19       ` Keith Busch
@ 2021-06-03  9:48       ` Stefan Hajnoczi
  2021-06-03 10:04         ` Klaus Jensen
  1 sibling, 1 reply; 13+ messages in thread
From: Stefan Hajnoczi @ 2021-06-03  9:48 UTC (permalink / raw)
  To: Gollu Appalanaidu; +Cc: fam, kwolf, qemu-block, qemu-devel, mreitz, its, kbusch

[-- Attachment #1: Type: text/plain, Size: 1854 bytes --]

On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
> @@ -5546,6 +5665,47 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
>          NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
>                         "invalid write to read only CMBSZ, ignored");
>          return;
> +    case 0x44:  /* BPRSEL */
> +        n->bar.bprsel = data & 0xffffffff;
> +        size_t bp_len = NVME_BPRSEL_BPRSZ(n->bar.bprsel) * 4 * KiB;
> +        int64_t bp_offset = NVME_BPRSEL_BPROF(n->bar.bprsel) * 4 * KiB;
> +        int64_t off = 0;
> +        struct nvme_bp_read_ctx *ctx;
> +
> +        trace_pci_nvme_mmio_bprsel(data, n->bar.bprsel,
> +                                   NVME_BPRSEL_BPID(n->bar.bpinfo),
> +                                   bp_offset, bp_len);
> +
> +        if (bp_len + bp_offset > n->bp_size) {
> +            NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
> +            NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_ERROR);
> +            return;
> +        }
> +
> +        off = NVME_BPRSEL_BPID(n->bar.bpinfo) * n->bp_size + bp_offset;
> +
> +        NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
> +        NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_READING);
> +
> +        ctx = g_new(struct nvme_bp_read_ctx, 1);
> +
> +        ctx->n = n;
> +
> +        pci_dma_sglist_init(&ctx->qsg, &n->parent_obj, 1);
> +
> +        qemu_sglist_add(&ctx->qsg, n->bar.bpmbl, bp_len);
> +
> +        dma_blk_read(n->blk_bp, &ctx->qsg, off , BDRV_SECTOR_SIZE,
> +                     nvme_bp_read_cb, ctx);

The returned BlockAIOCB is not stored. Two questions:

1. Can the guest allocate unbounded amounts of QEMU memory (struct
   nvme_bp_read_ctx) by repeatedly writing to this register?

2. What happens if the NVMe device is hot unplugged or reset while a
   boot partition read request is in flight?

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-03  9:48       ` Stefan Hajnoczi
@ 2021-06-03 10:04         ` Klaus Jensen
  2021-06-03 11:51           ` Stefan Hajnoczi
  0 siblings, 1 reply; 13+ messages in thread
From: Klaus Jensen @ 2021-06-03 10:04 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, qemu-devel, mreitz, kbusch

[-- Attachment #1: Type: text/plain, Size: 2826 bytes --]

On Jun  3 10:48, Stefan Hajnoczi wrote:
>On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
>> @@ -5546,6 +5665,47 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
>>          NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
>>                         "invalid write to read only CMBSZ, ignored");
>>          return;
>> +    case 0x44:  /* BPRSEL */
>> +        n->bar.bprsel = data & 0xffffffff;
>> +        size_t bp_len = NVME_BPRSEL_BPRSZ(n->bar.bprsel) * 4 * KiB;
>> +        int64_t bp_offset = NVME_BPRSEL_BPROF(n->bar.bprsel) * 4 * KiB;
>> +        int64_t off = 0;
>> +        struct nvme_bp_read_ctx *ctx;
>> +
>> +        trace_pci_nvme_mmio_bprsel(data, n->bar.bprsel,
>> +                                   NVME_BPRSEL_BPID(n->bar.bpinfo),
>> +                                   bp_offset, bp_len);
>> +
>> +        if (bp_len + bp_offset > n->bp_size) {
>> +            NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
>> +            NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_ERROR);
>> +            return;
>> +        }
>> +
>> +        off = NVME_BPRSEL_BPID(n->bar.bpinfo) * n->bp_size + bp_offset;
>> +
>> +        NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
>> +        NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_READING);
>> +
>> +        ctx = g_new(struct nvme_bp_read_ctx, 1);
>> +
>> +        ctx->n = n;
>> +
>> +        pci_dma_sglist_init(&ctx->qsg, &n->parent_obj, 1);
>> +
>> +        qemu_sglist_add(&ctx->qsg, n->bar.bpmbl, bp_len);
>> +
>> +        dma_blk_read(n->blk_bp, &ctx->qsg, off , BDRV_SECTOR_SIZE,
>> +                     nvme_bp_read_cb, ctx);
>
>The returned BlockAIOCB is not stored. Two questions:
>

Thanks for these comments Stefan, I've commented below how I think they
can be resolved.

>1. Can the guest allocate unbounded amounts of QEMU memory (struct
>   nvme_bp_read_ctx) by repeatedly writing to this register?
>

Yeah, the QSQ should just be a member of the controller struct and then 
have that as the cb_arg to dma_blk_read. Then, the QSG can be 
initialized and the host address added when BPMBL is written instead of 
here.

We don't want the QSG to change while the read is in progress, so the 
write to BPMBL should check BPINFO.BRS and not do anything if the QSG is 
"in use". The spec says that the host *shall* not modify the registers 
when a read is in progress, so we can safely just ignore the write.

>2. What happens if the NVMe device is hot unplugged or reset while a
>   boot partition read request is in flight?

The spec also says that the host *shall* not reset or shutdown the 
controller while a read is in progress, but again, we need to protect 
QEMU so the aiocb must be saved on the controller struct and cancelled 
appropriately.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/2] hw/nvme: add support for boot partiotions
  2021-06-03 10:04         ` Klaus Jensen
@ 2021-06-03 11:51           ` Stefan Hajnoczi
  0 siblings, 0 replies; 13+ messages in thread
From: Stefan Hajnoczi @ 2021-06-03 11:51 UTC (permalink / raw)
  To: Klaus Jensen
  Cc: fam, kwolf, qemu-block, Gollu Appalanaidu, qemu-devel, mreitz, kbusch

[-- Attachment #1: Type: text/plain, Size: 3533 bytes --]

On Thu, Jun 03, 2021 at 12:04:06PM +0200, Klaus Jensen wrote:
> On Jun  3 10:48, Stefan Hajnoczi wrote:
> > On Tue, Jun 01, 2021 at 08:07:48PM +0530, Gollu Appalanaidu wrote:
> > > @@ -5546,6 +5665,47 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
> > >          NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly,
> > >                         "invalid write to read only CMBSZ, ignored");
> > >          return;
> > > +    case 0x44:  /* BPRSEL */
> > > +        n->bar.bprsel = data & 0xffffffff;
> > > +        size_t bp_len = NVME_BPRSEL_BPRSZ(n->bar.bprsel) * 4 * KiB;
> > > +        int64_t bp_offset = NVME_BPRSEL_BPROF(n->bar.bprsel) * 4 * KiB;
> > > +        int64_t off = 0;
> > > +        struct nvme_bp_read_ctx *ctx;
> > > +
> > > +        trace_pci_nvme_mmio_bprsel(data, n->bar.bprsel,
> > > +                                   NVME_BPRSEL_BPID(n->bar.bpinfo),
> > > +                                   bp_offset, bp_len);
> > > +
> > > +        if (bp_len + bp_offset > n->bp_size) {
> > > +            NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
> > > +            NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_ERROR);
> > > +            return;
> > > +        }
> > > +
> > > +        off = NVME_BPRSEL_BPID(n->bar.bpinfo) * n->bp_size + bp_offset;
> > > +
> > > +        NVME_BPINFO_CLEAR_BRS(n->bar.bpinfo);
> > > +        NVME_BPINFO_SET_BRS(n->bar.bpinfo, NVME_BPINFO_BRS_READING);
> > > +
> > > +        ctx = g_new(struct nvme_bp_read_ctx, 1);
> > > +
> > > +        ctx->n = n;
> > > +
> > > +        pci_dma_sglist_init(&ctx->qsg, &n->parent_obj, 1);
> > > +
> > > +        qemu_sglist_add(&ctx->qsg, n->bar.bpmbl, bp_len);
> > > +
> > > +        dma_blk_read(n->blk_bp, &ctx->qsg, off , BDRV_SECTOR_SIZE,
> > > +                     nvme_bp_read_cb, ctx);
> > 
> > The returned BlockAIOCB is not stored. Two questions:
> > 
> 
> Thanks for these comments Stefan, I've commented below how I think they
> can be resolved.
> 
> > 1. Can the guest allocate unbounded amounts of QEMU memory (struct
> >   nvme_bp_read_ctx) by repeatedly writing to this register?
> > 
> 
> Yeah, the QSQ should just be a member of the controller struct and then have
> that as the cb_arg to dma_blk_read. Then, the QSG can be initialized and the
> host address added when BPMBL is written instead of here.
> 
> We don't want the QSG to change while the read is in progress, so the write
> to BPMBL should check BPINFO.BRS and not do anything if the QSG is "in use".
> The spec says that the host *shall* not modify the registers when a read is
> in progress, so we can safely just ignore the write.
> 
> > 2. What happens if the NVMe device is hot unplugged or reset while a
> >   boot partition read request is in flight?
> 
> The spec also says that the host *shall* not reset or shutdown the
> controller while a read is in progress, but again, we need to protect QEMU
> so the aiocb must be saved on the controller struct and cancelled
> appropriately.

Sounds good.

There is documentation about secure coding practices in
docs/devel/secure-coding-practices.rst (especially the stuff specific to
device emulation is interesting). It's possible that -device nvme will
become common in production virtual machines and it's hard to address
stability and security after the code has been written. Thanks for
taking this feedback on board even though it may not be relevant to NVMe
test/bringup/prototyping use cases.

Stefan

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2021-06-03 11:52 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20210601144231epcas5p3b931fe9421737d104dc3c5be50c928f3@epcas5p3.samsung.com>
2021-06-01 14:37 ` [PATCH v2 0/2] add boot partitions support and read test case Gollu Appalanaidu
     [not found]   ` <CGME20210601144234epcas5p153e855ad673876cf67e57d4b539dc274@epcas5p1.samsung.com>
2021-06-01 14:37     ` [PATCH v2 1/2] hw/nvme: add support for boot partiotions Gollu Appalanaidu
2021-06-01 17:19       ` Keith Busch
2021-06-01 17:41         ` Klaus Jensen
2021-06-01 17:46           ` Klaus Jensen
2021-06-01 18:07           ` Keith Busch
2021-06-01 19:07             ` Klaus Jensen
2021-06-01 20:23               ` Keith Busch
2021-06-03  9:48       ` Stefan Hajnoczi
2021-06-03 10:04         ` Klaus Jensen
2021-06-03 11:51           ` Stefan Hajnoczi
     [not found]   ` <CGME20210601144242epcas5p1292f656f45aebd1b9c75fe54797d6b46@epcas5p1.samsung.com>
2021-06-01 14:37     ` [PATCH v2 2/2] tests/qtest/nvme-test: add boot partition read test Gollu Appalanaidu
2021-06-01 17:43       ` Klaus Jensen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.