All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
@ 2020-02-18 22:48 Andrzej Jakowski
  2020-02-19  1:07 ` no-reply
  2020-02-21 13:45 ` Stefan Hajnoczi
  0 siblings, 2 replies; 12+ messages in thread
From: Andrzej Jakowski @ 2020-02-18 22:48 UTC (permalink / raw)
  To: keith.busch, kwolf, mreitz; +Cc: Andrzej Jakowski, qemu-devel, qemu-block

This patch introduces support for PMR that has been defined as part of NVMe 1.4
spec. User can now specify a pmr_file which will be mmap'ed into qemu address
space and subsequently in PCI BAR 2. Guest OS can perform mmio read and writes
to the PMR region that will stay persistent accross system reboot.

Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
---
 hw/block/nvme.c       | 145 ++++++++++++++++++++++++++++++++++-
 hw/block/nvme.h       |   5 ++
 hw/block/trace-events |   5 ++
 include/block/nvme.h  | 172 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 326 insertions(+), 1 deletion(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d28335cbf3..836cf8d426 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -19,10 +19,14 @@
  *      -drive file=<file>,if=none,id=<drive_id>
  *      -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
  *              cmb_size_mb=<cmb_size_mb[optional]>, \
+ *              [pmr_file=<pmr_file_path>,] \
  *              num_queues=<N[optional]>
  *
  * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
  * offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
+ *
+ * Either cmb or pmr - due to limitation in avaialbe BAR indexes.
+ * pmr_file file needs to be preallocated and be multiple of MiB in size.
  */
 
 #include "qemu/osdep.h"
@@ -1141,6 +1145,26 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
         NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
                        "invalid write to read only CMBSZ, ignored");
         return;
+    case 0xE00: /* PMRCAP */
+        NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
+                       "invalid write to PMRCAP register, ignored");
+        return;
+    case 0xE04: /* TODO PMRCTL */
+        break;
+    case 0xE08: /* PMRSTS */
+        NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
+                       "invalid write to PMRSTS register, ignored");
+        return;
+    case 0xE0C: /* PMREBS */
+        NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
+                       "invalid write to PMREBS register, ignored");
+        return;
+    case 0xE10: /* PMRSWTP */
+        NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
+                       "invalid write to PMRSWTP register, ignored");
+        return;
+    case 0xE14: /* TODO PMRMSC */
+         break;
     default:
         NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
                        "invalid MMIO write,"
@@ -1303,6 +1327,38 @@ static const MemoryRegionOps nvme_cmb_ops = {
     },
 };
 
+static void nvme_pmr_write(void *opaque, hwaddr addr, uint64_t data,
+    unsigned size)
+{
+    NvmeCtrl *n = (NvmeCtrl *)opaque;
+    stn_le_p(&n->pmrbuf[addr], size, data);
+}
+
+static uint64_t nvme_pmr_read(void *opaque, hwaddr addr, unsigned size)
+{
+    NvmeCtrl *n = (NvmeCtrl *)opaque;
+    if (!NVME_PMRCAP_PMRWBM(n->bar.pmrcap)) {
+        int ret;
+        ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
+        if (!ret) {
+            NVME_GUEST_ERR(nvme_ub_mmiowr_pmrread_barrier,
+                       "error while persisting data");
+        }
+    }
+    return ldn_le_p(&n->pmrbuf[addr], size);
+}
+
+static const MemoryRegionOps nvme_pmr_ops = {
+    .read = nvme_pmr_read,
+    .write = nvme_pmr_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+};
+
+
 static void nvme_realize(PCIDevice *pci_dev, Error **errp)
 {
     NvmeCtrl *n = NVME(pci_dev);
@@ -1332,6 +1388,37 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
         error_setg(errp, "serial property not set");
         return;
     }
+
+    if (!n->cmb_size_mb && n->pmr_file) {
+        int fd;
+
+        n->f_pmr = fopen(n->pmr_file, "r+b");
+        if (!n->f_pmr) {
+            error_setg(errp, "pmr backend file open error");
+            return;
+        }
+
+        fseek(n->f_pmr, 0L, SEEK_END);
+        n->f_pmr_size = ftell(n->f_pmr);
+        fseek(n->f_pmr, 0L, SEEK_SET);
+
+        /* pmr file size needs to be multiple of MiB in size */
+        if (!n->f_pmr_size || n->f_pmr_size % (1 << 20)) {
+            error_setg(errp, "pmr backend file size needs to be greater than 0"
+                             "and multiple of MiB in size");
+            return;
+        }
+
+        fd = fileno(n->f_pmr);
+        n->pmrbuf = mmap(NULL, n->f_pmr_size,
+                         (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0);
+
+        if (n->pmrbuf == MAP_FAILED) {
+            error_setg(errp, "pmr backend file mmap error");
+            return;
+        }
+    }
+
     blkconf_blocksizes(&n->conf);
     if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
                                        false, errp)) {
@@ -1393,7 +1480,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
     n->bar.intmc = n->bar.intms = 0;
 
     if (n->cmb_size_mb) {
-
         NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
         NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
 
@@ -1415,6 +1501,52 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
             PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
             PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
 
+    } else if (n->pmr_file) {
+        /* Controller Capabilities register */
+        NVME_CAP_SET_PMRS(n->bar.cap, 1);
+
+        /* PMR Capabities register */
+        n->bar.pmrcap = 0;
+        NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
+        NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
+        NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2);
+        NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
+        NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0);
+        NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
+        NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
+
+        /* PMR Control register */
+        n->bar.pmrctl = 0;
+        NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
+
+        /* PMR Status register */
+        n->bar.pmrsts = 0;
+        NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
+        NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
+        NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
+        NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
+
+        /* PMR Elasticity Buffer Size register */
+        n->bar.pmrebs = 0;
+        NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
+        NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
+        NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
+
+        /* PMR Sustained Write Throughput register */
+        n->bar.pmrswtp = 0;
+        NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
+        NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
+
+        /* PMR Memory Space Control register */
+        n->bar.pmrmsc = 0;
+        NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
+        NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
+
+        memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_pmr_ops, n,
+                              "nvme-pmr", n->f_pmr_size);
+        pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
+            PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
+            PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
     }
 
     for (i = 0; i < n->num_namespaces; i++) {
@@ -1445,11 +1577,22 @@ static void nvme_exit(PCIDevice *pci_dev)
     if (n->cmb_size_mb) {
         g_free(n->cmbuf);
     }
+
+    if (n->pmr_file) {
+        if (n->pmrbuf) {
+            munmap(n->pmrbuf, n->f_pmr_size);
+        }
+
+        if (n->f_pmr) {
+            fclose(n->f_pmr);
+        }
+    }
     msix_uninit_exclusive_bar(pci_dev);
 }
 
 static Property nvme_props[] = {
     DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
+    DEFINE_PROP_STRING("pmr_file", NvmeCtrl, pmr_file),
     DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
     DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
     DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 557194ee19..5ab734f96f 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -83,6 +83,11 @@ typedef struct NvmeCtrl {
     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
 
     char            *serial;
+    char            *pmr_file;
+    FILE            *f_pmr;
+    uint64_t        f_pmr_size;
+    uint8_t         *pmrbuf;
+
     NvmeNamespace   *namespaces;
     NvmeSQueue      **sq;
     NvmeCQueue      **cq;
diff --git a/hw/block/trace-events b/hw/block/trace-events
index c03e80c2c9..f633360f95 100644
--- a/hw/block/trace-events
+++ b/hw/block/trace-events
@@ -110,6 +110,11 @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA
 nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
 nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
 nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
+nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
+nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
+nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
+nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
+nvme_ub_mmiowr_pmrread_barrier(void) "failed to persists data"
 nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
 nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
 nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 8fb941c653..374262d4b7 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -15,6 +15,13 @@ typedef struct NvmeBar {
     uint64_t    acq;
     uint32_t    cmbloc;
     uint32_t    cmbsz;
+    uint8_t     padding[3520]; /* not used by QEMU */
+    uint32_t    pmrcap;
+    uint32_t    pmrctl;
+    uint32_t    pmrsts;
+    uint32_t    pmrebs;
+    uint32_t    pmrswtp;
+    uint32_t    pmrmsc;
 } NvmeBar;
 
 enum NvmeCapShift {
@@ -27,6 +34,7 @@ enum NvmeCapShift {
     CAP_CSS_SHIFT      = 37,
     CAP_MPSMIN_SHIFT   = 48,
     CAP_MPSMAX_SHIFT   = 52,
+    CAP_PMR_SHIFT      = 56,
 };
 
 enum NvmeCapMask {
@@ -39,6 +47,7 @@ enum NvmeCapMask {
     CAP_CSS_MASK       = 0xff,
     CAP_MPSMIN_MASK    = 0xf,
     CAP_MPSMAX_MASK    = 0xf,
+    CAP_PMR_MASK       = 0x1,
 };
 
 #define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
@@ -69,6 +78,8 @@ enum NvmeCapMask {
                                                            << CAP_MPSMIN_SHIFT)
 #define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & CAP_MPSMAX_MASK)\
                                                             << CAP_MPSMAX_SHIFT)
+#define NVME_CAP_SET_PMRS(cap, val) (cap |= (uint64_t)(val & CAP_PMR_MASK)\
+                                                            << CAP_PMR_SHIFT)
 
 enum NvmeCcShift {
     CC_EN_SHIFT     = 0,
@@ -205,6 +216,167 @@ enum NvmeCmbszMask {
 #define NVME_CMBSZ_GETSIZE(cmbsz) \
     (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz))))
 
+enum NvmePmrcapShift {
+    PMRCAP_RDS_SHIFT   = 3,
+    PMRCAP_WDS_SHIFT   = 4,
+    PMRCAP_BIR_SHIFT   = 5,
+    PMRCAP_PMRTU_SHIFT   = 8,
+    PMRCAP_PMRWBM_SHIFT   = 10,
+    PMRCAP_PMRTO_SHIFT   = 16,
+    PMRCAP_CMSS_SHIFT   = 24,
+};
+
+enum NvmePmrcapMask {
+    PMRCAP_RDS_MASK   = 0x1,
+    PMRCAP_WDS_MASK   = 0x1,
+    PMRCAP_BIR_MASK   = 0x7,
+    PMRCAP_PMRTU_MASK   = 0x3,
+    PMRCAP_PMRWBM_MASK   = 0xf,
+    PMRCAP_PMRTO_MASK   = 0xff,
+    PMRCAP_CMSS_MASK   = 0x1,
+};
+
+#define NVME_PMRCAP_RDS(pmrcap)    \
+    ((pmrcap >> PMRCAP_RDS_SHIFT)   & PMRCAP_RDS_MASK)
+#define NVME_PMRCAP_WDS(pmrcap)    \
+    ((pmrcap >> PMRCAP_WDS_SHIFT)   & PMRCAP_WDS_MASK)
+#define NVME_PMRCAP_BIR(pmrcap)    \
+    ((pmrcap >> PMRCAP_BIR_SHIFT)   & PMRCAP_BIR_MASK)
+#define NVME_PMRCAP_PMRTU(pmrcap)    \
+    ((pmrcap >> PMRCAP_PMRTU_SHIFT)   & PMRCAP_PMRTU_MASK)
+#define NVME_PMRCAP_PMRWBM(pmrcap)    \
+    ((pmrcap >> PMRCAP_PMRWBM_SHIFT)   & PMRCAP_PMRWBM_MASK)
+#define NVME_PMRCAP_PMRTO(pmrcap)    \
+    ((pmrcap >> PMRCAP_PMRTO_SHIFT)   & PMRCAP_PMRTO_MASK)
+#define NVME_PMRCAP_CMSS(pmrcap)    \
+    ((pmrcap >> PMRCAP_CMSS_SHIFT)   & PMRCAP_CMSS_MASK)
+
+#define NVME_PMRCAP_SET_RDS(pmrcap, val)   \
+    (pmrcap |= (uint64_t)(val & PMRCAP_RDS_MASK) << PMRCAP_RDS_SHIFT)
+#define NVME_PMRCAP_SET_WDS(pmrcap, val)   \
+    (pmrcap |= (uint64_t)(val & PMRCAP_WDS_MASK) << PMRCAP_WDS_SHIFT)
+#define NVME_PMRCAP_SET_BIR(pmrcap, val)   \
+    (pmrcap |= (uint64_t)(val & PMRCAP_BIR_MASK) << PMRCAP_BIR_SHIFT)
+#define NVME_PMRCAP_SET_PMRTU(pmrcap, val)   \
+    (pmrcap |= (uint64_t)(val & PMRCAP_PMRTU_MASK) << PMRCAP_PMRTU_SHIFT)
+#define NVME_PMRCAP_SET_PMRWBM(pmrcap, val)   \
+    (pmrcap |= (uint64_t)(val & PMRCAP_PMRWBM_MASK) << PMRCAP_PMRWBM_SHIFT)
+#define NVME_PMRCAP_SET_PMRTO(pmrcap, val)   \
+    (pmrcap |= (uint64_t)(val & PMRCAP_PMRTO_MASK) << PMRCAP_PMRTO_SHIFT)
+#define NVME_PMRCAP_SET_CMSS(pmrcap, val)   \
+    (pmrcap |= (uint64_t)(val & PMRCAP_CMSS_MASK) << PMRCAP_CMSS_SHIFT)
+
+enum NvmePmrctlShift {
+    PMRCTL_EN_SHIFT   = 0,
+};
+
+enum NvmePmrctlMask {
+    PMRCTL_EN_MASK   = 0x1,
+};
+
+#define NVME_PMRCTL_EN(pmrctl)  ((pmrctl >> PMRCTL_EN_SHIFT)   & PMRCTL_EN_MASK)
+
+#define NVME_PMRCTL_SET_EN(pmrctl, val)   \
+    (pmrctl |= (uint64_t)(val & PMRCTL_EN_MASK) << PMRCTL_EN_SHIFT)
+
+enum NvmePmrstsShift {
+    PMRSTS_ERR_SHIFT   = 0,
+    PMRSTS_NRDY_SHIFT   = 8,
+    PMRSTS_HSTS_SHIFT   = 9,
+    PMRSTS_CBAI_SHIFT   = 12,
+};
+
+enum NvmePmrstsMask {
+    PMRSTS_ERR_MASK   = 0xff,
+    PMRSTS_NRDY_MASK   = 0x1,
+    PMRSTS_HSTS_MASK   = 0x7,
+    PMRSTS_CBAI_MASK   = 0x1,
+};
+
+#define NVME_PMRSTS_ERR(pmrsts)     \
+    ((pmrsts >> PMRSTS_ERR_SHIFT)   & PMRSTS_ERR_MASK)
+#define NVME_PMRSTS_NRDY(pmrsts)    \
+    ((pmrsts >> PMRSTS_NRDY_SHIFT)   & PMRSTS_NRDY_MASK)
+#define NVME_PMRSTS_HSTS(pmrsts)    \
+    ((pmrsts >> PMRSTS_HSTS_SHIFT)   & PMRSTS_HSTS_MASK)
+#define NVME_PMRSTS_CBAI(pmrsts)    \
+    ((pmrsts >> PMRSTS_CBAI_SHIFT)   & PMRSTS_CBAI_MASK)
+
+#define NVME_PMRSTS_SET_ERR(pmrsts, val)   \
+    (pmrsts |= (uint64_t)(val & PMRSTS_ERR_MASK) << PMRSTS_ERR_SHIFT)
+#define NVME_PMRSTS_SET_NRDY(pmrsts, val)   \
+    (pmrsts |= (uint64_t)(val & PMRSTS_NRDY_MASK) << PMRSTS_NRDY_SHIFT)
+#define NVME_PMRSTS_SET_HSTS(pmrsts, val)   \
+    (pmrsts |= (uint64_t)(val & PMRSTS_HSTS_MASK) << PMRSTS_HSTS_SHIFT)
+#define NVME_PMRSTS_SET_CBAI(pmrsts, val)   \
+    (pmrsts |= (uint64_t)(val & PMRSTS_CBAI_MASK) << PMRSTS_CBAI_SHIFT)
+
+enum NvmePmrebsShift {
+    PMREBS_PMRSZU_SHIFT   = 0,
+    PMREBS_RBB_SHIFT   = 4,
+    PMREBS_PMRWBZ_SHIFT   = 8,
+};
+
+enum NvmePmrebsMask {
+    PMREBS_PMRSZU_MASK   = 0xf,
+    PMREBS_RBB_MASK   = 0x1,
+    PMREBS_PMRWBZ_MASK   = 0xffffff,
+};
+
+#define NVME_PMREBS_PMRSZU(pmrebs)  \
+    ((pmrebs >> PMREBS_PMRSZU_SHIFT)   & PMREBS_PMRSZU_MASK)
+#define NVME_PMREBS_RBB(pmrebs)     \
+    ((pmrebs >> PMREBS_RBB_SHIFT)   & PMREBS_RBB_MASK)
+#define NVME_PMREBS_PMRWBZ(pmrebs)  \
+    ((pmrebs >> PMREBS_PMRWBZ_SHIFT)   & PMREBS_PMRWBZ_MASK)
+
+#define NVME_PMREBS_SET_PMRSZU(pmrebs, val)   \
+    (pmrebs |= (uint64_t)(val & PMREBS_PMRSZU_MASK) << PMREBS_PMRSZU_SHIFT)
+#define NVME_PMREBS_SET_RBB(pmrebs, val)   \
+    (pmrebs |= (uint64_t)(val & PMREBS_RBB_MASK) << PMREBS_RBB_SHIFT)
+#define NVME_PMREBS_SET_PMRWBZ(pmrebs, val)   \
+    (pmrebs |= (uint64_t)(val & PMREBS_PMRWBZ_MASK) << PMREBS_PMRWBZ_SHIFT)
+
+enum NvmePmrswtpShift {
+    PMRSWTP_PMRSWTU_SHIFT   = 0,
+    PMRSWTP_PMRSWTV_SHIFT   = 8,
+};
+
+enum NvmePmrswtpMask {
+    PMRSWTP_PMRSWTU_MASK   = 0xf,
+    PMRSWTP_PMRSWTV_MASK   = 0xffffff,
+};
+
+#define NVME_PMRSWTP_PMRSWTU(pmrswtp)   \
+    ((pmrswtp >> PMRSWTP_PMRSWTU_SHIFT)   & PMRSWTP_PMRSWTU_MASK)
+#define NVME_PMRSWTP_PMRSWTV(pmrswtp)   \
+    ((pmrswtp >> PMRSWTP_PMRSWTV_SHIFT)   & PMRSWTP_PMRSWTV_MASK)
+
+#define NVME_PMRSWTP_SET_PMRSWTU(pmrswtp, val)   \
+    (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTU_MASK) << PMRSWTP_PMRSWTU_SHIFT)
+#define NVME_PMRSWTP_SET_PMRSWTV(pmrswtp, val)   \
+    (pmrswtp |= (uint64_t)(val & PMRSWTP_PMRSWTV_MASK) << PMRSWTP_PMRSWTV_SHIFT)
+
+enum NvmePmrmscShift {
+    PMRMSC_CMSE_SHIFT   = 1,
+    PMRMSC_CBA_SHIFT   = 12,
+};
+
+enum NvmePmrmscMask {
+    PMRMSC_CMSE_MASK   = 0x1,
+    PMRMSC_CBA_MASK   = 0xfffffffffffff,
+};
+
+#define NVME_PMRMSC_CMSE(pmrmsc)    \
+    ((pmrmsc >> PMRMSC_CMSE_SHIFT)   & PMRMSC_CMSE_MASK)
+#define NVME_PMRMSC_CBA(pmrmsc)     \
+    ((pmrmsc >> PMRMSC_CBA_SHIFT)   & PMRMSC_CBA_MASK)
+
+#define NVME_PMRMSC_SET_CMSE(pmrmsc, val)   \
+    (pmrmsc |= (uint64_t)(val & PMRMSC_CMSE_MASK) << PMRMSC_CMSE_SHIFT)
+#define NVME_PMRMSC_SET_CBA(pmrmsc, val)   \
+    (pmrmsc |= (uint64_t)(val & PMRMSC_CBA_MASK) << PMRMSC_CBA_SHIFT)
+
 typedef struct NvmeCmd {
     uint8_t     opcode;
     uint8_t     fuse;
-- 
2.21.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-18 22:48 [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec Andrzej Jakowski
@ 2020-02-19  1:07 ` no-reply
  2020-02-19 20:25   ` Andrzej Jakowski
  2020-02-21 13:45 ` Stefan Hajnoczi
  1 sibling, 1 reply; 12+ messages in thread
From: no-reply @ 2020-02-19  1:07 UTC (permalink / raw)
  To: andrzej.jakowski
  Cc: kwolf, qemu-block, qemu-devel, mreitz, keith.busch, andrzej.jakowski

Patchew URL: https://patchew.org/QEMU/20200218224811.30050-1-andrzej.jakowski@linux.intel.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#! /bin/bash
export ARCH=x86_64
make docker-image-fedora V=1 NETWORK=1
time make docker-test-mingw@fedora J=14 NETWORK=1
=== TEST SCRIPT END ===

  CC      hw/display/sii9022.o
  CC      hw/display/ssd0303.o
/tmp/qemu-test/src/hw/block/nvme.c: In function 'nvme_pmr_read':
/tmp/qemu-test/src/hw/block/nvme.c:1342:15: error: implicit declaration of function 'msync'; did you mean 'fsync'? [-Werror=implicit-function-declaration]
         ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
               ^~~~~
               fsync
/tmp/qemu-test/src/hw/block/nvme.c:1342:15: error: nested extern declaration of 'msync' [-Werror=nested-externs]
/tmp/qemu-test/src/hw/block/nvme.c:1342:47: error: 'MS_SYNC' undeclared (first use in this function)
         ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
                                               ^~~~~~~
/tmp/qemu-test/src/hw/block/nvme.c:1342:47: note: each undeclared identifier is reported only once for each function it appears in
/tmp/qemu-test/src/hw/block/nvme.c: In function 'nvme_realize':
/tmp/qemu-test/src/hw/block/nvme.c:1413:21: error: implicit declaration of function 'mmap'; did you mean 'max'? [-Werror=implicit-function-declaration]
         n->pmrbuf = mmap(NULL, n->f_pmr_size,
                     ^~~~
                     max
/tmp/qemu-test/src/hw/block/nvme.c:1413:21: error: nested extern declaration of 'mmap' [-Werror=nested-externs]
/tmp/qemu-test/src/hw/block/nvme.c:1414:27: error: 'PROT_READ' undeclared (first use in this function); did you mean 'OF_READ'?
                          (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0);
                           ^~~~~~~~~
                           OF_READ
/tmp/qemu-test/src/hw/block/nvme.c:1414:39: error: 'PROT_WRITE' undeclared (first use in this function); did you mean 'OF_WRITE'?
                          (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0);
                                       ^~~~~~~~~~
                                       OF_WRITE
/tmp/qemu-test/src/hw/block/nvme.c:1414:52: error: 'MAP_SHARED' undeclared (first use in this function); did you mean 'RAM_SHARED'?
                          (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0);
                                                    ^~~~~~~~~~
                                                    RAM_SHARED
/tmp/qemu-test/src/hw/block/nvme.c:1416:26: error: 'MAP_FAILED' undeclared (first use in this function); did you mean 'WAIT_FAILED'?
         if (n->pmrbuf == MAP_FAILED) {
                          ^~~~~~~~~~
                          WAIT_FAILED
/tmp/qemu-test/src/hw/block/nvme.c: In function 'nvme_exit':
/tmp/qemu-test/src/hw/block/nvme.c:1583:13: error: implicit declaration of function 'munmap' [-Werror=implicit-function-declaration]
             munmap(n->pmrbuf, n->f_pmr_size);
             ^~~~~~
/tmp/qemu-test/src/hw/block/nvme.c:1583:13: error: nested extern declaration of 'munmap' [-Werror=nested-externs]
cc1: all warnings being treated as errors
make: *** [/tmp/qemu-test/src/rules.mak:69: hw/block/nvme.o] Error 1
make: *** Waiting for unfinished jobs....
Traceback (most recent call last):
  File "./tests/docker/docker.py", line 664, in <module>
---
    raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--label', 'com.qemu.instance.uuid=058833b8d96e4c67a646a099f4118351', '-u', '1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', '-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 'SHOW_ENV=', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', '/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', '/var/tmp/patchew-tester-tmp-0kstbie3/src/docker-src.2020-02-18-20.04.28.2259:/var/tmp/qemu:z,ro', 'qemu:fedora', '/var/tmp/qemu/run', 'test-mingw']' returned non-zero exit status 2.
filter=--filter=label=com.qemu.instance.uuid=058833b8d96e4c67a646a099f4118351
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-0kstbie3/src'
make: *** [docker-run-test-mingw@fedora] Error 2

real    2m39.403s
user    0m8.170s


The full log is available at
http://patchew.org/logs/20200218224811.30050-1-andrzej.jakowski@linux.intel.com/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-devel@redhat.com

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-19  1:07 ` no-reply
@ 2020-02-19 20:25   ` Andrzej Jakowski
  0 siblings, 0 replies; 12+ messages in thread
From: Andrzej Jakowski @ 2020-02-19 20:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: keith.busch, kwolf, qemu-block, mreitz

On 2/18/20 6:07 PM, no-reply@patchew.org wrote:
> === TEST SCRIPT BEGIN ===
> #! /bin/bash
> export ARCH=x86_64
> make docker-image-fedora V=1 NETWORK=1
> time make docker-test-mingw@fedora J=14 NETWORK=1
> === TEST SCRIPT END ===
> 
>   CC      hw/display/sii9022.o
>   CC      hw/display/ssd0303.o
> /tmp/qemu-test/src/hw/block/nvme.c: In function 'nvme_pmr_read':
> /tmp/qemu-test/src/hw/block/nvme.c:1342:15: error: implicit declaration of function 'msync'; did you mean 'fsync'? [-Werror=implicit-function-declaration]
>          ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
>                ^~~~~
>                fsync
> /tmp/qemu-test/src/hw/block/nvme.c:1342:15: error: nested extern declaration of 'msync' [-Werror=nested-externs]
> /tmp/qemu-test/src/hw/block/nvme.c:1342:47: error: 'MS_SYNC' undeclared (first use in this function)
>          ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
>                                                ^~~~~~~
> /tmp/qemu-test/src/hw/block/nvme.c:1342:47: note: each undeclared identifier is reported only once for each function it appears in
> /tmp/qemu-test/src/hw/block/nvme.c: In function 'nvme_realize':
> /tmp/qemu-test/src/hw/block/nvme.c:1413:21: error: implicit declaration of function 'mmap'; did you mean 'max'? [-Werror=implicit-function-declaration]
>          n->pmrbuf = mmap(NULL, n->f_pmr_size,
>                      ^~~~

This patch seems to fail on cross-compilation for Windows env.
I plan to submit second version of this patch which will conditionally
support PMR for Linux environment only. It should take care of this problem.

Do you see any better fix for that?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-18 22:48 [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec Andrzej Jakowski
  2020-02-19  1:07 ` no-reply
@ 2020-02-21 13:45 ` Stefan Hajnoczi
  2020-02-21 15:36   ` Andrzej Jakowski
  2020-02-21 18:45   ` Dr. David Alan Gilbert
  1 sibling, 2 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2020-02-21 13:45 UTC (permalink / raw)
  To: Andrzej Jakowski
  Cc: kwolf, Haozhong Zhang, qemu-block, qemu-devel, mreitz,
	keith.busch, Zhang Yi, Junyan He, Dr. David Alan Gilbert

[-- Attachment #1: Type: text/plain, Size: 1835 bytes --]

On Tue, Feb 18, 2020 at 03:48:11PM -0700, Andrzej Jakowski wrote:
> This patch introduces support for PMR that has been defined as part of NVMe 1.4
> spec. User can now specify a pmr_file which will be mmap'ed into qemu address
> space and subsequently in PCI BAR 2. Guest OS can perform mmio read and writes
> to the PMR region that will stay persistent accross system reboot.
> 
> Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
> ---
>  hw/block/nvme.c       | 145 ++++++++++++++++++++++++++++++++++-
>  hw/block/nvme.h       |   5 ++
>  hw/block/trace-events |   5 ++
>  include/block/nvme.h  | 172 ++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 326 insertions(+), 1 deletion(-)

NVDIMM folks, please take a look.  There seems to be commonality here.

Can this use -object memory-backend-file instead of manually opening and
mapping a file?

Also CCing David Gilbert because there is some similarity with the
vhost-user-fs's DAX Window feature where QEMU mmaps regions of files
into a BAR.

> @@ -1303,6 +1327,38 @@ static const MemoryRegionOps nvme_cmb_ops = {
>      },
>  };
>  
> +static void nvme_pmr_write(void *opaque, hwaddr addr, uint64_t data,
> +    unsigned size)
> +{
> +    NvmeCtrl *n = (NvmeCtrl *)opaque;
> +    stn_le_p(&n->pmrbuf[addr], size, data);
> +}
> +
> +static uint64_t nvme_pmr_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +    NvmeCtrl *n = (NvmeCtrl *)opaque;
> +    if (!NVME_PMRCAP_PMRWBM(n->bar.pmrcap)) {
> +        int ret;
> +        ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
> +        if (!ret) {
> +            NVME_GUEST_ERR(nvme_ub_mmiowr_pmrread_barrier,
> +                       "error while persisting data");
> +        }
> +    }

Why is msync(2) done on memory loads instead of stores?

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-21 13:45 ` Stefan Hajnoczi
@ 2020-02-21 15:36   ` Andrzej Jakowski
  2020-02-21 17:31     ` Stefan Hajnoczi
  2020-02-21 18:45   ` Dr. David Alan Gilbert
  1 sibling, 1 reply; 12+ messages in thread
From: Andrzej Jakowski @ 2020-02-21 15:36 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: kwolf, Haozhong Zhang, qemu-block, qemu-devel, mreitz,
	keith.busch, Zhang Yi, Junyan He, Dr. David Alan Gilbert

On 2/21/20 6:45 AM, Stefan Hajnoczi wrote:
> Why is msync(2) done on memory loads instead of stores?

This is my interpretation of NVMe spec wording with regards to PMRWBM field
which says:

"The completion of a memory read from any Persistent
Memory Region address ensures that all prior writes to the
Persistent Memory Region have completed and are
persistent."


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-21 15:36   ` Andrzej Jakowski
@ 2020-02-21 17:31     ` Stefan Hajnoczi
  2020-02-21 17:50       ` Dr. David Alan Gilbert
  2020-02-21 19:32       ` Stefan Hajnoczi
  0 siblings, 2 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2020-02-21 17:31 UTC (permalink / raw)
  To: Andrzej Jakowski
  Cc: Kevin Wolf, Haozhong Zhang, qemu block, qemu-devel, Max Reitz,
	Keith Busch, Zhang Yi, Junyan He, Dr. David Alan Gilbert

On Fri, Feb 21, 2020 at 3:36 PM Andrzej Jakowski
<andrzej.jakowski@linux.intel.com> wrote:
> On 2/21/20 6:45 AM, Stefan Hajnoczi wrote:
> > Why is msync(2) done on memory loads instead of stores?
>
> This is my interpretation of NVMe spec wording with regards to PMRWBM field
> which says:
>
> "The completion of a memory read from any Persistent
> Memory Region address ensures that all prior writes to the
> Persistent Memory Region have completed and are
> persistent."

Thanks, I haven't read the PMR section of the spec :).

A synchronous operation is bad for virtualization performance.  While
the sync may be a cheap operation in hardware, it can be arbitrarily
expensive with msync(2).  The vCPU will be stuck until msync(2)
completes on the host.

It's also a strange design choice since performance will suffer when
an unrelated read has to wait for writes to complete.  This is
especially problematic for multi-threaded applications or multi-core
systems where I guess this case is hit frequently.  Maybe it's so
cheap in hardware that it doesn't matter?  But then why didn't NVDIMM
use this mechanism?

If anyone knows the answer I'd be interested in learning.  But this
isn't a criticism of the patch - of course it needs to implement the
hardware spec and we can't change it.

Stefan


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-21 17:31     ` Stefan Hajnoczi
@ 2020-02-21 17:50       ` Dr. David Alan Gilbert
  2020-02-21 18:29         ` Stefan Hajnoczi
  2020-02-21 19:32       ` Stefan Hajnoczi
  1 sibling, 1 reply; 12+ messages in thread
From: Dr. David Alan Gilbert @ 2020-02-21 17:50 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, Haozhong Zhang, qemu block, qemu-devel, Max Reitz,
	Keith Busch, Zhang Yi, Andrzej Jakowski, Junyan He

* Stefan Hajnoczi (stefanha@gmail.com) wrote:
> On Fri, Feb 21, 2020 at 3:36 PM Andrzej Jakowski
> <andrzej.jakowski@linux.intel.com> wrote:
> > On 2/21/20 6:45 AM, Stefan Hajnoczi wrote:
> > > Why is msync(2) done on memory loads instead of stores?
> >
> > This is my interpretation of NVMe spec wording with regards to PMRWBM field
> > which says:
> >
> > "The completion of a memory read from any Persistent
> > Memory Region address ensures that all prior writes to the
> > Persistent Memory Region have completed and are
> > persistent."
> 
> Thanks, I haven't read the PMR section of the spec :).
> 
> A synchronous operation is bad for virtualization performance.  While
> the sync may be a cheap operation in hardware, it can be arbitrarily
> expensive with msync(2).  The vCPU will be stuck until msync(2)
> completes on the host.
> 
> It's also a strange design choice since performance will suffer when
> an unrelated read has to wait for writes to complete.  This is
> especially problematic for multi-threaded applications or multi-core
> systems where I guess this case is hit frequently.  Maybe it's so
> cheap in hardware that it doesn't matter?  But then why didn't NVDIMM
> use this mechanism?
> 
> If anyone knows the answer I'd be interested in learning.  But this
> isn't a criticism of the patch - of course it needs to implement the
> hardware spec and we can't change it.

Is this coming from the underlying PCIe spec ?
In PCIe Base 4.0 Rev 0.3 Feb19-2014, section 2.4.1 Transaction ordering,
there's a Table 2-39 and entry B2a in that table is:


  'A Read Request must not pass a Posted Request unless B2b applies.'

and a posted request is defined as a 'Memory Write Request or a Message
Request'.

???

Dave

> Stefan
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-21 17:50       ` Dr. David Alan Gilbert
@ 2020-02-21 18:29         ` Stefan Hajnoczi
  0 siblings, 0 replies; 12+ messages in thread
From: Stefan Hajnoczi @ 2020-02-21 18:29 UTC (permalink / raw)
  To: Dr. David Alan Gilbert
  Cc: Kevin Wolf, Haozhong Zhang, qemu block, qemu-devel, Max Reitz,
	Keith Busch, Zhang Yi, Andrzej Jakowski, Junyan He

[-- Attachment #1: Type: text/plain, Size: 2155 bytes --]

On Fri, Feb 21, 2020, 17:50 Dr. David Alan Gilbert <dgilbert@redhat.com>
wrote:

> * Stefan Hajnoczi (stefanha@gmail.com) wrote:
> > On Fri, Feb 21, 2020 at 3:36 PM Andrzej Jakowski
> > <andrzej.jakowski@linux.intel.com> wrote:
> > > On 2/21/20 6:45 AM, Stefan Hajnoczi wrote:
> > > > Why is msync(2) done on memory loads instead of stores?
> > >
> > > This is my interpretation of NVMe spec wording with regards to PMRWBM
> field
> > > which says:
> > >
> > > "The completion of a memory read from any Persistent
> > > Memory Region address ensures that all prior writes to the
> > > Persistent Memory Region have completed and are
> > > persistent."
> >
> > Thanks, I haven't read the PMR section of the spec :).
> >
> > A synchronous operation is bad for virtualization performance.  While
> > the sync may be a cheap operation in hardware, it can be arbitrarily
> > expensive with msync(2).  The vCPU will be stuck until msync(2)
> > completes on the host.
> >
> > It's also a strange design choice since performance will suffer when
> > an unrelated read has to wait for writes to complete.  This is
> > especially problematic for multi-threaded applications or multi-core
> > systems where I guess this case is hit frequently.  Maybe it's so
> > cheap in hardware that it doesn't matter?  But then why didn't NVDIMM
> > use this mechanism?
> >
> > If anyone knows the answer I'd be interested in learning.  But this
> > isn't a criticism of the patch - of course it needs to implement the
> > hardware spec and we can't change it.
>
> Is this coming from the underlying PCIe spec ?
> In PCIe Base 4.0 Rev 0.3 Feb19-2014, section 2.4.1 Transaction ordering,
> there's a Table 2-39 and entry B2a in that table is:
>
>
>   'A Read Request must not pass a Posted Request unless B2b applies.'
>
> and a posted request is defined as a 'Memory Write Request or a Message
> Request'.
>
> ???
>

No, that relates to transaction ordering in PCI, not data persistence in
PMR.  PMR can define whatever persistence semantics it wants.

The completion of the write transaction at the PCI level does not mean data
has to be persistent at the PMR level.

Stefan

>

[-- Attachment #2: Type: text/html, Size: 3221 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-21 13:45 ` Stefan Hajnoczi
  2020-02-21 15:36   ` Andrzej Jakowski
@ 2020-02-21 18:45   ` Dr. David Alan Gilbert
  2020-02-21 20:19     ` Andrzej Jakowski
  1 sibling, 1 reply; 12+ messages in thread
From: Dr. David Alan Gilbert @ 2020-02-21 18:45 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: kwolf, Haozhong Zhang, qemu-block, qemu-devel, mreitz,
	keith.busch, Zhang Yi, Andrzej Jakowski, Junyan He

* Stefan Hajnoczi (stefanha@gmail.com) wrote:
> On Tue, Feb 18, 2020 at 03:48:11PM -0700, Andrzej Jakowski wrote:
> > This patch introduces support for PMR that has been defined as part of NVMe 1.4
> > spec. User can now specify a pmr_file which will be mmap'ed into qemu address
> > space and subsequently in PCI BAR 2. Guest OS can perform mmio read and writes
> > to the PMR region that will stay persistent accross system reboot.
> > 
> > Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
> > ---
> >  hw/block/nvme.c       | 145 ++++++++++++++++++++++++++++++++++-
> >  hw/block/nvme.h       |   5 ++
> >  hw/block/trace-events |   5 ++
> >  include/block/nvme.h  | 172 ++++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 326 insertions(+), 1 deletion(-)
> 
> NVDIMM folks, please take a look.  There seems to be commonality here.
> 
> Can this use -object memory-backend-file instead of manually opening and
> mapping a file?
> 
> Also CCing David Gilbert because there is some similarity with the
> vhost-user-fs's DAX Window feature where QEMU mmaps regions of files
> into a BAR.

I guess the biggest difference here is that the read can have the side
effect; in my world I don't have to set read/write/endian ops - I just
map a chunk of memory and use memory_region_add_subregion, so all the
read/writes are native read/writes - I assume that would be a lot
faster - I guess it depends if NVME_PMRCAP_PMRWBM is something constant
you know early on; if you know that you don't need to do side effects in
the read you could do the same trick and avoid the IO ops altogether.


Isn't there also a requirement that BARs are powers of two? Wouldn't
you need to ensure the PMR file is a power of 2 in size?

Dave

> > @@ -1303,6 +1327,38 @@ static const MemoryRegionOps nvme_cmb_ops = {
> >      },
> >  };
> >  
> > +static void nvme_pmr_write(void *opaque, hwaddr addr, uint64_t data,
> > +    unsigned size)
> > +{
> > +    NvmeCtrl *n = (NvmeCtrl *)opaque;
> > +    stn_le_p(&n->pmrbuf[addr], size, data);
> > +}
> > +
> > +static uint64_t nvme_pmr_read(void *opaque, hwaddr addr, unsigned size)
> > +{
> > +    NvmeCtrl *n = (NvmeCtrl *)opaque;
> > +    if (!NVME_PMRCAP_PMRWBM(n->bar.pmrcap)) {
> > +        int ret;
> > +        ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC);
> > +        if (!ret) {
> > +            NVME_GUEST_ERR(nvme_ub_mmiowr_pmrread_barrier,
> > +                       "error while persisting data");
> > +        }
> > +    }
> 
> Why is msync(2) done on memory loads instead of stores?


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-21 17:31     ` Stefan Hajnoczi
  2020-02-21 17:50       ` Dr. David Alan Gilbert
@ 2020-02-21 19:32       ` Stefan Hajnoczi
  2020-02-21 20:20         ` Andrzej Jakowski
  1 sibling, 1 reply; 12+ messages in thread
From: Stefan Hajnoczi @ 2020-02-21 19:32 UTC (permalink / raw)
  To: Andrzej Jakowski
  Cc: Kevin Wolf, Haozhong Zhang, qemu block, qemu-devel, Max Reitz,
	Keith Busch, Zhang Yi, Junyan He, Dr. David Alan Gilbert

Hi Andrzej,
After having looked at the PMRWBM part of the spec, I think that the
Bit 1 mode should be implemented for slightly better performance.  Bit
0 mode is not well-suited to virtualization for the reasons I
mentioned in the previous email.

The spec describes Bit 1 mode as "The completion of a read to the
PMRSTS register shall ensure that all prior writes to the Persistent
Memory Region have completed and are persistent".

Stefan


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-21 18:45   ` Dr. David Alan Gilbert
@ 2020-02-21 20:19     ` Andrzej Jakowski
  0 siblings, 0 replies; 12+ messages in thread
From: Andrzej Jakowski @ 2020-02-21 20:19 UTC (permalink / raw)
  To: Dr. David Alan Gilbert, Stefan Hajnoczi
  Cc: kwolf, Haozhong Zhang, qemu-block, qemu-devel, mreitz,
	keith.busch, Zhang Yi, Junyan He

On 2/21/20 11:45 AM, Dr. David Alan Gilbert wrote:
> Isn't there also a requirement that BARs are powers of two? Wouldn't
> you need to ensure the PMR file is a power of 2 in size?
> 
> Dave

Agree, need to make sure it is power of 2.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
  2020-02-21 19:32       ` Stefan Hajnoczi
@ 2020-02-21 20:20         ` Andrzej Jakowski
  0 siblings, 0 replies; 12+ messages in thread
From: Andrzej Jakowski @ 2020-02-21 20:20 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Kevin Wolf, Haozhong Zhang, qemu block, qemu-devel, Max Reitz,
	Keith Busch, Zhang Yi, Junyan He, Dr. David Alan Gilbert

On 2/21/20 12:32 PM, Stefan Hajnoczi wrote:
> Hi Andrzej,
> After having looked at the PMRWBM part of the spec, I think that the
> Bit 1 mode should be implemented for slightly better performance.  Bit
> 0 mode is not well-suited to virtualization for the reasons I
> mentioned in the previous email.
> 
> The spec describes Bit 1 mode as "The completion of a read to the
> PMRSTS register shall ensure that all prior writes to the Persistent
> Memory Region have completed and are persistent".
> 
> Stefan

Make sense -- will incorporate that feedback in second version of patch.


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-02-21 20:21 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-18 22:48 [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec Andrzej Jakowski
2020-02-19  1:07 ` no-reply
2020-02-19 20:25   ` Andrzej Jakowski
2020-02-21 13:45 ` Stefan Hajnoczi
2020-02-21 15:36   ` Andrzej Jakowski
2020-02-21 17:31     ` Stefan Hajnoczi
2020-02-21 17:50       ` Dr. David Alan Gilbert
2020-02-21 18:29         ` Stefan Hajnoczi
2020-02-21 19:32       ` Stefan Hajnoczi
2020-02-21 20:20         ` Andrzej Jakowski
2020-02-21 18:45   ` Dr. David Alan Gilbert
2020-02-21 20:19     ` Andrzej Jakowski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.