All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shivaprasad G Bhat <sbhat@linux.ibm.com>
To: david@gibson.dropbear.id.au, groug@kaod.org, qemu-ppc@nongnu.org,
	ehabkost@redhat.com, marcel.apfelbaum@gmail.com, mst@redhat.com,
	imammedo@redhat.com, xiaoguangrong.eric@gmail.com,
	peter.maydell@linaro.org, eblake@redhat.com, qemu-arm@nongnu.org,
	richard.henderson@linaro.org, pbonzini@redhat.com,
	marcel.apfelbaum@gmail.com, stefanha@redhat.com,
	haozhong.zhang@intel.com, shameerali.kolothum.thodi@huawei.com,
	kwangwoo.lee@sk.com, armbru@redhat.com
Cc: qemu-devel@nongnu.org, aneesh.kumar@linux.ibm.com,
	linux-nvdimm@lists.01.org, kvm-ppc@vger.kernel.org,
	shivaprasadbhat@gmail.com, bharata@linux.vnet.ibm.com
Subject: [PATCH v4 3/3] nvdimm: Enable sync-dax device property for nvdimm
Date: Wed, 28 Apr 2021 23:49:03 -0400	[thread overview]
Message-ID: <161966813983.652.5749368609701495826.stgit@17be908f7c1c> (raw)
In-Reply-To: <161966810162.652.13723419108625443430.stgit@17be908f7c1c>

The patch adds the 'sync-dax' property to the nvdimm device.

When the sync-dax is 'direct' indicates the backend is synchronous DAX
capable and no explicit flush requests are required. When the mode is
set to 'writeback' it indicates the backend is not synhronous DAX
capable and explicit flushes to Hypervisor are required.

On PPC where the flush requests from guest can be honoured by the qemu,
the 'writeback' mode is supported and set as the default. The device
tree property "hcall-flush-required" is added to the nvdimm node which
makes the guest to issue H_SCM_FLUSH hcalls to request for flushes
explicitly. This would be the default behaviour without sync-dax
property set for the nvdimm device. For old pSeries machine, the
default is 'unsafe'.

For non-PPC platforms, the mode is set to 'unsafe' as the default.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
 hw/arm/virt.c           |   28 +++++++++++++++++++++++--
 hw/i386/pc.c            |   28 +++++++++++++++++++++++--
 hw/mem/nvdimm.c         |   52 +++++++++++++++++++++++++++++++++++++++++++----
 hw/ppc/spapr.c          |   10 +++++++++
 hw/ppc/spapr_nvdimm.c   |   39 +++++++++++++++++++++++++++++++++++
 include/hw/mem/nvdimm.h |   11 ++++++++++
 include/hw/ppc/spapr.h  |    1 +
 qapi/common.json        |   20 ++++++++++++++++++
 8 files changed, 179 insertions(+), 10 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9f01d9041b..f32e3e4010 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2358,6 +2358,27 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
     return ms->possible_cpus;
 }
 
+static bool virt_nvdimm_validate(const MachineState *ms, NVDIMMDevice *nvdimm,
+                                 Error **errp)
+{
+    NvdimmSyncModes sync;
+
+    if (!ms->nvdimms_state->is_enabled) {
+        error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
+        return false;
+    }
+
+    sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                    "NvdimmSyncModes", &error_abort);
+    if (sync == NVDIMM_SYNC_MODES_WRITEBACK) {
+        error_setg(errp, "NVDIMM device " NVDIMM_SYNC_DAX_PROP
+                         "=%s mode unsupported", NvdimmSyncModes_str(sync));
+        return false;
+    }
+
+    return true;
+}
+
 static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                                  Error **errp)
 {
@@ -2376,9 +2397,10 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         return;
     }
 
-    if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
-        error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
-        return;
+    if (is_nvdimm) {
+        if (!virt_nvdimm_validate(ms, NVDIMM(dev), errp)) {
+            return;
+        }
     }
 
     pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), NULL, errp);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8a84b25a03..2d5151462c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1211,6 +1211,27 @@ void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
     g_free(i8259);
 }
 
+static bool pc_nvdimm_validate(const MachineState *ms, NVDIMMDevice *nvdimm,
+                               Error **errp)
+{
+    NvdimmSyncModes sync;
+
+    if (!ms->nvdimms_state->is_enabled) {
+        error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
+        return false;
+    }
+
+    sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                    "NvdimmSyncModes", &error_abort);
+    if (sync == NVDIMM_SYNC_MODES_WRITEBACK) {
+        error_setg(errp, "NVDIMM device " NVDIMM_SYNC_DAX_PROP
+                   "=%s mode unsupported", NvdimmSyncModes_str(sync));
+        return false;
+    }
+
+    return true;
+}
+
 static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                                Error **errp)
 {
@@ -1233,9 +1254,10 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         return;
     }
 
-    if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
-        error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
-        return;
+    if (is_nvdimm) {
+        if (!pc_nvdimm_validate(ms, NVDIMM(dev), errp)) {
+            return;
+        }
     }
 
     hotplug_handler_pre_plug(x86ms->acpi_dev, dev, &local_err);
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 7397b67156..56b4527362 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -96,6 +96,19 @@ static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name,
     g_free(value);
 }
 
+static int nvdimm_get_sync_mode(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+    NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+    return nvdimm->sync_dax;
+}
+
+static void nvdimm_set_sync_mode(Object *obj, int mode, Error **errp)
+{
+    NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+    nvdimm->sync_dax = mode;
+}
 
 static void nvdimm_init(Object *obj)
 {
@@ -105,6 +118,13 @@ static void nvdimm_init(Object *obj)
 
     object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid,
                         nvdimm_set_uuid, NULL, NULL);
+
+    object_property_add_enum(obj, NVDIMM_SYNC_DAX_PROP, "NvdimmSyncModes",
+                             &NvdimmSyncModes_lookup, nvdimm_get_sync_mode,
+                             nvdimm_set_sync_mode);
+    object_property_set_description(obj, NVDIMM_SYNC_DAX_PROP,
+                                    "Set the Synchronus DAX mode");
+
 }
 
 static void nvdimm_finalize(Object *obj)
@@ -119,6 +139,9 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
     PCDIMMDevice *dimm = PC_DIMM(nvdimm);
     uint64_t align, pmem_size, size;
     MemoryRegion *mr;
+    HostMemoryBackend *hostmem;
+    bool is_file_backed;
+    bool __attribute__((unused)) is_pmem = false;
 
     g_assert(!nvdimm->nvdimm_mr);
 
@@ -135,9 +158,8 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
     nvdimm->label_data = memory_region_get_ram_ptr(mr) + pmem_size;
     pmem_size = QEMU_ALIGN_DOWN(pmem_size, align);
 
+    hostmem = dimm->hostmem;
     if (size <= nvdimm->label_size || !pmem_size) {
-        HostMemoryBackend *hostmem = dimm->hostmem;
-
         error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too "
                    "small to contain nvdimm label (0x%" PRIx64 ") and "
                    "aligned PMEM (0x%" PRIx64 ")",
@@ -147,14 +169,36 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
     }
 
     if (!nvdimm->unarmed && memory_region_is_rom(mr)) {
-        HostMemoryBackend *hostmem = dimm->hostmem;
-
         error_setg(errp, "'unarmed' property must be off since memdev %s "
                    "is read-only",
                    object_get_canonical_path_component(OBJECT(hostmem)));
         return;
     }
 
+    is_file_backed = (memory_region_get_fd(mr) > 0);
+    if (nvdimm->sync_dax == NVDIMM_SYNC_MODES_WRITEBACK && !is_file_backed) {
+        error_setg(errp, NVDIMM_SYNC_DAX_PROP"='%s' mode requires the "
+                   "memdev %s to be file backed",
+                   NvdimmSyncModes_str(nvdimm->sync_dax),
+                   object_get_canonical_path_component(OBJECT(hostmem)));
+        return;
+    }
+
+#ifdef CONFIG_LIBPMEM
+    if (is_file_backed) {
+        is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem",
+                                           &error_abort);
+    }
+
+    if (nvdimm->sync_dax == NVDIMM_SYNC_MODES_DIRECT && !is_pmem) {
+        error_setg(errp, "NVDIMM device "NVDIMM_SYNC_DAX_PROP"=%s mode requires"
+                   " the memory backend device to be synchronous DAX capable. "
+                   "Indicate it so with pmem=yes for the corresponding "
+                   "memory-backend-file.",
+                   NvdimmSyncModes_str(nvdimm->sync_dax));
+    }
+#endif
+
     nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
     memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
                              "nvdimm-memory", mr, 0, pmem_size);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 80957f9188..d0058bc13b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4616,6 +4616,11 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
 static void spapr_machine_6_0_class_options(MachineClass *mc)
 {
     /* Defaults for the latest behaviour inherited from the base class */
+    static GlobalProperty compat[] = {
+        { "nvdimm", "sync-dax", "writeback" },
+    };
+
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 
 DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
@@ -4625,8 +4630,13 @@ DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
  */
 static void spapr_machine_5_2_class_options(MachineClass *mc)
 {
+    static GlobalProperty compat[] = {
+        { "nvdimm", "sync-dax", "unsafe" },
+    };
+
     spapr_machine_6_0_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 
 DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 77eb7e1293..615439391c 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -50,6 +50,10 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
 {
     const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
     const MachineState *ms = MACHINE(hotplug_dev);
+    PCDIMMDevice __attribute__((unused)) *dimm = PC_DIMM(nvdimm);
+    MemoryRegion __attribute__((unused)) *mr;
+    bool __attribute__((unused)) is_pmem = false;
+    NvdimmSyncModes __attribute__((unused)) sync;
     g_autofree char *uuidstr = NULL;
     QemuUUID uuid;
     int ret;
@@ -77,6 +81,24 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
         return false;
     }
 
+#ifdef CONFIG_LIBPMEM
+    sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                    "NvdimmSyncModes", &error_abort);
+
+    mr = host_memory_backend_get_memory(dimm->hostmem);
+    if (memory_region_get_fd(mr) > 0) { /* memor-backend-file */
+        HostMemoryBackend *backend = MEMORY_BACKEND(dimm->hostmem);
+        is_pmem = object_property_get_bool(OBJECT(backend), "pmem",
+                                           &error_abort);
+    }
+
+    if (sync == NVDIMM_SYNC_MODES_WRITEBACK && is_pmem) {
+        warn_report("The NVDIMM backing device being Synchronous DAX capable, "
+                    NVDIMM_SYNC_DAX_PROP"='%s' is unnecessary as the backend "
+                    "ensures the safety already.", NvdimmSyncModes_str(sync));
+    }
+#endif
+
     uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
                                       &error_abort);
     ret = qemu_uuid_parse(uuidstr, &uuid);
@@ -124,6 +146,9 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
     uint64_t lsize = nvdimm->label_size;
     uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
                                             NULL);
+    NvdimmSyncModes sync_dax = object_property_get_enum(OBJECT(nvdimm),
+                                         NVDIMM_SYNC_DAX_PROP,
+                                         "NvdimmSyncModes", &error_abort);
 
     drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
     g_assert(drc);
@@ -158,6 +183,11 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
                              "operating-system")));
     _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
 
+    if (sync_dax == NVDIMM_SYNC_MODES_WRITEBACK) {
+        _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
+                         NULL, 0));
+    }
+
     return child_offset;
 }
 
@@ -566,6 +596,8 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
     uint64_t continue_token = args[1];
     SpaprDrc *drc = spapr_drc_by_index(drc_index);
     PCDIMMDevice *dimm;
+    NVDIMMDevice *nvdimm;
+    NvdimmSyncModes sync_dax;
     HostMemoryBackend *backend = NULL;
     SpaprNVDIMMDeviceFlushState *state;
     ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
@@ -575,6 +607,13 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
         return H_PARAMETER;
     }
 
+    nvdimm = NVDIMM(drc->dev);
+    sync_dax = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                    "NvdimmSyncModes", &error_abort);
+    if (sync_dax != NVDIMM_SYNC_MODES_WRITEBACK) {
+        return H_UNSUPPORTED;
+    }
+
     if (continue_token != 0) {
         goto get_status;
     }
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index bcf62f825c..ef30bdeca4 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -28,6 +28,7 @@
 #include "qemu/uuid.h"
 #include "hw/acpi/aml-build.h"
 #include "qom/object.h"
+#include "qapi/qapi-types-machine.h"
 
 #define NVDIMM_DEBUG 0
 #define nvdimm_debug(fmt, ...)                                \
@@ -51,6 +52,7 @@ OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM)
 #define NVDIMM_LABEL_SIZE_PROP "label-size"
 #define NVDIMM_UUID_PROP       "uuid"
 #define NVDIMM_UNARMED_PROP    "unarmed"
+#define NVDIMM_SYNC_DAX_PROP   "sync-dax"
 
 struct NVDIMMDevice {
     /* private */
@@ -85,6 +87,15 @@ struct NVDIMMDevice {
      */
     bool unarmed;
 
+    /*
+     * The 'writeback' value would indicate the guest to make explicit
+     * flush requests to hypervisor. When 'direct', the device is
+     * assumed to be synchronous DAX capable and no explicit flush
+     * is required. 'unsafe' indicates flush semantics unimplemented
+     * and the data persistence not guaranteed in power failure scenarios.
+     */
+    NvdimmSyncModes sync_dax;
+
     /*
      * The PPC64 - spapr requires each nvdimm device have a uuid.
      */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 478c031396..ddde87e2b6 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -332,6 +332,7 @@ struct SpaprMachineState {
 #define H_P7              -60
 #define H_P8              -61
 #define H_P9              -62
+#define H_UNSUPPORTED     -67
 #define H_OVERLAP         -68
 #define H_UNSUPPORTED_FLAG -256
 #define H_MULTI_THREADS_ACTIVE -9005
diff --git a/qapi/common.json b/qapi/common.json
index 7c976296f0..bec1b45b09 100644
--- a/qapi/common.json
+++ b/qapi/common.json
@@ -197,3 +197,23 @@
 { 'enum': 'GrabToggleKeys',
   'data': [ 'ctrl-ctrl', 'alt-alt', 'shift-shift','meta-meta', 'scrolllock',
             'ctrl-scrolllock' ] }
+
+##
+# @NvdimmSyncModes:
+#
+# Indicates the mode of flush to be used to ensure persistence in case
+# of power failures.
+#
+# @unsafe: This is to indicate, the data on the backend device not be
+#          consistent in power failure scenarios.
+# @direct: This is to indicate the backend device supports synchronous DAX
+#          and no explicit flush requests from the guest is required.
+# @writeback: To be used when the backend device doesn't support synchronous
+#             DAX. The hypervisor issues flushes to the disk when requested
+#             by the guest.
+# Since: 6.0
+#
+##
+{ 'enum': 'NvdimmSyncModes',
+  'data': [ 'unsafe', 'writeback',
+            { 'name': 'direct', 'if': 'defined(CONFIG_LIBPMEM)' } ] }

_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

WARNING: multiple messages have this Message-ID
From: Shivaprasad G Bhat <sbhat@linux.ibm.com>
To: david@gibson.dropbear.id.au, groug@kaod.org, qemu-ppc@nongnu.org,
	ehabkost@redhat.com, marcel.apfelbaum@gmail.com, mst@redhat.com,
	imammedo@redhat.com, xiaoguangrong.eric@gmail.com,
	peter.maydell@linaro.org, eblake@redhat.com, qemu-arm@nongnu.org,
	richard.henderson@linaro.org, pbonzini@redhat.com,
	marcel.apfelbaum@gmail.com, stefanha@redhat.com,
	haozhong.zhang@intel.com, shameerali.kolothum.thodi@huawei.com,
	kwangwoo.lee@sk.com, armbru@redhat.com
Cc: linux-nvdimm@lists.01.org, aneesh.kumar@linux.ibm.com,
	qemu-devel@nongnu.org, kvm-ppc@vger.kernel.org,
	shivaprasadbhat@gmail.com, bharata@linux.vnet.ibm.com
Subject: [PATCH v4 3/3] nvdimm: Enable sync-dax device property for nvdimm
Date: Wed, 28 Apr 2021 23:49:03 -0400	[thread overview]
Message-ID: <161966813983.652.5749368609701495826.stgit@17be908f7c1c> (raw)
In-Reply-To: <161966810162.652.13723419108625443430.stgit@17be908f7c1c>

The patch adds the 'sync-dax' property to the nvdimm device.

When the sync-dax is 'direct' indicates the backend is synchronous DAX
capable and no explicit flush requests are required. When the mode is
set to 'writeback' it indicates the backend is not synhronous DAX
capable and explicit flushes to Hypervisor are required.

On PPC where the flush requests from guest can be honoured by the qemu,
the 'writeback' mode is supported and set as the default. The device
tree property "hcall-flush-required" is added to the nvdimm node which
makes the guest to issue H_SCM_FLUSH hcalls to request for flushes
explicitly. This would be the default behaviour without sync-dax
property set for the nvdimm device. For old pSeries machine, the
default is 'unsafe'.

For non-PPC platforms, the mode is set to 'unsafe' as the default.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
 hw/arm/virt.c           |   28 +++++++++++++++++++++++--
 hw/i386/pc.c            |   28 +++++++++++++++++++++++--
 hw/mem/nvdimm.c         |   52 +++++++++++++++++++++++++++++++++++++++++++----
 hw/ppc/spapr.c          |   10 +++++++++
 hw/ppc/spapr_nvdimm.c   |   39 +++++++++++++++++++++++++++++++++++
 include/hw/mem/nvdimm.h |   11 ++++++++++
 include/hw/ppc/spapr.h  |    1 +
 qapi/common.json        |   20 ++++++++++++++++++
 8 files changed, 179 insertions(+), 10 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9f01d9041b..f32e3e4010 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2358,6 +2358,27 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
     return ms->possible_cpus;
 }
 
+static bool virt_nvdimm_validate(const MachineState *ms, NVDIMMDevice *nvdimm,
+                                 Error **errp)
+{
+    NvdimmSyncModes sync;
+
+    if (!ms->nvdimms_state->is_enabled) {
+        error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
+        return false;
+    }
+
+    sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                    "NvdimmSyncModes", &error_abort);
+    if (sync == NVDIMM_SYNC_MODES_WRITEBACK) {
+        error_setg(errp, "NVDIMM device " NVDIMM_SYNC_DAX_PROP
+                         "=%s mode unsupported", NvdimmSyncModes_str(sync));
+        return false;
+    }
+
+    return true;
+}
+
 static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                                  Error **errp)
 {
@@ -2376,9 +2397,10 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         return;
     }
 
-    if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
-        error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
-        return;
+    if (is_nvdimm) {
+        if (!virt_nvdimm_validate(ms, NVDIMM(dev), errp)) {
+            return;
+        }
     }
 
     pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), NULL, errp);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 8a84b25a03..2d5151462c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1211,6 +1211,27 @@ void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
     g_free(i8259);
 }
 
+static bool pc_nvdimm_validate(const MachineState *ms, NVDIMMDevice *nvdimm,
+                               Error **errp)
+{
+    NvdimmSyncModes sync;
+
+    if (!ms->nvdimms_state->is_enabled) {
+        error_setg(errp, "nvdimm is not enabled: add 'nvdimm=on' to '-M'");
+        return false;
+    }
+
+    sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                    "NvdimmSyncModes", &error_abort);
+    if (sync == NVDIMM_SYNC_MODES_WRITEBACK) {
+        error_setg(errp, "NVDIMM device " NVDIMM_SYNC_DAX_PROP
+                   "=%s mode unsupported", NvdimmSyncModes_str(sync));
+        return false;
+    }
+
+    return true;
+}
+
 static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                                Error **errp)
 {
@@ -1233,9 +1254,10 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         return;
     }
 
-    if (is_nvdimm && !ms->nvdimms_state->is_enabled) {
-        error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
-        return;
+    if (is_nvdimm) {
+        if (!pc_nvdimm_validate(ms, NVDIMM(dev), errp)) {
+            return;
+        }
     }
 
     hotplug_handler_pre_plug(x86ms->acpi_dev, dev, &local_err);
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 7397b67156..56b4527362 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -96,6 +96,19 @@ static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name,
     g_free(value);
 }
 
+static int nvdimm_get_sync_mode(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+    NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+    return nvdimm->sync_dax;
+}
+
+static void nvdimm_set_sync_mode(Object *obj, int mode, Error **errp)
+{
+    NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+    nvdimm->sync_dax = mode;
+}
 
 static void nvdimm_init(Object *obj)
 {
@@ -105,6 +118,13 @@ static void nvdimm_init(Object *obj)
 
     object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid,
                         nvdimm_set_uuid, NULL, NULL);
+
+    object_property_add_enum(obj, NVDIMM_SYNC_DAX_PROP, "NvdimmSyncModes",
+                             &NvdimmSyncModes_lookup, nvdimm_get_sync_mode,
+                             nvdimm_set_sync_mode);
+    object_property_set_description(obj, NVDIMM_SYNC_DAX_PROP,
+                                    "Set the Synchronus DAX mode");
+
 }
 
 static void nvdimm_finalize(Object *obj)
@@ -119,6 +139,9 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
     PCDIMMDevice *dimm = PC_DIMM(nvdimm);
     uint64_t align, pmem_size, size;
     MemoryRegion *mr;
+    HostMemoryBackend *hostmem;
+    bool is_file_backed;
+    bool __attribute__((unused)) is_pmem = false;
 
     g_assert(!nvdimm->nvdimm_mr);
 
@@ -135,9 +158,8 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
     nvdimm->label_data = memory_region_get_ram_ptr(mr) + pmem_size;
     pmem_size = QEMU_ALIGN_DOWN(pmem_size, align);
 
+    hostmem = dimm->hostmem;
     if (size <= nvdimm->label_size || !pmem_size) {
-        HostMemoryBackend *hostmem = dimm->hostmem;
-
         error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too "
                    "small to contain nvdimm label (0x%" PRIx64 ") and "
                    "aligned PMEM (0x%" PRIx64 ")",
@@ -147,14 +169,36 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
     }
 
     if (!nvdimm->unarmed && memory_region_is_rom(mr)) {
-        HostMemoryBackend *hostmem = dimm->hostmem;
-
         error_setg(errp, "'unarmed' property must be off since memdev %s "
                    "is read-only",
                    object_get_canonical_path_component(OBJECT(hostmem)));
         return;
     }
 
+    is_file_backed = (memory_region_get_fd(mr) > 0);
+    if (nvdimm->sync_dax == NVDIMM_SYNC_MODES_WRITEBACK && !is_file_backed) {
+        error_setg(errp, NVDIMM_SYNC_DAX_PROP"='%s' mode requires the "
+                   "memdev %s to be file backed",
+                   NvdimmSyncModes_str(nvdimm->sync_dax),
+                   object_get_canonical_path_component(OBJECT(hostmem)));
+        return;
+    }
+
+#ifdef CONFIG_LIBPMEM
+    if (is_file_backed) {
+        is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem",
+                                           &error_abort);
+    }
+
+    if (nvdimm->sync_dax == NVDIMM_SYNC_MODES_DIRECT && !is_pmem) {
+        error_setg(errp, "NVDIMM device "NVDIMM_SYNC_DAX_PROP"=%s mode requires"
+                   " the memory backend device to be synchronous DAX capable. "
+                   "Indicate it so with pmem=yes for the corresponding "
+                   "memory-backend-file.",
+                   NvdimmSyncModes_str(nvdimm->sync_dax));
+    }
+#endif
+
     nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
     memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
                              "nvdimm-memory", mr, 0, pmem_size);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 80957f9188..d0058bc13b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4616,6 +4616,11 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
 static void spapr_machine_6_0_class_options(MachineClass *mc)
 {
     /* Defaults for the latest behaviour inherited from the base class */
+    static GlobalProperty compat[] = {
+        { "nvdimm", "sync-dax", "writeback" },
+    };
+
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 
 DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
@@ -4625,8 +4630,13 @@ DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
  */
 static void spapr_machine_5_2_class_options(MachineClass *mc)
 {
+    static GlobalProperty compat[] = {
+        { "nvdimm", "sync-dax", "unsafe" },
+    };
+
     spapr_machine_6_0_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 
 DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 77eb7e1293..615439391c 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -50,6 +50,10 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
 {
     const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
     const MachineState *ms = MACHINE(hotplug_dev);
+    PCDIMMDevice __attribute__((unused)) *dimm = PC_DIMM(nvdimm);
+    MemoryRegion __attribute__((unused)) *mr;
+    bool __attribute__((unused)) is_pmem = false;
+    NvdimmSyncModes __attribute__((unused)) sync;
     g_autofree char *uuidstr = NULL;
     QemuUUID uuid;
     int ret;
@@ -77,6 +81,24 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
         return false;
     }
 
+#ifdef CONFIG_LIBPMEM
+    sync = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                    "NvdimmSyncModes", &error_abort);
+
+    mr = host_memory_backend_get_memory(dimm->hostmem);
+    if (memory_region_get_fd(mr) > 0) { /* memor-backend-file */
+        HostMemoryBackend *backend = MEMORY_BACKEND(dimm->hostmem);
+        is_pmem = object_property_get_bool(OBJECT(backend), "pmem",
+                                           &error_abort);
+    }
+
+    if (sync == NVDIMM_SYNC_MODES_WRITEBACK && is_pmem) {
+        warn_report("The NVDIMM backing device being Synchronous DAX capable, "
+                    NVDIMM_SYNC_DAX_PROP"='%s' is unnecessary as the backend "
+                    "ensures the safety already.", NvdimmSyncModes_str(sync));
+    }
+#endif
+
     uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
                                       &error_abort);
     ret = qemu_uuid_parse(uuidstr, &uuid);
@@ -124,6 +146,9 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
     uint64_t lsize = nvdimm->label_size;
     uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
                                             NULL);
+    NvdimmSyncModes sync_dax = object_property_get_enum(OBJECT(nvdimm),
+                                         NVDIMM_SYNC_DAX_PROP,
+                                         "NvdimmSyncModes", &error_abort);
 
     drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
     g_assert(drc);
@@ -158,6 +183,11 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
                              "operating-system")));
     _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
 
+    if (sync_dax == NVDIMM_SYNC_MODES_WRITEBACK) {
+        _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
+                         NULL, 0));
+    }
+
     return child_offset;
 }
 
@@ -566,6 +596,8 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
     uint64_t continue_token = args[1];
     SpaprDrc *drc = spapr_drc_by_index(drc_index);
     PCDIMMDevice *dimm;
+    NVDIMMDevice *nvdimm;
+    NvdimmSyncModes sync_dax;
     HostMemoryBackend *backend = NULL;
     SpaprNVDIMMDeviceFlushState *state;
     ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
@@ -575,6 +607,13 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
         return H_PARAMETER;
     }
 
+    nvdimm = NVDIMM(drc->dev);
+    sync_dax = object_property_get_enum(OBJECT(nvdimm), NVDIMM_SYNC_DAX_PROP,
+                                    "NvdimmSyncModes", &error_abort);
+    if (sync_dax != NVDIMM_SYNC_MODES_WRITEBACK) {
+        return H_UNSUPPORTED;
+    }
+
     if (continue_token != 0) {
         goto get_status;
     }
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index bcf62f825c..ef30bdeca4 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -28,6 +28,7 @@
 #include "qemu/uuid.h"
 #include "hw/acpi/aml-build.h"
 #include "qom/object.h"
+#include "qapi/qapi-types-machine.h"
 
 #define NVDIMM_DEBUG 0
 #define nvdimm_debug(fmt, ...)                                \
@@ -51,6 +52,7 @@ OBJECT_DECLARE_TYPE(NVDIMMDevice, NVDIMMClass, NVDIMM)
 #define NVDIMM_LABEL_SIZE_PROP "label-size"
 #define NVDIMM_UUID_PROP       "uuid"
 #define NVDIMM_UNARMED_PROP    "unarmed"
+#define NVDIMM_SYNC_DAX_PROP   "sync-dax"
 
 struct NVDIMMDevice {
     /* private */
@@ -85,6 +87,15 @@ struct NVDIMMDevice {
      */
     bool unarmed;
 
+    /*
+     * The 'writeback' value would indicate the guest to make explicit
+     * flush requests to hypervisor. When 'direct', the device is
+     * assumed to be synchronous DAX capable and no explicit flush
+     * is required. 'unsafe' indicates flush semantics unimplemented
+     * and the data persistence not guaranteed in power failure scenarios.
+     */
+    NvdimmSyncModes sync_dax;
+
     /*
      * The PPC64 - spapr requires each nvdimm device have a uuid.
      */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 478c031396..ddde87e2b6 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -332,6 +332,7 @@ struct SpaprMachineState {
 #define H_P7              -60
 #define H_P8              -61
 #define H_P9              -62
+#define H_UNSUPPORTED     -67
 #define H_OVERLAP         -68
 #define H_UNSUPPORTED_FLAG -256
 #define H_MULTI_THREADS_ACTIVE -9005
diff --git a/qapi/common.json b/qapi/common.json
index 7c976296f0..bec1b45b09 100644
--- a/qapi/common.json
+++ b/qapi/common.json
@@ -197,3 +197,23 @@
 { 'enum': 'GrabToggleKeys',
   'data': [ 'ctrl-ctrl', 'alt-alt', 'shift-shift','meta-meta', 'scrolllock',
             'ctrl-scrolllock' ] }
+
+##
+# @NvdimmSyncModes:
+#
+# Indicates the mode of flush to be used to ensure persistence in case
+# of power failures.
+#
+# @unsafe: This is to indicate, the data on the backend device not be
+#          consistent in power failure scenarios.
+# @direct: This is to indicate the backend device supports synchronous DAX
+#          and no explicit flush requests from the guest is required.
+# @writeback: To be used when the backend device doesn't support synchronous
+#             DAX. The hypervisor issues flushes to the disk when requested
+#             by the guest.
+# Since: 6.0
+#
+##
+{ 'enum': 'NvdimmSyncModes',
+  'data': [ 'unsafe', 'writeback',
+            { 'name': 'direct', 'if': 'defined(CONFIG_LIBPMEM)' } ] }




  parent reply	other threads:[~2021-04-29  3:49 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-29  3:48 [PATCH v4 0/3] nvdimm: Enable sync-dax " Shivaprasad G Bhat
2021-04-29  3:48 ` Shivaprasad G Bhat
2021-04-29  3:48 ` [PATCH v4 1/3] spapr: nvdimm: Forward declare and move the definitions Shivaprasad G Bhat
2021-04-29  3:48   ` Shivaprasad G Bhat
2021-05-03 18:23   ` Eric Blake
2021-05-03 18:23     ` Eric Blake
2021-05-04  1:21     ` David Gibson
2021-05-04  1:21       ` David Gibson
2021-04-29  3:48 ` [PATCH v4 2/3] spapr: nvdimm: Implement H_SCM_FLUSH hcall Shivaprasad G Bhat
2021-04-29  3:48   ` Shivaprasad G Bhat
2021-04-29  3:49 ` Shivaprasad G Bhat [this message]
2021-04-29  3:49   ` [PATCH v4 3/3] nvdimm: Enable sync-dax device property for nvdimm Shivaprasad G Bhat
2021-05-03 18:27   ` Eric Blake
2021-05-03 18:27     ` Eric Blake
2021-04-29 15:55 ` [PATCH v4 0/3] nvdimm: Enable sync-dax " Stefan Hajnoczi
2021-04-29 15:55   ` Stefan Hajnoczi
2021-04-29 16:32   ` Aneesh Kumar K.V
2021-04-29 16:32     ` Aneesh Kumar K.V
2021-04-30  4:27     ` David Gibson
2021-04-30  4:27       ` David Gibson
2021-04-30 15:08       ` Stefan Hajnoczi
2021-04-30 15:08         ` Stefan Hajnoczi
2021-04-30 19:14 ` Dan Williams
2021-04-30 19:14   ` Dan Williams
2021-05-01 13:55   ` Aneesh Kumar K.V
2021-05-01 13:55     ` Aneesh Kumar K.V
2021-05-03 14:05   ` Shivaprasad G Bhat
2021-05-03 14:05     ` Shivaprasad G Bhat
2021-05-03 19:41     ` Dan Williams
2021-05-03 19:41       ` Dan Williams
2021-05-04  4:59       ` Aneesh Kumar K.V
2021-05-04  4:59         ` Aneesh Kumar K.V
2021-05-04  5:43         ` Pankaj Gupta
2021-05-04  5:43           ` Pankaj Gupta
2021-05-04  9:02           ` Aneesh Kumar K.V
2021-05-04  9:02             ` Aneesh Kumar K.V
2021-05-05  0:12             ` Dan Williams
2021-05-05  0:12               ` Dan Williams

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=161966813983.652.5749368609701495826.stgit@17be908f7c1c \
    --to=sbhat@linux.ibm.com \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=armbru@redhat.com \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=eblake@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=groug@kaod.org \
    --cc=haozhong.zhang@intel.com \
    --cc=imammedo@redhat.com \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kwangwoo.lee@sk.com \
    --cc=linux-nvdimm@lists.01.org \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=shameerali.kolothum.thodi@huawei.com \
    --cc=shivaprasadbhat@gmail.com \
    --cc=stefanha@redhat.com \
    --cc=xiaoguangrong.eric@gmail.com \
    --subject='Re: [PATCH v4 3/3] nvdimm: Enable sync-dax device property for nvdimm' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.