linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH v2 1/5] libnvdimm/dax: Add a dax flag to control synchronous fault support
@ 2020-06-02  7:49 Aneesh Kumar K.V
  2020-06-02  7:49 ` [RFC PATCH v2 2/5] powerpc/pmem: Disable synchronous fault by default Aneesh Kumar K.V
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V

With POWER10, architecture is adding new pmem flush and sync instructions.
The kernel should prevent the usage of MAP_SYNC if applications are not using
the new instructions on newer hardware

This patch adds a dax attribute (/sys/bus/nd/devices/region0/pfn0.1/block/pmem0/dax/sync_fault)
which can be used to control this flag. If the device supports synchronous flush
then userspace can update this attribute to enable/disable the synchronous
fault. The attribute is only visible if there is write cache enabled on the device.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/dax/super.c | 73 ++++++++++++++++++++++++++++++++++++++++++++-
 mm/Kconfig          |  3 ++
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 8e32345be0f7..980f7be7e56d 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -198,6 +198,12 @@ enum dax_device_flags {
 	DAXDEV_WRITE_CACHE,
 	/* flag to check if device supports synchronous flush */
 	DAXDEV_SYNC,
+	/*
+	 * flag to indicate whether synchronous flush is enabled.
+	 * Some platform may want to disable synchronous flush support
+	 * even though device supports the same.
+	 */
+	DAXDEV_SYNC_ENABLED,
 };
 
 /**
@@ -254,6 +260,60 @@ static ssize_t write_cache_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(write_cache);
 
+static bool dax_synchronous_enabled(struct dax_device *dax_dev)
+{
+	return test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+}
+
+static void set_dax_synchronous_enable(struct dax_device *dax_dev, bool enable)
+{
+	if (!test_bit(DAXDEV_SYNC, &dax_dev->flags))
+		return;
+
+	if (enable)
+		set_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+	else
+		clear_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+}
+
+
+static ssize_t sync_fault_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
+	ssize_t rc;
+
+	WARN_ON_ONCE(!dax_dev);
+	if (!dax_dev)
+		return -ENXIO;
+
+	rc = sprintf(buf, "%d\n", !!__dax_synchronous(dax_dev));
+	put_dax(dax_dev);
+	return rc;
+}
+
+static ssize_t sync_fault_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	bool enable_sync;
+	int rc = strtobool(buf, &enable_sync);
+	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
+
+	WARN_ON_ONCE(!dax_dev);
+	if (!dax_dev)
+		return -ENXIO;
+
+	if (rc)
+		len = rc;
+	else
+		set_dax_synchronous_enable(dax_dev, enable_sync);
+
+	put_dax(dax_dev);
+	return len;
+}
+
+static DEVICE_ATTR_RW(sync_fault);
+
 static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
@@ -267,11 +327,18 @@ static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
 	if (a == &dev_attr_write_cache.attr)
 		return 0;
 #endif
+	if (a == &dev_attr_sync_fault.attr) {
+		if (dax_write_cache_enabled(dax_dev))
+			return a->mode;
+		return 0;
+	}
+
 	return a->mode;
 }
 
 static struct attribute *dax_attributes[] = {
 	&dev_attr_write_cache.attr,
+	&dev_attr_sync_fault.attr,
 	NULL,
 };
 
@@ -394,13 +461,17 @@ EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
 
 bool __dax_synchronous(struct dax_device *dax_dev)
 {
-	return test_bit(DAXDEV_SYNC, &dax_dev->flags);
+	return test_bit(DAXDEV_SYNC, &dax_dev->flags) &&
+		test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
 }
 EXPORT_SYMBOL_GPL(__dax_synchronous);
 
 void __set_dax_synchronous(struct dax_device *dax_dev)
 {
 	set_bit(DAXDEV_SYNC, &dax_dev->flags);
+#ifndef CONFIG_ARCH_MAP_SYNC_DISABLE
+	set_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+#endif
 }
 EXPORT_SYMBOL_GPL(__set_dax_synchronous);
 
diff --git a/mm/Kconfig b/mm/Kconfig
index c1acc34c1c35..38fd7cfbfca8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -867,4 +867,7 @@ config ARCH_HAS_HUGEPD
 config MAPPING_DIRTY_HELPERS
         bool
 
+config ARCH_MAP_SYNC_DISABLE
+	bool
+
 endmenu
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH v2 2/5] powerpc/pmem: Disable synchronous fault by default
  2020-06-02  7:49 [RFC PATCH v2 1/5] libnvdimm/dax: Add a dax flag to control synchronous fault support Aneesh Kumar K.V
@ 2020-06-02  7:49 ` Aneesh Kumar K.V
  2020-06-02  7:49 ` [RFC PATCH v2 3/5] libnvdimm/dax: Make DAXDEV_SYNC_ENABLED flag region-specific Aneesh Kumar K.V
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V

This adds a kernel config option that controls whether MAP_SYNC is enabled by
default. With POWER10, architecture is adding new pmem flush and sync
instructions. The kernel should prevent the usage of MAP_SYNC if applications
are not using the new instructions on newer hardware.

This config allows user to control whether MAP_SYNC should be enabled by
default or not.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/Kconfig.cputype | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 27a81c291be8..f8694838ad4e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -383,6 +383,15 @@ config PPC_KUEP
 
 	  If you're unsure, say Y.
 
+config ARCH_MAP_SYNC_DISABLE
+	bool "Disable synchronous fault support (MAP_SYNC)"
+	default y
+	help
+	  Disable support for synchronous fault with nvdimm namespaces.
+
+	  If you're unsure, say Y.
+
+
 config PPC_HAVE_KUAP
 	bool
 
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH v2 3/5] libnvdimm/dax: Make DAXDEV_SYNC_ENABLED flag region-specific
  2020-06-02  7:49 [RFC PATCH v2 1/5] libnvdimm/dax: Add a dax flag to control synchronous fault support Aneesh Kumar K.V
  2020-06-02  7:49 ` [RFC PATCH v2 2/5] powerpc/pmem: Disable synchronous fault by default Aneesh Kumar K.V
@ 2020-06-02  7:49 ` Aneesh Kumar K.V
  2020-06-02  7:49 ` [RFC PATCH v2 4/5] powerpc/papr_scm: disable MAP_SYNC for newer hardware Aneesh Kumar K.V
  2020-06-02  7:49 ` [RFC PATCH v2 5/5] libnvdimm: Add prctl control for disabling synchronous fault support Aneesh Kumar K.V
  3 siblings, 0 replies; 5+ messages in thread
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V

This patch makes sync fault enable/disable feature more fine-grained
by allowing region-wise control of the same.

In a followup patch on ppc64 only device with compat string "ibm,pmemory-v2"
will disable the sync fault feature.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/dax/bus.c            |  2 +-
 drivers/dax/super.c          | 16 +++++++++-------
 drivers/nvdimm/pmem.c        |  4 ++++
 drivers/nvdimm/region_devs.c | 16 ++++++++++++++++
 include/linux/dax.h          | 16 ++++++++++++++++
 include/linux/libnvdimm.h    |  4 ++++
 6 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index df238c8b6ef2..8a825ecff49b 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -420,7 +420,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
 	 * No 'host' or dax_operations since there is no access to this
 	 * device outside of mmap of the resulting character device.
 	 */
-	dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC);
+	dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC | DAXDEV_F_SYNC_ENABLED);
 	if (IS_ERR(dax_dev)) {
 		rc = PTR_ERR(dax_dev);
 		goto err;
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 980f7be7e56d..f93e6649d452 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -260,10 +260,11 @@ static ssize_t write_cache_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(write_cache);
 
-static bool dax_synchronous_enabled(struct dax_device *dax_dev)
+bool __dax_synchronous_enabled(struct dax_device *dax_dev)
 {
 	return test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
 }
+EXPORT_SYMBOL_GPL(__dax_synchronous_enabled);
 
 static void set_dax_synchronous_enable(struct dax_device *dax_dev, bool enable)
 {
@@ -280,6 +281,7 @@ static void set_dax_synchronous_enable(struct dax_device *dax_dev, bool enable)
 static ssize_t sync_fault_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
+	int enabled;
 	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
 	ssize_t rc;
 
@@ -287,7 +289,8 @@ static ssize_t sync_fault_show(struct device *dev,
 	if (!dax_dev)
 		return -ENXIO;
 
-	rc = sprintf(buf, "%d\n", !!__dax_synchronous(dax_dev));
+	enabled = (dax_synchronous(dax_dev) && dax_synchronous_enabled(dax_dev));
+	rc = sprintf(buf, "%d\n", enabled);
 	put_dax(dax_dev);
 	return rc;
 }
@@ -461,17 +464,13 @@ EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
 
 bool __dax_synchronous(struct dax_device *dax_dev)
 {
-	return test_bit(DAXDEV_SYNC, &dax_dev->flags) &&
-		test_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
+	return test_bit(DAXDEV_SYNC, &dax_dev->flags);
 }
 EXPORT_SYMBOL_GPL(__dax_synchronous);
 
 void __set_dax_synchronous(struct dax_device *dax_dev)
 {
 	set_bit(DAXDEV_SYNC, &dax_dev->flags);
-#ifndef CONFIG_ARCH_MAP_SYNC_DISABLE
-	set_bit(DAXDEV_SYNC_ENABLED, &dax_dev->flags);
-#endif
 }
 EXPORT_SYMBOL_GPL(__set_dax_synchronous);
 
@@ -665,6 +664,9 @@ struct dax_device *alloc_dax(void *private, const char *__host,
 	if (flags & DAXDEV_F_SYNC)
 		set_dax_synchronous(dax_dev);
 
+	if (flags & DAXDEV_F_SYNC_ENABLED)
+		set_dax_synchronous_enable(dax_dev, true);
+
 	return dax_dev;
 
  err_dev:
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 2df6994acf83..dc9c269eb50d 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -485,6 +485,10 @@ static int pmem_attach_disk(struct device *dev,
 
 	if (is_nvdimm_sync(nd_region))
 		flags = DAXDEV_F_SYNC;
+
+	if (is_nvdimm_sync_enabled(nd_region))
+		flags |= DAXDEV_F_SYNC_ENABLED;
+
 	dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
 	if (IS_ERR(dax_dev)) {
 		put_disk(disk);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index ccbb5b43b8b2..2181560cf655 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1283,6 +1283,22 @@ bool is_nvdimm_sync(struct nd_region *nd_region)
 }
 EXPORT_SYMBOL_GPL(is_nvdimm_sync);
 
+bool is_nvdimm_sync_enabled(struct nd_region *nd_region)
+{
+#ifdef CONFIG_ARCH_MAP_SYNC_DISABLE
+	if (is_nd_volatile(&nd_region->dev))
+		return true;
+
+	return is_nd_pmem(&nd_region->dev) &&
+		test_bit(ND_REGION_SYNC_ENABLED, &nd_region->flags);
+#else
+	return true;
+#endif
+
+}
+EXPORT_SYMBOL_GPL(is_nvdimm_sync_enabled);
+
+
 struct conflict_context {
 	struct nd_region *nd_region;
 	resource_size_t start, size;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d7af5d243f24..c4a3551557de 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -10,6 +10,9 @@
 /* Flag for synchronous flush */
 #define DAXDEV_F_SYNC (1UL << 0)
 
+/* flag for platform forcing synchronous flush disable */
+#define DAXDEV_F_SYNC_ENABLED (1UL << 1)
+
 typedef unsigned long dax_entry_t;
 
 struct iomap_ops;
@@ -59,6 +62,13 @@ static inline void set_dax_synchronous(struct dax_device *dax_dev)
 {
 	__set_dax_synchronous(dax_dev);
 }
+
+bool __dax_synchronous_enabled(struct dax_device *dax_dev);
+static inline bool dax_synchronous_enabled(struct dax_device *dax_dev)
+{
+	return  __dax_synchronous_enabled(dax_dev);
+}
+
 /*
  * Check if given mapping is supported by the file / underlying device.
  */
@@ -69,6 +79,12 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 		return true;
 	if (!IS_DAX(file_inode(vma->vm_file)))
 		return false;
+	/*
+	 * check MAP_SYNC is disabled by platform for this device.
+	 */
+	if (!dax_synchronous_enabled(dax_dev))
+		return false;
+
 	return dax_synchronous(dax_dev);
 }
 #else
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 18da4059be09..cc3962c4978f 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -63,6 +63,9 @@ enum {
 	/* Platform provides asynchronous flush mechanism */
 	ND_REGION_ASYNC = 3,
 
+	/* Platform wants to disable synchronous flush mechanism */
+	ND_REGION_SYNC_ENABLED= 4,
+
 	/* mark newly adjusted resources as requiring a label update */
 	DPA_RESOURCE_ADJUSTED = 1 << 0,
 };
@@ -262,6 +265,7 @@ int nvdimm_has_flush(struct nd_region *nd_region);
 int nvdimm_has_cache(struct nd_region *nd_region);
 int nvdimm_in_overwrite(struct nvdimm *nvdimm);
 bool is_nvdimm_sync(struct nd_region *nd_region);
+bool is_nvdimm_sync_enabled(struct nd_region *nd_region);
 
 static inline int nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len, int *cmd_rc)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH v2 4/5] powerpc/papr_scm: disable MAP_SYNC for newer hardware
  2020-06-02  7:49 [RFC PATCH v2 1/5] libnvdimm/dax: Add a dax flag to control synchronous fault support Aneesh Kumar K.V
  2020-06-02  7:49 ` [RFC PATCH v2 2/5] powerpc/pmem: Disable synchronous fault by default Aneesh Kumar K.V
  2020-06-02  7:49 ` [RFC PATCH v2 3/5] libnvdimm/dax: Make DAXDEV_SYNC_ENABLED flag region-specific Aneesh Kumar K.V
@ 2020-06-02  7:49 ` Aneesh Kumar K.V
  2020-06-02  7:49 ` [RFC PATCH v2 5/5] libnvdimm: Add prctl control for disabling synchronous fault support Aneesh Kumar K.V
  3 siblings, 0 replies; 5+ messages in thread
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 17 ++++++++++++++++-
 drivers/nvdimm/of_pmem.c                  |  7 +++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index f35592423380..30474d4cd109 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -30,6 +30,7 @@ struct papr_scm_priv {
 	uint64_t block_size;
 	int metadata_size;
 	bool is_volatile;
+	bool disable_map_sync;
 
 	uint64_t bound_addr;
 
@@ -340,11 +341,18 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	ndr_desc.mapping = &mapping;
 	ndr_desc.num_mappings = 1;
 	ndr_desc.nd_set = &p->nd_set;
+	set_bit(ND_REGION_SYNC_ENABLED, &ndr_desc.flags);
 
 	if (p->is_volatile)
 		p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
 	else {
 		set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
+		/*
+		 * for a persistent region, check if the platform needs to
+		 * force MAP_SYNC disable.
+		 */
+		if (p->disable_map_sync)
+			clear_bit(ND_REGION_SYNC_ENABLED, &ndr_desc.flags);
 		p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
 	}
 	if (!p->region) {
@@ -365,7 +373,7 @@ err:	nvdimm_bus_unregister(p->bus);
 
 static int papr_scm_probe(struct platform_device *pdev)
 {
-	struct device_node *dn = pdev->dev.of_node;
+	struct device_node *dn;
 	u32 drc_index, metadata_size;
 	u64 blocks, block_size;
 	struct papr_scm_priv *p;
@@ -373,6 +381,10 @@ static int papr_scm_probe(struct platform_device *pdev)
 	u64 uuid[2];
 	int rc;
 
+	dn = dev_of_node(&pdev->dev);
+	if (!dn)
+		return -ENXIO;
+
 	/* check we have all the required DT properties */
 	if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
 		dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn);
@@ -402,6 +414,9 @@ static int papr_scm_probe(struct platform_device *pdev)
 	/* optional DT properties */
 	of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
 
+	if (of_device_is_compatible(dn, "ibm,pmemory-v2"))
+		p->disable_map_sync = true;
+
 	p->dn = dn;
 	p->drc_index = drc_index;
 	p->block_size = block_size;
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 6826a274a1f1..a6cc3488e552 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -59,12 +59,19 @@ static int of_pmem_region_probe(struct platform_device *pdev)
 		ndr_desc.res = &pdev->resource[i];
 		ndr_desc.of_node = np;
 		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+		set_bit(ND_REGION_SYNC_ENABLED, &ndr_desc.flags);
 
 		if (is_volatile)
 			region = nvdimm_volatile_region_create(bus, &ndr_desc);
 		else {
 			set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
+			/*
+			 * for a persistent region, check for newer device
+			 */
+			if (of_device_is_compatible(np, "pmem-region-v2"))
+				clear_bit(ND_REGION_SYNC_ENABLED, &ndr_desc.flags);
 			region = nvdimm_pmem_region_create(bus, &ndr_desc);
+
 		}
 
 		if (!region)
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH v2 5/5] libnvdimm: Add prctl control for disabling synchronous fault support
  2020-06-02  7:49 [RFC PATCH v2 1/5] libnvdimm/dax: Add a dax flag to control synchronous fault support Aneesh Kumar K.V
                   ` (2 preceding siblings ...)
  2020-06-02  7:49 ` [RFC PATCH v2 4/5] powerpc/papr_scm: disable MAP_SYNC for newer hardware Aneesh Kumar K.V
@ 2020-06-02  7:49 ` Aneesh Kumar K.V
  3 siblings, 0 replies; 5+ messages in thread
From: Aneesh Kumar K.V @ 2020-06-02  7:49 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V

With POWER10, architecture is adding new pmem flush and sync instructions.
The kernel should prevent the usage of MAP_SYNC if applications are not using
the new instructions on newer hardware.

This patch adds a prctl option MAP_SYNC_ENABLE that can be used to enable
the usage of MAP_SYNC. This is in addition to the namespace specific control
already added (/sys/bus/nd/devices/region0/pfn0.1/block/pmem0/dax/sync_fault)

With this patch, if the device supports synchronous fault, then an application
can enable the synchronous fault support using the prctl() interface even if
the platform disabled it for the namespace.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 include/linux/dax.h            |  5 +++--
 include/linux/sched/coredump.h | 13 ++++++++++---
 include/uapi/linux/prctl.h     |  3 +++
 kernel/fork.c                  |  8 +++++++-
 kernel/sys.c                   | 18 ++++++++++++++++++
 5 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/linux/dax.h b/include/linux/dax.h
index c4a3551557de..0733aae23828 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -80,9 +80,10 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 	if (!IS_DAX(file_inode(vma->vm_file)))
 		return false;
 	/*
-	 * check MAP_SYNC is disabled by platform for this device.
+	 * MAP_SYNC is disabled by platform for this device.
+	 * check for prctl.
 	 */
-	if (!dax_synchronous_enabled(dax_dev))
+	if (!dax_synchronous_enabled(dax_dev) && !map_sync_enabled(vma->vm_mm))
 		return false;
 
 	return dax_synchronous(dax_dev);
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ecdc6542070f..35698adc3d13 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -72,9 +72,16 @@ static inline int get_dumpable(struct mm_struct *mm)
 #define MMF_DISABLE_THP		24	/* disable THP for all VMAs */
 #define MMF_OOM_VICTIM		25	/* mm is the oom victim */
 #define MMF_OOM_REAP_QUEUED	26	/* mm was queued for oom_reaper */
-#define MMF_DISABLE_THP_MASK	(1 << MMF_DISABLE_THP)
+#define MMF_ENABLE_MAP_SYNC	27	/* disable THP for all VMAs */
+#define MMF_DISABLE_THP_MASK		(1 << MMF_DISABLE_THP)
+#define MMF_ENABLE_MAP_SYNC_MASK	(1 << MMF_ENABLE_MAP_SYNC)
 
-#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
-				 MMF_DISABLE_THP_MASK)
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK | \
+			MMF_DISABLE_THP_MASK | MMF_ENABLE_MAP_SYNC_MASK)
+
+static inline bool map_sync_enabled(struct mm_struct *mm)
+{
+	return !!(mm->flags & MMF_ENABLE_MAP_SYNC_MASK);
+}
 
 #endif /* _LINUX_SCHED_COREDUMP_H */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e36..ee4cde32d5cf 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -238,4 +238,7 @@ struct prctl_mm_map {
 #define PR_SET_IO_FLUSHER		57
 #define PR_GET_IO_FLUSHER		58
 
+#define PR_SET_MAP_SYNC_ENABLE		59
+#define PR_GET_MAP_SYNC_ENABLE		60
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c700f881d92..d50cac15ef41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -963,6 +963,12 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
 
 static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
 
+#ifndef CONFIG_ARCH_MAP_SYNC_DISABLE
+unsigned long default_map_sync_mask = MMF_ENABLE_MAP_SYNC_MASK;
+#else
+unsigned long default_map_sync_mask = 0;
+#endif
+
 static int __init coredump_filter_setup(char *s)
 {
 	default_dump_filter =
@@ -1039,7 +1045,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 		mm->flags = current->mm->flags & MMF_INIT_MASK;
 		mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
 	} else {
-		mm->flags = default_dump_filter;
+		mm->flags = default_dump_filter | default_map_sync_mask;
 		mm->def_flags = 0;
 	}
 
diff --git a/kernel/sys.c b/kernel/sys.c
index d325f3ab624a..5011912831b0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2450,6 +2450,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			clear_bit(MMF_DISABLE_THP, &me->mm->flags);
 		up_write(&me->mm->mmap_sem);
 		break;
+
+	case PR_GET_MAP_SYNC_ENABLE:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = !!test_bit(MMF_ENABLE_MAP_SYNC, &me->mm->flags);
+		break;
+	case PR_SET_MAP_SYNC_ENABLE:
+		if (arg3 || arg4 || arg5)
+			return -EINVAL;
+		if (down_write_killable(&me->mm->mmap_sem))
+			return -EINTR;
+		if (arg2)
+			set_bit(MMF_ENABLE_MAP_SYNC, &me->mm->flags);
+		else
+			clear_bit(MMF_ENABLE_MAP_SYNC, &me->mm->flags);
+		up_write(&me->mm->mmap_sem);
+		break;
+
 	case PR_MPX_ENABLE_MANAGEMENT:
 	case PR_MPX_DISABLE_MANAGEMENT:
 		/* No longer implemented: */
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-06-02  7:59 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-02  7:49 [RFC PATCH v2 1/5] libnvdimm/dax: Add a dax flag to control synchronous fault support Aneesh Kumar K.V
2020-06-02  7:49 ` [RFC PATCH v2 2/5] powerpc/pmem: Disable synchronous fault by default Aneesh Kumar K.V
2020-06-02  7:49 ` [RFC PATCH v2 3/5] libnvdimm/dax: Make DAXDEV_SYNC_ENABLED flag region-specific Aneesh Kumar K.V
2020-06-02  7:49 ` [RFC PATCH v2 4/5] powerpc/papr_scm: disable MAP_SYNC for newer hardware Aneesh Kumar K.V
2020-06-02  7:49 ` [RFC PATCH v2 5/5] libnvdimm: Add prctl control for disabling synchronous fault support Aneesh Kumar K.V

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).