nvdimm.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] libnvdimm: use dax_write_cache* helpers
@ 2018-06-05 20:58 Ross Zwisler
  2018-06-05 20:58 ` [PATCH 2/2] libnvdimm: don't flush power-fail protected CPU caches Ross Zwisler
  0 siblings, 1 reply; 5+ messages in thread
From: Ross Zwisler @ 2018-06-05 20:58 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-nvdimm

Use dax_write_cache() and dax_write_cache_enabled() instead of open coding
the bit operations.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 drivers/dax/super.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 2b2332b605e4..c2c46f96b18c 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -182,8 +182,7 @@ static ssize_t write_cache_show(struct device *dev,
 	if (!dax_dev)
 		return -ENXIO;
 
-	rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
-				&dax_dev->flags));
+	rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev));
 	put_dax(dax_dev);
 	return rc;
 }
@@ -201,10 +200,8 @@ static ssize_t write_cache_store(struct device *dev,
 
 	if (rc)
 		len = rc;
-	else if (write_cache)
-		set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
 	else
-		clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+		dax_write_cache(dax_dev, write_cache);
 
 	put_dax(dax_dev);
 	return len;
@@ -286,7 +283,7 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter);
 void arch_wb_cache_pmem(void *addr, size_t size);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
 {
-	if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)))
+	if (unlikely(!dax_write_cache_enabled(dax_dev)))
 		return;
 
 	arch_wb_cache_pmem(addr, size);
-- 
2.14.4

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] libnvdimm: don't flush power-fail protected CPU caches
  2018-06-05 20:58 [PATCH 1/2] libnvdimm: use dax_write_cache* helpers Ross Zwisler
@ 2018-06-05 20:58 ` Ross Zwisler
  2018-06-05 21:20   ` Dan Williams
  0 siblings, 1 reply; 5+ messages in thread
From: Ross Zwisler @ 2018-06-05 20:58 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-nvdimm

This commit:

5fdf8e5ba566 ("libnvdimm: re-enable deep flush for pmem devices via fsync()")

intended to make sure that deep flush was always available even on
platforms which support a power-fail protected CPU cache.  An unintended
side effect of this change was that we also lost the ability to skip
flushing CPU caches on those power-fail protected CPU cache.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Fixes: 5fdf8e5ba566 ("libnvdimm: re-enable deep flush for pmem devices via fsync()")
---
 drivers/dax/super.c   | 20 +++++++++++++++++++-
 drivers/nvdimm/pmem.c |  2 ++
 include/linux/dax.h   |  9 +++++++++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index c2c46f96b18c..457e0bb6c936 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -152,6 +152,8 @@ enum dax_device_flags {
 	DAXDEV_ALIVE,
 	/* gate whether dax_flush() calls the low level flush routine */
 	DAXDEV_WRITE_CACHE,
+	/* only flush the CPU caches if they are not power fail protected */
+	DAXDEV_FLUSH_ON_SYNC,
 };
 
 /**
@@ -283,7 +285,8 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter);
 void arch_wb_cache_pmem(void *addr, size_t size);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
 {
-	if (unlikely(!dax_write_cache_enabled(dax_dev)))
+	if (unlikely(!dax_write_cache_enabled(dax_dev)) ||
+			!dax_flush_on_sync_enabled(dax_dev))
 		return;
 
 	arch_wb_cache_pmem(addr, size);
@@ -310,6 +313,21 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev)
 }
 EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
 
+void dax_flush_on_sync(struct dax_device *dax_dev, bool flush)
+{
+	if (flush)
+		set_bit(DAXDEV_FLUSH_ON_SYNC, &dax_dev->flags);
+	else
+		clear_bit(DAXDEV_FLUSH_ON_SYNC, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(dax_flush_on_sync);
+
+bool dax_flush_on_sync_enabled(struct dax_device *dax_dev)
+{
+	return test_bit(DAXDEV_FLUSH_ON_SYNC, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(dax_flush_on_sync_enabled);
+
 bool dax_alive(struct dax_device *dax_dev)
 {
 	lockdep_assert_held(&dax_srcu);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 9d714926ecf5..faeb2deae7f0 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -414,6 +414,8 @@ static int pmem_attach_disk(struct device *dev,
 		return -ENOMEM;
 	}
 	dax_write_cache(dax_dev, wbc);
+	dax_flush_on_sync(dax_dev,
+			!test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags));
 	pmem->dax_dev = dax_dev;
 
 	gendev = disk_to_dev(disk);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index f9eb22ad341e..1e6086405a1a 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -32,6 +32,8 @@ void put_dax(struct dax_device *dax_dev);
 void kill_dax(struct dax_device *dax_dev);
 void dax_write_cache(struct dax_device *dax_dev, bool wc);
 bool dax_write_cache_enabled(struct dax_device *dax_dev);
+void dax_flush_on_sync(struct dax_device *dax_dev, bool flush);
+bool dax_flush_on_sync_enabled(struct dax_device *dax_dev);
 #else
 static inline struct dax_device *dax_get_by_host(const char *host)
 {
@@ -59,6 +61,13 @@ static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
 {
 	return false;
 }
+static inline void dax_flush_on_sync(struct dax_device *dax_dev, bool flush)
+{
+}
+static inline bool dax_flush_on_sync_enabled(struct dax_device *dax_dev)
+{
+	return false;
+}
 #endif
 
 struct writeback_control;
-- 
2.14.4

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] libnvdimm: don't flush power-fail protected CPU caches
  2018-06-05 20:58 ` [PATCH 2/2] libnvdimm: don't flush power-fail protected CPU caches Ross Zwisler
@ 2018-06-05 21:20   ` Dan Williams
  2018-06-05 21:59     ` Ross Zwisler
  0 siblings, 1 reply; 5+ messages in thread
From: Dan Williams @ 2018-06-05 21:20 UTC (permalink / raw)
  To: Ross Zwisler; +Cc: Linux Kernel Mailing List, linux-nvdimm

On Tue, Jun 5, 2018 at 1:58 PM, Ross Zwisler
<ross.zwisler@linux.intel.com> wrote:
> This commit:
>
> 5fdf8e5ba566 ("libnvdimm: re-enable deep flush for pmem devices via fsync()")
>
> intended to make sure that deep flush was always available even on
> platforms which support a power-fail protected CPU cache.  An unintended
> side effect of this change was that we also lost the ability to skip
> flushing CPU caches on those power-fail protected CPU cache.
>
> Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> Fixes: 5fdf8e5ba566 ("libnvdimm: re-enable deep flush for pmem devices via fsync()")
> ---
>  drivers/dax/super.c   | 20 +++++++++++++++++++-
>  drivers/nvdimm/pmem.c |  2 ++
>  include/linux/dax.h   |  9 +++++++++
>  3 files changed, 30 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index c2c46f96b18c..457e0bb6c936 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -152,6 +152,8 @@ enum dax_device_flags {
>         DAXDEV_ALIVE,
>         /* gate whether dax_flush() calls the low level flush routine */
>         DAXDEV_WRITE_CACHE,
> +       /* only flush the CPU caches if they are not power fail protected */
> +       DAXDEV_FLUSH_ON_SYNC,
>  };
>
>  /**
> @@ -283,7 +285,8 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter);
>  void arch_wb_cache_pmem(void *addr, size_t size);
>  void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
>  {
> -       if (unlikely(!dax_write_cache_enabled(dax_dev)))
> +       if (unlikely(!dax_write_cache_enabled(dax_dev)) ||
> +                       !dax_flush_on_sync_enabled(dax_dev))

This seems backwards. I think we should teach the pmem driver to still
issue deep flush even when dax_write_cache_enabled() is false.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] libnvdimm: don't flush power-fail protected CPU caches
  2018-06-05 21:20   ` Dan Williams
@ 2018-06-05 21:59     ` Ross Zwisler
  2018-06-05 22:12       ` Dan Williams
  0 siblings, 1 reply; 5+ messages in thread
From: Ross Zwisler @ 2018-06-05 21:59 UTC (permalink / raw)
  To: Dan Williams; +Cc: Linux Kernel Mailing List, linux-nvdimm

On Tue, Jun 05, 2018 at 02:20:38PM -0700, Dan Williams wrote:
> On Tue, Jun 5, 2018 at 1:58 PM, Ross Zwisler
> <ross.zwisler@linux.intel.com> wrote:
> > This commit:
> >
> > 5fdf8e5ba566 ("libnvdimm: re-enable deep flush for pmem devices via fsync()")
> >
> > intended to make sure that deep flush was always available even on
> > platforms which support a power-fail protected CPU cache.  An unintended
> > side effect of this change was that we also lost the ability to skip
> > flushing CPU caches on those power-fail protected CPU cache.
> >
> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> > Fixes: 5fdf8e5ba566 ("libnvdimm: re-enable deep flush for pmem devices via fsync()")
> > ---
> >  drivers/dax/super.c   | 20 +++++++++++++++++++-
> >  drivers/nvdimm/pmem.c |  2 ++
> >  include/linux/dax.h   |  9 +++++++++
> >  3 files changed, 30 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> > index c2c46f96b18c..457e0bb6c936 100644
> > --- a/drivers/dax/super.c
> > +++ b/drivers/dax/super.c
> > @@ -152,6 +152,8 @@ enum dax_device_flags {
> >         DAXDEV_ALIVE,
> >         /* gate whether dax_flush() calls the low level flush routine */
> >         DAXDEV_WRITE_CACHE,
> > +       /* only flush the CPU caches if they are not power fail protected */
> > +       DAXDEV_FLUSH_ON_SYNC,
> >  };
> >
> >  /**
> > @@ -283,7 +285,8 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter);
> >  void arch_wb_cache_pmem(void *addr, size_t size);
> >  void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
> >  {
> > -       if (unlikely(!dax_write_cache_enabled(dax_dev)))
> > +       if (unlikely(!dax_write_cache_enabled(dax_dev)) ||
> > +                       !dax_flush_on_sync_enabled(dax_dev))
> 
> This seems backwards. I think we should teach the pmem driver to still
> issue deep flush even when dax_write_cache_enabled() is false.

That does still happen.  Deep flush is essentially controlled by the 'wbc'
variable in pmem_attach_disk(), which we use to set blk_queue_write_cache().
My understanding is that this causes the block layer to send down
REQ_FUA/REQ_PREFLUSH BIOs, and it's in response to these that we do a deep
flush via nvdimm_flush().  Whether this happens is totally up to the device's
write cache setting, and doesn't look at whether the platform has
flush-on-fail CPU caches.

This does bring up another wrinkle, though: we export a write_cache sysfs
entry that you can use to change the write cache setting of a namespace:

i.e.:
	/sys/bus/nd/devices/pfn0.1/block/pmem0/dax/write_cache

This changes whether or not the DAXDEV_WRITE_CACHE flag is set, but does *not*
change whether the block queue says it supports a write cache
(blk_queue_write_cache()).  So, the sysfs entry ends up controlling whether or
not we do CPU cache flushing via DAX, but does not do anything with the deep
flush code.

I'm guessing this should be fixed?  I'll go take a look...
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] libnvdimm: don't flush power-fail protected CPU caches
  2018-06-05 21:59     ` Ross Zwisler
@ 2018-06-05 22:12       ` Dan Williams
  0 siblings, 0 replies; 5+ messages in thread
From: Dan Williams @ 2018-06-05 22:12 UTC (permalink / raw)
  To: Ross Zwisler; +Cc: Linux Kernel Mailing List, linux-nvdimm

On Tue, Jun 5, 2018 at 2:59 PM, Ross Zwisler
<ross.zwisler@linux.intel.com> wrote:
> On Tue, Jun 05, 2018 at 02:20:38PM -0700, Dan Williams wrote:
>> On Tue, Jun 5, 2018 at 1:58 PM, Ross Zwisler
>> <ross.zwisler@linux.intel.com> wrote:
>> > This commit:
>> >
>> > 5fdf8e5ba566 ("libnvdimm: re-enable deep flush for pmem devices via fsync()")
>> >
>> > intended to make sure that deep flush was always available even on
>> > platforms which support a power-fail protected CPU cache.  An unintended
>> > side effect of this change was that we also lost the ability to skip
>> > flushing CPU caches on those power-fail protected CPU cache.
>> >
>> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
>> > Fixes: 5fdf8e5ba566 ("libnvdimm: re-enable deep flush for pmem devices via fsync()")
>> > ---
>> >  drivers/dax/super.c   | 20 +++++++++++++++++++-
>> >  drivers/nvdimm/pmem.c |  2 ++
>> >  include/linux/dax.h   |  9 +++++++++
>> >  3 files changed, 30 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/drivers/dax/super.c b/drivers/dax/super.c
>> > index c2c46f96b18c..457e0bb6c936 100644
>> > --- a/drivers/dax/super.c
>> > +++ b/drivers/dax/super.c
>> > @@ -152,6 +152,8 @@ enum dax_device_flags {
>> >         DAXDEV_ALIVE,
>> >         /* gate whether dax_flush() calls the low level flush routine */
>> >         DAXDEV_WRITE_CACHE,
>> > +       /* only flush the CPU caches if they are not power fail protected */
>> > +       DAXDEV_FLUSH_ON_SYNC,
>> >  };
>> >
>> >  /**
>> > @@ -283,7 +285,8 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter);
>> >  void arch_wb_cache_pmem(void *addr, size_t size);
>> >  void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
>> >  {
>> > -       if (unlikely(!dax_write_cache_enabled(dax_dev)))
>> > +       if (unlikely(!dax_write_cache_enabled(dax_dev)) ||
>> > +                       !dax_flush_on_sync_enabled(dax_dev))
>>
>> This seems backwards. I think we should teach the pmem driver to still
>> issue deep flush even when dax_write_cache_enabled() is false.
>
> That does still happen.  Deep flush is essentially controlled by the 'wbc'
> variable in pmem_attach_disk(), which we use to set blk_queue_write_cache().

Right, what I'm trying to kill is the need to add
dax_flush_on_sync_enabled() I think we can handle this local to the
pmem driver and not extend the 'dax' api.

> My understanding is that this causes the block layer to send down
> REQ_FUA/REQ_PREFLUSH BIOs, and it's in response to these that we do a deep
> flush via nvdimm_flush().  Whether this happens is totally up to the device's
> write cache setting, and doesn't look at whether the platform has
> flush-on-fail CPU caches.
>
> This does bring up another wrinkle, though: we export a write_cache sysfs
> entry that you can use to change the write cache setting of a namespace:
>
> i.e.:
>         /sys/bus/nd/devices/pfn0.1/block/pmem0/dax/write_cache
>
> This changes whether or not the DAXDEV_WRITE_CACHE flag is set, but does *not*
> change whether the block queue says it supports a write cache
> (blk_queue_write_cache()).  So, the sysfs entry ends up controlling whether or
> not we do CPU cache flushing via DAX, but does not do anything with the deep
> flush code.
>
> I'm guessing this should be fixed?  I'll go take a look...

I think we need to disconnect DAXDEV_WRITE_CACHE from the indication
of the filesystem triggerring nvdimm_flush() via REQ_{FUA,FLUSH}.
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-06-05 22:12 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-05 20:58 [PATCH 1/2] libnvdimm: use dax_write_cache* helpers Ross Zwisler
2018-06-05 20:58 ` [PATCH 2/2] libnvdimm: don't flush power-fail protected CPU caches Ross Zwisler
2018-06-05 21:20   ` Dan Williams
2018-06-05 21:59     ` Ross Zwisler
2018-06-05 22:12       ` Dan Williams

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).