* [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-03 20:00 [RFC PATCH 0/5][V2] dax,pmem: Provide a dax operation to zero range of memory Vivek Goyal
@ 2020-02-03 20:00 ` Vivek Goyal
2020-02-05 18:30 ` Christoph Hellwig
2020-02-03 20:00 ` [PATCH 2/5] s390,dax: Add dax zero_page_range operation to dcssblk driver Vivek Goyal
` (3 subsequent siblings)
4 siblings, 1 reply; 25+ messages in thread
From: Vivek Goyal @ 2020-02-03 20:00 UTC (permalink / raw)
To: linux-fsdevel, linux-nvdimm, dan.j.williams, hch; +Cc: dm-devel
Add a dax operation zero_page_range, to zero a range of memory. This will
also clear any poison in the range being zeroed.
As of now, zeroing of up to one page is allowed in a single call. There
are no callers which are trying to zero more than a page in a single call.
Once we grow the callers which zero more than a page in single call, we
can add that support. Primary reason for not doing that yet is that this
will add little complexity in dm implementation where a range might be
spanning multiple underlying targets and one will have to split the range
into multiple sub ranges and call zero_page_range() on individual targets.
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
drivers/dax/super.c | 20 +++++++++++++++++
drivers/nvdimm/pmem.c | 50 +++++++++++++++++++++++++++++++++++++++++++
fs/dax.c | 15 +++++++++++++
include/linux/dax.h | 6 ++++++
4 files changed, 91 insertions(+)
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 26a654dbc69a..371744256fe5 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -344,6 +344,26 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
}
EXPORT_SYMBOL_GPL(dax_copy_to_iter);
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ unsigned offset, size_t len)
+{
+ if (!dax_alive(dax_dev))
+ return -ENXIO;
+
+ if (!dax_dev->ops->zero_page_range)
+ return -EOPNOTSUPP;
+
+ /*
+ * There are no users as of now. Once users are there, fix dm code
+ * to be able to split a long range across targets.
+ */
+ if (offset + len > PAGE_SIZE)
+ return -EIO;
+
+ return dax_dev->ops->zero_page_range(dax_dev, pgoff, offset, len);
+}
+EXPORT_SYMBOL_GPL(dax_zero_page_range);
+
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ad8e4df1282b..8739244a72a4 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -268,6 +268,55 @@ static const struct block_device_operations pmem_fops = {
.revalidate_disk = nvdimm_revalidate_disk,
};
+static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ unsigned int offset, size_t len)
+{
+ int rc = 0;
+ phys_addr_t phys_pos = pgoff * PAGE_SIZE + offset;
+ struct pmem_device *pmem = dax_get_private(dax_dev);
+ struct page *page = ZERO_PAGE(0);
+ unsigned bytes, nr_sectors = 0;
+ sector_t sector_start, sector_end;
+ bool bad_pmem = false;
+ phys_addr_t pmem_off = phys_pos + pmem->data_offset;
+ void *pmem_addr = pmem->virt_addr + pmem_off;
+
+ bytes = min_t(size_t, PAGE_SIZE - offset_in_page(phys_pos),
+ len);
+ /*
+ * As of now zeroing only with-in a page is supported. This can be
+ * changed once there are users of zeroing across multiple pages
+ */
+ if (WARN_ON(len > bytes))
+ return -EIO;
+
+ sector_start = ALIGN(phys_pos, 512)/512;
+ sector_end = ALIGN_DOWN(phys_pos + bytes, 512)/512;
+ if (sector_end > sector_start)
+ nr_sectors = sector_end - sector_start;
+
+ if (nr_sectors &&
+ unlikely(is_bad_pmem(&pmem->bb, sector_start,
+ nr_sectors * 512)))
+ bad_pmem = true;
+
+ write_pmem(pmem_addr, page, 0, bytes);
+ if (unlikely(bad_pmem)) {
+ /*
+ * Pass block aligned offset and length. That seems
+ * to work as of now. Other finer grained alignment
+ * cases can be addressed later if need be.
+ */
+ rc = pmem_clear_poison(pmem, ALIGN(pmem_off, 512),
+ nr_sectors * 512);
+ write_pmem(pmem_addr, page, 0, bytes);
+ }
+ if (rc > 0)
+ return -EIO;
+
+ return 0;
+}
+
static long pmem_dax_direct_access(struct dax_device *dax_dev,
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
{
@@ -299,6 +348,7 @@ static const struct dax_operations pmem_dax_ops = {
.dax_supported = generic_fsdax_supported,
.copy_from_iter = pmem_copy_from_iter,
.copy_to_iter = pmem_copy_to_iter,
+ .zero_page_range = pmem_dax_zero_page_range,
};
static const struct attribute_group *pmem_attribute_groups[] = {
diff --git a/fs/dax.c b/fs/dax.c
index 1f1f0201cad1..35631a4d0295 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1057,6 +1057,21 @@ static bool dax_range_is_aligned(struct block_device *bdev,
return true;
}
+int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ unsigned int offset, size_t len)
+{
+ long rc;
+ void *kaddr;
+
+ rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
+ if (rc < 0)
+ return rc;
+ memset(kaddr + offset, 0, len);
+ dax_flush(dax_dev, kaddr + offset, len);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(generic_dax_zero_page_range);
+
int __dax_zero_page_range(struct block_device *bdev,
struct dax_device *dax_dev, sector_t sector,
unsigned int offset, unsigned int size)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9bd8528bd305..3356b874c55d 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -34,6 +34,8 @@ struct dax_operations {
/* copy_to_iter: required operation for fs-dax direct-i/o */
size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *);
+ /* zero_page_range: required operation for fs-dax direct-i/o */
+ int (*zero_page_range)(struct dax_device *, pgoff_t, unsigned, size_t);
};
extern struct attribute_group dax_attribute_group;
@@ -209,6 +211,10 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ unsigned offset, size_t len);
+int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ unsigned int offset, size_t len);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
--
2.18.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-03 20:00 ` [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range Vivek Goyal
@ 2020-02-05 18:30 ` Christoph Hellwig
2020-02-05 20:02 ` Vivek Goyal
0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-02-05 18:30 UTC (permalink / raw)
To: Vivek Goyal; +Cc: linux-fsdevel, linux-nvdimm, hch, dm-devel
> + /*
> + * There are no users as of now. Once users are there, fix dm code
> + * to be able to split a long range across targets.
> + */
This comment confused me. I think this wants to say something like:
/*
* There are now callers that want to zero across a page boundary as of
* now. Once there are users this check can be removed after the
* device mapper code has been updated to split ranges across targets.
*/
> +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> + unsigned int offset, size_t len)
> +{
> + int rc = 0;
> + phys_addr_t phys_pos = pgoff * PAGE_SIZE + offset;
Any reason not to pass a phys_addr_t in the calling convention for the
method and maybe also for dax_zero_page_range itself?
> + sector_start = ALIGN(phys_pos, 512)/512;
> + sector_end = ALIGN_DOWN(phys_pos + bytes, 512)/512;
Missing whitespaces. Also this could use DIV_ROUND_UP and
DIV_ROUND_DOWN.
> + if (sector_end > sector_start)
> + nr_sectors = sector_end - sector_start;
> +
> + if (nr_sectors &&
> + unlikely(is_bad_pmem(&pmem->bb, sector_start,
> + nr_sectors * 512)))
> + bad_pmem = true;
How could nr_sectors be zero?
> + write_pmem(pmem_addr, page, 0, bytes);
> + if (unlikely(bad_pmem)) {
> + /*
> + * Pass block aligned offset and length. That seems
> + * to work as of now. Other finer grained alignment
> + * cases can be addressed later if need be.
> + */
> + rc = pmem_clear_poison(pmem, ALIGN(pmem_off, 512),
> + nr_sectors * 512);
> + write_pmem(pmem_addr, page, 0, bytes);
> + }
This code largerly duplicates the write side of pmem_do_bvec. I
think it might make sense to split pmem_do_bvec into a read and a write
side as a prep patch, and then reuse the write side here.
> +int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> + unsigned int offset, size_t len);
This should probably go into a separare are of the header and have
comment about being a section for generic helpers for drivers.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-05 18:30 ` Christoph Hellwig
@ 2020-02-05 20:02 ` Vivek Goyal
2020-02-06 0:40 ` Dan Williams
0 siblings, 1 reply; 25+ messages in thread
From: Vivek Goyal @ 2020-02-05 20:02 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel, linux-nvdimm, dm-devel
On Wed, Feb 05, 2020 at 10:30:50AM -0800, Christoph Hellwig wrote:
> > + /*
> > + * There are no users as of now. Once users are there, fix dm code
> > + * to be able to split a long range across targets.
> > + */
>
> This comment confused me. I think this wants to say something like:
>
> /*
> * There are now callers that want to zero across a page boundary as of
> * now. Once there are users this check can be removed after the
> * device mapper code has been updated to split ranges across targets.
> */
Yes, that's what I wanted to say but I missed one line. Thanks. Will fix
it.
>
> > +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> > + unsigned int offset, size_t len)
> > +{
> > + int rc = 0;
> > + phys_addr_t phys_pos = pgoff * PAGE_SIZE + offset;
>
> Any reason not to pass a phys_addr_t in the calling convention for the
> method and maybe also for dax_zero_page_range itself?
I don't have any reason not to pass phys_addr_t. If that sounds better,
will make changes.
>
> > + sector_start = ALIGN(phys_pos, 512)/512;
> > + sector_end = ALIGN_DOWN(phys_pos + bytes, 512)/512;
>
> Missing whitespaces. Also this could use DIV_ROUND_UP and
> DIV_ROUND_DOWN.
Will do.
>
> > + if (sector_end > sector_start)
> > + nr_sectors = sector_end - sector_start;
> > +
> > + if (nr_sectors &&
> > + unlikely(is_bad_pmem(&pmem->bb, sector_start,
> > + nr_sectors * 512)))
> > + bad_pmem = true;
>
> How could nr_sectors be zero?
If somebody specified a range across two sectors but none of the sector is
completely written. Then nr_sectors will be zero. In fact this check
shoudl probably be nr_sectors > 0 as writes with-in a sector will lead
to nr_sector being -1.
Am I missing something.
>
> > + write_pmem(pmem_addr, page, 0, bytes);
> > + if (unlikely(bad_pmem)) {
> > + /*
> > + * Pass block aligned offset and length. That seems
> > + * to work as of now. Other finer grained alignment
> > + * cases can be addressed later if need be.
> > + */
> > + rc = pmem_clear_poison(pmem, ALIGN(pmem_off, 512),
> > + nr_sectors * 512);
> > + write_pmem(pmem_addr, page, 0, bytes);
> > + }
>
> This code largerly duplicates the write side of pmem_do_bvec. I
> think it might make sense to split pmem_do_bvec into a read and a write
> side as a prep patch, and then reuse the write side here.
Ok, I will look into it. How about just add a helper function for write
side and use that function both here and in pmem_do_bvec().
>
> > +int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> > + unsigned int offset, size_t len);
>
> This should probably go into a separare are of the header and have
> comment about being a section for generic helpers for drivers.
ok, will do.
Thanks
Vivek
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-05 20:02 ` Vivek Goyal
@ 2020-02-06 0:40 ` Dan Williams
2020-02-06 7:41 ` Christoph Hellwig
2020-02-06 14:34 ` Vivek Goyal
0 siblings, 2 replies; 25+ messages in thread
From: Dan Williams @ 2020-02-06 0:40 UTC (permalink / raw)
To: Vivek Goyal
Cc: Christoph Hellwig, linux-fsdevel, linux-nvdimm,
device-mapper development
On Wed, Feb 5, 2020 at 12:03 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Wed, Feb 05, 2020 at 10:30:50AM -0800, Christoph Hellwig wrote:
> > > + /*
> > > + * There are no users as of now. Once users are there, fix dm code
> > > + * to be able to split a long range across targets.
> > > + */
> >
> > This comment confused me. I think this wants to say something like:
> >
> > /*
> > * There are now callers that want to zero across a page boundary as of
> > * now. Once there are users this check can be removed after the
> > * device mapper code has been updated to split ranges across targets.
> > */
>
> Yes, that's what I wanted to say but I missed one line. Thanks. Will fix
> it.
>
> >
> > > +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> > > + unsigned int offset, size_t len)
> > > +{
> > > + int rc = 0;
> > > + phys_addr_t phys_pos = pgoff * PAGE_SIZE + offset;
> >
> > Any reason not to pass a phys_addr_t in the calling convention for the
> > method and maybe also for dax_zero_page_range itself?
>
> I don't have any reason not to pass phys_addr_t. If that sounds better,
> will make changes.
The problem is device-mapper. That wants to use offset to route
through the map to the leaf device. If it weren't for the firmware
communication requirement you could do:
dax_direct_access(...)
generic_dax_zero_page_range(...)
...but as long as the firmware error clearing path is required I think
we need to do pass the pgoff through the interface and do the pgoff to
virt / phys translation inside the ops handler.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-06 0:40 ` Dan Williams
@ 2020-02-06 7:41 ` Christoph Hellwig
2020-02-07 16:57 ` Dan Williams
2020-02-06 14:34 ` Vivek Goyal
1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-02-06 7:41 UTC (permalink / raw)
To: Dan Williams
Cc: Christoph Hellwig, linux-fsdevel, linux-nvdimm,
device-mapper development
On Wed, Feb 05, 2020 at 04:40:44PM -0800, Dan Williams wrote:
> > I don't have any reason not to pass phys_addr_t. If that sounds better,
> > will make changes.
>
> The problem is device-mapper. That wants to use offset to route
> through the map to the leaf device. If it weren't for the firmware
> communication requirement you could do:
>
> dax_direct_access(...)
> generic_dax_zero_page_range(...)
>
> ...but as long as the firmware error clearing path is required I think
> we need to do pass the pgoff through the interface and do the pgoff to
> virt / phys translation inside the ops handler.
Maybe phys_addr_t was the wrong type - but why do we split the offset
into the block device argument into a pgoff and offset into page instead
of a single 64-bit value?
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-06 7:41 ` Christoph Hellwig
@ 2020-02-07 16:57 ` Dan Williams
2020-02-07 17:01 ` Vivek Goyal
0 siblings, 1 reply; 25+ messages in thread
From: Dan Williams @ 2020-02-07 16:57 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel, linux-nvdimm, device-mapper development
On Wed, Feb 5, 2020 at 11:41 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Wed, Feb 05, 2020 at 04:40:44PM -0800, Dan Williams wrote:
> > > I don't have any reason not to pass phys_addr_t. If that sounds better,
> > > will make changes.
> >
> > The problem is device-mapper. That wants to use offset to route
> > through the map to the leaf device. If it weren't for the firmware
> > communication requirement you could do:
> >
> > dax_direct_access(...)
> > generic_dax_zero_page_range(...)
> >
> > ...but as long as the firmware error clearing path is required I think
> > we need to do pass the pgoff through the interface and do the pgoff to
> > virt / phys translation inside the ops handler.
>
> Maybe phys_addr_t was the wrong type - but why do we split the offset
> into the block device argument into a pgoff and offset into page instead
> of a single 64-bit value?
Oh, got it yes, that looks odd for sub-page zeroing. Yes, let's just
have one device relative byte-offset.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-07 16:57 ` Dan Williams
@ 2020-02-07 17:01 ` Vivek Goyal
2020-02-07 17:06 ` Dan Williams
0 siblings, 1 reply; 25+ messages in thread
From: Vivek Goyal @ 2020-02-07 17:01 UTC (permalink / raw)
To: Dan Williams
Cc: Christoph Hellwig, linux-fsdevel, linux-nvdimm,
device-mapper development
On Fri, Feb 07, 2020 at 08:57:39AM -0800, Dan Williams wrote:
> On Wed, Feb 5, 2020 at 11:41 PM Christoph Hellwig <hch@infradead.org> wrote:
> >
> > On Wed, Feb 05, 2020 at 04:40:44PM -0800, Dan Williams wrote:
> > > > I don't have any reason not to pass phys_addr_t. If that sounds better,
> > > > will make changes.
> > >
> > > The problem is device-mapper. That wants to use offset to route
> > > through the map to the leaf device. If it weren't for the firmware
> > > communication requirement you could do:
> > >
> > > dax_direct_access(...)
> > > generic_dax_zero_page_range(...)
> > >
> > > ...but as long as the firmware error clearing path is required I think
> > > we need to do pass the pgoff through the interface and do the pgoff to
> > > virt / phys translation inside the ops handler.
> >
> > Maybe phys_addr_t was the wrong type - but why do we split the offset
> > into the block device argument into a pgoff and offset into page instead
> > of a single 64-bit value?
>
> Oh, got it yes, that looks odd for sub-page zeroing. Yes, let's just
> have one device relative byte-offset.
So what's the best type to represent this offset. "u64" or "phys_addr_t"
or "loff_t" or something else. I like phys_addr_t followed by u64.
Vivek
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-07 17:01 ` Vivek Goyal
@ 2020-02-07 17:06 ` Dan Williams
0 siblings, 0 replies; 25+ messages in thread
From: Dan Williams @ 2020-02-07 17:06 UTC (permalink / raw)
To: Vivek Goyal
Cc: Christoph Hellwig, linux-fsdevel, linux-nvdimm,
device-mapper development
On Fri, Feb 7, 2020 at 9:02 AM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Fri, Feb 07, 2020 at 08:57:39AM -0800, Dan Williams wrote:
> > On Wed, Feb 5, 2020 at 11:41 PM Christoph Hellwig <hch@infradead.org> wrote:
> > >
> > > On Wed, Feb 05, 2020 at 04:40:44PM -0800, Dan Williams wrote:
> > > > > I don't have any reason not to pass phys_addr_t. If that sounds better,
> > > > > will make changes.
> > > >
> > > > The problem is device-mapper. That wants to use offset to route
> > > > through the map to the leaf device. If it weren't for the firmware
> > > > communication requirement you could do:
> > > >
> > > > dax_direct_access(...)
> > > > generic_dax_zero_page_range(...)
> > > >
> > > > ...but as long as the firmware error clearing path is required I think
> > > > we need to do pass the pgoff through the interface and do the pgoff to
> > > > virt / phys translation inside the ops handler.
> > >
> > > Maybe phys_addr_t was the wrong type - but why do we split the offset
> > > into the block device argument into a pgoff and offset into page instead
> > > of a single 64-bit value?
> >
> > Oh, got it yes, that looks odd for sub-page zeroing. Yes, let's just
> > have one device relative byte-offset.
>
> So what's the best type to represent this offset. "u64" or "phys_addr_t"
> or "loff_t" or something else. I like phys_addr_t followed by u64.
Let's make it u64.
phys_addr_t has already led to confusion in this thread because the
first question I ask when I read it is "why call ->direct_access() to
do the translation when you already have the physical address?".
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-06 0:40 ` Dan Williams
2020-02-06 7:41 ` Christoph Hellwig
@ 2020-02-06 14:34 ` Vivek Goyal
2020-02-07 16:58 ` Dan Williams
1 sibling, 1 reply; 25+ messages in thread
From: Vivek Goyal @ 2020-02-06 14:34 UTC (permalink / raw)
To: Dan Williams
Cc: Christoph Hellwig, linux-fsdevel, linux-nvdimm,
device-mapper development
On Wed, Feb 05, 2020 at 04:40:44PM -0800, Dan Williams wrote:
> On Wed, Feb 5, 2020 at 12:03 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> >
> > On Wed, Feb 05, 2020 at 10:30:50AM -0800, Christoph Hellwig wrote:
> > > > + /*
> > > > + * There are no users as of now. Once users are there, fix dm code
> > > > + * to be able to split a long range across targets.
> > > > + */
> > >
> > > This comment confused me. I think this wants to say something like:
> > >
> > > /*
> > > * There are now callers that want to zero across a page boundary as of
> > > * now. Once there are users this check can be removed after the
> > > * device mapper code has been updated to split ranges across targets.
> > > */
> >
> > Yes, that's what I wanted to say but I missed one line. Thanks. Will fix
> > it.
> >
> > >
> > > > +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> > > > + unsigned int offset, size_t len)
> > > > +{
> > > > + int rc = 0;
> > > > + phys_addr_t phys_pos = pgoff * PAGE_SIZE + offset;
> > >
> > > Any reason not to pass a phys_addr_t in the calling convention for the
> > > method and maybe also for dax_zero_page_range itself?
> >
> > I don't have any reason not to pass phys_addr_t. If that sounds better,
> > will make changes.
>
> The problem is device-mapper. That wants to use offset to route
> through the map to the leaf device. If it weren't for the firmware
> communication requirement you could do:
>
> dax_direct_access(...)
> generic_dax_zero_page_range(...)
>
> ...but as long as the firmware error clearing path is required I think
> we need to do pass the pgoff through the interface and do the pgoff to
> virt / phys translation inside the ops handler.
Hi Dan,
Drivers can easily convert offset into dax device (say phys_addr_t) to
pgoff and offset into page, isn't it?
pgoff_t = phys_addr_t/PAGE_SIZE;
offset = phys_addr_t & (PAGE_SIZE - 1);
And pgoff can easily be converted into sectors which dm/md can use for
mapping and come up with pgoff in target device.
Anyway, I am fine with anything.
Thanks
Vivek
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
2020-02-06 14:34 ` Vivek Goyal
@ 2020-02-07 16:58 ` Dan Williams
0 siblings, 0 replies; 25+ messages in thread
From: Dan Williams @ 2020-02-07 16:58 UTC (permalink / raw)
To: Vivek Goyal
Cc: Christoph Hellwig, linux-fsdevel, linux-nvdimm,
device-mapper development
On Thu, Feb 6, 2020 at 6:35 AM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Wed, Feb 05, 2020 at 04:40:44PM -0800, Dan Williams wrote:
> > On Wed, Feb 5, 2020 at 12:03 PM Vivek Goyal <vgoyal@redhat.com> wrote:
> > >
> > > On Wed, Feb 05, 2020 at 10:30:50AM -0800, Christoph Hellwig wrote:
> > > > > + /*
> > > > > + * There are no users as of now. Once users are there, fix dm code
> > > > > + * to be able to split a long range across targets.
> > > > > + */
> > > >
> > > > This comment confused me. I think this wants to say something like:
> > > >
> > > > /*
> > > > * There are now callers that want to zero across a page boundary as of
> > > > * now. Once there are users this check can be removed after the
> > > > * device mapper code has been updated to split ranges across targets.
> > > > */
> > >
> > > Yes, that's what I wanted to say but I missed one line. Thanks. Will fix
> > > it.
> > >
> > > >
> > > > > +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> > > > > + unsigned int offset, size_t len)
> > > > > +{
> > > > > + int rc = 0;
> > > > > + phys_addr_t phys_pos = pgoff * PAGE_SIZE + offset;
> > > >
> > > > Any reason not to pass a phys_addr_t in the calling convention for the
> > > > method and maybe also for dax_zero_page_range itself?
> > >
> > > I don't have any reason not to pass phys_addr_t. If that sounds better,
> > > will make changes.
> >
> > The problem is device-mapper. That wants to use offset to route
> > through the map to the leaf device. If it weren't for the firmware
> > communication requirement you could do:
> >
> > dax_direct_access(...)
> > generic_dax_zero_page_range(...)
> >
> > ...but as long as the firmware error clearing path is required I think
> > we need to do pass the pgoff through the interface and do the pgoff to
> > virt / phys translation inside the ops handler.
>
> Hi Dan,
>
> Drivers can easily convert offset into dax device (say phys_addr_t) to
> pgoff and offset into page, isn't it?
It's not a phys_addr_t it's a 64-bit device relative offset.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 2/5] s390,dax: Add dax zero_page_range operation to dcssblk driver
2020-02-03 20:00 [RFC PATCH 0/5][V2] dax,pmem: Provide a dax operation to zero range of memory Vivek Goyal
2020-02-03 20:00 ` [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range Vivek Goyal
@ 2020-02-03 20:00 ` Vivek Goyal
2020-02-05 18:32 ` Christoph Hellwig
2020-02-03 20:00 ` [PATCH 3/5] dm,dax: Add dax zero_page_range operation Vivek Goyal
` (2 subsequent siblings)
4 siblings, 1 reply; 25+ messages in thread
From: Vivek Goyal @ 2020-02-03 20:00 UTC (permalink / raw)
To: linux-fsdevel, linux-nvdimm, dan.j.williams, hch; +Cc: dm-devel
Add dax operation zero_page_range. This just calls generic helper
generic_dax_zero_page_range().
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
drivers/s390/block/dcssblk.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 63502ca537eb..f6709200bcd0 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -62,6 +62,7 @@ static const struct dax_operations dcssblk_dax_ops = {
.dax_supported = generic_fsdax_supported,
.copy_from_iter = dcssblk_dax_copy_from_iter,
.copy_to_iter = dcssblk_dax_copy_to_iter,
+ .zero_page_range = dcssblk_dax_zero_page_range,
};
struct dcssblk_dev_info {
@@ -941,6 +942,12 @@ dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return __dcssblk_direct_access(dev_info, pgoff, nr_pages, kaddr, pfn);
}
+static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,pgoff_t pgoff,
+ unsigned offset, size_t len)
+{
+ return generic_dax_zero_page_range(dax_dev, pgoff, offset, len);
+}
+
static void
dcssblk_check_params(void)
{
--
2.18.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 2/5] s390,dax: Add dax zero_page_range operation to dcssblk driver
2020-02-03 20:00 ` [PATCH 2/5] s390,dax: Add dax zero_page_range operation to dcssblk driver Vivek Goyal
@ 2020-02-05 18:32 ` Christoph Hellwig
2020-02-05 20:04 ` Vivek Goyal
0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-02-05 18:32 UTC (permalink / raw)
To: Vivek Goyal; +Cc: linux-fsdevel, linux-nvdimm, hch, dm-devel
> diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> index 63502ca537eb..f6709200bcd0 100644
> --- a/drivers/s390/block/dcssblk.c
> +++ b/drivers/s390/block/dcssblk.c
> @@ -62,6 +62,7 @@ static const struct dax_operations dcssblk_dax_ops = {
> .dax_supported = generic_fsdax_supported,
> .copy_from_iter = dcssblk_dax_copy_from_iter,
> .copy_to_iter = dcssblk_dax_copy_to_iter,
> + .zero_page_range = dcssblk_dax_zero_page_range,
> };
>
> struct dcssblk_dev_info {
> @@ -941,6 +942,12 @@ dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
> return __dcssblk_direct_access(dev_info, pgoff, nr_pages, kaddr, pfn);
> }
>
> +static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,pgoff_t pgoff,
> + unsigned offset, size_t len)
> +{
> + return generic_dax_zero_page_range(dax_dev, pgoff, offset, len);
> +}
Wouldn't this need a forward declaration? Then again given that dcssblk
is the only caller of generic_dax_zero_page_range we might as well merge
the two. If you want to keep the generic one it could be wired up to
dcssblk_dax_ops directly, though.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/5] s390,dax: Add dax zero_page_range operation to dcssblk driver
2020-02-05 18:32 ` Christoph Hellwig
@ 2020-02-05 20:04 ` Vivek Goyal
0 siblings, 0 replies; 25+ messages in thread
From: Vivek Goyal @ 2020-02-05 20:04 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel, linux-nvdimm, dm-devel
On Wed, Feb 05, 2020 at 10:32:05AM -0800, Christoph Hellwig wrote:
> > diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> > index 63502ca537eb..f6709200bcd0 100644
> > --- a/drivers/s390/block/dcssblk.c
> > +++ b/drivers/s390/block/dcssblk.c
> > @@ -62,6 +62,7 @@ static const struct dax_operations dcssblk_dax_ops = {
> > .dax_supported = generic_fsdax_supported,
> > .copy_from_iter = dcssblk_dax_copy_from_iter,
> > .copy_to_iter = dcssblk_dax_copy_to_iter,
> > + .zero_page_range = dcssblk_dax_zero_page_range,
> > };
> >
> > struct dcssblk_dev_info {
> > @@ -941,6 +942,12 @@ dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
> > return __dcssblk_direct_access(dev_info, pgoff, nr_pages, kaddr, pfn);
> > }
> >
> > +static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,pgoff_t pgoff,
> > + unsigned offset, size_t len)
> > +{
> > + return generic_dax_zero_page_range(dax_dev, pgoff, offset, len);
> > +}
>
> Wouldn't this need a forward declaration? Then again given that dcssblk
> is the only caller of generic_dax_zero_page_range we might as well merge
> the two. If you want to keep the generic one it could be wired up to
> dcssblk_dax_ops directly, though.
Given dcssblk is the only user, I am inclined to get rid of genric
version. We can add one later if another user shows up.
Thanks
Vivek
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 3/5] dm,dax: Add dax zero_page_range operation
2020-02-03 20:00 [RFC PATCH 0/5][V2] dax,pmem: Provide a dax operation to zero range of memory Vivek Goyal
2020-02-03 20:00 ` [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range Vivek Goyal
2020-02-03 20:00 ` [PATCH 2/5] s390,dax: Add dax zero_page_range operation to dcssblk driver Vivek Goyal
@ 2020-02-03 20:00 ` Vivek Goyal
2020-02-05 18:33 ` Christoph Hellwig
2020-02-03 20:00 ` [PATCH 4/5] dax,iomap: Start using dax native zero_page_range() Vivek Goyal
2020-02-03 20:00 ` [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range Vivek Goyal
4 siblings, 1 reply; 25+ messages in thread
From: Vivek Goyal @ 2020-02-03 20:00 UTC (permalink / raw)
To: linux-fsdevel, linux-nvdimm, dan.j.williams, hch; +Cc: dm-devel
This patch adds support for dax zero_page_range operation to dm targets.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
drivers/md/dm-linear.c | 18 ++++++++++++++++++
drivers/md/dm-log-writes.c | 17 +++++++++++++++++
drivers/md/dm-stripe.c | 23 +++++++++++++++++++++++
drivers/md/dm.c | 30 ++++++++++++++++++++++++++++++
include/linux/device-mapper.h | 3 +++
5 files changed, 91 insertions(+)
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 8d07fdf63a47..a6db998f0264 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -201,10 +201,27 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}
+static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
+ unsigned offset, size_t len)
+{
+ int ret;
+ struct linear_c *lc = ti->private;
+ struct block_device *bdev = lc->dev->bdev;
+ struct dax_device *dax_dev = lc->dev->dax_dev;
+ sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+
+ dev_sector = linear_map_sector(ti, sector);
+ ret = bdev_dax_pgoff(bdev, dev_sector, ALIGN(len, PAGE_SIZE), &pgoff);
+ if (ret)
+ return ret;
+ return dax_zero_page_range(dax_dev, pgoff, offset, len);
+}
+
#else
#define linear_dax_direct_access NULL
#define linear_dax_copy_from_iter NULL
#define linear_dax_copy_to_iter NULL
+#define linear_dax_zero_page_range NULL
#endif
static struct target_type linear_target = {
@@ -226,6 +243,7 @@ static struct target_type linear_target = {
.direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_copy_to_iter = linear_dax_copy_to_iter,
+ .dax_zero_page_range = linear_dax_zero_page_range,
};
int __init dm_linear_init(void)
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 99721c76225d..be20605f7544 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -994,10 +994,26 @@ static size_t log_writes_dax_copy_to_iter(struct dm_target *ti,
return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
}
+static int log_writes_dax_zero_page_range(struct dm_target *ti,
+ pgoff_t pgoff, unsigned offset,
+ size_t len)
+{
+ int ret;
+ struct log_writes_c *lc = ti->private;
+ sector_t sector = pgoff * PAGE_SECTORS;
+
+ ret = bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(len, PAGE_SIZE),
+ &pgoff);
+ if (ret)
+ return ret;
+ return dax_zero_page_range(lc->dev->dax_dev, pgoff, offset, len);
+}
+
#else
#define log_writes_dax_direct_access NULL
#define log_writes_dax_copy_from_iter NULL
#define log_writes_dax_copy_to_iter NULL
+#define log_writes_dax_zero_page_range NULL
#endif
static struct target_type log_writes_target = {
@@ -1016,6 +1032,7 @@ static struct target_type log_writes_target = {
.direct_access = log_writes_dax_direct_access,
.dax_copy_from_iter = log_writes_dax_copy_from_iter,
.dax_copy_to_iter = log_writes_dax_copy_to_iter,
+ .dax_zero_page_range = log_writes_dax_zero_page_range,
};
static int __init dm_log_writes_init(void)
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 63bbcc20f49a..8ad3c956efbf 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -360,10 +360,32 @@ static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
}
+static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
+ unsigned offset, size_t len)
+{
+ int ret;
+ sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
+ struct stripe_c *sc = ti->private;
+ struct dax_device *dax_dev;
+ struct block_device *bdev;
+ uint32_t stripe;
+
+ stripe_map_sector(sc, sector, &stripe, &dev_sector);
+ dev_sector += sc->stripe[stripe].physical_start;
+ dax_dev = sc->stripe[stripe].dev->dax_dev;
+ bdev = sc->stripe[stripe].dev->bdev;
+
+ ret = bdev_dax_pgoff(bdev, dev_sector, ALIGN(len, PAGE_SIZE), &pgoff);
+ if (ret)
+ return ret;
+ return dax_zero_page_range(dax_dev, pgoff, offset, len);
+}
+
#else
#define stripe_dax_direct_access NULL
#define stripe_dax_copy_from_iter NULL
#define stripe_dax_copy_to_iter NULL
+#define stripe_dax_zero_page_range NULL
#endif
/*
@@ -486,6 +508,7 @@ static struct target_type stripe_target = {
.direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
.dax_copy_to_iter = stripe_dax_copy_to_iter,
+ .dax_zero_page_range = stripe_dax_zero_page_range,
};
int __init dm_stripe_init(void)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index e8f9661a10a1..4605d30dad60 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1198,6 +1198,35 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
return ret;
}
+static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ unsigned offset, size_t len)
+{
+ struct mapped_device *md = dax_get_private(dax_dev);
+ sector_t sector = pgoff * PAGE_SECTORS;
+ struct dm_target *ti;
+ int ret = -EIO;
+ int srcu_idx;
+
+ ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+ if (!ti)
+ goto out;
+ if (WARN_ON(!ti->type->dax_zero_page_range)) {
+ /*
+ * ->zero_page_range() is mandatory dax operation. If we are
+ * here, something is wrong.
+ */
+ dm_put_live_table(md, srcu_idx);
+ goto out;
+ }
+ ret = ti->type->dax_zero_page_range(ti, pgoff, offset, len);
+
+ out:
+ dm_put_live_table(md, srcu_idx);
+
+ return ret;
+}
+
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
* allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
@@ -3194,6 +3223,7 @@ static const struct dax_operations dm_dax_ops = {
.dax_supported = dm_dax_supported,
.copy_from_iter = dm_dax_copy_from_iter,
.copy_to_iter = dm_dax_copy_to_iter,
+ .zero_page_range = dm_dax_zero_page_range,
};
/*
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 475668c69dbc..04009accf819 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -141,6 +141,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff,
long nr_pages, void **kaddr, pfn_t *pfn);
typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i);
+typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff,
+ unsigned, size_t len);
#define PAGE_SECTORS (PAGE_SIZE / 512)
void dm_error(const char *message);
@@ -195,6 +197,7 @@ struct target_type {
dm_dax_direct_access_fn direct_access;
dm_dax_copy_iter_fn dax_copy_from_iter;
dm_dax_copy_iter_fn dax_copy_to_iter;
+ dm_dax_zero_page_range_fn dax_zero_page_range;
/* For internal device-mapper use. */
struct list_head list;
--
2.18.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 3/5] dm,dax: Add dax zero_page_range operation
2020-02-03 20:00 ` [PATCH 3/5] dm,dax: Add dax zero_page_range operation Vivek Goyal
@ 2020-02-05 18:33 ` Christoph Hellwig
2020-02-07 16:34 ` Vivek Goyal
0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-02-05 18:33 UTC (permalink / raw)
To: Vivek Goyal; +Cc: linux-fsdevel, linux-nvdimm, hch, dm-devel
On Mon, Feb 03, 2020 at 03:00:27PM -0500, Vivek Goyal wrote:
> This patch adds support for dax zero_page_range operation to dm targets.
Any way to share the code with the dax copy iter here?
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 3/5] dm,dax: Add dax zero_page_range operation
2020-02-05 18:33 ` Christoph Hellwig
@ 2020-02-07 16:34 ` Vivek Goyal
0 siblings, 0 replies; 25+ messages in thread
From: Vivek Goyal @ 2020-02-07 16:34 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel, linux-nvdimm, dm-devel
On Wed, Feb 05, 2020 at 10:33:04AM -0800, Christoph Hellwig wrote:
> On Mon, Feb 03, 2020 at 03:00:27PM -0500, Vivek Goyal wrote:
> > This patch adds support for dax zero_page_range operation to dm targets.
>
> Any way to share the code with the dax copy iter here?
Had a look at it and can't think of a good way of sharing the code. If
you have something specific in mind, I am happy to make changes.
Vivek
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 4/5] dax,iomap: Start using dax native zero_page_range()
2020-02-03 20:00 [RFC PATCH 0/5][V2] dax,pmem: Provide a dax operation to zero range of memory Vivek Goyal
` (2 preceding siblings ...)
2020-02-03 20:00 ` [PATCH 3/5] dm,dax: Add dax zero_page_range operation Vivek Goyal
@ 2020-02-03 20:00 ` Vivek Goyal
2020-02-05 18:33 ` Christoph Hellwig
2020-02-03 20:00 ` [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range Vivek Goyal
4 siblings, 1 reply; 25+ messages in thread
From: Vivek Goyal @ 2020-02-03 20:00 UTC (permalink / raw)
To: linux-fsdevel, linux-nvdimm, dan.j.williams, hch; +Cc: dm-devel
Get rid of calling block device interface for zeroing in iomap dax
zeroing path and use dax native zeroing interface instead.
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
fs/dax.c | 45 +++++++++------------------------------------
1 file changed, 9 insertions(+), 36 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 35631a4d0295..1b9ba6b59cdb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1044,19 +1044,6 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
return ret;
}
-static bool dax_range_is_aligned(struct block_device *bdev,
- unsigned int offset, unsigned int length)
-{
- unsigned short sector_size = bdev_logical_block_size(bdev);
-
- if (!IS_ALIGNED(offset, sector_size))
- return false;
- if (!IS_ALIGNED(length, sector_size))
- return false;
-
- return true;
-}
-
int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
unsigned int offset, size_t len)
{
@@ -1076,31 +1063,17 @@ int __dax_zero_page_range(struct block_device *bdev,
struct dax_device *dax_dev, sector_t sector,
unsigned int offset, unsigned int size)
{
- if (dax_range_is_aligned(bdev, offset, size)) {
- sector_t start_sector = sector + (offset >> 9);
-
- return blkdev_issue_zeroout(bdev, start_sector,
- size >> 9, GFP_NOFS, 0);
- } else {
- pgoff_t pgoff;
- long rc, id;
- void *kaddr;
+ pgoff_t pgoff;
+ long rc, id;
- rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
- if (rc)
- return rc;
+ rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
+ if (rc)
+ return rc;
- id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
- if (rc < 0) {
- dax_read_unlock(id);
- return rc;
- }
- memset(kaddr + offset, 0, size);
- dax_flush(dax_dev, kaddr + offset, size);
- dax_read_unlock(id);
- }
- return 0;
+ id = dax_read_lock();
+ rc = dax_zero_page_range(dax_dev, pgoff, offset, size);
+ dax_read_unlock(id);
+ return rc;
}
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
--
2.18.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 4/5] dax,iomap: Start using dax native zero_page_range()
2020-02-03 20:00 ` [PATCH 4/5] dax,iomap: Start using dax native zero_page_range() Vivek Goyal
@ 2020-02-05 18:33 ` Christoph Hellwig
2020-02-05 20:10 ` Vivek Goyal
2020-02-07 15:31 ` Vivek Goyal
0 siblings, 2 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-02-05 18:33 UTC (permalink / raw)
To: Vivek Goyal; +Cc: linux-fsdevel, linux-nvdimm, hch, dm-devel
On Mon, Feb 03, 2020 at 03:00:28PM -0500, Vivek Goyal wrote:
> + id = dax_read_lock();
> + rc = dax_zero_page_range(dax_dev, pgoff, offset, size);
> + dax_read_unlock(id);
> + return rc;
Is there a good reason not to move the locking into dax_zero_page_range?
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 4/5] dax,iomap: Start using dax native zero_page_range()
2020-02-05 18:33 ` Christoph Hellwig
@ 2020-02-05 20:10 ` Vivek Goyal
2020-02-07 15:31 ` Vivek Goyal
1 sibling, 0 replies; 25+ messages in thread
From: Vivek Goyal @ 2020-02-05 20:10 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel, linux-nvdimm, dm-devel
On Wed, Feb 05, 2020 at 10:33:56AM -0800, Christoph Hellwig wrote:
> On Mon, Feb 03, 2020 at 03:00:28PM -0500, Vivek Goyal wrote:
> > + id = dax_read_lock();
> > + rc = dax_zero_page_range(dax_dev, pgoff, offset, size);
> > + dax_read_unlock(id);
> > + return rc;
>
> Is there a good reason not to move the locking into dax_zero_page_range?
No reason. I can move locking inside dax_zero_page_range(). Will do.
Vivek
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 4/5] dax,iomap: Start using dax native zero_page_range()
2020-02-05 18:33 ` Christoph Hellwig
2020-02-05 20:10 ` Vivek Goyal
@ 2020-02-07 15:31 ` Vivek Goyal
1 sibling, 0 replies; 25+ messages in thread
From: Vivek Goyal @ 2020-02-07 15:31 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel, linux-nvdimm, dm-devel
On Wed, Feb 05, 2020 at 10:33:56AM -0800, Christoph Hellwig wrote:
> On Mon, Feb 03, 2020 at 03:00:28PM -0500, Vivek Goyal wrote:
> > + id = dax_read_lock();
> > + rc = dax_zero_page_range(dax_dev, pgoff, offset, size);
> > + dax_read_unlock(id);
> > + return rc;
>
> Is there a good reason not to move the locking into dax_zero_page_range?
Thinking more about it. If we keep locking outside, then we don't have
to take lock again when we recurse into dax_zero_page_range() in device
mapper path. IIUC, just taking lock once at top level is enough. If that's
the case then it probably is better to keep locking outside of
dax_zero_page_range().
Thanks
Vivek
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range
2020-02-03 20:00 [RFC PATCH 0/5][V2] dax,pmem: Provide a dax operation to zero range of memory Vivek Goyal
` (3 preceding siblings ...)
2020-02-03 20:00 ` [PATCH 4/5] dax,iomap: Start using dax native zero_page_range() Vivek Goyal
@ 2020-02-03 20:00 ` Vivek Goyal
2020-02-04 5:17 ` kbuild test robot
2020-02-05 18:36 ` Christoph Hellwig
4 siblings, 2 replies; 25+ messages in thread
From: Vivek Goyal @ 2020-02-03 20:00 UTC (permalink / raw)
To: linux-fsdevel, linux-nvdimm, dan.j.williams, hch; +Cc: dm-devel
Add a helper dax_ioamp_zero() to zero a range. This patch basically
merges __dax_zero_page_range() and iomap_dax_zero().
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
fs/dax.c | 12 ++++++------
fs/iomap/buffered-io.c | 9 +--------
include/linux/dax.h | 11 +++++------
3 files changed, 12 insertions(+), 20 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index 1b9ba6b59cdb..63303e274221 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1059,23 +1059,23 @@ int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
}
EXPORT_SYMBOL_GPL(generic_dax_zero_page_range);
-int __dax_zero_page_range(struct block_device *bdev,
- struct dax_device *dax_dev, sector_t sector,
- unsigned int offset, unsigned int size)
+int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
+ struct iomap *iomap)
{
pgoff_t pgoff;
long rc, id;
+ sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
- rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
+ rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
if (rc)
return rc;
id = dax_read_lock();
- rc = dax_zero_page_range(dax_dev, pgoff, offset, size);
+ rc = dax_zero_page_range(iomap->dax_dev, pgoff, offset, size);
dax_read_unlock(id);
return rc;
}
-EXPORT_SYMBOL_GPL(__dax_zero_page_range);
+EXPORT_SYMBOL_GPL(dax_iomap_zero);
static loff_t
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 828444e14d09..5a5d784a110e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -974,13 +974,6 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
}
-static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
- struct iomap *iomap)
-{
- return __dax_zero_page_range(iomap->bdev, iomap->dax_dev,
- iomap_sector(iomap, pos & PAGE_MASK), offset, bytes);
-}
-
static loff_t
iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
void *data, struct iomap *iomap, struct iomap *srcmap)
@@ -1000,7 +993,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
bytes = min_t(loff_t, PAGE_SIZE - offset, count);
if (IS_DAX(inode))
- status = iomap_dax_zero(pos, offset, bytes, iomap);
+ status = dax_iomap_zero(pos, offset, bytes, iomap);
else
status = iomap_zero(inode, pos, offset, bytes, iomap,
srcmap);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 3356b874c55d..ffaaa12f8ca9 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -13,6 +13,7 @@
typedef unsigned long dax_entry_t;
struct iomap_ops;
+struct iomap;
struct dax_device;
struct dax_operations {
/*
@@ -228,13 +229,11 @@ int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index);
#ifdef CONFIG_FS_DAX
-int __dax_zero_page_range(struct block_device *bdev,
- struct dax_device *dax_dev, sector_t sector,
- unsigned int offset, unsigned int length);
+int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
+ struct iomap *iomap);
#else
-static inline int __dax_zero_page_range(struct block_device *bdev,
- struct dax_device *dax_dev, sector_t sector,
- unsigned int offset, unsigned int length)
+static inline int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
+ struct iomap *iomap)
{
return -ENXIO;
}
--
2.18.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range
2020-02-03 20:00 ` [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range Vivek Goyal
@ 2020-02-04 5:17 ` kbuild test robot
2020-02-05 18:36 ` Christoph Hellwig
1 sibling, 0 replies; 25+ messages in thread
From: kbuild test robot @ 2020-02-04 5:17 UTC (permalink / raw)
To: Vivek Goyal; +Cc: kbuild-all, linux-fsdevel, linux-nvdimm, hch, dm-devel
Hi Vivek,
I love your patch! Yet something to improve:
[auto build test ERROR on dm/for-next]
[also build test ERROR on s390/features xfs-linux/for-next linus/master linux-nvdimm/libnvdimm-for-next v5.5 next-20200203]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]
url: https://github.com/0day-ci/linux/commits/Vivek-Goyal/dax-pmem-Provide-a-dax-operation-to-zero-range-of-memory/20200204-082750
base: https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git for-next
config: s390-alldefconfig (attached as .config)
compiler: s390-linux-gcc (GCC) 7.5.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.5.0 make.cross ARCH=s390
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
>> drivers/s390/block/dcssblk.c:65:21: error: 'dcssblk_dax_zero_page_range' undeclared here (not in a function); did you mean 'generic_dax_zero_page_range'?
.zero_page_range = dcssblk_dax_zero_page_range,
^~~~~~~~~~~~~~~~~~~~~~~~~~~
generic_dax_zero_page_range
drivers/s390/block/dcssblk.c:945:12: warning: 'dcssblk_dax_zero_page_range' defined but not used [-Wunused-function]
static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,pgoff_t pgoff,
^~~~~~~~~~~~~~~~~~~~~~~~~~~
vim +65 drivers/s390/block/dcssblk.c
b3a9a0c36e1f7b Dan Williams 2018-05-02 59
7a2765f6e82063 Dan Williams 2017-01-26 60 static const struct dax_operations dcssblk_dax_ops = {
7a2765f6e82063 Dan Williams 2017-01-26 61 .direct_access = dcssblk_dax_direct_access,
7bf7eac8d64805 Dan Williams 2019-05-16 62 .dax_supported = generic_fsdax_supported,
5d61e43b3975c0 Dan Williams 2017-06-27 63 .copy_from_iter = dcssblk_dax_copy_from_iter,
b3a9a0c36e1f7b Dan Williams 2018-05-02 64 .copy_to_iter = dcssblk_dax_copy_to_iter,
c5cb636194a0d8 Vivek Goyal 2020-02-03 @65 .zero_page_range = dcssblk_dax_zero_page_range,
^1da177e4c3f41 Linus Torvalds 2005-04-16 66 };
^1da177e4c3f41 Linus Torvalds 2005-04-16 67
:::::: The code at line 65 was first introduced by commit
:::::: c5cb636194a0d8d33d549903c92189385db48406 s390,dax: Add dax zero_page_range operation to dcssblk driver
:::::: TO: Vivek Goyal <vgoyal@redhat.com>
:::::: CC: 0day robot <lkp@intel.com>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range
2020-02-03 20:00 ` [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range Vivek Goyal
2020-02-04 5:17 ` kbuild test robot
@ 2020-02-05 18:36 ` Christoph Hellwig
2020-02-05 20:15 ` Vivek Goyal
1 sibling, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-02-05 18:36 UTC (permalink / raw)
To: Vivek Goyal; +Cc: linux-fsdevel, linux-nvdimm, hch, dm-devel
> +int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
> + struct iomap *iomap)
> {
> pgoff_t pgoff;
> long rc, id;
> + sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
>
> - rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
> + rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
> if (rc)
> return rc;
>
> id = dax_read_lock();
> - rc = dax_zero_page_range(dax_dev, pgoff, offset, size);
> + rc = dax_zero_page_range(iomap->dax_dev, pgoff, offset, size);
> dax_read_unlock(id);
> return rc;
> }
> -EXPORT_SYMBOL_GPL(__dax_zero_page_range);
> +EXPORT_SYMBOL_GPL(dax_iomap_zero);
This function is only used by fs/iomap/buffered-io.c, so no need to
export it.
> #ifdef CONFIG_FS_DAX
> -int __dax_zero_page_range(struct block_device *bdev,
> - struct dax_device *dax_dev, sector_t sector,
> - unsigned int offset, unsigned int length);
> +int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
> + struct iomap *iomap);
> #else
> -static inline int __dax_zero_page_range(struct block_device *bdev,
> - struct dax_device *dax_dev, sector_t sector,
> - unsigned int offset, unsigned int length)
> +static inline int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
> + struct iomap *iomap)
> {
> return -ENXIO;
> }
Given that the only caller is under an IS_DAX() check you could just
declare the function unconditionally and let the compiler optimize
away the guaranteed dead call for the !CONFIG_FS_DAX case, like we
do with various other functions.
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range
2020-02-05 18:36 ` Christoph Hellwig
@ 2020-02-05 20:15 ` Vivek Goyal
0 siblings, 0 replies; 25+ messages in thread
From: Vivek Goyal @ 2020-02-05 20:15 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-fsdevel, linux-nvdimm, dm-devel
On Wed, Feb 05, 2020 at 10:36:09AM -0800, Christoph Hellwig wrote:
> > +int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
> > + struct iomap *iomap)
> > {
> > pgoff_t pgoff;
> > long rc, id;
> > + sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
> >
> > - rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
> > + rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
> > if (rc)
> > return rc;
> >
> > id = dax_read_lock();
> > - rc = dax_zero_page_range(dax_dev, pgoff, offset, size);
> > + rc = dax_zero_page_range(iomap->dax_dev, pgoff, offset, size);
> > dax_read_unlock(id);
> > return rc;
> > }
> > -EXPORT_SYMBOL_GPL(__dax_zero_page_range);
> > +EXPORT_SYMBOL_GPL(dax_iomap_zero);
>
> This function is only used by fs/iomap/buffered-io.c, so no need to
> export it.
Will do.
>
> > #ifdef CONFIG_FS_DAX
> > -int __dax_zero_page_range(struct block_device *bdev,
> > - struct dax_device *dax_dev, sector_t sector,
> > - unsigned int offset, unsigned int length);
> > +int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
> > + struct iomap *iomap);
> > #else
> > -static inline int __dax_zero_page_range(struct block_device *bdev,
> > - struct dax_device *dax_dev, sector_t sector,
> > - unsigned int offset, unsigned int length)
> > +static inline int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
> > + struct iomap *iomap)
> > {
> > return -ENXIO;
> > }
>
> Given that the only caller is under an IS_DAX() check you could just
> declare the function unconditionally and let the compiler optimize
> away the guaranteed dead call for the !CONFIG_FS_DAX case, like we
> do with various other functions.
Sure, will do.
Vivek
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org
^ permalink raw reply [flat|nested] 25+ messages in thread