All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vivek Goyal <vgoyal@redhat.com>
To: linux-fsdevel@vger.kernel.org, linux-nvdimm@lists.01.org,
	dan.j.williams@intel.com, hch@infradead.org
Cc: dm-devel@redhat.com
Subject: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
Date: Mon,  3 Feb 2020 15:00:25 -0500	[thread overview]
Message-ID: <20200203200029.4592-2-vgoyal@redhat.com> (raw)
In-Reply-To: <20200203200029.4592-1-vgoyal@redhat.com>

Add a dax operation zero_page_range, to zero a range of memory. This will
also clear any poison in the range being zeroed.

As of now, zeroing of up to one page is allowed in a single call. There
are no callers which are trying to zero more than a page in a single call.

Once we grow the callers which zero more than a page in single call, we
can add that support. Primary reason for not doing that yet is that this
will add little complexity in dm implementation where a range might be
spanning multiple underlying targets and one will have to split the range
into multiple sub ranges and call zero_page_range() on individual targets.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 drivers/dax/super.c   | 20 +++++++++++++++++
 drivers/nvdimm/pmem.c | 50 +++++++++++++++++++++++++++++++++++++++++++
 fs/dax.c              | 15 +++++++++++++
 include/linux/dax.h   |  6 ++++++
 4 files changed, 91 insertions(+)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 26a654dbc69a..371744256fe5 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -344,6 +344,26 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 }
 EXPORT_SYMBOL_GPL(dax_copy_to_iter);
 
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+			unsigned offset, size_t len)
+{
+	if (!dax_alive(dax_dev))
+		return -ENXIO;
+
+	if (!dax_dev->ops->zero_page_range)
+		return -EOPNOTSUPP;
+
+	/*
+	 * There are no users as of now. Once users are there, fix dm code
+	 * to be able to split a long range across targets.
+	 */
+	if (offset + len > PAGE_SIZE)
+		return -EIO;
+
+	return dax_dev->ops->zero_page_range(dax_dev, pgoff, offset, len);
+}
+EXPORT_SYMBOL_GPL(dax_zero_page_range);
+
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 void arch_wb_cache_pmem(void *addr, size_t size);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ad8e4df1282b..8739244a72a4 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -268,6 +268,55 @@ static const struct block_device_operations pmem_fops = {
 	.revalidate_disk =	nvdimm_revalidate_disk,
 };
 
+static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+				    unsigned int offset, size_t len)
+{
+	int rc = 0;
+	phys_addr_t phys_pos = pgoff * PAGE_SIZE + offset;
+	struct pmem_device *pmem = dax_get_private(dax_dev);
+	struct page *page = ZERO_PAGE(0);
+	unsigned bytes, nr_sectors = 0;
+	sector_t sector_start, sector_end;
+	bool bad_pmem = false;
+	phys_addr_t pmem_off = phys_pos + pmem->data_offset;
+	void *pmem_addr = pmem->virt_addr + pmem_off;
+
+	bytes = min_t(size_t, PAGE_SIZE - offset_in_page(phys_pos),
+		      len);
+	/*
+	 * As of now zeroing only with-in a page is supported. This can be
+	 * changed once there are users of zeroing across multiple pages
+	 */
+	if (WARN_ON(len > bytes))
+		return -EIO;
+
+	sector_start = ALIGN(phys_pos, 512)/512;
+	sector_end = ALIGN_DOWN(phys_pos + bytes, 512)/512;
+	if (sector_end > sector_start)
+		nr_sectors = sector_end - sector_start;
+
+	if (nr_sectors &&
+	    unlikely(is_bad_pmem(&pmem->bb, sector_start,
+				 nr_sectors * 512)))
+		bad_pmem = true;
+
+	write_pmem(pmem_addr, page, 0, bytes);
+	if (unlikely(bad_pmem)) {
+		/*
+		 * Pass block aligned offset and length. That seems
+		 * to work as of now. Other finer grained alignment
+		 * cases can be addressed later if need be.
+		 */
+		rc = pmem_clear_poison(pmem, ALIGN(pmem_off, 512),
+				       nr_sectors * 512);
+		write_pmem(pmem_addr, page, 0, bytes);
+	}
+	if (rc > 0)
+		return -EIO;
+
+	return 0;
+}
+
 static long pmem_dax_direct_access(struct dax_device *dax_dev,
 		pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -299,6 +348,7 @@ static const struct dax_operations pmem_dax_ops = {
 	.dax_supported = generic_fsdax_supported,
 	.copy_from_iter = pmem_copy_from_iter,
 	.copy_to_iter = pmem_copy_to_iter,
+	.zero_page_range = pmem_dax_zero_page_range,
 };
 
 static const struct attribute_group *pmem_attribute_groups[] = {
diff --git a/fs/dax.c b/fs/dax.c
index 1f1f0201cad1..35631a4d0295 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1057,6 +1057,21 @@ static bool dax_range_is_aligned(struct block_device *bdev,
 	return true;
 }
 
+int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+				unsigned int offset, size_t len)
+{
+	long rc;
+	void *kaddr;
+
+	rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
+	if (rc < 0)
+		return rc;
+	memset(kaddr + offset, 0, len);
+	dax_flush(dax_dev, kaddr + offset, len);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(generic_dax_zero_page_range);
+
 int __dax_zero_page_range(struct block_device *bdev,
 		struct dax_device *dax_dev, sector_t sector,
 		unsigned int offset, unsigned int size)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9bd8528bd305..3356b874c55d 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -34,6 +34,8 @@ struct dax_operations {
 	/* copy_to_iter: required operation for fs-dax direct-i/o */
 	size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
 			struct iov_iter *);
+	/* zero_page_range: required operation for fs-dax direct-i/o */
+	int (*zero_page_range)(struct dax_device *, pgoff_t, unsigned, size_t);
 };
 
 extern struct attribute_group dax_attribute_group;
@@ -209,6 +211,10 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+			unsigned offset, size_t len);
+int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+				 unsigned int offset, size_t len);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
-- 
2.18.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

WARNING: multiple messages have this Message-ID (diff)
From: Vivek Goyal <vgoyal@redhat.com>
To: linux-fsdevel@vger.kernel.org, linux-nvdimm@lists.01.org,
	dan.j.williams@intel.com, hch@infradead.org
Cc: vgoyal@redhat.com, vishal.l.verma@intel.com, dm-devel@redhat.com
Subject: [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range
Date: Mon,  3 Feb 2020 15:00:25 -0500	[thread overview]
Message-ID: <20200203200029.4592-2-vgoyal@redhat.com> (raw)
In-Reply-To: <20200203200029.4592-1-vgoyal@redhat.com>

Add a dax operation zero_page_range, to zero a range of memory. This will
also clear any poison in the range being zeroed.

As of now, zeroing of up to one page is allowed in a single call. There
are no callers which are trying to zero more than a page in a single call.

Once we grow the callers which zero more than a page in single call, we
can add that support. Primary reason for not doing that yet is that this
will add little complexity in dm implementation where a range might be
spanning multiple underlying targets and one will have to split the range
into multiple sub ranges and call zero_page_range() on individual targets.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 drivers/dax/super.c   | 20 +++++++++++++++++
 drivers/nvdimm/pmem.c | 50 +++++++++++++++++++++++++++++++++++++++++++
 fs/dax.c              | 15 +++++++++++++
 include/linux/dax.h   |  6 ++++++
 4 files changed, 91 insertions(+)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 26a654dbc69a..371744256fe5 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -344,6 +344,26 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 }
 EXPORT_SYMBOL_GPL(dax_copy_to_iter);
 
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+			unsigned offset, size_t len)
+{
+	if (!dax_alive(dax_dev))
+		return -ENXIO;
+
+	if (!dax_dev->ops->zero_page_range)
+		return -EOPNOTSUPP;
+
+	/*
+	 * There are no users as of now. Once users are there, fix dm code
+	 * to be able to split a long range across targets.
+	 */
+	if (offset + len > PAGE_SIZE)
+		return -EIO;
+
+	return dax_dev->ops->zero_page_range(dax_dev, pgoff, offset, len);
+}
+EXPORT_SYMBOL_GPL(dax_zero_page_range);
+
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 void arch_wb_cache_pmem(void *addr, size_t size);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ad8e4df1282b..8739244a72a4 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -268,6 +268,55 @@ static const struct block_device_operations pmem_fops = {
 	.revalidate_disk =	nvdimm_revalidate_disk,
 };
 
+static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+				    unsigned int offset, size_t len)
+{
+	int rc = 0;
+	phys_addr_t phys_pos = pgoff * PAGE_SIZE + offset;
+	struct pmem_device *pmem = dax_get_private(dax_dev);
+	struct page *page = ZERO_PAGE(0);
+	unsigned bytes, nr_sectors = 0;
+	sector_t sector_start, sector_end;
+	bool bad_pmem = false;
+	phys_addr_t pmem_off = phys_pos + pmem->data_offset;
+	void *pmem_addr = pmem->virt_addr + pmem_off;
+
+	bytes = min_t(size_t, PAGE_SIZE - offset_in_page(phys_pos),
+		      len);
+	/*
+	 * As of now zeroing only with-in a page is supported. This can be
+	 * changed once there are users of zeroing across multiple pages
+	 */
+	if (WARN_ON(len > bytes))
+		return -EIO;
+
+	sector_start = ALIGN(phys_pos, 512)/512;
+	sector_end = ALIGN_DOWN(phys_pos + bytes, 512)/512;
+	if (sector_end > sector_start)
+		nr_sectors = sector_end - sector_start;
+
+	if (nr_sectors &&
+	    unlikely(is_bad_pmem(&pmem->bb, sector_start,
+				 nr_sectors * 512)))
+		bad_pmem = true;
+
+	write_pmem(pmem_addr, page, 0, bytes);
+	if (unlikely(bad_pmem)) {
+		/*
+		 * Pass block aligned offset and length. That seems
+		 * to work as of now. Other finer grained alignment
+		 * cases can be addressed later if need be.
+		 */
+		rc = pmem_clear_poison(pmem, ALIGN(pmem_off, 512),
+				       nr_sectors * 512);
+		write_pmem(pmem_addr, page, 0, bytes);
+	}
+	if (rc > 0)
+		return -EIO;
+
+	return 0;
+}
+
 static long pmem_dax_direct_access(struct dax_device *dax_dev,
 		pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -299,6 +348,7 @@ static const struct dax_operations pmem_dax_ops = {
 	.dax_supported = generic_fsdax_supported,
 	.copy_from_iter = pmem_copy_from_iter,
 	.copy_to_iter = pmem_copy_to_iter,
+	.zero_page_range = pmem_dax_zero_page_range,
 };
 
 static const struct attribute_group *pmem_attribute_groups[] = {
diff --git a/fs/dax.c b/fs/dax.c
index 1f1f0201cad1..35631a4d0295 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1057,6 +1057,21 @@ static bool dax_range_is_aligned(struct block_device *bdev,
 	return true;
 }
 
+int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+				unsigned int offset, size_t len)
+{
+	long rc;
+	void *kaddr;
+
+	rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
+	if (rc < 0)
+		return rc;
+	memset(kaddr + offset, 0, len);
+	dax_flush(dax_dev, kaddr + offset, len);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(generic_dax_zero_page_range);
+
 int __dax_zero_page_range(struct block_device *bdev,
 		struct dax_device *dax_dev, sector_t sector,
 		unsigned int offset, unsigned int size)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9bd8528bd305..3356b874c55d 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -34,6 +34,8 @@ struct dax_operations {
 	/* copy_to_iter: required operation for fs-dax direct-i/o */
 	size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
 			struct iov_iter *);
+	/* zero_page_range: required operation for fs-dax direct-i/o */
+	int (*zero_page_range)(struct dax_device *, pgoff_t, unsigned, size_t);
 };
 
 extern struct attribute_group dax_attribute_group;
@@ -209,6 +211,10 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
+int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+			unsigned offset, size_t len);
+int generic_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+				 unsigned int offset, size_t len);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
-- 
2.18.1


  reply	other threads:[~2020-02-03 20:01 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-03 20:00 [RFC PATCH 0/5][V2] dax,pmem: Provide a dax operation to zero range of memory Vivek Goyal
2020-02-03 20:00 ` Vivek Goyal
2020-02-03 20:00 ` Vivek Goyal [this message]
2020-02-03 20:00   ` [PATCH 1/5] dax, pmem: Add a dax operation zero_page_range Vivek Goyal
2020-02-05 18:30   ` Christoph Hellwig
2020-02-05 18:30     ` Christoph Hellwig
2020-02-05 20:02     ` Vivek Goyal
2020-02-05 20:02       ` Vivek Goyal
2020-02-06  0:40       ` Dan Williams
2020-02-06  0:40         ` Dan Williams
2020-02-06  7:41         ` Christoph Hellwig
2020-02-06  7:41           ` Christoph Hellwig
2020-02-07 16:57           ` Dan Williams
2020-02-07 16:57             ` Dan Williams
2020-02-07 17:01             ` Vivek Goyal
2020-02-07 17:01               ` Vivek Goyal
2020-02-07 17:06               ` Dan Williams
2020-02-07 17:06                 ` Dan Williams
2020-02-06 14:34         ` Vivek Goyal
2020-02-06 14:34           ` Vivek Goyal
2020-02-07 16:58           ` Dan Williams
2020-02-07 16:58             ` Dan Williams
2020-02-03 20:00 ` [PATCH 2/5] s390,dax: Add dax zero_page_range operation to dcssblk driver Vivek Goyal
2020-02-03 20:00   ` Vivek Goyal
2020-02-05 18:32   ` Christoph Hellwig
2020-02-05 18:32     ` Christoph Hellwig
2020-02-05 20:04     ` Vivek Goyal
2020-02-05 20:04       ` Vivek Goyal
2020-02-03 20:00 ` [PATCH 3/5] dm,dax: Add dax zero_page_range operation Vivek Goyal
2020-02-03 20:00   ` Vivek Goyal
2020-02-05 18:33   ` Christoph Hellwig
2020-02-05 18:33     ` Christoph Hellwig
2020-02-07 16:34     ` Vivek Goyal
2020-02-07 16:34       ` Vivek Goyal
2020-02-03 20:00 ` [PATCH 4/5] dax,iomap: Start using dax native zero_page_range() Vivek Goyal
2020-02-03 20:00   ` Vivek Goyal
2020-02-05 18:33   ` Christoph Hellwig
2020-02-05 18:33     ` Christoph Hellwig
2020-02-05 20:10     ` Vivek Goyal
2020-02-05 20:10       ` Vivek Goyal
2020-02-07 15:31     ` Vivek Goyal
2020-02-07 15:31       ` Vivek Goyal
2020-02-03 20:00 ` [PATCH 5/5] dax,iomap: Add helper dax_iomap_zero() to zero a range Vivek Goyal
2020-02-03 20:00   ` Vivek Goyal
2020-02-04  5:17   ` kbuild test robot
2020-02-04  5:17     ` [PATCH 5/5] dax, iomap: " kbuild test robot
2020-02-04  5:17     ` [PATCH 5/5] dax,iomap: " kbuild test robot
2020-02-04  5:17     ` kbuild test robot
2020-02-05 18:36   ` Christoph Hellwig
2020-02-05 18:36     ` Christoph Hellwig
2020-02-05 20:15     ` Vivek Goyal
2020-02-05 20:15       ` Vivek Goyal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200203200029.4592-2-vgoyal@redhat.com \
    --to=vgoyal@redhat.com \
    --cc=dan.j.williams@intel.com \
    --cc=dm-devel@redhat.com \
    --cc=hch@infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.