All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
To: <linux-kernel@vger.kernel.org>, <linux-xfs@vger.kernel.org>,
	<nvdimm@lists.linux.dev>, <linux-mm@kvack.org>,
	<linux-fsdevel@vger.kernel.org>
Cc: <djwong@kernel.org>, <dan.j.williams@intel.com>,
	<david@fromorbit.com>, <hch@infradead.org>,
	<akpm@linux-foundation.org>, <jane.chu@oracle.com>,
	<rgoldwyn@suse.de>, <viro@zeniv.linux.org.uk>,
	<willy@infradead.org>, <naoya.horiguchi@nec.com>,
	<linmiaohe@huawei.com>, Christoph Hellwig <hch@lst.de>
Subject: [PATCH v2 09/14] fsdax: Introduce dax_iomap_cow_copy()
Date: Fri, 3 Jun 2022 13:37:33 +0800	[thread overview]
Message-ID: <20220603053738.1218681-10-ruansy.fnst@fujitsu.com> (raw)
In-Reply-To: <20220603053738.1218681-1-ruansy.fnst@fujitsu.com>

In the case where the iomap is a write operation and iomap is not equal
to srcmap after iomap_begin, we consider it is a CoW operation.

In this case, the destination (iomap->addr) points to a newly allocated
extent.  It is needed to copy the data from srcmap to the extent.  In
theory, it is better to copy the head and tail ranges which is outside
of the non-aligned area instead of copying the whole aligned range. But
in dax page fault, it will always be an aligned range. So copy the whole
range in this case.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/dax.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 83 insertions(+), 5 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index ab659c9f142a..3fe8e3714327 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1062,6 +1062,60 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
 	return rc;
 }
 
+/**
+ * dax_iomap_cow_copy - Copy the data from source to destination before write
+ * @pos:	address to do copy from.
+ * @length:	size of copy operation.
+ * @align_size:	aligned w.r.t align_size (either PMD_SIZE or PAGE_SIZE)
+ * @srcmap:	iomap srcmap
+ * @daddr:	destination address to copy to.
+ *
+ * This can be called from two places. Either during DAX write fault (page
+ * aligned), to copy the length size data to daddr. Or, while doing normal DAX
+ * write operation, dax_iomap_actor() might call this to do the copy of either
+ * start or end unaligned address. In the latter case the rest of the copy of
+ * aligned ranges is taken care by dax_iomap_actor() itself.
+ */
+static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size,
+		const struct iomap *srcmap, void *daddr)
+{
+	loff_t head_off = pos & (align_size - 1);
+	size_t size = ALIGN(head_off + length, align_size);
+	loff_t end = pos + length;
+	loff_t pg_end = round_up(end, align_size);
+	bool copy_all = head_off == 0 && end == pg_end;
+	void *saddr = 0;
+	int ret = 0;
+
+	ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
+	if (ret)
+		return ret;
+
+	if (copy_all) {
+		ret = copy_mc_to_kernel(daddr, saddr, length);
+		return ret ? -EIO : 0;
+	}
+
+	/* Copy the head part of the range */
+	if (head_off) {
+		ret = copy_mc_to_kernel(daddr, saddr, head_off);
+		if (ret)
+			return -EIO;
+	}
+
+	/* Copy the tail part of the range */
+	if (end < pg_end) {
+		loff_t tail_off = head_off + length;
+		loff_t tail_len = pg_end - end;
+
+		ret = copy_mc_to_kernel(daddr + tail_off, saddr + tail_off,
+					tail_len);
+		if (ret)
+			return -EIO;
+	}
+	return 0;
+}
+
 /*
  * The user has performed a load from a hole in the file.  Allocating a new
  * page in the file would cause excessive storage usage for workloads with
@@ -1232,15 +1286,17 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 		struct iov_iter *iter)
 {
 	const struct iomap *iomap = &iomi->iomap;
+	const struct iomap *srcmap = &iomi->srcmap;
 	loff_t length = iomap_length(iomi);
 	loff_t pos = iomi->pos;
 	struct dax_device *dax_dev = iomap->dax_dev;
 	loff_t end = pos + length, done = 0;
+	bool write = iov_iter_rw(iter) == WRITE;
 	ssize_t ret = 0;
 	size_t xfer;
 	int id;
 
-	if (iov_iter_rw(iter) == READ) {
+	if (!write) {
 		end = min(end, i_size_read(iomi->inode));
 		if (pos >= end)
 			return 0;
@@ -1249,7 +1305,12 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 			return iov_iter_zero(min(length, end - pos), iter);
 	}
 
-	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+	/*
+	 * In DAX mode, enforce either pure overwrites of written extents, or
+	 * writes to unwritten extents as part of a copy-on-write operation.
+	 */
+	if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED &&
+			!(iomap->flags & IOMAP_F_SHARED)))
 		return -EIO;
 
 	/*
@@ -1291,6 +1352,14 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 			break;
 		}
 
+		if (write &&
+		    srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
+			ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap,
+						 kaddr);
+			if (ret)
+				break;
+		}
+
 		map_len = PFN_PHYS(map_len);
 		kaddr += offset;
 		map_len -= offset;
@@ -1300,7 +1369,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 		if (recovery)
 			xfer = dax_recovery_write(dax_dev, pgoff, kaddr,
 					map_len, iter);
-		else if (iov_iter_rw(iter) == WRITE)
+		else if (write)
 			xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
 					map_len, iter);
 		else
@@ -1440,6 +1509,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
 {
 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	const struct iomap *iomap = &iter->iomap;
+	const struct iomap *srcmap = &iter->srcmap;
 	size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
 	loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -1447,6 +1517,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
 	unsigned long entry_flags = pmd ? DAX_PMD : 0;
 	int err = 0;
 	pfn_t pfn;
+	void *kaddr;
 
 	if (!pmd && vmf->cow_page)
 		return dax_fault_cow_page(vmf, iter);
@@ -1459,18 +1530,25 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
 		return dax_pmd_load_hole(xas, vmf, iomap, entry);
 	}
 
-	if (iomap->type != IOMAP_MAPPED) {
+	if (iomap->type != IOMAP_MAPPED && !(iomap->flags & IOMAP_F_SHARED)) {
 		WARN_ON_ONCE(1);
 		return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS;
 	}
 
-	err = dax_iomap_direct_access(&iter->iomap, pos, size, NULL, &pfn);
+	err = dax_iomap_direct_access(iomap, pos, size, &kaddr, &pfn);
 	if (err)
 		return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err);
 
 	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn, entry_flags,
 				  write && !sync);
 
+	if (write &&
+	    srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
+		err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr);
+		if (err)
+			return dax_fault_return(err);
+	}
+
 	if (sync)
 		return dax_fault_synchronous_pfnp(pfnp, pfn);
 
-- 
2.36.1




  parent reply	other threads:[~2022-06-03  5:37 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-03  5:37 [PATCHSETS v2] v14 fsdax-rmap + v11 fsdax-reflink Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 01/14] dax: Introduce holder for dax_device Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 02/14] mm: factor helpers for memory_failure_dev_pagemap Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 03/14] pagemap,pmem: Introduce ->memory_failure() Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 04/14] fsdax: Introduce dax_lock_mapping_entry() Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 05/14] mm: Introduce mf_dax_kill_procs() for fsdax case Shiyang Ruan
2022-08-24 21:52   ` Dan Williams
2022-08-24 23:42     ` HORIGUCHI NAOYA(堀口 直也)
2022-08-25  4:33       ` Dan Williams
2022-08-25  5:05         ` Dan Williams
2022-08-25 19:28           ` Dan Williams
2022-06-03  5:37 ` [PATCH v2 06/14] xfs: Implement ->notify_failure() for XFS Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 07/14] fsdax: set a CoW flag when associate reflink mappings Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 08/14] fsdax: Output address in dax_iomap_pfn() and rename it Shiyang Ruan
2022-06-07 14:38   ` [PATCH v2.1 " Shiyang Ruan
2022-06-03  5:37 ` Shiyang Ruan [this message]
2022-06-03  5:37 ` [PATCH v2 10/14] fsdax: Replace mmap entry in case of CoW Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 11/14] fsdax: Add dax_iomap_cow_copy() for dax zero Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 12/14] fsdax: Dedup file range to use a compare function Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 13/14] xfs: support CoW in fsdax mode Shiyang Ruan
2022-06-03  5:37 ` [PATCH v2 14/14] xfs: Add dax dedupe support Shiyang Ruan
2022-06-17  2:31 ` [PATCHSETS v2] v14 fsdax-rmap + v11 fsdax-reflink Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220603053738.1218681-10-ruansy.fnst@fujitsu.com \
    --to=ruansy.fnst@fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=hch@lst.de \
    --cc=jane.chu@oracle.com \
    --cc=linmiaohe@huawei.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=naoya.horiguchi@nec.com \
    --cc=nvdimm@lists.linux.dev \
    --cc=rgoldwyn@suse.de \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.