All of lore.kernel.org
 help / color / mirror / Atom feed
From: "ruansy.fnst@fujitsu.com" <ruansy.fnst@fujitsu.com>
To: "Darrick J. Wong" <djwong@kernel.org>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-xfs@vger.kernel.org" <linux-xfs@vger.kernel.org>,
	"linux-nvdimm@lists.01.org" <linux-nvdimm@lists.01.org>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
	"darrick.wong@oracle.com" <darrick.wong@oracle.com>,
	"dan.j.williams@intel.com" <dan.j.williams@intel.com>,
	"willy@infradead.org" <willy@infradead.org>,
	"viro@zeniv.linux.org.uk" <viro@zeniv.linux.org.uk>,
	"david@fromorbit.com" <david@fromorbit.com>,
	"hch@lst.de" <hch@lst.de>, "rgoldwyn@suse.de" <rgoldwyn@suse.de>
Subject: RE: [PATCH v5 7/7] fs/xfs: Add dax dedupe support
Date: Wed, 12 May 2021 01:26:30 +0000	[thread overview]
Message-ID: <OSBPR01MB292025D1E3CA65493B714E63F4529@OSBPR01MB2920.jpnprd01.prod.outlook.com> (raw)
In-Reply-To: <20210512010428.GQ8582@magnolia>

> -----Original Message-----
> From: Darrick J. Wong <djwong@kernel.org>
> Subject: Re: [PATCH v5 7/7] fs/xfs: Add dax dedupe support
> 
> On Tue, May 11, 2021 at 11:09:33AM +0800, Shiyang Ruan wrote:
> > Introduce xfs_mmaplock_two_inodes_and_break_dax_layout() for dax files
> > who are going to be deduped.  After that, call compare range function
> > only when files are both DAX or not.
> >
> > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> > ---
> >  fs/xfs/xfs_file.c    |  2 +-
> >  fs/xfs/xfs_inode.c   | 66
> +++++++++++++++++++++++++++++++++++++++++++-
> >  fs/xfs/xfs_inode.h   |  1 +
> >  fs/xfs/xfs_reflink.c |  4 +--
> >  4 files changed, 69 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index
> > 38d8eca05aee..bd5002d38df4 100644
> > --- a/fs/xfs/xfs_file.c
> > +++ b/fs/xfs/xfs_file.c
> > @@ -823,7 +823,7 @@ xfs_wait_dax_page(
> >  	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
> >  }
> >
> > -static int
> > +int
> >  xfs_break_dax_layouts(
> >  	struct inode		*inode,
> >  	bool			*retry)
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index
> > 0369eb22c1bb..0774b6e2b940 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -3711,6 +3711,64 @@ xfs_iolock_two_inodes_and_break_layout(
> >  	return 0;
> >  }
> >
> > +static int
> > +xfs_mmaplock_two_inodes_and_break_dax_layout(
> > +	struct inode		*src,
> > +	struct inode		*dest)
> 
> MMAPLOCK is an xfs_inode lock, so please pass those in here.
> 
> > +{
> > +	int			error, attempts = 0;
> > +	bool			retry;
> > +	struct xfs_inode	*ip0, *ip1;
> > +	struct page		*page;
> > +	struct xfs_log_item	*lp;
> > +
> > +	if (src > dest)
> > +		swap(src, dest);
> 
> The MMAPLOCK (and ILOCK) locking order is increasing inode number, not the
> address of the incore object.  This is different (and not consistent
> with) i_rwsem/XFS_IOLOCK, but those are the rules.

Yes, I misunderstood here.

> 
> > +	ip0 = XFS_I(src);
> > +	ip1 = XFS_I(dest);
> > +
> > +again:
> > +	retry = false;
> > +	/* Lock the first inode */
> > +	xfs_ilock(ip0, XFS_MMAPLOCK_EXCL);
> > +	error = xfs_break_dax_layouts(src, &retry);
> > +	if (error || retry) {
> > +		xfs_iunlock(ip0, XFS_MMAPLOCK_EXCL);
> > +		goto again;
> > +	}
> > +
> > +	if (src == dest)
> > +		return 0;
> > +
> > +	/* Nested lock the second inode */
> > +	lp = &ip0->i_itemp->ili_item;
> > +	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
> > +		if (!xfs_ilock_nowait(ip1,
> > +		    xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, 1))) {
> > +			xfs_iunlock(ip0, XFS_MMAPLOCK_EXCL);
> > +			if ((++attempts % 5) == 0)
> > +				delay(1); /* Don't just spin the CPU */
> > +			goto again;
> > +		}
> > +	} else
> > +		xfs_ilock(ip1, xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, 1));
> > +	/*
> > +	 * We cannot use xfs_break_dax_layouts() directly here because it may
> > +	 * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
> > +	 * for this nested lock case.
> > +	 */
> > +	page = dax_layout_busy_page(dest->i_mapping);
> > +	if (page) {
> > +		if (page_ref_count(page) != 1) {
> 
> This could be flattened to:
> 
> 	if (page && page_ref_count(page) != 1) {
> 		...
> 	}

OK.


--
Thanks,
Ruan Shiyang.
> 
> --D
> 
> > +			xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
> > +			xfs_iunlock(ip0, XFS_MMAPLOCK_EXCL);
> > +			goto again;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  /*
> >   * Lock two inodes so that userspace cannot initiate I/O via file syscalls or
> >   * mmap activity.
> > @@ -3721,10 +3779,16 @@ xfs_ilock2_io_mmap(
> >  	struct xfs_inode	*ip2)
> >  {
> >  	int			ret;
> > +	struct inode		*ino1 = VFS_I(ip1);
> > +	struct inode		*ino2 = VFS_I(ip2);
> >
> > -	ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
> > +	ret = xfs_iolock_two_inodes_and_break_layout(ino1, ino2);
> >  	if (ret)
> >  		return ret;
> > +
> > +	if (IS_DAX(ino1) && IS_DAX(ino2))
> > +		return xfs_mmaplock_two_inodes_and_break_dax_layout(ino1, ino2);
> > +
> >  	if (ip1 == ip2)
> >  		xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
> >  	else
> > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index
> > ca826cfba91c..2d0b344fb100 100644
> > --- a/fs/xfs/xfs_inode.h
> > +++ b/fs/xfs/xfs_inode.h
> > @@ -457,6 +457,7 @@ enum xfs_prealloc_flags {
> >
> >  int	xfs_update_prealloc_flags(struct xfs_inode *ip,
> >  				  enum xfs_prealloc_flags flags);
> > +int	xfs_break_dax_layouts(struct inode *inode, bool *retry);
> >  int	xfs_break_layouts(struct inode *inode, uint *iolock,
> >  		enum layout_break_reason reason);
> >
> > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index
> > 9a780948dbd0..ff308304c5cd 100644
> > --- a/fs/xfs/xfs_reflink.c
> > +++ b/fs/xfs/xfs_reflink.c
> > @@ -1324,8 +1324,8 @@ xfs_reflink_remap_prep(
> >  	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
> >  		goto out_unlock;
> >
> > -	/* Don't share DAX file data for now. */
> > -	if (IS_DAX(inode_in) || IS_DAX(inode_out))
> > +	/* Don't share DAX file data with non-DAX file. */
> > +	if (IS_DAX(inode_in) != IS_DAX(inode_out))
> >  		goto out_unlock;
> >
> >  	if (!IS_DAX(inode_in))
> > --
> > 2.31.1
> >
> >
> >

WARNING: multiple messages have this Message-ID (diff)
From: "ruansy.fnst@fujitsu.com" <ruansy.fnst@fujitsu.com>
To: "Darrick J. Wong" <djwong@kernel.org>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-xfs@vger.kernel.org" <linux-xfs@vger.kernel.org>,
	"linux-nvdimm@lists.01.org" <linux-nvdimm@lists.01.org>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
	"darrick.wong@oracle.com" <darrick.wong@oracle.com>,
	"willy@infradead.org" <willy@infradead.org>,
	"viro@zeniv.linux.org.uk" <viro@zeniv.linux.org.uk>,
	"david@fromorbit.com" <david@fromorbit.com>,
	"hch@lst.de" <hch@lst.de>, "rgoldwyn@suse.de" <rgoldwyn@suse.de>
Subject: RE: [PATCH v5 7/7] fs/xfs: Add dax dedupe support
Date: Wed, 12 May 2021 01:26:30 +0000	[thread overview]
Message-ID: <OSBPR01MB292025D1E3CA65493B714E63F4529@OSBPR01MB2920.jpnprd01.prod.outlook.com> (raw)
In-Reply-To: <20210512010428.GQ8582@magnolia>

> -----Original Message-----
> From: Darrick J. Wong <djwong@kernel.org>
> Subject: Re: [PATCH v5 7/7] fs/xfs: Add dax dedupe support
> 
> On Tue, May 11, 2021 at 11:09:33AM +0800, Shiyang Ruan wrote:
> > Introduce xfs_mmaplock_two_inodes_and_break_dax_layout() for dax files
> > who are going to be deduped.  After that, call compare range function
> > only when files are both DAX or not.
> >
> > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> > ---
> >  fs/xfs/xfs_file.c    |  2 +-
> >  fs/xfs/xfs_inode.c   | 66
> +++++++++++++++++++++++++++++++++++++++++++-
> >  fs/xfs/xfs_inode.h   |  1 +
> >  fs/xfs/xfs_reflink.c |  4 +--
> >  4 files changed, 69 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index
> > 38d8eca05aee..bd5002d38df4 100644
> > --- a/fs/xfs/xfs_file.c
> > +++ b/fs/xfs/xfs_file.c
> > @@ -823,7 +823,7 @@ xfs_wait_dax_page(
> >  	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
> >  }
> >
> > -static int
> > +int
> >  xfs_break_dax_layouts(
> >  	struct inode		*inode,
> >  	bool			*retry)
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index
> > 0369eb22c1bb..0774b6e2b940 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -3711,6 +3711,64 @@ xfs_iolock_two_inodes_and_break_layout(
> >  	return 0;
> >  }
> >
> > +static int
> > +xfs_mmaplock_two_inodes_and_break_dax_layout(
> > +	struct inode		*src,
> > +	struct inode		*dest)
> 
> MMAPLOCK is an xfs_inode lock, so please pass those in here.
> 
> > +{
> > +	int			error, attempts = 0;
> > +	bool			retry;
> > +	struct xfs_inode	*ip0, *ip1;
> > +	struct page		*page;
> > +	struct xfs_log_item	*lp;
> > +
> > +	if (src > dest)
> > +		swap(src, dest);
> 
> The MMAPLOCK (and ILOCK) locking order is increasing inode number, not the
> address of the incore object.  This is different (and not consistent
> with) i_rwsem/XFS_IOLOCK, but those are the rules.

Yes, I misunderstood here.

> 
> > +	ip0 = XFS_I(src);
> > +	ip1 = XFS_I(dest);
> > +
> > +again:
> > +	retry = false;
> > +	/* Lock the first inode */
> > +	xfs_ilock(ip0, XFS_MMAPLOCK_EXCL);
> > +	error = xfs_break_dax_layouts(src, &retry);
> > +	if (error || retry) {
> > +		xfs_iunlock(ip0, XFS_MMAPLOCK_EXCL);
> > +		goto again;
> > +	}
> > +
> > +	if (src == dest)
> > +		return 0;
> > +
> > +	/* Nested lock the second inode */
> > +	lp = &ip0->i_itemp->ili_item;
> > +	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
> > +		if (!xfs_ilock_nowait(ip1,
> > +		    xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, 1))) {
> > +			xfs_iunlock(ip0, XFS_MMAPLOCK_EXCL);
> > +			if ((++attempts % 5) == 0)
> > +				delay(1); /* Don't just spin the CPU */
> > +			goto again;
> > +		}
> > +	} else
> > +		xfs_ilock(ip1, xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, 1));
> > +	/*
> > +	 * We cannot use xfs_break_dax_layouts() directly here because it may
> > +	 * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
> > +	 * for this nested lock case.
> > +	 */
> > +	page = dax_layout_busy_page(dest->i_mapping);
> > +	if (page) {
> > +		if (page_ref_count(page) != 1) {
> 
> This could be flattened to:
> 
> 	if (page && page_ref_count(page) != 1) {
> 		...
> 	}

OK.


--
Thanks,
Ruan Shiyang.
> 
> --D
> 
> > +			xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
> > +			xfs_iunlock(ip0, XFS_MMAPLOCK_EXCL);
> > +			goto again;
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  /*
> >   * Lock two inodes so that userspace cannot initiate I/O via file syscalls or
> >   * mmap activity.
> > @@ -3721,10 +3779,16 @@ xfs_ilock2_io_mmap(
> >  	struct xfs_inode	*ip2)
> >  {
> >  	int			ret;
> > +	struct inode		*ino1 = VFS_I(ip1);
> > +	struct inode		*ino2 = VFS_I(ip2);
> >
> > -	ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
> > +	ret = xfs_iolock_two_inodes_and_break_layout(ino1, ino2);
> >  	if (ret)
> >  		return ret;
> > +
> > +	if (IS_DAX(ino1) && IS_DAX(ino2))
> > +		return xfs_mmaplock_two_inodes_and_break_dax_layout(ino1, ino2);
> > +
> >  	if (ip1 == ip2)
> >  		xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
> >  	else
> > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index
> > ca826cfba91c..2d0b344fb100 100644
> > --- a/fs/xfs/xfs_inode.h
> > +++ b/fs/xfs/xfs_inode.h
> > @@ -457,6 +457,7 @@ enum xfs_prealloc_flags {
> >
> >  int	xfs_update_prealloc_flags(struct xfs_inode *ip,
> >  				  enum xfs_prealloc_flags flags);
> > +int	xfs_break_dax_layouts(struct inode *inode, bool *retry);
> >  int	xfs_break_layouts(struct inode *inode, uint *iolock,
> >  		enum layout_break_reason reason);
> >
> > diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index
> > 9a780948dbd0..ff308304c5cd 100644
> > --- a/fs/xfs/xfs_reflink.c
> > +++ b/fs/xfs/xfs_reflink.c
> > @@ -1324,8 +1324,8 @@ xfs_reflink_remap_prep(
> >  	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
> >  		goto out_unlock;
> >
> > -	/* Don't share DAX file data for now. */
> > -	if (IS_DAX(inode_in) || IS_DAX(inode_out))
> > +	/* Don't share DAX file data with non-DAX file. */
> > +	if (IS_DAX(inode_in) != IS_DAX(inode_out))
> >  		goto out_unlock;
> >
> >  	if (!IS_DAX(inode_in))
> > --
> > 2.31.1
> >
> >
> >
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

  reply	other threads:[~2021-05-12  1:27 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-11  3:09 [PATCH v5 0/7] fsdax,xfs: Add reflink&dedupe support for fsdax Shiyang Ruan
2021-05-11  3:09 ` Shiyang Ruan
2021-05-11  3:09 ` [PATCH v5 1/7] fsdax: Introduce dax_iomap_cow_copy() Shiyang Ruan
2021-05-11  3:09   ` Shiyang Ruan
2021-05-12  1:08   ` Darrick J. Wong
2021-05-12  1:08     ` Darrick J. Wong
2021-05-13  7:57     ` ruansy.fnst
2021-05-13  7:57       ` ruansy.fnst
2021-05-13 15:02       ` Darrick J. Wong
2021-05-13 15:02         ` Darrick J. Wong
2021-05-11  3:09 ` [PATCH v5 2/7] fsdax: Replace mmap entry in case of CoW Shiyang Ruan
2021-05-11  3:09   ` Shiyang Ruan
2021-05-12  1:09   ` Darrick J. Wong
2021-05-12  1:09     ` Darrick J. Wong
2021-05-11  3:09 ` [PATCH v5 3/7] fsdax: Add dax_iomap_cow_copy() for dax_iomap_zero Shiyang Ruan
2021-05-11  3:09   ` Shiyang Ruan
2021-05-12  1:17   ` Darrick J. Wong
2021-05-12  1:17     ` Darrick J. Wong
2021-05-12  1:37     ` ruansy.fnst
2021-05-12  1:37       ` ruansy.fnst
2021-05-25 22:17       ` Darrick J. Wong
2021-05-12  2:27   ` Mika Penttilä
2021-05-12  2:27     ` Mika Penttilä
2021-05-13  7:48     ` ruansy.fnst
2021-05-13  7:48       ` ruansy.fnst
2021-05-11  3:09 ` [PATCH v5 4/7] iomap: Introduce iomap_apply2() for operations on two files Shiyang Ruan
2021-05-11  3:09   ` Shiyang Ruan
2021-05-11  3:09 ` [PATCH v5 5/7] fsdax: Dedup file range to use a compare function Shiyang Ruan
2021-05-11  3:09   ` Shiyang Ruan
2021-05-12  1:23   ` Darrick J. Wong
2021-05-12  1:23     ` Darrick J. Wong
2021-05-14  8:35     ` ruansy.fnst
2021-05-14  8:35       ` ruansy.fnst
2021-05-14 16:03       ` Darrick J. Wong
2021-05-14 16:03         ` Darrick J. Wong
2021-05-11  3:09 ` [PATCH v5 6/7] fs/xfs: Handle CoW for fsdax write() path Shiyang Ruan
2021-05-11  3:09   ` Shiyang Ruan
2021-05-12  1:30   ` Darrick J. Wong
2021-05-12  1:30     ` Darrick J. Wong
2021-05-11  3:09 ` [PATCH v5 7/7] fs/xfs: Add dax dedupe support Shiyang Ruan
2021-05-11  3:09   ` Shiyang Ruan
2021-05-12  1:04   ` Darrick J. Wong
2021-05-12  1:04     ` Darrick J. Wong
2021-05-12  1:26     ` ruansy.fnst [this message]
2021-05-12  1:26       ` ruansy.fnst
2021-05-11  3:57 ` [PATCH v5 0/7] fsdax,xfs: Add reflink&dedupe support for fsdax Darrick J. Wong
2021-05-11  3:57   ` Darrick J. Wong
2021-05-11  5:53   ` ruansy.fnst
2021-05-11  5:53     ` ruansy.fnst

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=OSBPR01MB292025D1E3CA65493B714E63F4529@OSBPR01MB2920.jpnprd01.prod.outlook.com \
    --to=ruansy.fnst@fujitsu.com \
    --cc=dan.j.williams@intel.com \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=rgoldwyn@suse.de \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.