linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: ira.weiny@intel.com
Cc: linux-kernel@vger.kernel.org,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Chinner <david@fromorbit.com>,
	Christoph Hellwig <hch@lst.de>,
	"Theodore Y. Ts'o" <tytso@mit.edu>, Jan Kara <jack@suse.cz>,
	linux-ext4@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org
Subject: Re: [RFC PATCH V2 08/12] fs/xfs: Add lock/unlock mode to xfs
Date: Mon, 13 Jan 2020 14:19:57 -0800	[thread overview]
Message-ID: <20200113221957.GN8247@magnolia> (raw)
In-Reply-To: <20200110192942.25021-9-ira.weiny@intel.com>

On Fri, Jan 10, 2020 at 11:29:38AM -0800, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> XFS requires regular files to be locked while changing to/from DAX mode.
> 
> Define a new DAX lock type and implement the [un]lock_mode() inode
> operation callbacks.
> 
> We define a new XFS_DAX_* lock type to carry the lock through the
> transaction because we don't want to use IOLOCK as that would cause
> performance issues with locking of the inode itself.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> ---
>  fs/xfs/xfs_icache.c |  2 ++
>  fs/xfs/xfs_inode.c  | 37 +++++++++++++++++++++++++++++++++++--
>  fs/xfs/xfs_inode.h  | 12 ++++++++++--
>  fs/xfs/xfs_iops.c   | 24 +++++++++++++++++++++++-
>  4 files changed, 70 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 8dc2e5414276..0288672e8902 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -74,6 +74,8 @@ xfs_inode_alloc(
>  	INIT_LIST_HEAD(&ip->i_ioend_list);
>  	spin_lock_init(&ip->i_ioend_lock);
>  
> +	percpu_init_rwsem(&ip->i_dax_sem);
> +
>  	return ip;
>  }
>  
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 401da197f012..e8fd95b75e5b 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -142,12 +142,12 @@ xfs_ilock_attr_map_shared(
>   *
>   * Basic locking order:
>   *
> - * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
> + * i_rwsem -> i_dax_sem -> i_mmap_lock -> page_lock -> i_ilock

Mmmmmm, more locks.  Can we skip the extra lock if CONFIG_FSDAX=n or if
the filesystem devices don't support DAX at all?

Also, I don't think we're actually following the i_rwsem -> i_daxsem
order in fallocate, and possibly elsewhere too?

Does the vfs have to take the i_dax_sem to do remapping things like
reflink?  (Pretend that reflink and dax are compatible for the moment)

>   * mmap_sem locking order:
>   *
>   * i_rwsem -> page lock -> mmap_sem
> - * mmap_sem -> i_mmap_lock -> page_lock
> + * mmap_sem -> i_dax_sem -> i_mmap_lock -> page_lock
>   *
>   * The difference in mmap_sem locking order mean that we cannot hold the
>   * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
> @@ -181,6 +181,13 @@ xfs_ilock(
>  	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
>  	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
>  	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
> +	ASSERT((lock_flags & (XFS_DAX_SHARED | XFS_DAX_EXCL)) !=
> +	       (XFS_DAX_SHARED | XFS_DAX_EXCL));
> +
> +	if (lock_flags & XFS_DAX_EXCL)
> +		percpu_down_write(&ip->i_dax_sem);
> +	else if (lock_flags & XFS_DAX_SHARED)
> +		percpu_down_read(&ip->i_dax_sem);
>  
>  	if (lock_flags & XFS_IOLOCK_EXCL) {
>  		down_write_nested(&VFS_I(ip)->i_rwsem,
> @@ -224,6 +231,8 @@ xfs_ilock_nowait(
>  	 * You can't set both SHARED and EXCL for the same lock,
>  	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
>  	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
> +	 *
> +	 * XFS_DAX_* is not allowed
>  	 */
>  	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
>  	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
> @@ -232,6 +241,7 @@ xfs_ilock_nowait(
>  	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
>  	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
>  	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
> +	ASSERT((lock_flags & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
>  
>  	if (lock_flags & XFS_IOLOCK_EXCL) {
>  		if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
> @@ -302,6 +312,8 @@ xfs_iunlock(
>  	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
>  	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
>  	ASSERT(lock_flags != 0);
> +	ASSERT((lock_flags & (XFS_DAX_SHARED | XFS_DAX_EXCL)) !=
> +	       (XFS_DAX_SHARED | XFS_DAX_EXCL));
>  
>  	if (lock_flags & XFS_IOLOCK_EXCL)
>  		up_write(&VFS_I(ip)->i_rwsem);
> @@ -318,6 +330,11 @@ xfs_iunlock(
>  	else if (lock_flags & XFS_ILOCK_SHARED)
>  		mrunlock_shared(&ip->i_lock);
>  
> +	if (lock_flags & XFS_DAX_EXCL)
> +		percpu_up_write(&ip->i_dax_sem);
> +	else if (lock_flags & XFS_DAX_SHARED)
> +		percpu_up_read(&ip->i_dax_sem);
> +
>  	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
>  }
>  
> @@ -333,6 +350,8 @@ xfs_ilock_demote(
>  	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
>  	ASSERT((lock_flags &
>  		~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
> +	/* XFS_DAX_* is not allowed */
> +	ASSERT((lock_flags & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
>  
>  	if (lock_flags & XFS_ILOCK_EXCL)
>  		mrdemote(&ip->i_lock);
> @@ -369,6 +388,13 @@ xfs_isilocked(
>  		return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
>  	}
>  
> +	if (lock_flags & (XFS_DAX_EXCL|XFS_DAX_SHARED)) {
> +		if (!(lock_flags & XFS_DAX_SHARED))
> +			return !debug_locks ||
> +				percpu_rwsem_is_held(&ip->i_dax_sem, 0);
> +		return rwsem_is_locked(&ip->i_dax_sem);
> +	}
> +
>  	ASSERT(0);
>  	return 0;
>  }
> @@ -465,6 +491,9 @@ xfs_lock_inodes(
>  	ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
>  		inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
>  
> +	/* XFS_DAX_* is not allowed */
> +	ASSERT((lock_mode & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
> +
>  	if (lock_mode & XFS_IOLOCK_EXCL) {
>  		ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
>  	} else if (lock_mode & XFS_MMAPLOCK_EXCL)
> @@ -566,6 +595,10 @@ xfs_lock_two_inodes(
>  	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
>  	       !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
>  
> +	/* XFS_DAX_* is not allowed */
> +	ASSERT((ip0_mode & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
> +	ASSERT((ip1_mode & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
> +
>  	ASSERT(ip0->i_ino != ip1->i_ino);
>  
>  	if (ip0->i_ino > ip1->i_ino) {
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 492e53992fa9..693ca66bd89b 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -67,6 +67,9 @@ typedef struct xfs_inode {
>  	spinlock_t		i_ioend_lock;
>  	struct work_struct	i_ioend_work;
>  	struct list_head	i_ioend_list;
> +
> +	/* protect changing the mode to/from DAX */
> +	struct percpu_rw_semaphore i_dax_sem;
>  } xfs_inode_t;
>  
>  /* Convert from vfs inode to xfs inode */
> @@ -278,10 +281,13 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
>  #define	XFS_ILOCK_SHARED	(1<<3)
>  #define	XFS_MMAPLOCK_EXCL	(1<<4)
>  #define	XFS_MMAPLOCK_SHARED	(1<<5)
> +#define	XFS_DAX_EXCL		(1<<6)
> +#define	XFS_DAX_SHARED		(1<<7)
>  
>  #define XFS_LOCK_MASK		(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
>  				| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
> -				| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
> +				| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED \
> +				| XFS_DAX_EXCL | XFS_DAX_SHARED)
>  
>  #define XFS_LOCK_FLAGS \
>  	{ XFS_IOLOCK_EXCL,	"IOLOCK_EXCL" }, \
> @@ -289,7 +295,9 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
>  	{ XFS_ILOCK_EXCL,	"ILOCK_EXCL" }, \
>  	{ XFS_ILOCK_SHARED,	"ILOCK_SHARED" }, \
>  	{ XFS_MMAPLOCK_EXCL,	"MMAPLOCK_EXCL" }, \
> -	{ XFS_MMAPLOCK_SHARED,	"MMAPLOCK_SHARED" }
> +	{ XFS_MMAPLOCK_SHARED,	"MMAPLOCK_SHARED" }, \
> +	{ XFS_DAX_EXCL,   	"DAX_EXCL" }, \

Whitespace between the comma & string.

> +	{ XFS_DAX_SHARED,	"DAX_SHARED" }
>  
>  
>  /*
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index d6843cdb51d0..a2f2604c3187 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -1158,6 +1158,16 @@ xfs_vn_tmpfile(
>  	return xfs_generic_create(dir, dentry, mode, 0, true);
>  }
>  
> +static void xfs_lock_mode(struct inode *inode)
> +{
> +	xfs_ilock(XFS_I(inode), XFS_DAX_SHARED);
> +}
> +
> +static void xfs_unlock_mode(struct inode *inode)
> +{
> +	xfs_iunlock(XFS_I(inode), XFS_DAX_SHARED);
> +}
> +
>  static const struct inode_operations xfs_inode_operations = {
>  	.get_acl		= xfs_get_acl,
>  	.set_acl		= xfs_set_acl,
> @@ -1168,6 +1178,18 @@ static const struct inode_operations xfs_inode_operations = {
>  	.update_time		= xfs_vn_update_time,
>  };
>  
> +static const struct inode_operations xfs_reg_inode_operations = {
> +	.get_acl		= xfs_get_acl,
> +	.set_acl		= xfs_set_acl,
> +	.getattr		= xfs_vn_getattr,
> +	.setattr		= xfs_vn_setattr,
> +	.listxattr		= xfs_vn_listxattr,
> +	.fiemap			= xfs_vn_fiemap,
> +	.update_time		= xfs_vn_update_time,
> +	.lock_mode              = xfs_lock_mode,
> +	.unlock_mode            = xfs_unlock_mode,
> +};
> +
>  static const struct inode_operations xfs_dir_inode_operations = {
>  	.create			= xfs_vn_create,
>  	.lookup			= xfs_vn_lookup,
> @@ -1372,7 +1394,7 @@ xfs_setup_iops(
>  
>  	switch (inode->i_mode & S_IFMT) {
>  	case S_IFREG:
> -		inode->i_op = &xfs_inode_operations;
> +		inode->i_op = &xfs_reg_inode_operations;

xfs_file_inode_operations?

--D

>  		inode->i_fop = &xfs_file_operations;
>  		if (IS_DAX(inode))
>  			inode->i_mapping->a_ops = &xfs_dax_aops;
> -- 
> 2.21.0
> 

  reply	other threads:[~2020-01-13 22:20 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-10 19:29 [RFC PATCH V2 00/12] Enable per-file/directory DAX operations V2 ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 01/12] fs/stat: Define DAX statx attribute ira.weiny
2020-01-15 11:37   ` Jan Kara
2020-01-15 17:38     ` Darrick J. Wong
2020-01-15 19:45       ` Ira Weiny
2020-01-15 20:10         ` Dan Williams
2020-01-15 22:38           ` Ira Weiny
2020-01-16  5:39             ` Darrick J. Wong
2020-01-16  6:05               ` Dan Williams
2020-01-16  6:18                 ` Darrick J. Wong
2020-01-16  6:25                   ` Dan Williams
2020-01-18  9:11                 ` Dave Chinner
2020-01-16 17:55               ` Ira Weiny
2020-01-16 18:04                 ` Darrick J. Wong
2020-01-16 18:52                   ` Ira Weiny
2020-01-16 22:19                     ` Darrick J. Wong
2020-01-17 11:58                     ` Jan Kara
2020-01-10 19:29 ` [RFC PATCH V2 02/12] fs/xfs: Isolate the physical DAX flag from effective ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 03/12] fs/xfs: Separate functionality of xfs_inode_supports_dax() ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 04/12] fs/xfs: Clean up DAX support check ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 05/12] fs: remove unneeded IS_DAX() check ira.weiny
2020-01-16  9:38   ` Jan Kara
2020-01-16 18:47     ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 06/12] fs/xfs: Check if the inode supports DAX under lock ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 07/12] fs: Add locking for a dynamic inode 'mode' ira.weiny
2020-01-13 22:12   ` Darrick J. Wong
2020-01-14  0:20     ` Ira Weiny
2020-01-14  1:03       ` Darrick J. Wong
2020-01-15 19:08         ` Ira Weiny
2020-01-16  5:40           ` Darrick J. Wong
2020-01-16 18:54             ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 08/12] fs/xfs: Add lock/unlock mode to xfs ira.weiny
2020-01-13 22:19   ` Darrick J. Wong [this message]
2020-01-14  0:35     ` Ira Weiny
2020-01-15  0:57       ` Ira Weiny
2020-01-15 23:52     ` Ira Weiny
2020-01-16  9:24   ` Jan Kara
2020-01-16 19:12     ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 09/12] fs: Prevent mode change if file is mmap'ed ira.weiny
2020-01-13 22:22   ` Darrick J. Wong
2020-01-14  0:46     ` Ira Weiny
2020-01-14  1:30       ` Darrick J. Wong
2020-01-14 17:53         ` Ira Weiny
2020-01-15 11:34           ` Jan Kara
2020-01-15 18:24             ` Ira Weiny
2020-01-15 10:21   ` David Laight
2020-01-15 17:53     ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 10/12] fs/xfs: Fix truncate up ira.weiny
2020-01-13 22:27   ` Darrick J. Wong
2020-01-14  0:40     ` Ira Weiny
2020-01-14  1:14       ` Darrick J. Wong
2020-01-14 19:00         ` Ira Weiny
2020-01-14 19:39           ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 11/12] fs/xfs: Clean up locking in dax invalidate ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 12/12] fs/xfs: Allow toggle of effective DAX flag ira.weiny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200113221957.GN8247@magnolia \
    --to=darrick.wong@oracle.com \
    --cc=dan.j.williams@intel.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=ira.weiny@intel.com \
    --cc=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).