All of lore.kernel.org
 help / color / mirror / Atom feed
From: ira.weiny@intel.com
To: linux-kernel@vger.kernel.org
Cc: Ira Weiny <ira.weiny@intel.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	"Darrick J. Wong" <darrick.wong@oracle.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Chinner <david@fromorbit.com>,
	Christoph Hellwig <hch@lst.de>,
	"Theodore Y. Ts'o" <tytso@mit.edu>, Jan Kara <jack@suse.cz>,
	linux-ext4@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org
Subject: [RFC PATCH V2 08/12] fs/xfs: Add lock/unlock mode to xfs
Date: Fri, 10 Jan 2020 11:29:38 -0800	[thread overview]
Message-ID: <20200110192942.25021-9-ira.weiny@intel.com> (raw)
In-Reply-To: <20200110192942.25021-1-ira.weiny@intel.com>

From: Ira Weiny <ira.weiny@intel.com>

XFS requires regular files to be locked while changing to/from DAX mode.

Define a new DAX lock type and implement the [un]lock_mode() inode
operation callbacks.

We define a new XFS_DAX_* lock type to carry the lock through the
transaction because we don't want to use IOLOCK as that would cause
performance issues with locking of the inode itself.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 fs/xfs/xfs_icache.c |  2 ++
 fs/xfs/xfs_inode.c  | 37 +++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_inode.h  | 12 ++++++++++--
 fs/xfs/xfs_iops.c   | 24 +++++++++++++++++++++++-
 4 files changed, 70 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 8dc2e5414276..0288672e8902 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -74,6 +74,8 @@ xfs_inode_alloc(
 	INIT_LIST_HEAD(&ip->i_ioend_list);
 	spin_lock_init(&ip->i_ioend_lock);
 
+	percpu_init_rwsem(&ip->i_dax_sem);
+
 	return ip;
 }
 
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 401da197f012..e8fd95b75e5b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -142,12 +142,12 @@ xfs_ilock_attr_map_shared(
  *
  * Basic locking order:
  *
- * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
+ * i_rwsem -> i_dax_sem -> i_mmap_lock -> page_lock -> i_ilock
  *
  * mmap_sem locking order:
  *
  * i_rwsem -> page lock -> mmap_sem
- * mmap_sem -> i_mmap_lock -> page_lock
+ * mmap_sem -> i_dax_sem -> i_mmap_lock -> page_lock
  *
  * The difference in mmap_sem locking order mean that we cannot hold the
  * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
@@ -181,6 +181,13 @@ xfs_ilock(
 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+	ASSERT((lock_flags & (XFS_DAX_SHARED | XFS_DAX_EXCL)) !=
+	       (XFS_DAX_SHARED | XFS_DAX_EXCL));
+
+	if (lock_flags & XFS_DAX_EXCL)
+		percpu_down_write(&ip->i_dax_sem);
+	else if (lock_flags & XFS_DAX_SHARED)
+		percpu_down_read(&ip->i_dax_sem);
 
 	if (lock_flags & XFS_IOLOCK_EXCL) {
 		down_write_nested(&VFS_I(ip)->i_rwsem,
@@ -224,6 +231,8 @@ xfs_ilock_nowait(
 	 * You can't set both SHARED and EXCL for the same lock,
 	 * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
 	 * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
+	 *
+	 * XFS_DAX_* is not allowed
 	 */
 	ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
 	       (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
@@ -232,6 +241,7 @@ xfs_ilock_nowait(
 	ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+	ASSERT((lock_flags & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
 
 	if (lock_flags & XFS_IOLOCK_EXCL) {
 		if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
@@ -302,6 +312,8 @@ xfs_iunlock(
 	       (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
 	ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
 	ASSERT(lock_flags != 0);
+	ASSERT((lock_flags & (XFS_DAX_SHARED | XFS_DAX_EXCL)) !=
+	       (XFS_DAX_SHARED | XFS_DAX_EXCL));
 
 	if (lock_flags & XFS_IOLOCK_EXCL)
 		up_write(&VFS_I(ip)->i_rwsem);
@@ -318,6 +330,11 @@ xfs_iunlock(
 	else if (lock_flags & XFS_ILOCK_SHARED)
 		mrunlock_shared(&ip->i_lock);
 
+	if (lock_flags & XFS_DAX_EXCL)
+		percpu_up_write(&ip->i_dax_sem);
+	else if (lock_flags & XFS_DAX_SHARED)
+		percpu_up_read(&ip->i_dax_sem);
+
 	trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
 }
 
@@ -333,6 +350,8 @@ xfs_ilock_demote(
 	ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
 	ASSERT((lock_flags &
 		~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
+	/* XFS_DAX_* is not allowed */
+	ASSERT((lock_flags & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
 
 	if (lock_flags & XFS_ILOCK_EXCL)
 		mrdemote(&ip->i_lock);
@@ -369,6 +388,13 @@ xfs_isilocked(
 		return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
 	}
 
+	if (lock_flags & (XFS_DAX_EXCL|XFS_DAX_SHARED)) {
+		if (!(lock_flags & XFS_DAX_SHARED))
+			return !debug_locks ||
+				percpu_rwsem_is_held(&ip->i_dax_sem, 0);
+		return rwsem_is_locked(&ip->i_dax_sem);
+	}
+
 	ASSERT(0);
 	return 0;
 }
@@ -465,6 +491,9 @@ xfs_lock_inodes(
 	ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
 		inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
 
+	/* XFS_DAX_* is not allowed */
+	ASSERT((lock_mode & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
+
 	if (lock_mode & XFS_IOLOCK_EXCL) {
 		ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
 	} else if (lock_mode & XFS_MMAPLOCK_EXCL)
@@ -566,6 +595,10 @@ xfs_lock_two_inodes(
 	ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) ||
 	       !(ip1_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
 
+	/* XFS_DAX_* is not allowed */
+	ASSERT((ip0_mode & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
+	ASSERT((ip1_mode & (XFS_DAX_SHARED | XFS_DAX_EXCL)) == 0);
+
 	ASSERT(ip0->i_ino != ip1->i_ino);
 
 	if (ip0->i_ino > ip1->i_ino) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 492e53992fa9..693ca66bd89b 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -67,6 +67,9 @@ typedef struct xfs_inode {
 	spinlock_t		i_ioend_lock;
 	struct work_struct	i_ioend_work;
 	struct list_head	i_ioend_list;
+
+	/* protect changing the mode to/from DAX */
+	struct percpu_rw_semaphore i_dax_sem;
 } xfs_inode_t;
 
 /* Convert from vfs inode to xfs inode */
@@ -278,10 +281,13 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
 #define	XFS_ILOCK_SHARED	(1<<3)
 #define	XFS_MMAPLOCK_EXCL	(1<<4)
 #define	XFS_MMAPLOCK_SHARED	(1<<5)
+#define	XFS_DAX_EXCL		(1<<6)
+#define	XFS_DAX_SHARED		(1<<7)
 
 #define XFS_LOCK_MASK		(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
 				| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
-				| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
+				| XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED \
+				| XFS_DAX_EXCL | XFS_DAX_SHARED)
 
 #define XFS_LOCK_FLAGS \
 	{ XFS_IOLOCK_EXCL,	"IOLOCK_EXCL" }, \
@@ -289,7 +295,9 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
 	{ XFS_ILOCK_EXCL,	"ILOCK_EXCL" }, \
 	{ XFS_ILOCK_SHARED,	"ILOCK_SHARED" }, \
 	{ XFS_MMAPLOCK_EXCL,	"MMAPLOCK_EXCL" }, \
-	{ XFS_MMAPLOCK_SHARED,	"MMAPLOCK_SHARED" }
+	{ XFS_MMAPLOCK_SHARED,	"MMAPLOCK_SHARED" }, \
+	{ XFS_DAX_EXCL,   	"DAX_EXCL" }, \
+	{ XFS_DAX_SHARED,	"DAX_SHARED" }
 
 
 /*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d6843cdb51d0..a2f2604c3187 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1158,6 +1158,16 @@ xfs_vn_tmpfile(
 	return xfs_generic_create(dir, dentry, mode, 0, true);
 }
 
+static void xfs_lock_mode(struct inode *inode)
+{
+	xfs_ilock(XFS_I(inode), XFS_DAX_SHARED);
+}
+
+static void xfs_unlock_mode(struct inode *inode)
+{
+	xfs_iunlock(XFS_I(inode), XFS_DAX_SHARED);
+}
+
 static const struct inode_operations xfs_inode_operations = {
 	.get_acl		= xfs_get_acl,
 	.set_acl		= xfs_set_acl,
@@ -1168,6 +1178,18 @@ static const struct inode_operations xfs_inode_operations = {
 	.update_time		= xfs_vn_update_time,
 };
 
+static const struct inode_operations xfs_reg_inode_operations = {
+	.get_acl		= xfs_get_acl,
+	.set_acl		= xfs_set_acl,
+	.getattr		= xfs_vn_getattr,
+	.setattr		= xfs_vn_setattr,
+	.listxattr		= xfs_vn_listxattr,
+	.fiemap			= xfs_vn_fiemap,
+	.update_time		= xfs_vn_update_time,
+	.lock_mode              = xfs_lock_mode,
+	.unlock_mode            = xfs_unlock_mode,
+};
+
 static const struct inode_operations xfs_dir_inode_operations = {
 	.create			= xfs_vn_create,
 	.lookup			= xfs_vn_lookup,
@@ -1372,7 +1394,7 @@ xfs_setup_iops(
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
-		inode->i_op = &xfs_inode_operations;
+		inode->i_op = &xfs_reg_inode_operations;
 		inode->i_fop = &xfs_file_operations;
 		if (IS_DAX(inode))
 			inode->i_mapping->a_ops = &xfs_dax_aops;
-- 
2.21.0


  parent reply	other threads:[~2020-01-10 19:30 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-10 19:29 [RFC PATCH V2 00/12] Enable per-file/directory DAX operations V2 ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 01/12] fs/stat: Define DAX statx attribute ira.weiny
2020-01-15 11:37   ` Jan Kara
2020-01-15 17:38     ` Darrick J. Wong
2020-01-15 19:45       ` Ira Weiny
2020-01-15 20:10         ` Dan Williams
2020-01-15 22:38           ` Ira Weiny
2020-01-16  5:39             ` Darrick J. Wong
2020-01-16  6:05               ` Dan Williams
2020-01-16  6:18                 ` Darrick J. Wong
2020-01-16  6:25                   ` Dan Williams
2020-01-18  9:11                 ` Dave Chinner
2020-01-16 17:55               ` Ira Weiny
2020-01-16 18:04                 ` Darrick J. Wong
2020-01-16 18:52                   ` Ira Weiny
2020-01-16 22:19                     ` Darrick J. Wong
2020-01-17 11:58                     ` Jan Kara
2020-01-10 19:29 ` [RFC PATCH V2 02/12] fs/xfs: Isolate the physical DAX flag from effective ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 03/12] fs/xfs: Separate functionality of xfs_inode_supports_dax() ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 04/12] fs/xfs: Clean up DAX support check ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 05/12] fs: remove unneeded IS_DAX() check ira.weiny
2020-01-16  9:38   ` Jan Kara
2020-01-16 18:47     ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 06/12] fs/xfs: Check if the inode supports DAX under lock ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 07/12] fs: Add locking for a dynamic inode 'mode' ira.weiny
2020-01-13 22:12   ` Darrick J. Wong
2020-01-14  0:20     ` Ira Weiny
2020-01-14  1:03       ` Darrick J. Wong
2020-01-15 19:08         ` Ira Weiny
2020-01-16  5:40           ` Darrick J. Wong
2020-01-16 18:54             ` Ira Weiny
2020-01-10 19:29 ` ira.weiny [this message]
2020-01-11 14:49   ` [RFC PATCH V2 08/12] fs/xfs: Add lock/unlock mode to xfs kbuild test robot
2020-01-13 22:19   ` Darrick J. Wong
2020-01-14  0:35     ` Ira Weiny
2020-01-15  0:57       ` Ira Weiny
2020-01-15 23:52     ` Ira Weiny
2020-01-16  9:24   ` Jan Kara
2020-01-16 19:12     ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 09/12] fs: Prevent mode change if file is mmap'ed ira.weiny
2020-01-13 22:22   ` Darrick J. Wong
2020-01-14  0:46     ` Ira Weiny
2020-01-14  1:30       ` Darrick J. Wong
2020-01-14 17:53         ` Ira Weiny
2020-01-15 11:34           ` Jan Kara
2020-01-15 18:24             ` Ira Weiny
2020-01-15 10:21   ` David Laight
2020-01-15 17:53     ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 10/12] fs/xfs: Fix truncate up ira.weiny
2020-01-13 22:27   ` Darrick J. Wong
2020-01-14  0:40     ` Ira Weiny
2020-01-14  1:14       ` Darrick J. Wong
2020-01-14 19:00         ` Ira Weiny
2020-01-14 19:39           ` Ira Weiny
2020-01-10 19:29 ` [RFC PATCH V2 11/12] fs/xfs: Clean up locking in dax invalidate ira.weiny
2020-01-10 19:29 ` [RFC PATCH V2 12/12] fs/xfs: Allow toggle of effective DAX flag ira.weiny

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200110192942.25021-9-ira.weiny@intel.com \
    --to=ira.weiny@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=darrick.wong@oracle.com \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.