All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
To: miklos@szeredi.hu
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	xieyongji@bytedance.com,
	Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
Subject: [PATCH v2] fuse: fix deadlock between atomic O_TRUNC open() and page invalidations
Date: Wed, 29 Dec 2021 12:02:39 +0800	[thread overview]
Message-ID: <20211229040239.66075-1-zhangjiachen.jaycee@bytedance.com> (raw)

fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
deadlock related to the case above, which will cause the xfstests
generic/464 testcase hung in our virtio-fs test environment.

For example, consider two processes concurrently open one same file, one
with O_TRUNC and another without O_TRUNC. The deadlock case is described
below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
to lock a page (acquiring B), open() could have held the page lock
(acquired B), and waiting on the page writeback (acquiring A). This would
lead to deadlocks.

open(O_TRUNC)
----------------------------------------------------------------
fuse_open_common
  inode_lock            [C acquire]
  fuse_set_nowrite      [A acquire]

  fuse_finish_open
    truncate_pagecache
      lock_page         [B acquire]
      truncate_inode_page
      unlock_page       [B release]

  fuse_release_nowrite  [A release]
  inode_unlock          [C release]
----------------------------------------------------------------

open()
----------------------------------------------------------------
fuse_open_common
  fuse_finish_open
    invalidate_inode_pages2
      lock_page         [B acquire]
	fuse_launder_page
	  fuse_wait_on_page_writeback [A acquire & release]
      unlock_page       [B release]
----------------------------------------------------------------

Besides this case, all calls of invalidate_inode_pages2() and
invalidate_inode_pages2_range() in fuse code also can deadlock with
open(O_TRUNC). This commit tries to fix it by adding a new lock,
atomic_o_trunc, to protect the areas with the A-B-B-A deadlock risk.

Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
---
 fs/fuse/dax.c    |  4 ++--
 fs/fuse/dir.c    |  2 +-
 fs/fuse/file.c   | 28 ++++++++++++++++++++++++++--
 fs/fuse/fuse_i.h |  7 +++++++
 fs/fuse/inode.c  |  7 ++++---
 5 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 182b24a14804..e5203d61698c 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -878,11 +878,11 @@ static int dmap_writeback_invalidate(struct inode *inode,
 		return ret;
 	}
 
-	ret = invalidate_inode_pages2_range(inode->i_mapping,
+	ret = fuse_invalidate_inode_pages_range(inode,
 					    start_pos >> PAGE_SHIFT,
 					    end_pos >> PAGE_SHIFT);
 	if (ret)
-		pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n",
+		pr_debug("fuse: fuse_invalidate_inode_pages_range() failed err=%d\n",
 			 ret);
 
 	return ret;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 656e921f3506..d6d5dcd3cf1e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1778,7 +1778,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 	if ((is_truncate || !is_wb) &&
 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
 		truncate_pagecache(inode, outarg.attr.size);
-		invalidate_inode_pages2(mapping);
+		fuse_invalidate_inode_pages(inode);
 	}
 
 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 829094451774..1dde21bad53c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -124,6 +124,28 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
 	}
 }
 
+int fuse_invalidate_inode_pages_range(struct inode *inode, pgoff_t start,
+					pgoff_t end)
+{
+	int ret;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	bool may_truncate = fc->atomic_o_trunc &&
+			    (fc->writeback_cache || FUSE_IS_DAX(inode));
+
+	if (may_truncate)
+		mutex_lock(&get_fuse_inode(inode)->atomic_trunc_mutex);
+	ret = invalidate_inode_pages2_range(inode->i_mapping, start, end);
+	if (may_truncate)
+		mutex_unlock(&get_fuse_inode(inode)->atomic_trunc_mutex);
+
+	return ret;
+}
+
+int fuse_invalidate_inode_pages(struct inode *inode)
+{
+	return fuse_invalidate_inode_pages_range(inode, 0, -1);
+}
+
 struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
 				 unsigned int open_flags, bool isdir)
 {
@@ -214,7 +236,7 @@ void fuse_finish_open(struct inode *inode, struct file *file)
 		file_update_time(file);
 		fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
 	} else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
-		invalidate_inode_pages2(inode->i_mapping);
+		fuse_invalidate_inode_pages(inode);
 	}
 
 	if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
@@ -241,6 +263,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 
 	if (is_wb_truncate || dax_truncate) {
 		inode_lock(inode);
+		mutex_lock(&get_fuse_inode(inode)->atomic_trunc_mutex);
 		fuse_set_nowrite(inode);
 	}
 
@@ -261,6 +284,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 
 	if (is_wb_truncate | dax_truncate) {
 		fuse_release_nowrite(inode);
+		mutex_unlock(&get_fuse_inode(inode)->atomic_trunc_mutex);
 		inode_unlock(inode);
 	}
 
@@ -2408,7 +2432,7 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 		if (vma->vm_flags & VM_MAYSHARE)
 			return -ENODEV;
 
-		invalidate_inode_pages2(file->f_mapping);
+		fuse_invalidate_inode_pages(file_inode(file));
 
 		return generic_file_mmap(file, vma);
 	}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e8e59fbdefeb..ea293d0347a0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -149,6 +149,9 @@ struct fuse_inode {
 	/** Lock to protect write related fields */
 	spinlock_t lock;
 
+	/** Lock for serializing page invalidation and atomic_o_trunc open */
+	struct mutex atomic_trunc_mutex;
+
 #ifdef CONFIG_FUSE_DAX
 	/*
 	 * Dax specific inode data
@@ -1315,4 +1318,8 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
 void fuse_file_release(struct inode *inode, struct fuse_file *ff,
 		       unsigned int open_flags, fl_owner_t id, bool isdir);
 
+int fuse_invalidate_inode_pages(struct inode *inode);
+int fuse_invalidate_inode_pages_range(struct inode *inode,
+				      pgoff_t start, pgoff_t end);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ee846ce371d8..997c620f25df 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -86,6 +86,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	fi->state = 0;
 	mutex_init(&fi->mutex);
 	spin_lock_init(&fi->lock);
+	mutex_init(&fi->atomic_trunc_mutex);
 	fi->forget = fuse_alloc_forget();
 	if (!fi->forget)
 		goto out_free;
@@ -107,6 +108,7 @@ static void fuse_free_inode(struct inode *inode)
 	struct fuse_inode *fi = get_fuse_inode(inode);
 
 	mutex_destroy(&fi->mutex);
+	mutex_destroy(&fi->atomic_trunc_mutex);
 	kfree(fi->forget);
 #ifdef CONFIG_FUSE_DAX
 	kfree(fi->dax);
@@ -299,7 +301,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 		}
 
 		if (inval)
-			invalidate_inode_pages2(inode->i_mapping);
+			fuse_invalidate_inode_pages(inode);
 	}
 
 	if (IS_ENABLED(CONFIG_FUSE_DAX))
@@ -448,8 +450,7 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
 			pg_end = -1;
 		else
 			pg_end = (offset + len - 1) >> PAGE_SHIFT;
-		invalidate_inode_pages2_range(inode->i_mapping,
-					      pg_start, pg_end);
+		fuse_invalidate_inode_pages_range(inode, pg_start, pg_end);
 	}
 	iput(inode);
 	return 0;
-- 
2.20.1


             reply	other threads:[~2021-12-29  4:03 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-29  4:02 Jiachen Zhang [this message]
2022-02-23  8:50 ` [PATCH v2] fuse: fix deadlock between atomic O_TRUNC open() and page invalidations Miklos Szeredi
2022-03-04  6:23   ` [External] " Jiachen Zhang
2022-03-04 15:30     ` Miklos Szeredi
2022-03-07  7:55       ` Jiachen Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211229040239.66075-1-zhangjiachen.jaycee@bytedance.com \
    --to=zhangjiachen.jaycee@bytedance.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=xieyongji@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.