linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Al Viro <viro@ZenIV.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [RFC PATCH 03/62] new inode method: ->free_inode()
Date: Tue, 16 Apr 2019 18:52:41 +0100	[thread overview]
Message-ID: <20190416175340.21068-3-viro@ZenIV.linux.org.uk> (raw)
In-Reply-To: <20190416175340.21068-1-viro@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

A lot of ->destroy_inode() instances end with call_rcu() of a callback
that does RCU-delayed part of freeing.  Introduce a new method for
doing just that, with saner signature.

Rules:
->destroy_inode		->free_inode
	f			g		immediate call of f(),
						RCU-delayed call of g()
	f			NULL		immediate call of f(),
						no RCU-delayed calls
	NULL			g		RCU-delayed call of g()
	NULL			NULL		RCU-delayed default freeing

IOW, NULL ->free_inode gives the same behaviour as now.

Note that NULL, NULL is equivalent to NULL, free_inode_nonrcu; we could
mandate the latter form, but that would have very little benefit beyond
making rules a bit more symmetric.  It would break backwards compatibility,
require extra boilerplate and expected semantics for (NULL, NULL) pair
would have no use whatsoever...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 ++
 Documentation/filesystems/porting | 17 ++++++++++++
 fs/inode.c                        | 54 +++++++++++++++++++++++----------------
 include/linux/fs.h                |  1 +
 4 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index efea228ccd8a..7b20c385cc02 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -118,6 +118,7 @@ set:		exclusive
 --------------------------- super_operations ---------------------------
 prototypes:
 	struct inode *(*alloc_inode)(struct super_block *sb);
+	void (*free_inode)(struct inode *);
 	void (*destroy_inode)(struct inode *);
 	void (*dirty_inode) (struct inode *, int flags);
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
@@ -139,6 +140,7 @@ locking rules:
 	All may block [not true, see below]
 			s_umount
 alloc_inode:
+free_inode:				called from RCU callback
 destroy_inode:
 dirty_inode:
 write_inode:
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index cf43bc4dbf31..9d80f9e0855e 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -638,3 +638,20 @@ in your dentry operations instead.
 	inode to d_splice_alias() will also do the right thing (equivalent of
 	d_add(dentry, NULL); return NULL;), so that kind of special cases
 	also doesn't need a separate treatment.
+--
+[strongly recommended]
+	take the RCU-delayed parts of ->destroy_inode() into a new method -
+	->free_inode().  If ->destroy_inode() becomes empty - all the better,
+	just get rid of it.  Synchronous work (e.g. the stuff that can't
+	be done from an RCU callback, or any WARN_ON() where we want the
+	stack trace) *might* be movable to ->evict_inode(); however,
+	that goes only for the things that are not needed to balance something
+	done by ->alloc_inode().  IOW, if it's cleaning up the stuff that
+	might have accumulated over the life of in-core inode, ->evict_inode()
+	might be a fit.
+
+	Rules for inode destruction:
+		* if ->destroy_inode() is non-NULL, it gets called
+		* if ->free_inode() is non-NULL, it gets scheduled by call_rcu()
+		* combination of NULL ->destroy_inode and NULL ->free_inode is
+		  treated as NULL/free_inode_nonrcu, to preserve the compatibility.
diff --git a/fs/inode.c b/fs/inode.c
index e9d97add2b36..fb45590d284e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -202,12 +202,28 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 }
 EXPORT_SYMBOL(inode_init_always);
 
+void free_inode_nonrcu(struct inode *inode)
+{
+	kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	if (inode->i_sb->s_op->free_inode)
+		inode->i_sb->s_op->free_inode(inode);
+	else
+		free_inode_nonrcu(inode);
+}
+
 static struct inode *alloc_inode(struct super_block *sb)
 {
+	const struct super_operations *ops = sb->s_op;
 	struct inode *inode;
 
-	if (sb->s_op->alloc_inode)
-		inode = sb->s_op->alloc_inode(sb);
+	if (ops->alloc_inode)
+		inode = ops->alloc_inode(sb);
 	else
 		inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
 
@@ -215,22 +231,18 @@ static struct inode *alloc_inode(struct super_block *sb)
 		return NULL;
 
 	if (unlikely(inode_init_always(sb, inode))) {
-		if (inode->i_sb->s_op->destroy_inode)
-			inode->i_sb->s_op->destroy_inode(inode);
-		else
-			kmem_cache_free(inode_cachep, inode);
+		if (ops->destroy_inode) {
+			ops->destroy_inode(inode);
+			if (!ops->free_inode)
+				return NULL;
+		}
+		i_callback(&inode->i_rcu);
 		return NULL;
 	}
 
 	return inode;
 }
 
-void free_inode_nonrcu(struct inode *inode)
-{
-	kmem_cache_free(inode_cachep, inode);
-}
-EXPORT_SYMBOL(free_inode_nonrcu);
-
 void __destroy_inode(struct inode *inode)
 {
 	BUG_ON(inode_has_buffers(inode));
@@ -253,20 +265,18 @@ void __destroy_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(__destroy_inode);
 
-static void i_callback(struct rcu_head *head)
-{
-	struct inode *inode = container_of(head, struct inode, i_rcu);
-	kmem_cache_free(inode_cachep, inode);
-}
-
 static void destroy_inode(struct inode *inode)
 {
+	const struct super_operations *ops = inode->i_sb->s_op;
+
 	BUG_ON(!list_empty(&inode->i_lru));
 	__destroy_inode(inode);
-	if (inode->i_sb->s_op->destroy_inode)
-		inode->i_sb->s_op->destroy_inode(inode);
-	else
-		call_rcu(&inode->i_rcu, i_callback);
+	if (ops->destroy_inode) {
+		ops->destroy_inode(inode);
+		if (!ops->free_inode)
+			return;
+	}
+	call_rcu(&inode->i_rcu, i_callback);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..2e9b9f87caca 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1903,6 +1903,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
+	void (*free_inode)(struct inode *);
 
    	void (*dirty_inode) (struct inode *, int flags);
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
-- 
2.11.0


  parent reply	other threads:[~2019-04-16 18:00 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-16 17:49 [RFC][PATCHSET] sorting out RCU-delayed stuff in ->destroy_inode() Al Viro
2019-04-16 17:52 ` [RFC PATCH 01/62] securityfs: fix use-after-free on symlink traversal Al Viro
2019-04-16 17:52   ` [RFC PATCH 02/62] apparmorfs: " Al Viro
2019-04-16 17:52   ` Al Viro [this message]
2019-04-16 17:52   ` [RFC PATCH 04/62] spufs: switch to ->free_inode() Al Viro
2019-04-16 17:52   ` [RFC PATCH 05/62] erofs: " Al Viro
2019-04-18 14:01     ` Gao Xiang
2019-04-16 17:52   ` [RFC PATCH 06/62] 9p: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 07/62] adfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 08/62] affs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 09/62] befs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 10/62] bfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 11/62] bdev: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 12/62] cifs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 13/62] debugfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 14/62] efs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 15/62] ext2: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 16/62] f2fs: " Al Viro
2019-04-20  2:52     ` Chao Yu
2019-04-16 17:52   ` [RFC PATCH 17/62] fat: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 18/62] freevxfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 19/62] gfs2: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 20/62] hfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 21/62] hfsplus: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 22/62] hostfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 23/62] hpfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 24/62] isofs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 25/62] jffs2: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 26/62] minix: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 27/62] nfs{,4}: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 28/62] nilfs2: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 29/62] dlmfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 30/62] ocfs2: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 31/62] openpromfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 32/62] procfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 33/62] qnx4: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 34/62] qnx6: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 35/62] reiserfs: convert " Al Viro
2019-04-16 17:53   ` [RFC PATCH 36/62] romfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 37/62] squashfs: switch " Al Viro
2019-04-16 17:53   ` [RFC PATCH 38/62] ubifs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 39/62] udf: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 40/62] sysv: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 41/62] coda: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 42/62] ufs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 43/62] mqueue: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 44/62] bpf: " Al Viro
2019-04-16 18:07     ` Alexei Starovoitov
2019-04-16 21:34       ` Song Liu
2019-04-16 17:53   ` [RFC PATCH 45/62] rpcpipe: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 46/62] apparmor: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 47/62] securityfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 48/62] ntfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 49/62] dax: make use of ->free_inode() Al Viro
2019-04-18 12:16     ` Jan Kara
2019-04-18 16:58       ` Dan Williams
2019-04-16 17:53   ` [RFC PATCH 50/62] afs: switch to " Al Viro
2019-04-16 17:53   ` [RFC PATCH 51/62] btrfs: use ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 52/62] ceph: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 53/62] ecryptfs: make use of ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 54/62] ext4: " Al Viro
2019-04-18 12:10     ` Jan Kara
2019-04-16 17:53   ` [RFC PATCH 55/62] fuse: switch to ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 56/62] jfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 57/62] overlayfs: make use of ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 58/62] hugetlb: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 59/62] shmem: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 60/62] orangefs: " Al Viro
2019-04-22 21:14     ` Mike Marshall
2019-04-22 21:56       ` Linus Torvalds
2019-04-22 23:10         ` Al Viro
2019-04-22 23:17           ` Mike Marshall
2019-04-16 17:53   ` [RFC PATCH 61/62] sockfs: switch to ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 62/62] coallocate socket->wq with socket itself Al Viro
2019-04-16 18:01 ` [RFC][PATCHSET] sorting out RCU-delayed stuff in ->destroy_inode() Linus Torvalds
2019-04-30  3:09   ` Al Viro
     [not found]     ` <CAHk-=wiMvCR0iENUVorfU-3EMC7G8RNSeHSQrz9tndP1uSg2BQ@mail.gmail.com>
2019-04-30  4:00       ` Al Viro
2019-05-01  1:59         ` Al Viro
2019-04-30  4:18     ` Andreas Dilger
2019-04-30  4:26       ` Al Viro
2019-04-30  5:26         ` Andreas Dilger
2019-04-17 15:55 ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190416175340.21068-3-viro@ZenIV.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).