All of lore.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@ZenIV.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [RFC PATCH 03/62] new inode method: ->free_inode()
Date: Tue, 16 Apr 2019 18:52:41 +0100	[thread overview]
Message-ID: <20190416175340.21068-3-viro@ZenIV.linux.org.uk> (raw)
In-Reply-To: <20190416175340.21068-1-viro@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

A lot of ->destroy_inode() instances end with call_rcu() of a callback
that does RCU-delayed part of freeing.  Introduce a new method for
doing just that, with saner signature.

Rules:
->destroy_inode		->free_inode
	f			g		immediate call of f(),
						RCU-delayed call of g()
	f			NULL		immediate call of f(),
						no RCU-delayed calls
	NULL			g		RCU-delayed call of g()
	NULL			NULL		RCU-delayed default freeing

IOW, NULL ->free_inode gives the same behaviour as now.

Note that NULL, NULL is equivalent to NULL, free_inode_nonrcu; we could
mandate the latter form, but that would have very little benefit beyond
making rules a bit more symmetric.  It would break backwards compatibility,
require extra boilerplate and expected semantics for (NULL, NULL) pair
would have no use whatsoever...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 ++
 Documentation/filesystems/porting | 17 ++++++++++++
 fs/inode.c                        | 54 +++++++++++++++++++++++----------------
 include/linux/fs.h                |  1 +
 4 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index efea228ccd8a..7b20c385cc02 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -118,6 +118,7 @@ set:		exclusive
 --------------------------- super_operations ---------------------------
 prototypes:
 	struct inode *(*alloc_inode)(struct super_block *sb);
+	void (*free_inode)(struct inode *);
 	void (*destroy_inode)(struct inode *);
 	void (*dirty_inode) (struct inode *, int flags);
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
@@ -139,6 +140,7 @@ locking rules:
 	All may block [not true, see below]
 			s_umount
 alloc_inode:
+free_inode:				called from RCU callback
 destroy_inode:
 dirty_inode:
 write_inode:
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index cf43bc4dbf31..9d80f9e0855e 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -638,3 +638,20 @@ in your dentry operations instead.
 	inode to d_splice_alias() will also do the right thing (equivalent of
 	d_add(dentry, NULL); return NULL;), so that kind of special cases
 	also doesn't need a separate treatment.
+--
+[strongly recommended]
+	take the RCU-delayed parts of ->destroy_inode() into a new method -
+	->free_inode().  If ->destroy_inode() becomes empty - all the better,
+	just get rid of it.  Synchronous work (e.g. the stuff that can't
+	be done from an RCU callback, or any WARN_ON() where we want the
+	stack trace) *might* be movable to ->evict_inode(); however,
+	that goes only for the things that are not needed to balance something
+	done by ->alloc_inode().  IOW, if it's cleaning up the stuff that
+	might have accumulated over the life of in-core inode, ->evict_inode()
+	might be a fit.
+
+	Rules for inode destruction:
+		* if ->destroy_inode() is non-NULL, it gets called
+		* if ->free_inode() is non-NULL, it gets scheduled by call_rcu()
+		* combination of NULL ->destroy_inode and NULL ->free_inode is
+		  treated as NULL/free_inode_nonrcu, to preserve the compatibility.
diff --git a/fs/inode.c b/fs/inode.c
index e9d97add2b36..fb45590d284e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -202,12 +202,28 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 }
 EXPORT_SYMBOL(inode_init_always);
 
+void free_inode_nonrcu(struct inode *inode)
+{
+	kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	if (inode->i_sb->s_op->free_inode)
+		inode->i_sb->s_op->free_inode(inode);
+	else
+		free_inode_nonrcu(inode);
+}
+
 static struct inode *alloc_inode(struct super_block *sb)
 {
+	const struct super_operations *ops = sb->s_op;
 	struct inode *inode;
 
-	if (sb->s_op->alloc_inode)
-		inode = sb->s_op->alloc_inode(sb);
+	if (ops->alloc_inode)
+		inode = ops->alloc_inode(sb);
 	else
 		inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
 
@@ -215,22 +231,18 @@ static struct inode *alloc_inode(struct super_block *sb)
 		return NULL;
 
 	if (unlikely(inode_init_always(sb, inode))) {
-		if (inode->i_sb->s_op->destroy_inode)
-			inode->i_sb->s_op->destroy_inode(inode);
-		else
-			kmem_cache_free(inode_cachep, inode);
+		if (ops->destroy_inode) {
+			ops->destroy_inode(inode);
+			if (!ops->free_inode)
+				return NULL;
+		}
+		i_callback(&inode->i_rcu);
 		return NULL;
 	}
 
 	return inode;
 }
 
-void free_inode_nonrcu(struct inode *inode)
-{
-	kmem_cache_free(inode_cachep, inode);
-}
-EXPORT_SYMBOL(free_inode_nonrcu);
-
 void __destroy_inode(struct inode *inode)
 {
 	BUG_ON(inode_has_buffers(inode));
@@ -253,20 +265,18 @@ void __destroy_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(__destroy_inode);
 
-static void i_callback(struct rcu_head *head)
-{
-	struct inode *inode = container_of(head, struct inode, i_rcu);
-	kmem_cache_free(inode_cachep, inode);
-}
-
 static void destroy_inode(struct inode *inode)
 {
+	const struct super_operations *ops = inode->i_sb->s_op;
+
 	BUG_ON(!list_empty(&inode->i_lru));
 	__destroy_inode(inode);
-	if (inode->i_sb->s_op->destroy_inode)
-		inode->i_sb->s_op->destroy_inode(inode);
-	else
-		call_rcu(&inode->i_rcu, i_callback);
+	if (ops->destroy_inode) {
+		ops->destroy_inode(inode);
+		if (!ops->free_inode)
+			return;
+	}
+	call_rcu(&inode->i_rcu, i_callback);
 }
 
 /**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..2e9b9f87caca 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1903,6 +1903,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
+	void (*free_inode)(struct inode *);
 
    	void (*dirty_inode) (struct inode *, int flags);
 	int (*write_inode) (struct inode *, struct writeback_control *wbc);
-- 
2.11.0


  parent reply	other threads:[~2019-04-16 18:00 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-16 17:49 [RFC][PATCHSET] sorting out RCU-delayed stuff in ->destroy_inode() Al Viro
2019-04-16 17:52 ` [RFC PATCH 01/62] securityfs: fix use-after-free on symlink traversal Al Viro
2019-04-16 17:52   ` [RFC PATCH 02/62] apparmorfs: " Al Viro
2019-04-16 17:52   ` Al Viro [this message]
2019-04-16 17:52   ` [RFC PATCH 04/62] spufs: switch to ->free_inode() Al Viro
2019-04-16 17:52   ` [RFC PATCH 05/62] erofs: " Al Viro
2019-04-18 14:01     ` Gao Xiang
2019-04-18 14:01       ` Gao Xiang
2019-04-16 17:52   ` [RFC PATCH 06/62] 9p: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 07/62] adfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 08/62] affs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 09/62] befs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 10/62] bfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 11/62] bdev: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 12/62] cifs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 13/62] debugfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 14/62] efs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 15/62] ext2: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 16/62] f2fs: " Al Viro
2019-04-20  2:52     ` Chao Yu
2019-04-16 17:52   ` [RFC PATCH 17/62] fat: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 18/62] freevxfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 19/62] gfs2: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 20/62] hfs: " Al Viro
2019-04-16 17:52   ` [RFC PATCH 21/62] hfsplus: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 22/62] hostfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 23/62] hpfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 24/62] isofs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 25/62] jffs2: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 26/62] minix: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 27/62] nfs{,4}: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 28/62] nilfs2: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 29/62] dlmfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 30/62] ocfs2: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 31/62] openpromfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 32/62] procfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 33/62] qnx4: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 34/62] qnx6: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 35/62] reiserfs: convert " Al Viro
2019-04-16 17:53   ` [RFC PATCH 36/62] romfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 37/62] squashfs: switch " Al Viro
2019-04-16 17:53   ` [RFC PATCH 38/62] ubifs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 39/62] udf: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 40/62] sysv: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 41/62] coda: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 42/62] ufs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 43/62] mqueue: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 44/62] bpf: " Al Viro
2019-04-16 18:07     ` Alexei Starovoitov
2019-04-16 21:34       ` Song Liu
2019-04-16 17:53   ` [RFC PATCH 45/62] rpcpipe: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 46/62] apparmor: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 47/62] securityfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 48/62] ntfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 49/62] dax: make use of ->free_inode() Al Viro
2019-04-18 12:16     ` Jan Kara
2019-04-18 16:58       ` Dan Williams
2019-04-16 17:53   ` [RFC PATCH 50/62] afs: switch to " Al Viro
2019-04-16 17:53   ` [RFC PATCH 51/62] btrfs: use ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 52/62] ceph: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 53/62] ecryptfs: make use of ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 54/62] ext4: " Al Viro
2019-04-18 12:10     ` Jan Kara
2019-04-16 17:53   ` [RFC PATCH 55/62] fuse: switch to ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 56/62] jfs: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 57/62] overlayfs: make use of ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 58/62] hugetlb: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 59/62] shmem: " Al Viro
2019-04-16 17:53   ` [RFC PATCH 60/62] orangefs: " Al Viro
2019-04-22 21:14     ` Mike Marshall
2019-04-22 21:56       ` Linus Torvalds
2019-04-22 23:10         ` Al Viro
2019-04-22 23:17           ` Mike Marshall
2019-04-16 17:53   ` [RFC PATCH 61/62] sockfs: switch to ->free_inode() Al Viro
2019-04-16 17:53   ` [RFC PATCH 62/62] coallocate socket->wq with socket itself Al Viro
2019-04-16 18:01 ` [RFC][PATCHSET] sorting out RCU-delayed stuff in ->destroy_inode() Linus Torvalds
2019-04-30  3:09   ` Al Viro
     [not found]     ` <CAHk-=wiMvCR0iENUVorfU-3EMC7G8RNSeHSQrz9tndP1uSg2BQ@mail.gmail.com>
2019-04-30  4:00       ` Al Viro
2019-05-01  1:59         ` Al Viro
2019-04-30  4:18     ` Andreas Dilger
2019-04-30  4:26       ` Al Viro
2019-04-30  5:26         ` Andreas Dilger
2019-04-17 15:55 ` David Sterba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190416175340.21068-3-viro@ZenIV.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.