From: Al Viro <viro@ZenIV.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [RFC PATCH 03/62] new inode method: ->free_inode()
Date: Tue, 16 Apr 2019 18:52:41 +0100 [thread overview]
Message-ID: <20190416175340.21068-3-viro@ZenIV.linux.org.uk> (raw)
In-Reply-To: <20190416175340.21068-1-viro@ZenIV.linux.org.uk>
From: Al Viro <viro@zeniv.linux.org.uk>
A lot of ->destroy_inode() instances end with call_rcu() of a callback
that does RCU-delayed part of freeing. Introduce a new method for
doing just that, with saner signature.
Rules:
->destroy_inode ->free_inode
f g immediate call of f(),
RCU-delayed call of g()
f NULL immediate call of f(),
no RCU-delayed calls
NULL g RCU-delayed call of g()
NULL NULL RCU-delayed default freeing
IOW, NULL ->free_inode gives the same behaviour as now.
Note that NULL, NULL is equivalent to NULL, free_inode_nonrcu; we could
mandate the latter form, but that would have very little benefit beyond
making rules a bit more symmetric. It would break backwards compatibility,
require extra boilerplate and expected semantics for (NULL, NULL) pair
would have no use whatsoever...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
Documentation/filesystems/Locking | 2 ++
Documentation/filesystems/porting | 17 ++++++++++++
fs/inode.c | 54 +++++++++++++++++++++++----------------
include/linux/fs.h | 1 +
4 files changed, 52 insertions(+), 22 deletions(-)
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index efea228ccd8a..7b20c385cc02 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -118,6 +118,7 @@ set: exclusive
--------------------------- super_operations ---------------------------
prototypes:
struct inode *(*alloc_inode)(struct super_block *sb);
+ void (*free_inode)(struct inode *);
void (*destroy_inode)(struct inode *);
void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, struct writeback_control *wbc);
@@ -139,6 +140,7 @@ locking rules:
All may block [not true, see below]
s_umount
alloc_inode:
+free_inode: called from RCU callback
destroy_inode:
dirty_inode:
write_inode:
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index cf43bc4dbf31..9d80f9e0855e 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -638,3 +638,20 @@ in your dentry operations instead.
inode to d_splice_alias() will also do the right thing (equivalent of
d_add(dentry, NULL); return NULL;), so that kind of special cases
also doesn't need a separate treatment.
+--
+[strongly recommended]
+ take the RCU-delayed parts of ->destroy_inode() into a new method -
+ ->free_inode(). If ->destroy_inode() becomes empty - all the better,
+ just get rid of it. Synchronous work (e.g. the stuff that can't
+ be done from an RCU callback, or any WARN_ON() where we want the
+ stack trace) *might* be movable to ->evict_inode(); however,
+ that goes only for the things that are not needed to balance something
+ done by ->alloc_inode(). IOW, if it's cleaning up the stuff that
+ might have accumulated over the life of in-core inode, ->evict_inode()
+ might be a fit.
+
+ Rules for inode destruction:
+ * if ->destroy_inode() is non-NULL, it gets called
+ * if ->free_inode() is non-NULL, it gets scheduled by call_rcu()
+ * combination of NULL ->destroy_inode and NULL ->free_inode is
+ treated as NULL/free_inode_nonrcu, to preserve the compatibility.
diff --git a/fs/inode.c b/fs/inode.c
index e9d97add2b36..fb45590d284e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -202,12 +202,28 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
}
EXPORT_SYMBOL(inode_init_always);
+void free_inode_nonrcu(struct inode *inode)
+{
+ kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ if (inode->i_sb->s_op->free_inode)
+ inode->i_sb->s_op->free_inode(inode);
+ else
+ free_inode_nonrcu(inode);
+}
+
static struct inode *alloc_inode(struct super_block *sb)
{
+ const struct super_operations *ops = sb->s_op;
struct inode *inode;
- if (sb->s_op->alloc_inode)
- inode = sb->s_op->alloc_inode(sb);
+ if (ops->alloc_inode)
+ inode = ops->alloc_inode(sb);
else
inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
@@ -215,22 +231,18 @@ static struct inode *alloc_inode(struct super_block *sb)
return NULL;
if (unlikely(inode_init_always(sb, inode))) {
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- kmem_cache_free(inode_cachep, inode);
+ if (ops->destroy_inode) {
+ ops->destroy_inode(inode);
+ if (!ops->free_inode)
+ return NULL;
+ }
+ i_callback(&inode->i_rcu);
return NULL;
}
return inode;
}
-void free_inode_nonrcu(struct inode *inode)
-{
- kmem_cache_free(inode_cachep, inode);
-}
-EXPORT_SYMBOL(free_inode_nonrcu);
-
void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
@@ -253,20 +265,18 @@ void __destroy_inode(struct inode *inode)
}
EXPORT_SYMBOL(__destroy_inode);
-static void i_callback(struct rcu_head *head)
-{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- kmem_cache_free(inode_cachep, inode);
-}
-
static void destroy_inode(struct inode *inode)
{
+ const struct super_operations *ops = inode->i_sb->s_op;
+
BUG_ON(!list_empty(&inode->i_lru));
__destroy_inode(inode);
- if (inode->i_sb->s_op->destroy_inode)
- inode->i_sb->s_op->destroy_inode(inode);
- else
- call_rcu(&inode->i_rcu, i_callback);
+ if (ops->destroy_inode) {
+ ops->destroy_inode(inode);
+ if (!ops->free_inode)
+ return;
+ }
+ call_rcu(&inode->i_rcu, i_callback);
}
/**
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dd28e7679089..2e9b9f87caca 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1903,6 +1903,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
+ void (*free_inode)(struct inode *);
void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, struct writeback_control *wbc);
--
2.11.0
next prev parent reply other threads:[~2019-04-16 18:00 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-16 17:49 [RFC][PATCHSET] sorting out RCU-delayed stuff in ->destroy_inode() Al Viro
2019-04-16 17:52 ` [RFC PATCH 01/62] securityfs: fix use-after-free on symlink traversal Al Viro
2019-04-16 17:52 ` [RFC PATCH 02/62] apparmorfs: " Al Viro
2019-04-16 17:52 ` Al Viro [this message]
2019-04-16 17:52 ` [RFC PATCH 04/62] spufs: switch to ->free_inode() Al Viro
2019-04-16 17:52 ` [RFC PATCH 05/62] erofs: " Al Viro
2019-04-18 14:01 ` Gao Xiang
2019-04-16 17:52 ` [RFC PATCH 06/62] 9p: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 07/62] adfs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 08/62] affs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 09/62] befs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 10/62] bfs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 11/62] bdev: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 12/62] cifs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 13/62] debugfs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 14/62] efs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 15/62] ext2: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 16/62] f2fs: " Al Viro
2019-04-20 2:52 ` Chao Yu
2019-04-16 17:52 ` [RFC PATCH 17/62] fat: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 18/62] freevxfs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 19/62] gfs2: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 20/62] hfs: " Al Viro
2019-04-16 17:52 ` [RFC PATCH 21/62] hfsplus: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 22/62] hostfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 23/62] hpfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 24/62] isofs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 25/62] jffs2: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 26/62] minix: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 27/62] nfs{,4}: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 28/62] nilfs2: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 29/62] dlmfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 30/62] ocfs2: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 31/62] openpromfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 32/62] procfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 33/62] qnx4: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 34/62] qnx6: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 35/62] reiserfs: convert " Al Viro
2019-04-16 17:53 ` [RFC PATCH 36/62] romfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 37/62] squashfs: switch " Al Viro
2019-04-16 17:53 ` [RFC PATCH 38/62] ubifs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 39/62] udf: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 40/62] sysv: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 41/62] coda: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 42/62] ufs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 43/62] mqueue: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 44/62] bpf: " Al Viro
2019-04-16 18:07 ` Alexei Starovoitov
2019-04-16 21:34 ` Song Liu
2019-04-16 17:53 ` [RFC PATCH 45/62] rpcpipe: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 46/62] apparmor: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 47/62] securityfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 48/62] ntfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 49/62] dax: make use of ->free_inode() Al Viro
2019-04-18 12:16 ` Jan Kara
2019-04-18 16:58 ` Dan Williams
2019-04-16 17:53 ` [RFC PATCH 50/62] afs: switch to " Al Viro
2019-04-16 17:53 ` [RFC PATCH 51/62] btrfs: use ->free_inode() Al Viro
2019-04-16 17:53 ` [RFC PATCH 52/62] ceph: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 53/62] ecryptfs: make use of ->free_inode() Al Viro
2019-04-16 17:53 ` [RFC PATCH 54/62] ext4: " Al Viro
2019-04-18 12:10 ` Jan Kara
2019-04-16 17:53 ` [RFC PATCH 55/62] fuse: switch to ->free_inode() Al Viro
2019-04-16 17:53 ` [RFC PATCH 56/62] jfs: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 57/62] overlayfs: make use of ->free_inode() Al Viro
2019-04-16 17:53 ` [RFC PATCH 58/62] hugetlb: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 59/62] shmem: " Al Viro
2019-04-16 17:53 ` [RFC PATCH 60/62] orangefs: " Al Viro
2019-04-22 21:14 ` Mike Marshall
2019-04-22 21:56 ` Linus Torvalds
2019-04-22 23:10 ` Al Viro
2019-04-22 23:17 ` Mike Marshall
2019-04-16 17:53 ` [RFC PATCH 61/62] sockfs: switch to ->free_inode() Al Viro
2019-04-16 17:53 ` [RFC PATCH 62/62] coallocate socket->wq with socket itself Al Viro
2019-04-16 18:01 ` [RFC][PATCHSET] sorting out RCU-delayed stuff in ->destroy_inode() Linus Torvalds
2019-04-30 3:09 ` Al Viro
[not found] ` <CAHk-=wiMvCR0iENUVorfU-3EMC7G8RNSeHSQrz9tndP1uSg2BQ@mail.gmail.com>
2019-04-30 4:00 ` Al Viro
2019-05-01 1:59 ` Al Viro
2019-04-30 4:18 ` Andreas Dilger
2019-04-30 4:26 ` Al Viro
2019-04-30 5:26 ` Andreas Dilger
2019-04-17 15:55 ` David Sterba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190416175340.21068-3-viro@ZenIV.linux.org.uk \
--to=viro@zeniv.linux.org.uk \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).