From: Steven Rostedt <rostedt@goodmis.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Matthew Garrett <matthewgarrett@google.com>,
jmorris@namei.org, linux-security-module@vger.kernel.org,
linux-api@vger.kernel.org, Ben Hutchings <ben@decadent.org.uk>,
Al Viro <viro@ZenIV.linux.org.uk>,
Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH] tracefs: Do not allocate and free proxy_ops for lockdown
Date: Fri, 11 Oct 2019 13:54:58 -0400 [thread overview]
Message-ID: <20191011135458.7399da44@gandalf.local.home> (raw)
[-- Attachment #1: Type: text/plain, Size: 7978 bytes --]
[ Attached the reproducers to this email ]
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Running the latest kernel through my "make instances" stress tests, I
triggered the following bug (with KASAN and kmemleak enabled):
mkdir invoked oom-killer:
gfp_mask=0x40cd0(GFP_KERNEL|__GFP_COMP|__GFP_RECLAIMABLE), order=0,
oom_score_adj=0
CPU: 1 PID: 2229 Comm: mkdir Not tainted 5.4.0-rc2-test #325
Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
Call Trace:
dump_stack+0x64/0x8c
dump_header+0x43/0x3b7
? trace_hardirqs_on+0x48/0x4a
oom_kill_process+0x68/0x2d5
out_of_memory+0x2aa/0x2d0
__alloc_pages_nodemask+0x96d/0xb67
__alloc_pages_node+0x19/0x1e
alloc_slab_page+0x17/0x45
new_slab+0xd0/0x234
___slab_alloc.constprop.86+0x18f/0x336
? alloc_inode+0x2c/0x74
? irq_trace+0x12/0x1e
? tracer_hardirqs_off+0x1d/0xd7
? __slab_alloc.constprop.85+0x21/0x53
__slab_alloc.constprop.85+0x31/0x53
? __slab_alloc.constprop.85+0x31/0x53
? alloc_inode+0x2c/0x74
kmem_cache_alloc+0x50/0x179
? alloc_inode+0x2c/0x74
alloc_inode+0x2c/0x74
new_inode_pseudo+0xf/0x48
new_inode+0x15/0x25
tracefs_get_inode+0x23/0x7c
? lookup_one_len+0x54/0x6c
tracefs_create_file+0x53/0x11d
trace_create_file+0x15/0x33
event_create_dir+0x2a3/0x34b
__trace_add_new_event+0x1c/0x26
event_trace_add_tracer+0x56/0x86
trace_array_create+0x13e/0x1e1
instance_mkdir+0x8/0x17
tracefs_syscall_mkdir+0x39/0x50
? get_dname+0x31/0x31
vfs_mkdir+0x78/0xa3
do_mkdirat+0x71/0xb0
sys_mkdir+0x19/0x1b
do_fast_syscall_32+0xb0/0xed
I bisected this down to the addition of the proxy_ops into tracefs for
lockdown. It appears that the allocation of the proxy_ops and then freeing
it in the destroy_inode callback, is causing havoc with the memory system.
Reading the documentation about destroy_inode, I'm not sure that this is the
proper way to handle allocating and then freeing the fops of the inode.
Instead of allocating the proxy_ops (and then having to free it), I created
a static proxy_ops. As tracefs only uses a subset of all the file_operations
methods, that subset can be defined in the static proxy_ops, and then the
passed in fops during the creation of the inode is saved in the dentry, and
that is use to call the real functions by the proxy_ops.
Fixes: ccbd54ff54e8 ("tracefs: Restrict tracefs when the kernel is locked down")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
fs/tracefs/inode.c | 153 +++++++++++++++++++++++++++++++++++++++------
1 file changed, 135 insertions(+), 18 deletions(-)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 9fc14e38927f..d0e8e4a16812 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -20,6 +20,7 @@
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/slab.h>
+#include <linux/poll.h>
#include <linux/security.h>
#define TRACEFS_DEFAULT_MODE 0700
@@ -28,7 +29,7 @@ static struct vfsmount *tracefs_mount;
static int tracefs_mount_count;
static bool tracefs_registered;
-static int default_open_file(struct inode *inode, struct file *filp)
+static int proxy_open(struct inode *inode, struct file *filp)
{
struct dentry *dentry = filp->f_path.dentry;
struct file_operations *real_fops;
@@ -47,6 +48,138 @@ static int default_open_file(struct inode *inode, struct file *filp)
return real_fops->open(inode, filp);
}
+static ssize_t proxy_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dentry = file->f_path.dentry;
+ struct file_operations *real_fops;
+
+ if (!dentry)
+ return -EINVAL;
+
+ real_fops = dentry->d_fsdata;
+
+ if (real_fops->read)
+ return real_fops->read(file, buf, count, pos);
+ else
+ return -EINVAL;
+}
+
+static ssize_t proxy_write(struct file *file, const char __user *p,
+ size_t count, loff_t *pos)
+{
+ struct dentry *dentry = file->f_path.dentry;
+ struct file_operations *real_fops;
+
+ if (!dentry)
+ return -EINVAL;
+
+ real_fops = dentry->d_fsdata;
+
+ if (real_fops->write)
+ return real_fops->write(file, p, count, pos);
+ else
+ return -EINVAL;
+}
+
+static loff_t proxy_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct dentry *dentry = file->f_path.dentry;
+ struct file_operations *real_fops;
+ loff_t (*fn)(struct file *, loff_t, int);
+
+ if (!dentry)
+ return -EINVAL;
+
+ real_fops = dentry->d_fsdata;
+
+ fn = no_llseek;
+ if (file->f_mode & FMODE_LSEEK) {
+ if (real_fops->llseek)
+ fn = real_fops->llseek;
+ }
+ return fn(file, offset, whence);
+}
+
+static int proxy_release(struct inode *inode, struct file *filp)
+{
+ struct dentry *dentry = filp->f_path.dentry;
+ struct file_operations *real_fops;
+
+ if (!dentry)
+ return 0;
+
+ real_fops = dentry->d_fsdata;
+
+ if (real_fops->release)
+ return real_fops->release(inode, filp);
+ return 0;
+}
+
+static __poll_t proxy_poll(struct file *file, struct poll_table_struct *pt)
+{
+ struct dentry *dentry = file->f_path.dentry;
+ struct file_operations *real_fops;
+
+ if (!dentry)
+ return 0;
+
+ real_fops = dentry->d_fsdata;
+
+ if (unlikely(!real_fops->poll))
+ return DEFAULT_POLLMASK;
+ return real_fops->poll(file, pt);
+}
+
+static ssize_t proxy_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct dentry *dentry = in->f_path.dentry;
+ struct file_operations *real_fops;
+ ssize_t (*splice_read)(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t, unsigned int);
+
+ if (!dentry)
+ return 0;
+
+ real_fops = dentry->d_fsdata;
+
+ if (real_fops->splice_read)
+ splice_read = real_fops->splice_read;
+ else
+ splice_read = generic_file_splice_read;
+
+ return splice_read(in, ppos, pipe, len, flags);
+}
+
+static int proxy_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct dentry *dentry = file->f_path.dentry;
+ struct file_operations *real_fops;
+
+ if (!dentry)
+ return 0;
+
+ real_fops = dentry->d_fsdata;
+
+ if (!real_fops->mmap)
+ return -ENODEV;
+
+ return real_fops->mmap(file, vma);
+}
+
+static const struct file_operations proxy_fops = {
+ .open = proxy_open,
+ .read = proxy_read,
+ .write = proxy_write,
+ .llseek = proxy_llseek,
+ .release = proxy_release,
+ .poll = proxy_poll,
+ .splice_read = proxy_splice_read,
+ .mmap = proxy_mmap,
+};
+
static ssize_t default_read_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -241,12 +374,6 @@ static int tracefs_apply_options(struct super_block *sb)
return 0;
}
-static void tracefs_destroy_inode(struct inode *inode)
-{
- if (S_ISREG(inode->i_mode))
- kfree(inode->i_fop);
-}
-
static int tracefs_remount(struct super_block *sb, int *flags, char *data)
{
int err;
@@ -283,7 +410,6 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
static const struct super_operations tracefs_super_operations = {
.statfs = simple_statfs,
.remount_fs = tracefs_remount,
- .destroy_inode = tracefs_destroy_inode,
.show_options = tracefs_show_options,
};
@@ -414,7 +540,6 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops)
{
- struct file_operations *proxy_fops;
struct dentry *dentry;
struct inode *inode;
@@ -430,20 +555,12 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
if (unlikely(!inode))
return failed_creating(dentry);
- proxy_fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL);
- if (unlikely(!proxy_fops)) {
- iput(inode);
- return failed_creating(dentry);
- }
-
if (!fops)
fops = &tracefs_file_operations;
dentry->d_fsdata = (void *)fops;
- memcpy(proxy_fops, fops, sizeof(*proxy_fops));
- proxy_fops->open = default_open_file;
inode->i_mode = mode;
- inode->i_fop = proxy_fops;
+ inode->i_fop = &proxy_fops;
inode->i_private = data;
d_instantiate(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);
--
2.20.1
[-- Attachment #2: ftrace-test-mkinstances --]
[-- Type: application/octet-stream, Size: 1000 bytes --]
#!/bin/bash
tracefs=`cat /proc/mounts |grep tracefs| head -1 | cut -d' ' -f2`
if [ -z "$tracefs" ]; then
echo "tracefs not mounted"
exit 0
fi
if [ ! -d $tracefs/instances ]; then
echo "No instances directory"
exit 0
fi
cd $tracefs/instances
mkdir x
rmdir x
result=$?
if [ $result -ne 0 ]; then
echo "instance rmdir not supported, skipping this test"
exit 0
fi
instance_slam() {
while :; do
mkdir x
mkdir y
mkdir z
rmdir x
rmdir y
rmdir z
done 2>/dev/null
}
instance_slam &
p1=$!
echo $p1
instance_slam &
p2=$!
echo $p2
instance_slam &
p3=$!
echo $p3
instance_slam &
p4=$!
echo $p4
instance_slam &
p5=$!
echo $p5
for i in `seq 10`; do
ls
sleep 1
done
kill -1 $p1
kill -1 $p2
kill -1 $p3
kill -1 $p4
kill -1 $p5
echo "Wait for processes to finish"
wait $p1 $p2 $p3 $p4 $p5
echo "all processes finished, wait for cleanup"
sleep 2
mkdir x y z
ls x y z
rmdir x y z
for d in x y z; do
if [ -d $d ]; then
echo $d still exists
exit -1
fi
done
echo SUCCESS
exit 0
[-- Attachment #3: ftrace-test-mkinstances-2 --]
[-- Type: application/octet-stream, Size: 890 bytes --]
#!/bin/bash
tracefs=`cat /proc/mounts |grep tracefs| head -1 | cut -d' ' -f2`
if [ -z "$tracefs" ]; then
echo "tracefs not mounted"
exit 0
fi
if [ ! -d $tracefs/instances ]; then
echo "No instances directory"
exit 0
fi
cd $tracefs/instances
instance_slam() {
while :; do
mkdir foo &> /dev/null
rmdir foo &> /dev/null
done
}
instance_read() {
while :; do
cat foo/trace &> /dev/null
done
}
instance_set() {
while :; do
echo 1 > foo/events/sched/sched_switch
done 2> /dev/null
}
instance_slam &
x=`jobs -l`
p1=`echo $x | cut -d' ' -f2`
echo $p1
instance_set &
x=`jobs -l | tail -1`
p2=`echo $x | cut -d' ' -f2`
echo $p2
sleep 10
kill -1 $p1
kill -1 $p2
echo "Wait for processes to finish"
wait $p1 $p2
echo "all processes finished, wait for cleanup"
sleep 2
mkdir foo
ls foo
rmdir foo
if [ -d foo ]; then
echo foo still exists
exit -1
fi
echo SUCCESS
exit 0
next reply other threads:[~2019-10-11 17:54 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-10-11 17:54 Steven Rostedt [this message]
2019-10-11 18:20 ` [PATCH] tracefs: Do not allocate and free proxy_ops for lockdown Linus Torvalds
2019-10-11 18:36 ` Steven Rostedt
2019-10-11 19:24 ` Linus Torvalds
2019-10-11 19:50 ` Ben Hutchings
2019-10-11 21:46 ` Florian Weimer
2019-10-11 22:27 ` Steven Rostedt
2019-10-11 20:25 ` Steven Rostedt
2019-10-11 20:46 ` Linus Torvalds
2019-10-11 21:08 ` Steven Rostedt
2019-10-11 20:54 ` Steven Rostedt
2019-10-11 21:00 ` Linus Torvalds
2019-10-11 21:11 ` Steven Rostedt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191011135458.7399da44@gandalf.local.home \
--to=rostedt@goodmis.org \
--cc=ben@decadent.org.uk \
--cc=jmorris@namei.org \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-security-module@vger.kernel.org \
--cc=matthewgarrett@google.com \
--cc=torvalds@linux-foundation.org \
--cc=viro@ZenIV.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).