All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Guy Briggs <rgb@redhat.com>
To: Paul Moore <paul@paul-moore.com>
Cc: linux-security-module@vger.kernel.org, selinux@vger.kernel.org,
	linux-audit@redhat.com, io-uring@vger.kernel.org,
	linux-fsdevel@vger.kernel.org,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	Jens Axboe <axboe@kernel.dk>,
	Alexander Viro <viro@zeniv.linux.org.uk>
Subject: Re: [RFC PATCH 2/9] audit, io_uring, io-wq: add some basic audit support to io_uring
Date: Wed, 2 Jun 2021 13:29:24 -0400	[thread overview]
Message-ID: <20210602172924.GM447005@madcap2.tricolour.ca> (raw)
In-Reply-To: <162163379461.8379.9691291608621179559.stgit@sifl>

On 2021-05-21 17:49, Paul Moore wrote:
> WARNING - This is a work in progress and should not be merged
> anywhere important.  It is almost surely not complete, and while it
> probably compiles it likely hasn't been booted and will do terrible
> things.  You have been warned.
> 
> This patch adds basic auditing to io_uring operations, regardless of
> their context.  This is accomplished by allocating audit_context
> structures for the io-wq worker and io_uring SQPOLL kernel threads
> as well as explicitly auditing the io_uring operations in
> io_issue_sqe().  The io_uring operations are audited using a new
> AUDIT_URINGOP record, an example is shown below:
> 
>   % <TODO - insert AUDIT_URINGOP record example>
> 
> Thanks to Richard Guy Briggs for review and feedback.
> 
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
>  fs/io-wq.c                 |    4 +
>  fs/io_uring.c              |   11 +++
>  include/linux/audit.h      |   17 ++++
>  include/uapi/linux/audit.h |    1 
>  kernel/audit.h             |    2 +
>  kernel/auditsc.c           |  173 ++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 208 insertions(+)
> 
> diff --git a/fs/io-wq.c b/fs/io-wq.c
> index 5361a9b4b47b..8af09a3336e0 100644
> --- a/fs/io-wq.c
> +++ b/fs/io-wq.c
> @@ -16,6 +16,7 @@
>  #include <linux/rculist_nulls.h>
>  #include <linux/cpu.h>
>  #include <linux/tracehook.h>
> +#include <linux/audit.h>
>  
>  #include "io-wq.h"
>  
> @@ -535,6 +536,8 @@ static int io_wqe_worker(void *data)
>  	snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
>  	set_task_comm(current, buf);
>  
> +	audit_alloc_kernel(current);
> +
>  	while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
>  		long ret;
>  
> @@ -573,6 +576,7 @@ static int io_wqe_worker(void *data)
>  			raw_spin_unlock_irq(&wqe->lock);
>  	}
>  
> +	audit_free(current);
>  	io_worker_exit(worker);
>  	return 0;
>  }
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index e481ac8a757a..e9941d1ad8fd 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -78,6 +78,7 @@
>  #include <linux/task_work.h>
>  #include <linux/pagemap.h>
>  #include <linux/io_uring.h>
> +#include <linux/audit.h>
>  
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/io_uring.h>
> @@ -6105,6 +6106,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
>  	if (req->work.creds && req->work.creds != current_cred())
>  		creds = override_creds(req->work.creds);
>  
> +	if (req->opcode < IORING_OP_LAST)
> +		audit_uring_entry(req->opcode);
> +
>  	switch (req->opcode) {
>  	case IORING_OP_NOP:
>  		ret = io_nop(req, issue_flags);
> @@ -6211,6 +6215,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
>  		break;
>  	}
>  
> +	if (req->opcode < IORING_OP_LAST)
> +		audit_uring_exit(!ret, ret);
> +
>  	if (creds)
>  		revert_creds(creds);
>  
> @@ -6827,6 +6834,8 @@ static int io_sq_thread(void *data)
>  		set_cpus_allowed_ptr(current, cpu_online_mask);
>  	current->flags |= PF_NO_SETAFFINITY;
>  
> +	audit_alloc_kernel(current);
> +
>  	mutex_lock(&sqd->lock);
>  	/* a user may had exited before the thread started */
>  	io_run_task_work_head(&sqd->park_task_work);
> @@ -6916,6 +6925,8 @@ static int io_sq_thread(void *data)
>  	io_run_task_work_head(&sqd->park_task_work);
>  	mutex_unlock(&sqd->lock);
>  
> +	audit_free(current);
> +
>  	complete(&sqd->exited);
>  	do_exit(0);
>  }
> diff --git a/include/linux/audit.h b/include/linux/audit.h
> index 82b7c1116a85..6a0c013bc7de 100644
> --- a/include/linux/audit.h
> +++ b/include/linux/audit.h
> @@ -286,7 +286,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
>  /* These are defined in auditsc.c */
>  				/* Public API */
>  extern int  audit_alloc(struct task_struct *task);
> +extern int  audit_alloc_kernel(struct task_struct *task);
>  extern void __audit_free(struct task_struct *task);
> +extern void __audit_uring_entry(u8 op);
> +extern void __audit_uring_exit(int success, long code);
>  extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
>  				  unsigned long a2, unsigned long a3);
>  extern void __audit_syscall_exit(int ret_success, long ret_value);
> @@ -323,6 +326,16 @@ static inline void audit_free(struct task_struct *task)
>  	if (unlikely(task->audit_context))
>  		__audit_free(task);
>  }
> +static inline void audit_uring_entry(u8 op)
> +{
> +	if (unlikely(audit_context()))
> +		__audit_uring_entry(op);
> +}
> +static inline void audit_uring_exit(int success, long code)
> +{
> +	if (unlikely(audit_context()))
> +		__audit_uring_exit(success, code);
> +}
>  static inline void audit_syscall_entry(int major, unsigned long a0,
>  				       unsigned long a1, unsigned long a2,
>  				       unsigned long a3)
> @@ -554,6 +567,10 @@ static inline int audit_alloc(struct task_struct *task)
>  {
>  	return 0;
>  }
> +static inline int audit_alloc_kernel(struct task_struct *task)
> +{
> +	return 0;
> +}
>  static inline void audit_free(struct task_struct *task)
>  { }
>  static inline void audit_syscall_entry(int major, unsigned long a0,
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index cd2d8279a5e4..b26e0c435e8b 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -118,6 +118,7 @@
>  #define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
>  #define AUDIT_BPF		1334	/* BPF subsystem */
>  #define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
> +#define AUDIT_URINGOP		1336	/* io_uring operation */
>  
>  #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
>  #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
> diff --git a/kernel/audit.h b/kernel/audit.h
> index fba180de5912..50de827497ca 100644
> --- a/kernel/audit.h
> +++ b/kernel/audit.h
> @@ -100,10 +100,12 @@ struct audit_context {
>  	enum {
>  		AUDIT_CTX_UNUSED,	/* audit_context is currently unused */
>  		AUDIT_CTX_SYSCALL,	/* in use by syscall */
> +		AUDIT_CTX_URING,	/* in use by io_uring */
>  	} context;
>  	enum audit_state    state, current_state;
>  	unsigned int	    serial;     /* serial number for record */
>  	int		    major;      /* syscall number */
> +	int		    uring_op;   /* uring operation */
>  	struct timespec64   ctime;      /* time of syscall entry */
>  	unsigned long	    argv[4];    /* syscall arguments */
>  	long		    return_code;/* syscall return code */
> diff --git a/kernel/auditsc.c b/kernel/auditsc.c
> index cc89e9f9a753..729849d41631 100644
> --- a/kernel/auditsc.c
> +++ b/kernel/auditsc.c
> @@ -953,6 +953,7 @@ static void audit_reset_context(struct audit_context *ctx)
>  	ctx->current_state = ctx->state;
>  	ctx->serial = 0;
>  	ctx->major = 0;
> +	ctx->uring_op = 0;
>  	ctx->ctime = (struct timespec64){ .tv_sec = 0, .tv_nsec = 0 };
>  	memset(ctx->argv, 0, sizeof(ctx->argv));
>  	ctx->return_code = 0;
> @@ -1038,6 +1039,31 @@ int audit_alloc(struct task_struct *tsk)
>  	return 0;
>  }
>  
> +/**
> + * audit_alloc_kernel - allocate an audit_context for a kernel task
> + * @tsk: the kernel task
> + *
> + * Similar to the audit_alloc() function, but intended for kernel private
> + * threads.  Returns zero on success, negative values on failure.
> + */
> +int audit_alloc_kernel(struct task_struct *tsk)
> +{
> +	/*
> +	 * At the moment we are just going to call into audit_alloc() to
> +	 * simplify the code, but there two things to keep in mind with this
> +	 * approach:
> +	 *
> +	 * 1. Filtering internal kernel tasks is a bit laughable in almost all
> +	 * cases, but there is at least one case where there is a benefit:
> +	 * the '-a task,never' case allows the admin to effectively disable
> +	 * task auditing at runtime.
> +	 *
> +	 * 2. The {set,clear}_task_syscall_work() ops likely have zero effect
> +	 * on these internal kernel tasks, but they probably don't hurt either.
> +	 */
> +	return audit_alloc(tsk);
> +}
> +
>  static inline void audit_free_context(struct audit_context *context)
>  {
>  	/* resetting is extra work, but it is likely just noise */
> @@ -1536,6 +1562,52 @@ static void audit_log_proctitle(void)
>  	audit_log_end(ab);
>  }
>  
> +/**
> + * audit_log_uring - generate a AUDIT_URINGOP record
> + * @ctx: the audit context
> + */
> +static void audit_log_uring(struct audit_context *ctx)
> +{
> +	struct audit_buffer *ab;
> +	const struct cred *cred;
> +
> +	/*
> +	 * TODO: What do we log here?  I'm tossing in a few things to start the
> +	 *       conversation, but additional thought needs to go into this.
> +	 */
> +
> +	ab = audit_log_start(ctx, GFP_KERNEL, AUDIT_URINGOP);
> +	if (!ab)
> +		return;
> +	cred = current_cred();

This may need to be req->work.creds.  I haven't been following if the
io_uring thread inherited the user task's creds (and below, comm and
exe).

> +	audit_log_format(ab, "uring_op=%d", ctx->uring_op);

arch is stored below in __audit_uring_entry() and never used in the
AUDIT_CTX_URING case.  That assignment can either be dropped or printed
before uring_op similar to the SYSCALL record.  There aren't really any
arg[0-3] to print.

io_uring_register and io_uring_setup() args are better covered by other
records.  io_uring_enter() has 6 args and the last two aren't covered by
SYSCALL anyways.

> +	if (ctx->return_valid != AUDITSC_INVALID)
> +		audit_log_format(ab, " success=%s exit=%ld",
> +				 (ctx->return_valid == AUDITSC_SUCCESS ?
> +				  "yes" : "no"),
> +				 ctx->return_code);
> +	audit_log_format(ab,
> +			 " items=%d"
> +			 " ppid=%d pid=%d auid=%u uid=%u gid=%u"
> +			 " euid=%u suid=%u fsuid=%u"
> +			 " egid=%u sgid=%u fsgid=%u",
> +			 ctx->name_count,
> +			 task_ppid_nr(current),
> +			 task_tgid_nr(current),
> +			 from_kuid(&init_user_ns, audit_get_loginuid(current)),
> +			 from_kuid(&init_user_ns, cred->uid),
> +			 from_kgid(&init_user_ns, cred->gid),
> +			 from_kuid(&init_user_ns, cred->euid),
> +			 from_kuid(&init_user_ns, cred->suid),
> +			 from_kuid(&init_user_ns, cred->fsuid),
> +			 from_kgid(&init_user_ns, cred->egid),
> +			 from_kgid(&init_user_ns, cred->sgid),
> +			 from_kgid(&init_user_ns, cred->fsgid));

The audit session ID is still important, relevant and qualifies auid.
In keeping with the SYSCALL record format, I think we want to keep
ses=audit_get_sessionid(current) in here.

I'm pretty sure we also want to keep comm= and exe= too, but may have to
reach into req->task to get it.  There are two values for comm possible,
one from the original task and second "iou-sqp-<pid>" set at the top of
io_sq_thread().

I'm reluctant to leave them out now and then have to re-add them in yet
another field order later.

> +	audit_log_task_context(ab);
> +	audit_log_key(ab, ctx->filterkey);
> +	audit_log_end(ab);
> +}
> +
>  static void audit_log_exit(void)
>  {
>  	int i, call_panic = 0;
> @@ -1571,6 +1643,9 @@ static void audit_log_exit(void)
>  		audit_log_key(ab, context->filterkey);
>  		audit_log_end(ab);
>  		break;
> +	case AUDIT_CTX_URING:
> +		audit_log_uring(context);
> +		break;
>  	default:
>  		BUG();
>  		break;
> @@ -1740,6 +1815,104 @@ static void audit_return_fixup(struct audit_context *ctx,
>  	ctx->return_valid = (success ? AUDITSC_SUCCESS : AUDITSC_FAILURE);
>  }
>  
> +/**
> + * __audit_uring_entry - prepare the kernel task's audit context for io_uring
> + * @op: the io_uring opcode
> + *
> + * This is similar to audit_syscall_entry() but is intended for use by io_uring
> + * operations.
> + */
> +void __audit_uring_entry(u8 op)
> +{
> +	struct audit_context *ctx = audit_context();
> +
> +	if (!audit_enabled || !ctx || ctx->state == AUDIT_DISABLED)
> +		return;
> +
> +	/*
> +	 * NOTE: It's possible that we can be called from the process' context
> +	 *       before it returns to userspace, and before audit_syscall_exit()
> +	 *       is called.  In this case there is not much to do, just record
> +	 *       the io_uring details and return.
> +	 */
> +	ctx->uring_op = op;
> +	if (ctx->context == AUDIT_CTX_SYSCALL)
> +		return;
> +
> +	ctx->dummy = !audit_n_rules;
> +	if (!ctx->dummy && ctx->state == AUDIT_BUILD_CONTEXT)
> +		ctx->prio = 0;
> +
> +	ctx->arch = syscall_get_arch(current);
> +	ctx->context = AUDIT_CTX_URING;
> +	ctx->current_state = ctx->state;
> +	ktime_get_coarse_real_ts64(&ctx->ctime);
> +}
> +
> +/**
> + * __audit_uring_exit - wrap up the kernel task's audit context after io_uring
> + * @success: true/false value to indicate if the operation succeeded or not
> + * @code: operation return code
> + *
> + * This is similar to audit_syscall_exit() but is intended for use by io_uring
> + * operations.
> + */
> +void __audit_uring_exit(int success, long code)
> +{
> +	struct audit_context *ctx = audit_context();
> +
> +	/*
> +	 * TODO: At some point we will likely want to filter on io_uring ops
> +	 *       and other things similar to what we do for syscalls, but that
> +	 *       is something for another day; just record what we can here.
> +	 */
> +
> +	if (!ctx || ctx->dummy)
> +		goto out;
> +	if (ctx->context == AUDIT_CTX_SYSCALL) {
> +		/*
> +		 * NOTE: See the note in __audit_uring_entry() about the case
> +		 *       where we may be called from process context before we
> +		 *       return to userspace via audit_syscall_exit().  In this
> +		 *       case we simply emit a URINGOP record and bail, the
> +		 *       normal syscall exit handling will take care of
> +		 *       everything else.
> +		 *       It is also worth mentioning that when we are called,
> +		 *       the current process creds may differ from the creds
> +		 *       used during the normal syscall processing; keep that
> +		 *       in mind if/when we move the record generation code.
> +		 */
> +
> +		/*
> +		 * We need to filter on the syscall info here to decide if we
> +		 * should emit a URINGOP record.  I know it seems odd but this
> +		 * solves the problem where users have a filter to block *all*
> +		 * syscall records in the "exit" filter; we want to preserve
> +		 * the behavior here.
> +		 */
> +		audit_filter_syscall(current, ctx);
> +		audit_filter_inodes(current, ctx);
> +		if (ctx->current_state != AUDIT_RECORD_CONTEXT)
> +			return;
> +
> +		audit_log_uring(ctx);
> +		return;
> +	}
> +
> +	/* this may generate CONFIG_CHANGE records */
> +	if (!list_empty(&ctx->killed_trees))
> +		audit_kill_trees(ctx);
> +
> +	audit_filter_inodes(current, ctx);
> +	if (ctx->current_state != AUDIT_RECORD_CONTEXT)
> +		goto out;
> +	audit_return_fixup(ctx, success, code);
> +	audit_log_exit();
> +
> +out:
> +	audit_reset_context(ctx);
> +}
> +
>  /**
>   * __audit_syscall_entry - fill in an audit record at syscall entry
>   * @major: major syscall type (function)
> 
> --
> Linux-audit mailing list
> Linux-audit@redhat.com
> https://listman.redhat.com/mailman/listinfo/linux-audit

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635


WARNING: multiple messages have this Message-ID (diff)
From: Richard Guy Briggs <rgb@redhat.com>
To: Paul Moore <paul@paul-moore.com>
Cc: Jens Axboe <axboe@kernel.dk>,
	selinux@vger.kernel.org, linux-security-module@vger.kernel.org,
	linux-audit@redhat.com,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>,
	linux-fsdevel@vger.kernel.org, io-uring@vger.kernel.org,
	Alexander Viro <viro@zeniv.linux.org.uk>
Subject: Re: [RFC PATCH 2/9] audit, io_uring, io-wq: add some basic audit support to io_uring
Date: Wed, 2 Jun 2021 13:29:24 -0400	[thread overview]
Message-ID: <20210602172924.GM447005@madcap2.tricolour.ca> (raw)
In-Reply-To: <162163379461.8379.9691291608621179559.stgit@sifl>

On 2021-05-21 17:49, Paul Moore wrote:
> WARNING - This is a work in progress and should not be merged
> anywhere important.  It is almost surely not complete, and while it
> probably compiles it likely hasn't been booted and will do terrible
> things.  You have been warned.
> 
> This patch adds basic auditing to io_uring operations, regardless of
> their context.  This is accomplished by allocating audit_context
> structures for the io-wq worker and io_uring SQPOLL kernel threads
> as well as explicitly auditing the io_uring operations in
> io_issue_sqe().  The io_uring operations are audited using a new
> AUDIT_URINGOP record, an example is shown below:
> 
>   % <TODO - insert AUDIT_URINGOP record example>
> 
> Thanks to Richard Guy Briggs for review and feedback.
> 
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
>  fs/io-wq.c                 |    4 +
>  fs/io_uring.c              |   11 +++
>  include/linux/audit.h      |   17 ++++
>  include/uapi/linux/audit.h |    1 
>  kernel/audit.h             |    2 +
>  kernel/auditsc.c           |  173 ++++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 208 insertions(+)
> 
> diff --git a/fs/io-wq.c b/fs/io-wq.c
> index 5361a9b4b47b..8af09a3336e0 100644
> --- a/fs/io-wq.c
> +++ b/fs/io-wq.c
> @@ -16,6 +16,7 @@
>  #include <linux/rculist_nulls.h>
>  #include <linux/cpu.h>
>  #include <linux/tracehook.h>
> +#include <linux/audit.h>
>  
>  #include "io-wq.h"
>  
> @@ -535,6 +536,8 @@ static int io_wqe_worker(void *data)
>  	snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
>  	set_task_comm(current, buf);
>  
> +	audit_alloc_kernel(current);
> +
>  	while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
>  		long ret;
>  
> @@ -573,6 +576,7 @@ static int io_wqe_worker(void *data)
>  			raw_spin_unlock_irq(&wqe->lock);
>  	}
>  
> +	audit_free(current);
>  	io_worker_exit(worker);
>  	return 0;
>  }
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index e481ac8a757a..e9941d1ad8fd 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -78,6 +78,7 @@
>  #include <linux/task_work.h>
>  #include <linux/pagemap.h>
>  #include <linux/io_uring.h>
> +#include <linux/audit.h>
>  
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/io_uring.h>
> @@ -6105,6 +6106,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
>  	if (req->work.creds && req->work.creds != current_cred())
>  		creds = override_creds(req->work.creds);
>  
> +	if (req->opcode < IORING_OP_LAST)
> +		audit_uring_entry(req->opcode);
> +
>  	switch (req->opcode) {
>  	case IORING_OP_NOP:
>  		ret = io_nop(req, issue_flags);
> @@ -6211,6 +6215,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
>  		break;
>  	}
>  
> +	if (req->opcode < IORING_OP_LAST)
> +		audit_uring_exit(!ret, ret);
> +
>  	if (creds)
>  		revert_creds(creds);
>  
> @@ -6827,6 +6834,8 @@ static int io_sq_thread(void *data)
>  		set_cpus_allowed_ptr(current, cpu_online_mask);
>  	current->flags |= PF_NO_SETAFFINITY;
>  
> +	audit_alloc_kernel(current);
> +
>  	mutex_lock(&sqd->lock);
>  	/* a user may had exited before the thread started */
>  	io_run_task_work_head(&sqd->park_task_work);
> @@ -6916,6 +6925,8 @@ static int io_sq_thread(void *data)
>  	io_run_task_work_head(&sqd->park_task_work);
>  	mutex_unlock(&sqd->lock);
>  
> +	audit_free(current);
> +
>  	complete(&sqd->exited);
>  	do_exit(0);
>  }
> diff --git a/include/linux/audit.h b/include/linux/audit.h
> index 82b7c1116a85..6a0c013bc7de 100644
> --- a/include/linux/audit.h
> +++ b/include/linux/audit.h
> @@ -286,7 +286,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
>  /* These are defined in auditsc.c */
>  				/* Public API */
>  extern int  audit_alloc(struct task_struct *task);
> +extern int  audit_alloc_kernel(struct task_struct *task);
>  extern void __audit_free(struct task_struct *task);
> +extern void __audit_uring_entry(u8 op);
> +extern void __audit_uring_exit(int success, long code);
>  extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
>  				  unsigned long a2, unsigned long a3);
>  extern void __audit_syscall_exit(int ret_success, long ret_value);
> @@ -323,6 +326,16 @@ static inline void audit_free(struct task_struct *task)
>  	if (unlikely(task->audit_context))
>  		__audit_free(task);
>  }
> +static inline void audit_uring_entry(u8 op)
> +{
> +	if (unlikely(audit_context()))
> +		__audit_uring_entry(op);
> +}
> +static inline void audit_uring_exit(int success, long code)
> +{
> +	if (unlikely(audit_context()))
> +		__audit_uring_exit(success, code);
> +}
>  static inline void audit_syscall_entry(int major, unsigned long a0,
>  				       unsigned long a1, unsigned long a2,
>  				       unsigned long a3)
> @@ -554,6 +567,10 @@ static inline int audit_alloc(struct task_struct *task)
>  {
>  	return 0;
>  }
> +static inline int audit_alloc_kernel(struct task_struct *task)
> +{
> +	return 0;
> +}
>  static inline void audit_free(struct task_struct *task)
>  { }
>  static inline void audit_syscall_entry(int major, unsigned long a0,
> diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index cd2d8279a5e4..b26e0c435e8b 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -118,6 +118,7 @@
>  #define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
>  #define AUDIT_BPF		1334	/* BPF subsystem */
>  #define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
> +#define AUDIT_URINGOP		1336	/* io_uring operation */
>  
>  #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
>  #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
> diff --git a/kernel/audit.h b/kernel/audit.h
> index fba180de5912..50de827497ca 100644
> --- a/kernel/audit.h
> +++ b/kernel/audit.h
> @@ -100,10 +100,12 @@ struct audit_context {
>  	enum {
>  		AUDIT_CTX_UNUSED,	/* audit_context is currently unused */
>  		AUDIT_CTX_SYSCALL,	/* in use by syscall */
> +		AUDIT_CTX_URING,	/* in use by io_uring */
>  	} context;
>  	enum audit_state    state, current_state;
>  	unsigned int	    serial;     /* serial number for record */
>  	int		    major;      /* syscall number */
> +	int		    uring_op;   /* uring operation */
>  	struct timespec64   ctime;      /* time of syscall entry */
>  	unsigned long	    argv[4];    /* syscall arguments */
>  	long		    return_code;/* syscall return code */
> diff --git a/kernel/auditsc.c b/kernel/auditsc.c
> index cc89e9f9a753..729849d41631 100644
> --- a/kernel/auditsc.c
> +++ b/kernel/auditsc.c
> @@ -953,6 +953,7 @@ static void audit_reset_context(struct audit_context *ctx)
>  	ctx->current_state = ctx->state;
>  	ctx->serial = 0;
>  	ctx->major = 0;
> +	ctx->uring_op = 0;
>  	ctx->ctime = (struct timespec64){ .tv_sec = 0, .tv_nsec = 0 };
>  	memset(ctx->argv, 0, sizeof(ctx->argv));
>  	ctx->return_code = 0;
> @@ -1038,6 +1039,31 @@ int audit_alloc(struct task_struct *tsk)
>  	return 0;
>  }
>  
> +/**
> + * audit_alloc_kernel - allocate an audit_context for a kernel task
> + * @tsk: the kernel task
> + *
> + * Similar to the audit_alloc() function, but intended for kernel private
> + * threads.  Returns zero on success, negative values on failure.
> + */
> +int audit_alloc_kernel(struct task_struct *tsk)
> +{
> +	/*
> +	 * At the moment we are just going to call into audit_alloc() to
> +	 * simplify the code, but there two things to keep in mind with this
> +	 * approach:
> +	 *
> +	 * 1. Filtering internal kernel tasks is a bit laughable in almost all
> +	 * cases, but there is at least one case where there is a benefit:
> +	 * the '-a task,never' case allows the admin to effectively disable
> +	 * task auditing at runtime.
> +	 *
> +	 * 2. The {set,clear}_task_syscall_work() ops likely have zero effect
> +	 * on these internal kernel tasks, but they probably don't hurt either.
> +	 */
> +	return audit_alloc(tsk);
> +}
> +
>  static inline void audit_free_context(struct audit_context *context)
>  {
>  	/* resetting is extra work, but it is likely just noise */
> @@ -1536,6 +1562,52 @@ static void audit_log_proctitle(void)
>  	audit_log_end(ab);
>  }
>  
> +/**
> + * audit_log_uring - generate a AUDIT_URINGOP record
> + * @ctx: the audit context
> + */
> +static void audit_log_uring(struct audit_context *ctx)
> +{
> +	struct audit_buffer *ab;
> +	const struct cred *cred;
> +
> +	/*
> +	 * TODO: What do we log here?  I'm tossing in a few things to start the
> +	 *       conversation, but additional thought needs to go into this.
> +	 */
> +
> +	ab = audit_log_start(ctx, GFP_KERNEL, AUDIT_URINGOP);
> +	if (!ab)
> +		return;
> +	cred = current_cred();

This may need to be req->work.creds.  I haven't been following if the
io_uring thread inherited the user task's creds (and below, comm and
exe).

> +	audit_log_format(ab, "uring_op=%d", ctx->uring_op);

arch is stored below in __audit_uring_entry() and never used in the
AUDIT_CTX_URING case.  That assignment can either be dropped or printed
before uring_op similar to the SYSCALL record.  There aren't really any
arg[0-3] to print.

io_uring_register and io_uring_setup() args are better covered by other
records.  io_uring_enter() has 6 args and the last two aren't covered by
SYSCALL anyways.

> +	if (ctx->return_valid != AUDITSC_INVALID)
> +		audit_log_format(ab, " success=%s exit=%ld",
> +				 (ctx->return_valid == AUDITSC_SUCCESS ?
> +				  "yes" : "no"),
> +				 ctx->return_code);
> +	audit_log_format(ab,
> +			 " items=%d"
> +			 " ppid=%d pid=%d auid=%u uid=%u gid=%u"
> +			 " euid=%u suid=%u fsuid=%u"
> +			 " egid=%u sgid=%u fsgid=%u",
> +			 ctx->name_count,
> +			 task_ppid_nr(current),
> +			 task_tgid_nr(current),
> +			 from_kuid(&init_user_ns, audit_get_loginuid(current)),
> +			 from_kuid(&init_user_ns, cred->uid),
> +			 from_kgid(&init_user_ns, cred->gid),
> +			 from_kuid(&init_user_ns, cred->euid),
> +			 from_kuid(&init_user_ns, cred->suid),
> +			 from_kuid(&init_user_ns, cred->fsuid),
> +			 from_kgid(&init_user_ns, cred->egid),
> +			 from_kgid(&init_user_ns, cred->sgid),
> +			 from_kgid(&init_user_ns, cred->fsgid));

The audit session ID is still important, relevant and qualifies auid.
In keeping with the SYSCALL record format, I think we want to keep
ses=audit_get_sessionid(current) in here.

I'm pretty sure we also want to keep comm= and exe= too, but may have to
reach into req->task to get it.  There are two values for comm possible,
one from the original task and second "iou-sqp-<pid>" set at the top of
io_sq_thread().

I'm reluctant to leave them out now and then have to re-add them in yet
another field order later.

> +	audit_log_task_context(ab);
> +	audit_log_key(ab, ctx->filterkey);
> +	audit_log_end(ab);
> +}
> +
>  static void audit_log_exit(void)
>  {
>  	int i, call_panic = 0;
> @@ -1571,6 +1643,9 @@ static void audit_log_exit(void)
>  		audit_log_key(ab, context->filterkey);
>  		audit_log_end(ab);
>  		break;
> +	case AUDIT_CTX_URING:
> +		audit_log_uring(context);
> +		break;
>  	default:
>  		BUG();
>  		break;
> @@ -1740,6 +1815,104 @@ static void audit_return_fixup(struct audit_context *ctx,
>  	ctx->return_valid = (success ? AUDITSC_SUCCESS : AUDITSC_FAILURE);
>  }
>  
> +/**
> + * __audit_uring_entry - prepare the kernel task's audit context for io_uring
> + * @op: the io_uring opcode
> + *
> + * This is similar to audit_syscall_entry() but is intended for use by io_uring
> + * operations.
> + */
> +void __audit_uring_entry(u8 op)
> +{
> +	struct audit_context *ctx = audit_context();
> +
> +	if (!audit_enabled || !ctx || ctx->state == AUDIT_DISABLED)
> +		return;
> +
> +	/*
> +	 * NOTE: It's possible that we can be called from the process' context
> +	 *       before it returns to userspace, and before audit_syscall_exit()
> +	 *       is called.  In this case there is not much to do, just record
> +	 *       the io_uring details and return.
> +	 */
> +	ctx->uring_op = op;
> +	if (ctx->context == AUDIT_CTX_SYSCALL)
> +		return;
> +
> +	ctx->dummy = !audit_n_rules;
> +	if (!ctx->dummy && ctx->state == AUDIT_BUILD_CONTEXT)
> +		ctx->prio = 0;
> +
> +	ctx->arch = syscall_get_arch(current);
> +	ctx->context = AUDIT_CTX_URING;
> +	ctx->current_state = ctx->state;
> +	ktime_get_coarse_real_ts64(&ctx->ctime);
> +}
> +
> +/**
> + * __audit_uring_exit - wrap up the kernel task's audit context after io_uring
> + * @success: true/false value to indicate if the operation succeeded or not
> + * @code: operation return code
> + *
> + * This is similar to audit_syscall_exit() but is intended for use by io_uring
> + * operations.
> + */
> +void __audit_uring_exit(int success, long code)
> +{
> +	struct audit_context *ctx = audit_context();
> +
> +	/*
> +	 * TODO: At some point we will likely want to filter on io_uring ops
> +	 *       and other things similar to what we do for syscalls, but that
> +	 *       is something for another day; just record what we can here.
> +	 */
> +
> +	if (!ctx || ctx->dummy)
> +		goto out;
> +	if (ctx->context == AUDIT_CTX_SYSCALL) {
> +		/*
> +		 * NOTE: See the note in __audit_uring_entry() about the case
> +		 *       where we may be called from process context before we
> +		 *       return to userspace via audit_syscall_exit().  In this
> +		 *       case we simply emit a URINGOP record and bail, the
> +		 *       normal syscall exit handling will take care of
> +		 *       everything else.
> +		 *       It is also worth mentioning that when we are called,
> +		 *       the current process creds may differ from the creds
> +		 *       used during the normal syscall processing; keep that
> +		 *       in mind if/when we move the record generation code.
> +		 */
> +
> +		/*
> +		 * We need to filter on the syscall info here to decide if we
> +		 * should emit a URINGOP record.  I know it seems odd but this
> +		 * solves the problem where users have a filter to block *all*
> +		 * syscall records in the "exit" filter; we want to preserve
> +		 * the behavior here.
> +		 */
> +		audit_filter_syscall(current, ctx);
> +		audit_filter_inodes(current, ctx);
> +		if (ctx->current_state != AUDIT_RECORD_CONTEXT)
> +			return;
> +
> +		audit_log_uring(ctx);
> +		return;
> +	}
> +
> +	/* this may generate CONFIG_CHANGE records */
> +	if (!list_empty(&ctx->killed_trees))
> +		audit_kill_trees(ctx);
> +
> +	audit_filter_inodes(current, ctx);
> +	if (ctx->current_state != AUDIT_RECORD_CONTEXT)
> +		goto out;
> +	audit_return_fixup(ctx, success, code);
> +	audit_log_exit();
> +
> +out:
> +	audit_reset_context(ctx);
> +}
> +
>  /**
>   * __audit_syscall_entry - fill in an audit record at syscall entry
>   * @major: major syscall type (function)
> 
> --
> Linux-audit mailing list
> Linux-audit@redhat.com
> https://listman.redhat.com/mailman/listinfo/linux-audit

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

--
Linux-audit mailing list
Linux-audit@redhat.com
https://listman.redhat.com/mailman/listinfo/linux-audit


  parent reply	other threads:[~2021-06-02 17:29 UTC|newest]

Thread overview: 144+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-21 21:49 [RFC PATCH 0/9] Add LSM access controls and auditing to io_uring Paul Moore
2021-05-21 21:49 ` Paul Moore
2021-05-21 21:49 ` [RFC PATCH 1/9] audit: prepare audit_context for use in calling contexts beyond syscalls Paul Moore
2021-05-21 21:49   ` Paul Moore
2021-05-21 21:49 ` [RFC PATCH 2/9] audit,io_uring,io-wq: add some basic audit support to io_uring Paul Moore
2021-05-21 21:49   ` [RFC PATCH 2/9] audit, io_uring, io-wq: " Paul Moore
2021-05-22  0:22   ` [RFC PATCH 2/9] audit,io_uring,io-wq: " Pavel Begunkov
2021-05-22  0:22     ` Pavel Begunkov
2021-05-22  2:36     ` Paul Moore
2021-05-22  2:36       ` Paul Moore
2021-05-23 20:26       ` Pavel Begunkov
2021-05-23 20:26         ` Pavel Begunkov
2021-05-24 19:59         ` Paul Moore
2021-05-24 19:59           ` Paul Moore
2021-05-25  8:27           ` Pavel Begunkov
2021-05-25  8:27             ` Pavel Begunkov
2021-05-25 14:53             ` Paul Moore
2021-05-25 14:53               ` Paul Moore
2021-05-26  1:11           ` Jens Axboe
2021-05-26  1:11             ` Jens Axboe
2021-05-26  2:04             ` Paul Moore
2021-05-26  2:04               ` Paul Moore
2021-05-26 10:19               ` Pavel Begunkov
2021-05-26 10:19                 ` Pavel Begunkov
2021-05-26 14:38                 ` Paul Moore
2021-05-26 14:38                   ` Paul Moore
2021-05-26 15:11                   ` Steve Grubb
2021-05-26 15:11                     ` [RFC PATCH 2/9] audit, io_uring, io-wq: " Steve Grubb
2021-05-26 15:17                   ` [RFC PATCH 2/9] audit,io_uring,io-wq: " Stefan Metzmacher
2021-05-26 15:17                     ` Stefan Metzmacher
2021-05-26 15:49                     ` Richard Guy Briggs
2021-05-26 15:49                       ` Richard Guy Briggs
2021-05-26 17:22                       ` Jens Axboe
2021-05-26 17:22                         ` Jens Axboe
2021-05-27 17:27                         ` Richard Guy Briggs
2021-05-27 17:27                           ` Richard Guy Briggs
2021-05-26 15:49                     ` Victor Stewart
2021-05-26 15:49                       ` Victor Stewart
2021-05-26 16:38                       ` Casey Schaufler
2021-05-26 16:38                         ` Casey Schaufler
2021-05-26 17:15               ` Jens Axboe
2021-05-26 17:15                 ` Jens Axboe
2021-05-26 17:31                 ` Jens Axboe
2021-05-26 17:31                   ` Jens Axboe
2021-05-26 17:54                   ` Jens Axboe
2021-05-26 17:54                     ` Jens Axboe
2021-05-26 18:01                     ` Jens Axboe
2021-05-26 18:01                       ` Jens Axboe
2021-05-26 18:44                       ` Paul Moore
2021-05-26 18:44                         ` Paul Moore
2021-05-26 18:57                         ` Pavel Begunkov
2021-05-26 18:57                           ` Pavel Begunkov
2021-05-26 19:10                           ` Paul Moore
2021-05-26 19:10                             ` Paul Moore
2021-05-26 19:44                         ` Jens Axboe
2021-05-26 19:44                           ` Jens Axboe
2021-05-26 20:19                           ` Paul Moore
2021-05-26 20:19                             ` Paul Moore
2021-05-28 16:02                             ` Paul Moore
2021-05-28 16:02                               ` Paul Moore
2021-06-02  8:26                               ` Pavel Begunkov
2021-06-02  8:26                                 ` Pavel Begunkov
2021-06-02 15:46                                 ` Richard Guy Briggs
2021-06-02 15:46                                   ` Richard Guy Briggs
2021-06-03 10:39                                   ` Pavel Begunkov
2021-06-03 10:39                                     ` Pavel Begunkov
2021-06-02 19:46                                 ` Paul Moore
2021-06-02 19:46                                   ` Paul Moore
2021-06-03 10:51                                   ` Pavel Begunkov
2021-06-03 10:51                                     ` Pavel Begunkov
2021-06-03 15:54                                     ` Casey Schaufler
2021-06-03 15:54                                       ` Casey Schaufler
2021-06-03 15:54                               ` Jens Axboe
2021-06-03 15:54                                 ` Jens Axboe
2021-06-04  5:04                                 ` Paul Moore
2021-06-04  5:04                                   ` Paul Moore
2021-05-26 18:38                     ` Paul Moore
2021-05-26 18:38                       ` Paul Moore
2021-06-02 17:29   ` Richard Guy Briggs [this message]
2021-06-02 17:29     ` [RFC PATCH 2/9] audit, io_uring, io-wq: " Richard Guy Briggs
2021-06-02 20:46     ` Paul Moore
2021-06-02 20:46       ` Paul Moore
2021-08-25  1:21       ` Richard Guy Briggs
2021-08-25  1:21         ` Richard Guy Briggs
2021-08-25 19:41         ` Paul Moore
2021-08-25 19:41           ` Paul Moore
2021-05-21 21:50 ` [RFC PATCH 3/9] audit: dev/test patch to force io_uring auditing Paul Moore
2021-05-21 21:50   ` Paul Moore
2021-05-21 21:50 ` [RFC PATCH 4/9] audit: add filtering for io_uring records Paul Moore
2021-05-21 21:50   ` Paul Moore
2021-05-28 22:35   ` Richard Guy Briggs
2021-05-28 22:35     ` Richard Guy Briggs
2021-05-30 15:26     ` Paul Moore
2021-05-30 15:26       ` Paul Moore
2021-05-31 13:44       ` Richard Guy Briggs
2021-05-31 13:44         ` Richard Guy Briggs
2021-06-02  1:40         ` Paul Moore
2021-06-02  1:40           ` Paul Moore
2021-06-02 15:37           ` Richard Guy Briggs
2021-06-02 15:37             ` Richard Guy Briggs
2021-06-02 17:20             ` Paul Moore
2021-06-02 17:20               ` Paul Moore
2021-05-31 13:44       ` [PATCH 1/2] audit: add filtering for io_uring records, addendum Richard Guy Briggs
2021-05-31 13:44         ` Richard Guy Briggs
2021-05-31 16:08         ` kernel test robot
2021-05-31 16:08           ` kernel test robot
2021-05-31 16:08           ` kernel test robot
2021-05-31 17:38         ` kernel test robot
2021-05-31 17:38           ` kernel test robot
2021-05-31 17:38           ` kernel test robot
2021-06-07 23:15         ` Paul Moore
2021-06-07 23:15           ` Paul Moore
2021-06-08 12:55           ` Richard Guy Briggs
2021-06-08 12:55             ` Richard Guy Briggs
2021-06-09  2:45             ` Paul Moore
2021-06-09  2:45               ` Paul Moore
2021-05-31 13:44       ` [PATCH 2/2] audit: block PERM fields being used with io_uring filtering Richard Guy Briggs
2021-05-31 13:44         ` Richard Guy Briggs
2021-05-21 21:50 ` [RFC PATCH 5/9] fs: add anon_inode_getfile_secure() similar to anon_inode_getfd_secure() Paul Moore
2021-05-21 21:50   ` Paul Moore
2021-05-21 21:50 ` [RFC PATCH 6/9] io_uring: convert io_uring to the secure anon inode interface Paul Moore
2021-05-21 21:50   ` Paul Moore
2021-05-21 21:50 ` [RFC PATCH 7/9] lsm,io_uring: add LSM hooks to io_uring Paul Moore
2021-05-21 21:50   ` Paul Moore
2021-05-26 14:48   ` Stefan Metzmacher
2021-05-26 14:48     ` Stefan Metzmacher
2021-05-26 20:45     ` Paul Moore
2021-05-26 20:45       ` Paul Moore
2021-05-21 21:50 ` [RFC PATCH 8/9] selinux: add support for the io_uring access controls Paul Moore
2021-05-21 21:50   ` Paul Moore
2021-05-21 21:50 ` [RFC PATCH 9/9] Smack: Brutalist io_uring support with debug Paul Moore
2021-05-21 21:50   ` Paul Moore
2021-05-22  0:53 ` [RFC PATCH 0/9] Add LSM access controls and auditing to io_uring Tetsuo Handa
2021-05-22  0:53   ` Tetsuo Handa
2021-05-22  2:06   ` Paul Moore
2021-05-22  2:06     ` Paul Moore
2021-05-26 15:00 ` Jeff Moyer
2021-05-26 15:00   ` Jeff Moyer
2021-05-26 18:49   ` Paul Moore
2021-05-26 18:49     ` Paul Moore
2021-05-26 19:07     ` Jeff Moyer
2021-05-26 19:07       ` Jeff Moyer
2021-05-26 19:10       ` Paul Moore
2021-05-26 19:10         ` Paul Moore

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210602172924.GM447005@madcap2.tricolour.ca \
    --to=rgb@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=linux-audit@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=memxor@gmail.com \
    --cc=paul@paul-moore.com \
    --cc=selinux@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.