All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Wilcox <willy@infradead.org>
To: Alexander Viro <viro@zeniv.linux.org.uk>,
	Benjamin LaHaise <bcrl@kvack.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Kees Cook <keescook@chromium.org>,
	linux-fsdevel@vger.kernel.org, linux-aio@kvack.org,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Dan Carpenter <dan.carpenter@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Subject: [PATCH] aio: Convert ioctx_table to XArray
Date: Wed, 28 Nov 2018 10:35:31 -0800	[thread overview]
Message-ID: <20181128183531.5139-1-willy@infradead.org> (raw)

This custom resizing array was vulnerable to a Spectre attack (speculating
off the end of an array to a user-controlled offset).  The XArray is
not vulnerable to Spectre as it always masks its lookups to be within
the bounds of the array.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/aio.c                 | 182 ++++++++++++++-------------------------
 include/linux/mm_types.h |   5 +-
 kernel/fork.c            |   3 +-
 mm/debug.c               |   6 --
 4 files changed, 67 insertions(+), 129 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 301e6314183b..51ba7446a24f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -71,12 +71,6 @@ struct aio_ring {
 
 #define AIO_RING_PAGES	8
 
-struct kioctx_table {
-	struct rcu_head		rcu;
-	unsigned		nr;
-	struct kioctx __rcu	*table[];
-};
-
 struct kioctx_cpu {
 	unsigned		reqs_available;
 };
@@ -313,27 +307,22 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
 {
 	struct file *file = vma->vm_file;
 	struct mm_struct *mm = vma->vm_mm;
-	struct kioctx_table *table;
-	int i, res = -EINVAL;
+	struct kioctx *ctx;
+	unsigned long index;
+	int res = -EINVAL;
 
-	spin_lock(&mm->ioctx_lock);
-	rcu_read_lock();
-	table = rcu_dereference(mm->ioctx_table);
-	for (i = 0; i < table->nr; i++) {
-		struct kioctx *ctx;
-
-		ctx = rcu_dereference(table->table[i]);
-		if (ctx && ctx->aio_ring_file == file) {
-			if (!atomic_read(&ctx->dead)) {
-				ctx->user_id = ctx->mmap_base = vma->vm_start;
-				res = 0;
-			}
-			break;
+	xa_lock(&mm->ioctx);
+	xa_for_each(&mm->ioctx, ctx, index, ULONG_MAX, XA_PRESENT) {
+		if (ctx->aio_ring_file != file)
+			continue;
+		if (!atomic_read(&ctx->dead)) {
+			ctx->user_id = ctx->mmap_base = vma->vm_start;
+			res = 0;
 		}
+		break;
 	}
+	xa_unlock(&mm->ioctx);
 
-	rcu_read_unlock();
-	spin_unlock(&mm->ioctx_lock);
 	return res;
 }
 
@@ -617,57 +606,21 @@ static void free_ioctx_users(struct percpu_ref *ref)
 
 static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
 {
-	unsigned i, new_nr;
-	struct kioctx_table *table, *old;
 	struct aio_ring *ring;
+	int err;
 
-	spin_lock(&mm->ioctx_lock);
-	table = rcu_dereference_raw(mm->ioctx_table);
-
-	while (1) {
-		if (table)
-			for (i = 0; i < table->nr; i++)
-				if (!rcu_access_pointer(table->table[i])) {
-					ctx->id = i;
-					rcu_assign_pointer(table->table[i], ctx);
-					spin_unlock(&mm->ioctx_lock);
-
-					/* While kioctx setup is in progress,
-					 * we are protected from page migration
-					 * changes ring_pages by ->ring_lock.
-					 */
-					ring = kmap_atomic(ctx->ring_pages[0]);
-					ring->id = ctx->id;
-					kunmap_atomic(ring);
-					return 0;
-				}
-
-		new_nr = (table ? table->nr : 1) * 4;
-		spin_unlock(&mm->ioctx_lock);
-
-		table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
-				new_nr, GFP_KERNEL);
-		if (!table)
-			return -ENOMEM;
-
-		table->nr = new_nr;
-
-		spin_lock(&mm->ioctx_lock);
-		old = rcu_dereference_raw(mm->ioctx_table);
-
-		if (!old) {
-			rcu_assign_pointer(mm->ioctx_table, table);
-		} else if (table->nr > old->nr) {
-			memcpy(table->table, old->table,
-			       old->nr * sizeof(struct kioctx *));
+	err = xa_alloc(&mm->ioctx, &ctx->id, UINT_MAX, ctx, GFP_KERNEL);
+	if (err)
+		return err;
 
-			rcu_assign_pointer(mm->ioctx_table, table);
-			kfree_rcu(old, rcu);
-		} else {
-			kfree(table);
-			table = old;
-		}
-	}
+	/*
+	 * While kioctx setup is in progress, we are protected from
+	 * page migration changing ring_pages by ->ring_lock.
+	 */
+	ring = kmap_atomic(ctx->ring_pages[0]);
+	ring->id = ctx->id;
+	kunmap_atomic(ring);
+	return 0;
 }
 
 static void aio_nr_sub(unsigned nr)
@@ -793,27 +746,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 	return ERR_PTR(err);
 }
 
-/* kill_ioctx
- *	Cancels all outstanding aio requests on an aio context.  Used
- *	when the processes owning a context have all exited to encourage
- *	the rapid destruction of the kioctx.
- */
-static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
-		      struct ctx_rq_wait *wait)
+static void __kill_ioctx(struct kioctx *ctx, struct ctx_rq_wait *wait)
 {
-	struct kioctx_table *table;
-
-	spin_lock(&mm->ioctx_lock);
-	if (atomic_xchg(&ctx->dead, 1)) {
-		spin_unlock(&mm->ioctx_lock);
-		return -EINVAL;
-	}
-
-	table = rcu_dereference_raw(mm->ioctx_table);
-	WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id]));
-	RCU_INIT_POINTER(table->table[ctx->id], NULL);
-	spin_unlock(&mm->ioctx_lock);
-
 	/* free_ioctx_reqs() will do the necessary RCU synchronization */
 	wake_up_all(&ctx->wait);
 
@@ -831,6 +765,30 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 
 	ctx->rq_wait = wait;
 	percpu_ref_kill(&ctx->users);
+}
+
+/* kill_ioctx
+ *	Cancels all outstanding aio requests on an aio context.  Used
+ *	when the processes owning a context have all exited to encourage
+ *	the rapid destruction of the kioctx.
+ */
+static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+		      struct ctx_rq_wait *wait)
+{
+	struct kioctx *old;
+
+	/* I don't understand what this lock is protecting against */
+	xa_lock(&mm->ioctx);
+	if (atomic_xchg(&ctx->dead, 1)) {
+		xa_unlock(&mm->ioctx);
+		return -EINVAL;
+	}
+
+	old = __xa_erase(&mm->ioctx, ctx->id);
+	WARN_ON(old != ctx);
+	xa_unlock(&mm->ioctx);
+
+	__kill_ioctx(ctx, wait);
 	return 0;
 }
 
@@ -844,26 +802,21 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
  */
 void exit_aio(struct mm_struct *mm)
 {
-	struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
+	struct kioctx *ctx;
 	struct ctx_rq_wait wait;
-	int i, skipped;
+	unsigned long index;
 
-	if (!table)
+	if (xa_empty(&mm->ioctx))
 		return;
 
-	atomic_set(&wait.count, table->nr);
+	/*
+	 * Prevent count from getting to 0 until we're ready for it
+	 */
+	atomic_set(&wait.count, 1);
 	init_completion(&wait.comp);
 
-	skipped = 0;
-	for (i = 0; i < table->nr; ++i) {
-		struct kioctx *ctx =
-			rcu_dereference_protected(table->table[i], true);
-
-		if (!ctx) {
-			skipped++;
-			continue;
-		}
-
+	xa_lock(&mm->ioctx);
+	xa_for_each(&mm->ioctx, ctx, index, ULONG_MAX, XA_PRESENT) {
 		/*
 		 * We don't need to bother with munmap() here - exit_mmap(mm)
 		 * is coming and it'll unmap everything. And we simply can't,
@@ -872,16 +825,16 @@ void exit_aio(struct mm_struct *mm)
 		 * that it needs to unmap the area, just set it to 0.
 		 */
 		ctx->mmap_size = 0;
-		kill_ioctx(mm, ctx, &wait);
+		atomic_inc(&wait.count);
+		__xa_erase(&mm->ioctx, ctx->id);
+		__kill_ioctx(ctx, &wait);
 	}
+	xa_unlock(&mm->ioctx);
 
-	if (!atomic_sub_and_test(skipped, &wait.count)) {
+	if (!atomic_sub_and_test(1, &wait.count)) {
 		/* Wait until all IO for the context are done. */
 		wait_for_completion(&wait.comp);
 	}
-
-	RCU_INIT_POINTER(mm->ioctx_table, NULL);
-	kfree(table);
 }
 
 static void put_reqs_available(struct kioctx *ctx, unsigned nr)
@@ -1026,24 +979,17 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	struct aio_ring __user *ring  = (void __user *)ctx_id;
 	struct mm_struct *mm = current->mm;
 	struct kioctx *ctx, *ret = NULL;
-	struct kioctx_table *table;
 	unsigned id;
 
 	if (get_user(id, &ring->id))
 		return NULL;
 
 	rcu_read_lock();
-	table = rcu_dereference(mm->ioctx_table);
-
-	if (!table || id >= table->nr)
-		goto out;
-
-	ctx = rcu_dereference(table->table[id]);
+	ctx = xa_load(&mm->ioctx, id);
 	if (ctx && ctx->user_id == ctx_id) {
 		if (percpu_ref_tryget_live(&ctx->users))
 			ret = ctx;
 	}
-out:
 	rcu_read_unlock();
 	return ret;
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ed8f6292a53..1a95bb27f5a7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -14,6 +14,7 @@
 #include <linux/uprobes.h>
 #include <linux/page-flags-layout.h>
 #include <linux/workqueue.h>
+#include <linux/xarray.h>
 
 #include <asm/mmu.h>
 
@@ -336,7 +337,6 @@ struct core_state {
 	struct completion startup;
 };
 
-struct kioctx_table;
 struct mm_struct {
 	struct {
 		struct vm_area_struct *mmap;		/* list of VMAs */
@@ -431,8 +431,7 @@ struct mm_struct {
 		atomic_t membarrier_state;
 #endif
 #ifdef CONFIG_AIO
-		spinlock_t			ioctx_lock;
-		struct kioctx_table __rcu	*ioctx_table;
+		struct xarray ioctx;
 #endif
 #ifdef CONFIG_MEMCG
 		/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 07cddff89c7b..acb775f9592d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -946,8 +946,7 @@ __setup("coredump_filter=", coredump_filter_setup);
 static void mm_init_aio(struct mm_struct *mm)
 {
 #ifdef CONFIG_AIO
-	spin_lock_init(&mm->ioctx_lock);
-	mm->ioctx_table = NULL;
+	xa_init_flags(&mm->ioctx, XA_FLAGS_ALLOC);
 #endif
 }
 
diff --git a/mm/debug.c b/mm/debug.c
index cdacba12e09a..d45ec63aed8c 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -127,9 +127,6 @@ void dump_mm(const struct mm_struct *mm)
 		"start_brk %lx brk %lx start_stack %lx\n"
 		"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
 		"binfmt %px flags %lx core_state %px\n"
-#ifdef CONFIG_AIO
-		"ioctx_table %px\n"
-#endif
 #ifdef CONFIG_MEMCG
 		"owner %px "
 #endif
@@ -158,9 +155,6 @@ void dump_mm(const struct mm_struct *mm)
 		mm->start_brk, mm->brk, mm->start_stack,
 		mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
 		mm->binfmt, mm->flags, mm->core_state,
-#ifdef CONFIG_AIO
-		mm->ioctx_table,
-#endif
 #ifdef CONFIG_MEMCG
 		mm->owner,
 #endif
-- 
2.19.1


             reply	other threads:[~2018-11-28 18:35 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-28 18:35 Matthew Wilcox [this message]
2018-12-06 21:08 ` [PATCH] aio: Convert ioctx_table to XArray Matthew Wilcox
2018-12-06 22:21 ` Jeff Moyer
2018-12-06 22:21   ` Jeff Moyer
2018-12-06 22:26   ` Jeff Moyer
2018-12-06 22:26     ` Jeff Moyer
2018-12-11 17:21     ` Jeff Moyer
2018-12-11 17:21       ` Jeff Moyer
2018-12-11 17:51       ` Matthew Wilcox
2018-12-11 18:02         ` Jeff Moyer
2018-12-11 18:02           ` Jeff Moyer
2018-12-11 18:05           ` Jens Axboe
2018-12-11 18:09             ` Jeff Moyer
2018-12-11 18:09               ` Jeff Moyer
2018-12-11 18:37               ` Jens Axboe
2018-12-11 18:32             ` Jens Axboe
2018-12-11 18:36               ` Jens Axboe
2018-12-11 18:51               ` Matthew Wilcox
2018-12-11 18:52                 ` Jens Axboe
2018-12-11 18:41 ` Jens Axboe
2018-12-11 18:41   ` Jens Axboe
2018-12-11 18:45   ` Matthew Wilcox
2018-12-11 18:46     ` Jens Axboe
2018-12-11 18:53       ` Matthew Wilcox
2018-12-11 18:54         ` Jens Axboe
2018-12-11 18:53       ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181128183531.5139-1-willy@infradead.org \
    --to=willy@infradead.org \
    --cc=akpm@linux-foundation.org \
    --cc=bcrl@kvack.org \
    --cc=dan.carpenter@oracle.com \
    --cc=keescook@chromium.org \
    --cc=linux-aio@kvack.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.