All of lore.kernel.org
 help / color / mirror / Atom feed
From: Al Viro <viro@ZenIV.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>,
	David Miller <davem@davemloft.net>,
	Jason Baron <jbaron@akamai.com>,
	kgraul@linux.ibm.com, ktkhai@virtuozzo.com,
	kyeongdon.kim@lge.com,
	Linux List Kernel Mailing <linux-kernel@vger.kernel.org>,
	Netdev <netdev@vger.kernel.org>,
	pabeni@redhat.com, syzkaller-bugs@googlegroups.com,
	xiyou.wangcong@gmail.com, Christoph Hellwig <hch@lst.de>,
	zhengbin <zhengbin13@huawei.com>,
	bcrl@kvack.org, linux-fsdevel@vger.kernel.org,
	linux-aio@kvack.org, houtao1@huawei.com, yi.zhang@huawei.com
Subject: [PATCH 4/8] aio_poll(): get rid of weird refcounting
Date: Thu,  7 Mar 2019 00:03:12 +0000	[thread overview]
Message-ID: <20190307000316.31133-4-viro@ZenIV.linux.org.uk> (raw)
In-Reply-To: <20190307000316.31133-1-viro@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

The only reason for taking the extra ref to iocb is that we want
to access it after vfs_poll() and an early wakeup could have it
already completed by the time vfs_poll() returns.

That's very easy to avoid, though - we need to know which lock
to grab and, having grabbed it, we need to check if an early
wakeup has already happened.  So let's just copy the reference
to waitqueue into aio_poll_table and instead of having the
"woken" flag in iocb, move it to aio_poll() stack frame and
put its address into iocb (and make sure it's cleared, so
aio_poll_wake() won't step onto it after aio_poll() exits).

That's enough to get rid of the refcount.  In async case freeing
is done by aio_poll_complete() (and aio_poll() will only touch
the iocb past the vfs_poll() if it's guaranteed that aio_poll_complete()
has not happened yet), in all other cases we make sure that wakeups
hadn't and won't happen and deal with disposal of iocb ourselves.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 55 +++++++++++++++++++++++++++----------------------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 22b288997441..ee062253e303 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -180,8 +180,8 @@ struct fsync_iocb {
 struct poll_iocb {
 	struct file		*file;
 	struct wait_queue_head	*head;
+	bool			*taken;
 	__poll_t		events;
-	bool			woken;
 	bool			cancelled;
 	struct wait_queue_entry	wait;
 	struct work_struct	work;
@@ -209,8 +209,6 @@ struct aio_kiocb {
 
 	struct list_head	ki_list;	/* the aio core uses this
 						 * for cancellation */
-	refcount_t		ki_refcnt;
-
 	/*
 	 * If the aio_resfd field of the userspace iocb is not zero,
 	 * this is the underlying eventfd context to deliver events to.
@@ -1034,7 +1032,6 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 	percpu_ref_get(&ctx->reqs);
 	req->ki_ctx = ctx;
 	INIT_LIST_HEAD(&req->ki_list);
-	refcount_set(&req->ki_refcnt, 0);
 	req->ki_eventfd = NULL;
 	return req;
 }
@@ -1069,13 +1066,10 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 
 static inline void iocb_put(struct aio_kiocb *iocb)
 {
-	if (refcount_read(&iocb->ki_refcnt) == 0 ||
-	    refcount_dec_and_test(&iocb->ki_refcnt)) {
-		if (iocb->ki_filp)
-			fput(iocb->ki_filp);
-		percpu_ref_put(&iocb->ki_ctx->reqs);
-		kmem_cache_free(kiocb_cachep, iocb);
-	}
+	if (iocb->ki_filp)
+		fput(iocb->ki_filp);
+	percpu_ref_put(&iocb->ki_ctx->reqs);
+	kmem_cache_free(kiocb_cachep, iocb);
 }
 
 static void aio_fill_event(struct io_event *ev, struct aio_kiocb *iocb,
@@ -1672,8 +1666,10 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	if (mask && !(mask & req->events))
 		return 0;
 
-	req->woken = true;
-
+	if (unlikely(req->taken)) {
+		*req->taken = true;
+		req->taken = NULL;
+	}
 	if (mask) {
 		/*
 		 * Try to complete the iocb inline if we can. Use
@@ -1698,6 +1694,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 
 struct aio_poll_table {
 	struct poll_table_struct	pt;
+	struct wait_queue_head		*head;
 	struct aio_kiocb		*iocb;
 	int				error;
 };
@@ -1715,7 +1712,7 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 	}
 
 	pt->error = 0;
-	pt->iocb->poll.head = head;
+	pt->head = pt->iocb->poll.head = head;
 	add_wait_queue(head, &pt->iocb->poll.wait);
 }
 
@@ -1738,7 +1735,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
 
 	req->head = NULL;
-	req->woken = false;
+	req->taken = &async;
 	req->cancelled = false;
 
 	apt.pt._qproc = aio_poll_queue_proc;
@@ -1750,36 +1747,38 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	INIT_LIST_HEAD(&req->wait.entry);
 	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
 
-	/* one for removal from waitqueue, one for this function */
-	refcount_set(&aiocb->ki_refcnt, 2);
-
-	mask = vfs_poll(req->file, &apt.pt) & req->events;
-	if (unlikely(!req->head)) {
+	mask = req->events;
+	mask &= vfs_poll(req->file, &apt.pt);
+	/*
+	 * Careful: we might've been put into waitqueue *and* already
+	 * woken up before vfs_poll() returns.  The caller is holding
+	 * a reference to file, so it couldn't have been killed under
+	 * us, no matter what.  However, in case of early wakeup, @req
+	 * might be already gone by now.
+	 */
+	if (unlikely(!apt.head)) {
 		/* we did not manage to set up a waitqueue, done */
 		goto out;
 	}
-
 	spin_lock_irq(&ctx->ctx_lock);
-	spin_lock(&req->head->lock);
-	if (req->woken) { /* already taken up by aio_poll_wake() */
-		async = true;
+	spin_lock(&apt.head->lock);
+	if (async) { /* already taken up by aio_poll_wake() */
 		apt.error = 0;
 	} else if (!mask && !apt.error) { /* actually waiting for an event */
 		list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
 		aiocb->ki_cancel = aio_poll_cancel;
+		req->taken = NULL;
 		async = true;
 	} else { /* if we get an error or a mask we are done */
 		WARN_ON_ONCE(list_empty(&req->wait.entry));
 		list_del_init(&req->wait.entry);
 		/* no wakeup in the future either; aiocb is ours to dispose of */
 	}
-	spin_unlock(&req->head->lock);
+	spin_unlock(&apt.head->lock);
 	spin_unlock_irq(&ctx->ctx_lock);
-
 out:
-	if (async && !apt.error)
+	if (!async && !apt.error)
 		aio_poll_complete(aiocb, mask);
-	iocb_put(aiocb);
 	return apt.error;
 }
 
-- 
2.11.0


  parent reply	other threads:[~2019-03-07  0:03 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-03 10:22 KASAN: use-after-free Read in unix_dgram_poll syzbot
2019-03-03 13:55 ` Al Viro
2019-03-03 15:18   ` [PATCH] aio: prevent the final fput() in the middle of vfs_poll() (Re: KASAN: use-after-free Read in unix_dgram_poll) Al Viro
2019-03-03 18:37     ` Eric Dumazet
2019-03-03 19:44     ` Linus Torvalds
2019-03-03 20:13       ` Linus Torvalds
2019-03-03 20:30       ` Al Viro
2019-03-03 22:23         ` Linus Torvalds
2019-03-04  2:36           ` Al Viro
2019-03-04 21:22             ` Linus Torvalds
2019-03-07  0:03               ` [PATCH 1/8] aio: make sure file is pinned Al Viro
2019-03-07  0:03                 ` [PATCH 2/8] aio_poll_wake(): don't set ->woken if we ignore the wakeup Al Viro
2019-03-07  2:18                   ` Al Viro
2019-03-08 11:16                     ` zhengbin (A)
2019-03-07  0:03                 ` [PATCH 3/8] aio_poll(): sanitize the logics after vfs_poll(), get rid of leak on error Al Viro
2019-03-07  2:11                   ` zhengbin (A)
2019-03-07  0:03                 ` Al Viro [this message]
2019-03-07  0:03                 ` [PATCH 5/8] make aio_read()/aio_write() return int Al Viro
2019-03-07  0:03                 ` [PATCH 6/8] move dropping ->ki_eventfd into iocb_put() Al Viro
2019-03-07  0:03                 ` [PATCH 7/8] deal with get_reqs_available() in aio_get_req() itself Al Viro
2019-03-07  0:03                 ` [PATCH 8/8] aio: move sanity checks and request allocation to io_submit_one() Al Viro
2019-03-07  0:23                 ` [PATCH 1/8] aio: make sure file is pinned Linus Torvalds
2019-03-07  0:41                   ` Al Viro
2019-03-07  0:48                     ` Al Viro
2019-03-07  1:20                       ` Al Viro
2019-03-07  1:30                         ` Linus Torvalds
2019-03-08  3:36                           ` Al Viro
2019-03-08 15:50                             ` Christoph Hellwig
2019-03-10  7:06                             ` Al Viro
2019-03-10  7:08                               ` [PATCH 1/8] pin iocb through aio Al Viro
2019-03-10  7:08                                 ` [PATCH 2/8] keep io_event in aio_kiocb Al Viro
2019-03-11 19:43                                   ` Christoph Hellwig
2019-03-11 21:17                                     ` Al Viro
2019-03-10  7:08                                 ` [PATCH 3/8] aio: store event at final iocb_put() Al Viro
2019-03-11 19:44                                   ` Christoph Hellwig
2019-03-11 21:13                                     ` Al Viro
2019-03-11 22:52                                       ` Al Viro
2019-03-10  7:08                                 ` [PATCH 4/8] Fix aio_poll() races Al Viro
2019-03-11 19:58                                   ` Christoph Hellwig
2019-03-11 21:06                                     ` Al Viro
2019-03-12 19:18                                       ` Christoph Hellwig
2019-03-10  7:08                                 ` [PATCH 5/8] make aio_read()/aio_write() return int Al Viro
2019-03-11 19:44                                   ` Christoph Hellwig
2019-03-10  7:08                                 ` [PATCH 6/8] move dropping ->ki_eventfd into iocb_destroy() Al Viro
2019-03-11 19:46                                   ` Christoph Hellwig
2019-03-10  7:08                                 ` [PATCH 7/8] deal with get_reqs_available() in aio_get_req() itself Al Viro
2019-03-11 19:46                                   ` Christoph Hellwig
2019-03-10  7:08                                 ` [PATCH 8/8] aio: move sanity checks and request allocation to io_submit_one() Al Viro
2019-03-11 19:48                                   ` Christoph Hellwig
2019-03-11 21:12                                     ` Al Viro
2019-03-11 19:41                                 ` [PATCH 1/8] pin iocb through aio Christoph Hellwig
2019-03-11 19:41                               ` [PATCH 1/8] aio: make sure file is pinned Christoph Hellwig
2019-03-04  7:53     ` [PATCH] aio: prevent the final fput() in the middle of vfs_poll() (Re: KASAN: use-after-free Read in unix_dgram_poll) Dmitry Vyukov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190307000316.31133-4-viro@ZenIV.linux.org.uk \
    --to=viro@zeniv.linux.org.uk \
    --cc=bcrl@kvack.org \
    --cc=davem@davemloft.net \
    --cc=eric.dumazet@gmail.com \
    --cc=hch@lst.de \
    --cc=houtao1@huawei.com \
    --cc=jbaron@akamai.com \
    --cc=kgraul@linux.ibm.com \
    --cc=ktkhai@virtuozzo.com \
    --cc=kyeongdon.kim@lge.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=syzkaller-bugs@googlegroups.com \
    --cc=torvalds@linux-foundation.org \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yi.zhang@huawei.com \
    --cc=zhengbin13@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.