All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>, Avi Kivity <avi@scylladb.com>,
	linux-aio@kvack.org, linux-fsdevel@vger.kernel.org,
	netdev@vger.kernel.org, linux-api@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 06/28] aio: implement IOCB_CMD_POLL
Date: Thu, 22 Mar 2018 18:24:10 +0100	[thread overview]
Message-ID: <20180322172410.GC5542@lst.de> (raw)
In-Reply-To: <20180322165255.GI30522@ZenIV.linux.org.uk>

On Thu, Mar 22, 2018 at 04:52:55PM +0000, Al Viro wrote:
> On Wed, Mar 21, 2018 at 08:40:10AM +0100, Christoph Hellwig wrote:
> > Simple one-shot poll through the io_submit() interface.  To poll for
> > a file descriptor the application should submit an iocb of type
> > IOCB_CMD_POLL.  It will poll the fd for the events specified in the
> > the first 32 bits of the aio_buf field of the iocb.
> > 
> > Unlike poll or epoll without EPOLLONESHOT this interface always works
> > in one shot mode, that is once the iocb is completed, it will have to be
> > resubmitted.
> 
> AFAICS, your wakeup can race with io_cancel(), leading to double fput().
> You are checking the "somebody had committed itself to cancelling that
> thing" bit outside of ->ctx_lock on the wakeup side, and I don't see
> anything to prevent both getting to __aio_poll_complete() on the same
> iocb, with obvious results.

True.  Probably wants something like this to fix, although for this
is entirely untested:

diff --git a/fs/aio.c b/fs/aio.c
index 38b408129697..66d5cc272617 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -187,8 +187,9 @@ struct aio_kiocb {
 						 * for cancellation */
 
 	unsigned int		flags;		/* protected by ctx->ctx_lock */
-#define AIO_IOCB_DELAYED_CANCEL	(1 << 0)
-#define AIO_IOCB_CANCELLED	(1 << 1)
+#define AIO_IOCB_CAN_CANCEL	(1 << 0)
+#define AIO_IOCB_DELAYED_CANCEL	(1 << 1)
+#define AIO_IOCB_CANCELLED	(1 << 2)
 
 	/*
 	 * If the aio_resfd field of the userspace iocb is not zero,
@@ -568,7 +569,7 @@ static void __kiocb_set_cancel_fn(struct aio_kiocb *req,
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
 	list_add_tail(&req->ki_list, &ctx->active_reqs);
 	req->ki_cancel = cancel;
-	req->flags |= iocb_flags;
+	req->flags |= (AIO_IOCB_CAN_CANCEL | iocb_flags);
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 }
 
@@ -1086,22 +1087,30 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	return ret;
 }
 
+#define AIO_COMPLETE_CANCEL	(1 << 0)
+
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
-static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
+static bool aio_complete(struct aio_kiocb *iocb, long res, long res2,
+		unsigned complete_flags)
 {
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
 	unsigned tail, pos, head;
-	unsigned long	flags;
-
-	if (!list_empty_careful(iocb->ki_list.next)) {
-		unsigned long flags;
+	unsigned long flags;
 
+	if (iocb->flags & AIO_IOCB_CAN_CANCEL) {
 		spin_lock_irqsave(&ctx->ctx_lock, flags);
-		list_del(&iocb->ki_list);
+		if (!(complete_flags & AIO_COMPLETE_CANCEL) &&
+		    (iocb->flags & AIO_IOCB_CANCELLED)) {
+			spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+			return false;
+		}
+
+		if (!list_empty(&iocb->ki_list))
+			list_del(&iocb->ki_list);
 		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	}
 
@@ -1177,6 +1186,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 		wake_up(&ctx->wait);
 
 	percpu_ref_put(&ctx->reqs);
+	return true;
 }
 
 /* aio_read_events_ring
@@ -1425,6 +1435,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
 	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
+	struct file *file = kiocb->ki_filp;
 
 	WARN_ON_ONCE(is_sync_kiocb(kiocb));
 
@@ -1440,8 +1451,8 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
 		file_end_write(kiocb->ki_filp);
 	}
 
-	fput(kiocb->ki_filp);
-	aio_complete(iocb, res, res2);
+	if (aio_complete(iocb, res, res2, 0))
+		fput(file);
 }
 
 static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
@@ -1584,11 +1595,13 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 static void aio_fsync_work(struct work_struct *work)
 {
 	struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
+	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, fsync);
+	struct file *file = req->file;
 	int ret;
 
 	ret = vfs_fsync(req->file, req->datasync);
-	fput(req->file);
-	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+	if (aio_complete(iocb, ret, 0, 0))
+		fput(file);
 }
 
 static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
@@ -1617,27 +1630,23 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
 	return ret;
 }
 
-static void __aio_complete_poll(struct poll_iocb *req, __poll_t mask)
-{
-	fput(req->file);
-	aio_complete(container_of(req, struct aio_kiocb, poll),
-			mangle_poll(mask), 0);
-}
-
 static void aio_complete_poll(struct poll_iocb *req, __poll_t mask)
 {
 	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+	struct file *file = req->file;
 
-	if (!(iocb->flags & AIO_IOCB_CANCELLED))
-		__aio_complete_poll(req, mask);
+	if (aio_complete(iocb, mangle_poll(mask), 0, 0))
+		fput(file);
 }
 
 static int aio_poll_cancel(struct kiocb *rw)
 {
 	struct aio_kiocb *iocb = container_of(rw, struct aio_kiocb, rw);
+	struct file *file = iocb->poll.file;
 
 	remove_wait_queue(iocb->poll.head, &iocb->poll.wait);
-	__aio_complete_poll(&iocb->poll, 0); /* no events to report */
+	if (aio_complete(iocb, 0, 0, AIO_COMPLETE_CANCEL))
+		fput(file);
 	return 0;
 }
 

WARNING: multiple messages have this Message-ID (diff)
From: Christoph Hellwig <hch@lst.de>
To: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>, Avi Kivity <avi@scylladb.com>,
	linux-aio@kvack.org, linux-fsdevel@vger.kernel.org,
	netdev@vger.kernel.org, linux-api@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 06/28] aio: implement IOCB_CMD_POLL
Date: Thu, 22 Mar 2018 18:24:10 +0100	[thread overview]
Message-ID: <20180322172410.GC5542@lst.de> (raw)
In-Reply-To: <20180322165255.GI30522@ZenIV.linux.org.uk>

On Thu, Mar 22, 2018 at 04:52:55PM +0000, Al Viro wrote:
> On Wed, Mar 21, 2018 at 08:40:10AM +0100, Christoph Hellwig wrote:
> > Simple one-shot poll through the io_submit() interface.  To poll for
> > a file descriptor the application should submit an iocb of type
> > IOCB_CMD_POLL.  It will poll the fd for the events specified in the
> > the first 32 bits of the aio_buf field of the iocb.
> > 
> > Unlike poll or epoll without EPOLLONESHOT this interface always works
> > in one shot mode, that is once the iocb is completed, it will have to be
> > resubmitted.
> 
> AFAICS, your wakeup can race with io_cancel(), leading to double fput().
> You are checking the "somebody had committed itself to cancelling that
> thing" bit outside of ->ctx_lock on the wakeup side, and I don't see
> anything to prevent both getting to __aio_poll_complete() on the same
> iocb, with obvious results.

True.  Probably wants something like this to fix, although for this
is entirely untested:

diff --git a/fs/aio.c b/fs/aio.c
index 38b408129697..66d5cc272617 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -187,8 +187,9 @@ struct aio_kiocb {
 						 * for cancellation */
 
 	unsigned int		flags;		/* protected by ctx->ctx_lock */
-#define AIO_IOCB_DELAYED_CANCEL	(1 << 0)
-#define AIO_IOCB_CANCELLED	(1 << 1)
+#define AIO_IOCB_CAN_CANCEL	(1 << 0)
+#define AIO_IOCB_DELAYED_CANCEL	(1 << 1)
+#define AIO_IOCB_CANCELLED	(1 << 2)
 
 	/*
 	 * If the aio_resfd field of the userspace iocb is not zero,
@@ -568,7 +569,7 @@ static void __kiocb_set_cancel_fn(struct aio_kiocb *req,
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
 	list_add_tail(&req->ki_list, &ctx->active_reqs);
 	req->ki_cancel = cancel;
-	req->flags |= iocb_flags;
+	req->flags |= (AIO_IOCB_CAN_CANCEL | iocb_flags);
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 }
 
@@ -1086,22 +1087,30 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	return ret;
 }
 
+#define AIO_COMPLETE_CANCEL	(1 << 0)
+
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
-static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
+static bool aio_complete(struct aio_kiocb *iocb, long res, long res2,
+		unsigned complete_flags)
 {
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
 	unsigned tail, pos, head;
-	unsigned long	flags;
-
-	if (!list_empty_careful(iocb->ki_list.next)) {
-		unsigned long flags;
+	unsigned long flags;
 
+	if (iocb->flags & AIO_IOCB_CAN_CANCEL) {
 		spin_lock_irqsave(&ctx->ctx_lock, flags);
-		list_del(&iocb->ki_list);
+		if (!(complete_flags & AIO_COMPLETE_CANCEL) &&
+		    (iocb->flags & AIO_IOCB_CANCELLED)) {
+			spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+			return false;
+		}
+
+		if (!list_empty(&iocb->ki_list))
+			list_del(&iocb->ki_list);
 		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	}
 
@@ -1177,6 +1186,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 		wake_up(&ctx->wait);
 
 	percpu_ref_put(&ctx->reqs);
+	return true;
 }
 
 /* aio_read_events_ring
@@ -1425,6 +1435,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
 	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
+	struct file *file = kiocb->ki_filp;
 
 	WARN_ON_ONCE(is_sync_kiocb(kiocb));
 
@@ -1440,8 +1451,8 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
 		file_end_write(kiocb->ki_filp);
 	}
 
-	fput(kiocb->ki_filp);
-	aio_complete(iocb, res, res2);
+	if (aio_complete(iocb, res, res2, 0))
+		fput(file);
 }
 
 static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
@@ -1584,11 +1595,13 @@ static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 static void aio_fsync_work(struct work_struct *work)
 {
 	struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
+	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, fsync);
+	struct file *file = req->file;
 	int ret;
 
 	ret = vfs_fsync(req->file, req->datasync);
-	fput(req->file);
-	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+	if (aio_complete(iocb, ret, 0, 0))
+		fput(file);
 }
 
 static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
@@ -1617,27 +1630,23 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
 	return ret;
 }
 
-static void __aio_complete_poll(struct poll_iocb *req, __poll_t mask)
-{
-	fput(req->file);
-	aio_complete(container_of(req, struct aio_kiocb, poll),
-			mangle_poll(mask), 0);
-}
-
 static void aio_complete_poll(struct poll_iocb *req, __poll_t mask)
 {
 	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+	struct file *file = req->file;
 
-	if (!(iocb->flags & AIO_IOCB_CANCELLED))
-		__aio_complete_poll(req, mask);
+	if (aio_complete(iocb, mangle_poll(mask), 0, 0))
+		fput(file);
 }
 
 static int aio_poll_cancel(struct kiocb *rw)
 {
 	struct aio_kiocb *iocb = container_of(rw, struct aio_kiocb, rw);
+	struct file *file = iocb->poll.file;
 
 	remove_wait_queue(iocb->poll.head, &iocb->poll.wait);
-	__aio_complete_poll(&iocb->poll, 0); /* no events to report */
+	if (aio_complete(iocb, 0, 0, AIO_COMPLETE_CANCEL))
+		fput(file);
 	return 0;
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

  reply	other threads:[~2018-03-22 17:24 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-21  7:40 aio poll and a new in-kernel poll API V6 Christoph Hellwig
2018-03-21  7:40 ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 01/28] fs: unexport poll_schedule_timeout Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  9:05   ` Greg KH
2018-03-21  9:05     ` Greg KH
2018-03-21  7:40 ` [PATCH 02/28] fs: cleanup do_pollfd Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  9:06   ` Greg KH
2018-03-21  9:06     ` Greg KH
2018-03-21  7:40 ` [PATCH 03/28] fs: update documentation to mention __poll_t Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  9:06   ` Greg KH
2018-03-21  9:06     ` Greg KH
2018-03-21 16:28   ` Darrick J. Wong
2018-03-21 16:28     ` Darrick J. Wong
2018-03-21  7:40 ` [PATCH 04/28] fs: add new vfs_poll and file_can_poll helpers Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  9:07   ` Greg KH
2018-03-21  9:07     ` Greg KH
2018-03-21  7:40 ` [PATCH 05/28] fs: introduce new ->get_poll_head and ->poll_mask methods Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  9:08   ` Greg KH
2018-03-21  9:08     ` Greg KH
2018-03-21 16:29   ` Darrick J. Wong
2018-03-21 16:29     ` Darrick J. Wong
2018-03-21  7:40 ` [PATCH 06/28] aio: implement IOCB_CMD_POLL Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  9:09   ` Greg KH
2018-03-21  9:09     ` Greg KH
2018-03-21 16:31   ` Darrick J. Wong
2018-03-21 16:31     ` Darrick J. Wong
2018-03-22 16:52   ` Al Viro
2018-03-22 16:52     ` Al Viro
2018-03-22 17:24     ` Christoph Hellwig [this message]
2018-03-22 17:24       ` Christoph Hellwig
2018-03-22 18:16       ` Al Viro
2018-03-22 18:16         ` Al Viro
2018-03-23 18:05         ` Christoph Hellwig
2018-03-23 18:05           ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 07/28] net: refactor socket_poll Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 08/28] net: add support for ->poll_mask in proto_ops Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 09/28] net: remove sock_no_poll Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 10/28] net/tcp: convert to ->poll_mask Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 11/28] net/unix: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 12/28] net: convert datagram_poll users tp ->poll_mask Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  9:11   ` Greg KH
2018-03-21  9:11     ` Greg KH
2018-03-21  7:40 ` [PATCH 13/28] net/dccp: convert to ->poll_mask Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 14/28] net/atm: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 15/28] net/vmw_vsock: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 16/28] net/tipc: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 17/28] net/sctp: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 18/28] net/bluetooth: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 19/28] net/caif: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 20/28] net/nfc: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 21/28] net/phonet: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 22/28] net/iucv: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 23/28] net/rxrpc: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 24/28] crypto: af_alg: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 25/28] pipe: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 26/28] eventfd: switch " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 27/28] timerfd: convert " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  7:40 ` [PATCH 28/28] random: " Christoph Hellwig
2018-03-21  7:40   ` Christoph Hellwig
2018-03-21  9:10   ` Greg KH
2018-03-21  9:10     ` Greg KH
2018-03-22 13:49   ` Theodore Y. Ts'o
2018-03-22 13:49     ` Theodore Y. Ts'o

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180322172410.GC5542@lst.de \
    --to=hch@lst.de \
    --cc=avi@scylladb.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.