linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4] eventfd: convert to f_op->read_iter()
@ 2020-05-01 19:11 Jens Axboe
  2020-05-01 23:12 ` Al Viro
  2020-05-04 12:39 ` Christoph Hellwig
  0 siblings, 2 replies; 7+ messages in thread
From: Jens Axboe @ 2020-05-01 19:11 UTC (permalink / raw)
  To: Alexander Viro; +Cc: linux-fsdevel, linux-kernel

eventfd is using ->read() as it's file_operations read handler, but
this prevents passing in information about whether a given IO operation
is blocking or not. We can only use the file flags for that. To support
async (-EAGAIN/poll based) retries for io_uring, we need ->read_iter()
support. Convert eventfd to using ->read_iter().

With ->read_iter(), we can support IOCB_NOWAIT. Ensure the fd setup
is done such that we set file->f_mode with FMODE_NOWAIT.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

Since v3:

- Ensure we fiddle ->f_mode before doing fd_install() on the fd

Since v2:

- Cleanup eventfd_read() as per Al's suggestions

Since v1:

- Add FMODE_NOWAIT to the eventfd file

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 78e41c7c3d05..20f0fd4d56e1 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -216,32 +216,32 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
 }
 EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
 
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
+static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to)
 {
+	struct file *file = iocb->ki_filp;
 	struct eventfd_ctx *ctx = file->private_data;
-	ssize_t res;
 	__u64 ucnt = 0;
 	DECLARE_WAITQUEUE(wait, current);
 
-	if (count < sizeof(ucnt))
+	if (iov_iter_count(to) < sizeof(ucnt))
 		return -EINVAL;
-
 	spin_lock_irq(&ctx->wqh.lock);
-	res = -EAGAIN;
-	if (ctx->count > 0)
-		res = sizeof(ucnt);
-	else if (!(file->f_flags & O_NONBLOCK)) {
+	if (!ctx->count) {
+		if ((file->f_flags & O_NONBLOCK) ||
+		    (iocb->ki_flags & IOCB_NOWAIT)) {
+			spin_unlock_irq(&ctx->wqh.lock);
+			return -EAGAIN;
+		}
 		__add_wait_queue(&ctx->wqh, &wait);
 		for (;;) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (ctx->count > 0) {
-				res = sizeof(ucnt);
+			if (ctx->count)
 				break;
-			}
 			if (signal_pending(current)) {
-				res = -ERESTARTSYS;
-				break;
+				__remove_wait_queue(&ctx->wqh, &wait);
+				__set_current_state(TASK_RUNNING);
+				spin_unlock_irq(&ctx->wqh.lock);
+				return -ERESTARTSYS;
 			}
 			spin_unlock_irq(&ctx->wqh.lock);
 			schedule();
@@ -250,17 +250,14 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 		__remove_wait_queue(&ctx->wqh, &wait);
 		__set_current_state(TASK_RUNNING);
 	}
-	if (likely(res > 0)) {
-		eventfd_ctx_do_read(ctx, &ucnt);
-		if (waitqueue_active(&ctx->wqh))
-			wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
-	}
+	eventfd_ctx_do_read(ctx, &ucnt);
+	if (waitqueue_active(&ctx->wqh))
+		wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
 	spin_unlock_irq(&ctx->wqh.lock);
-
-	if (res > 0 && put_user(ucnt, (__u64 __user *)buf))
+	if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
 		return -EFAULT;
 
-	return res;
+	return sizeof(ucnt);
 }
 
 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
@@ -329,7 +326,7 @@ static const struct file_operations eventfd_fops = {
 #endif
 	.release	= eventfd_release,
 	.poll		= eventfd_poll,
-	.read		= eventfd_read,
+	.read_iter	= eventfd_read,
 	.write		= eventfd_write,
 	.llseek		= noop_llseek,
 };
@@ -406,6 +403,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
 static int do_eventfd(unsigned int count, int flags)
 {
 	struct eventfd_ctx *ctx;
+	struct file *file;
 	int fd;
 
 	/* Check the EFD_* constants for consistency.  */
@@ -425,11 +423,24 @@ static int do_eventfd(unsigned int count, int flags)
 	ctx->flags = flags;
 	ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
 
-	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-			      O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
+	flags &= EFD_SHARED_FCNTL_FLAGS;
+	flags |= O_RDWR;
+	fd = get_unused_fd_flags(flags);
 	if (fd < 0)
-		eventfd_free_ctx(ctx);
+		goto err;
+
+	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
+	if (IS_ERR(file)) {
+		put_unused_fd(fd);
+		fd = PTR_ERR(file);
+		goto err;
+	}
 
+	file->f_mode |= FMODE_NOWAIT;
+	fd_install(fd, file);
+	return fd;
+err:
+	eventfd_free_ctx(ctx);
 	return fd;
 }
 
-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] eventfd: convert to f_op->read_iter()
  2020-05-01 19:11 [PATCH v4] eventfd: convert to f_op->read_iter() Jens Axboe
@ 2020-05-01 23:12 ` Al Viro
  2020-05-01 23:54   ` Jens Axboe
  2020-05-04 12:39 ` Christoph Hellwig
  1 sibling, 1 reply; 7+ messages in thread
From: Al Viro @ 2020-05-01 23:12 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-fsdevel, linux-kernel

On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote:
> +	flags &= EFD_SHARED_FCNTL_FLAGS;
> +	flags |= O_RDWR;
> +	fd = get_unused_fd_flags(flags);
>  	if (fd < 0)
> -		eventfd_free_ctx(ctx);
> +		goto err;
> +
> +	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
> +	if (IS_ERR(file)) {
> +		put_unused_fd(fd);
> +		fd = PTR_ERR(file);
> +		goto err;
> +	}
>  
> +	file->f_mode |= FMODE_NOWAIT;
> +	fd_install(fd, file);
> +	return fd;
> +err:
> +	eventfd_free_ctx(ctx);
>  	return fd;
>  }

Looks sane...  I can take it via vfs.git, or leave it for you if you
have other stuff in the same area...

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] eventfd: convert to f_op->read_iter()
  2020-05-01 23:12 ` Al Viro
@ 2020-05-01 23:54   ` Jens Axboe
  2020-05-03 13:46     ` Al Viro
  0 siblings, 1 reply; 7+ messages in thread
From: Jens Axboe @ 2020-05-01 23:54 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, linux-kernel

On 5/1/20 5:12 PM, Al Viro wrote:
> On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote:
>> +	flags &= EFD_SHARED_FCNTL_FLAGS;
>> +	flags |= O_RDWR;
>> +	fd = get_unused_fd_flags(flags);
>>  	if (fd < 0)
>> -		eventfd_free_ctx(ctx);
>> +		goto err;
>> +
>> +	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
>> +	if (IS_ERR(file)) {
>> +		put_unused_fd(fd);
>> +		fd = PTR_ERR(file);
>> +		goto err;
>> +	}
>>  
>> +	file->f_mode |= FMODE_NOWAIT;
>> +	fd_install(fd, file);
>> +	return fd;
>> +err:
>> +	eventfd_free_ctx(ctx);
>>  	return fd;
>>  }
> 
> Looks sane...  I can take it via vfs.git, or leave it for you if you
> have other stuff in the same area...

Would be great if you can queue it up in vfs.git, thanks! Don't have
anything else that'd conflict with this.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] eventfd: convert to f_op->read_iter()
  2020-05-01 23:54   ` Jens Axboe
@ 2020-05-03 13:46     ` Al Viro
  2020-05-03 14:42       ` Jens Axboe
  0 siblings, 1 reply; 7+ messages in thread
From: Al Viro @ 2020-05-03 13:46 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-fsdevel, linux-kernel

On Fri, May 01, 2020 at 05:54:09PM -0600, Jens Axboe wrote:
> On 5/1/20 5:12 PM, Al Viro wrote:
> > On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote:
> >> +	flags &= EFD_SHARED_FCNTL_FLAGS;
> >> +	flags |= O_RDWR;
> >> +	fd = get_unused_fd_flags(flags);
> >>  	if (fd < 0)
> >> -		eventfd_free_ctx(ctx);
> >> +		goto err;
> >> +
> >> +	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
> >> +	if (IS_ERR(file)) {
> >> +		put_unused_fd(fd);
> >> +		fd = PTR_ERR(file);
> >> +		goto err;
> >> +	}
> >>  
> >> +	file->f_mode |= FMODE_NOWAIT;
> >> +	fd_install(fd, file);
> >> +	return fd;
> >> +err:
> >> +	eventfd_free_ctx(ctx);
> >>  	return fd;
> >>  }
> > 
> > Looks sane...  I can take it via vfs.git, or leave it for you if you
> > have other stuff in the same area...
> 
> Would be great if you can queue it up in vfs.git, thanks! Don't have
> anything else that'd conflict with this.

Applied; BTW, what happens if
        ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
fails?  Quitely succeed with BS value (-ENOSPC/-ENOMEM) shown by
eventfd_show_fdinfo()?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] eventfd: convert to f_op->read_iter()
  2020-05-03 13:46     ` Al Viro
@ 2020-05-03 14:42       ` Jens Axboe
  2020-05-03 16:50         ` Jens Axboe
  0 siblings, 1 reply; 7+ messages in thread
From: Jens Axboe @ 2020-05-03 14:42 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, linux-kernel

On 5/3/20 7:46 AM, Al Viro wrote:
> On Fri, May 01, 2020 at 05:54:09PM -0600, Jens Axboe wrote:
>> On 5/1/20 5:12 PM, Al Viro wrote:
>>> On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote:
>>>> +	flags &= EFD_SHARED_FCNTL_FLAGS;
>>>> +	flags |= O_RDWR;
>>>> +	fd = get_unused_fd_flags(flags);
>>>>  	if (fd < 0)
>>>> -		eventfd_free_ctx(ctx);
>>>> +		goto err;
>>>> +
>>>> +	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
>>>> +	if (IS_ERR(file)) {
>>>> +		put_unused_fd(fd);
>>>> +		fd = PTR_ERR(file);
>>>> +		goto err;
>>>> +	}
>>>>  
>>>> +	file->f_mode |= FMODE_NOWAIT;
>>>> +	fd_install(fd, file);
>>>> +	return fd;
>>>> +err:
>>>> +	eventfd_free_ctx(ctx);
>>>>  	return fd;
>>>>  }
>>>
>>> Looks sane...  I can take it via vfs.git, or leave it for you if you
>>> have other stuff in the same area...
>>
>> Would be great if you can queue it up in vfs.git, thanks! Don't have
>> anything else that'd conflict with this.
> 
> Applied; BTW, what happens if
>         ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
> fails?  Quitely succeed with BS value (-ENOSPC/-ENOMEM) shown by
> eventfd_show_fdinfo()?

Huh yeah that's odd, not sure how I missed that when touching code
near it. Want a followup patch to fix that issue?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] eventfd: convert to f_op->read_iter()
  2020-05-03 14:42       ` Jens Axboe
@ 2020-05-03 16:50         ` Jens Axboe
  0 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2020-05-03 16:50 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, linux-kernel

On 5/3/20 8:42 AM, Jens Axboe wrote:
> On 5/3/20 7:46 AM, Al Viro wrote:
>> On Fri, May 01, 2020 at 05:54:09PM -0600, Jens Axboe wrote:
>>> On 5/1/20 5:12 PM, Al Viro wrote:
>>>> On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote:
>>>>> +	flags &= EFD_SHARED_FCNTL_FLAGS;
>>>>> +	flags |= O_RDWR;
>>>>> +	fd = get_unused_fd_flags(flags);
>>>>>  	if (fd < 0)
>>>>> -		eventfd_free_ctx(ctx);
>>>>> +		goto err;
>>>>> +
>>>>> +	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
>>>>> +	if (IS_ERR(file)) {
>>>>> +		put_unused_fd(fd);
>>>>> +		fd = PTR_ERR(file);
>>>>> +		goto err;
>>>>> +	}
>>>>>  
>>>>> +	file->f_mode |= FMODE_NOWAIT;
>>>>> +	fd_install(fd, file);
>>>>> +	return fd;
>>>>> +err:
>>>>> +	eventfd_free_ctx(ctx);
>>>>>  	return fd;
>>>>>  }
>>>>
>>>> Looks sane...  I can take it via vfs.git, or leave it for you if you
>>>> have other stuff in the same area...
>>>
>>> Would be great if you can queue it up in vfs.git, thanks! Don't have
>>> anything else that'd conflict with this.
>>
>> Applied; BTW, what happens if
>>         ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
>> fails?  Quitely succeed with BS value (-ENOSPC/-ENOMEM) shown by
>> eventfd_show_fdinfo()?
> 
> Huh yeah that's odd, not sure how I missed that when touching code
> near it. Want a followup patch to fix that issue?

This should do the trick. Ideally we'd change the order of these, and
shove this fix into 5.7, but not sure it's super important since this
bug is pretty old. Would make stable backports easier, though...
Let me know how you want to handle it, as it'll impact the one you
have already queued up.


diff --git a/fs/eventfd.c b/fs/eventfd.c
index 20f0fd4d56e1..96081efdd0c9 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -422,6 +422,10 @@ static int do_eventfd(unsigned int count, int flags)
 	ctx->count = count;
 	ctx->flags = flags;
 	ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
+	if (ctx->id < 0) {
+		fd = ctx->id;
+		goto err;
+	}
 
 	flags &= EFD_SHARED_FCNTL_FLAGS;
 	flags |= O_RDWR;

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v4] eventfd: convert to f_op->read_iter()
  2020-05-01 19:11 [PATCH v4] eventfd: convert to f_op->read_iter() Jens Axboe
  2020-05-01 23:12 ` Al Viro
@ 2020-05-04 12:39 ` Christoph Hellwig
  1 sibling, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2020-05-04 12:39 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Alexander Viro, linux-fsdevel, linux-kernel

On Fri, May 01, 2020 at 01:11:09PM -0600, Jens Axboe wrote:
> eventfd is using ->read() as it's file_operations read handler, but
> this prevents passing in information about whether a given IO operation
> is blocking or not. We can only use the file flags for that. To support
> async (-EAGAIN/poll based) retries for io_uring, we need ->read_iter()
> support. Convert eventfd to using ->read_iter().
> 
> With ->read_iter(), we can support IOCB_NOWAIT. Ensure the fd setup
> is done such that we set file->f_mode with FMODE_NOWAIT.

Can you add a anon_inode_getfd_mode that passes extra flags for f_mode
instead of opencoding it?  Especially as I expect more users that might
want to handle IOCB_NOWAIT.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-05-04 12:39 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-01 19:11 [PATCH v4] eventfd: convert to f_op->read_iter() Jens Axboe
2020-05-01 23:12 ` Al Viro
2020-05-01 23:54   ` Jens Axboe
2020-05-03 13:46     ` Al Viro
2020-05-03 14:42       ` Jens Axboe
2020-05-03 16:50         ` Jens Axboe
2020-05-04 12:39 ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).