linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
To: David Howells <dhowells@redhat.com>, torvalds@linux-foundation.org
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	nicolas.dichtel@6wind.com, raven@themaw.net,
	Christian Brauner <christian@brauner.io>,
	keyrings@vger.kernel.org, linux-usb@vger.kernel.org,
	linux-block@vger.kernel.org,
	linux-security-module@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH 11/10] pipe: Add fsync() support [ver #2]
Date: Sun, 27 Oct 2019 19:04:35 +0300	[thread overview]
Message-ID: <c6e044cc-5596-90b7-4418-6ad7009d6d79@yandex-team.ru> (raw)
In-Reply-To: <30394.1571936252@warthog.procyon.org.uk>

On 24/10/2019 19.57, David Howells wrote:
> pipe: Add fsync() support
> 
> The keyrings testsuite needs the ability to wait for all the outstanding
> notifications in the queue to have been processed so that it can then go
> through them to find out whether the notifications it expected have been
> emitted.

Similar synchronization is required for reusing memory after vmsplice()?
I don't see other way how sender could safely change these pages.

> 
> Implement fsync() support for pipes to provide this.  The tailmost buffer
> at the point of calling is marked and fsync adds itself to the list of
> waiters, noting the tail position to be waited for and marking the buffer
> as no longer mergeable.  Then when the buffer is consumed, if the flag is
> set, any matching waiters are woken up.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
>   fs/fuse/dev.c             |    1
>   fs/pipe.c                 |   61 ++++++++++++++++++++++++++++++++++++++++++++++
>   fs/splice.c               |    3 ++
>   include/linux/pipe_fs_i.h |   22 ++++++++++++++++
>   lib/iov_iter.c            |    2 -
>   5 files changed, 88 insertions(+), 1 deletion(-)
> 
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 5ef57a322cb8..9617a35579cb 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -1983,6 +1983,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
>   		if (rem >= ibuf->len) {
>   			*obuf = *ibuf;
>   			ibuf->ops = NULL;
> +			pipe_wake_fsync(pipe, ibuf, tail);
>   			tail++;
>   			pipe_commit_read(pipe, tail);
>   		} else {
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 6a982a88f658..8e5fd7314be1 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -30,6 +30,12 @@
>   
>   #include "internal.h"
>   
> +struct pipe_fsync {
> +	struct list_head	link;		/* Link in pipe->fsync */
> +	struct completion	done;
> +	unsigned int		tail;		/* The buffer being waited for */
> +};
> +
>   /*
>    * The max size that a non-root user is allowed to grow the pipe. Can
>    * be set by root in /proc/sys/fs/pipe-max-size
> @@ -269,6 +275,58 @@ static bool pipe_buf_can_merge(struct pipe_buffer *buf)
>   	return buf->ops == &anon_pipe_buf_ops;
>   }
>   
> +/*
> + * Wait for all the data currently in the pipe to be consumed.
> + */
> +static int pipe_fsync(struct file *file, loff_t a, loff_t b, int datasync)
> +{
> +	struct pipe_inode_info *pipe = file->private_data;
> +	struct pipe_buffer *buf;
> +	struct pipe_fsync fsync;
> +	unsigned int head, tail, mask;
> +
> +	pipe_lock(pipe);
> +
> +	head = pipe->head;
> +	tail = pipe->tail;
> +	mask = pipe->ring_size - 1;
> +
> +	if (pipe_empty(head, tail)) {
> +		pipe_unlock(pipe);
> +		return 0;
> +	}
> +
> +	init_completion(&fsync.done);
> +	fsync.tail = tail;
> +	buf = &pipe->bufs[tail & mask];
> +	buf->flags |= PIPE_BUF_FLAG_FSYNC;
> +	pipe_buf_mark_unmergeable(buf);
> +	list_add_tail(&fsync.link, &pipe->fsync);
> +	pipe_unlock(pipe);
> +
> +	if (wait_for_completion_interruptible(&fsync.done) < 0) {
> +		pipe_lock(pipe);
> +		list_del(&fsync.link);
> +		pipe_unlock(pipe);
> +		return -EINTR;
> +	}
> +
> +	return 0;
> +}
> +
> +void __pipe_wake_fsync(struct pipe_inode_info *pipe, unsigned int tail)
> +{
> +	struct pipe_fsync *fsync, *p;
> +
> +	list_for_each_entry_safe(fsync, p, &pipe->fsync, link) {
> +		if (fsync->tail == tail) {
> +			list_del_init(&fsync->link);
> +			complete(&fsync->done);
> +		}
> +	}
> +}
> +EXPORT_SYMBOL(__pipe_wake_fsync);
> +
>   static ssize_t
>   pipe_read(struct kiocb *iocb, struct iov_iter *to)
>   {
> @@ -325,6 +383,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>   			if (!buf->len) {
>   				pipe_buf_release(pipe, buf);
>   				spin_lock_irq(&pipe->wait.lock);
> +				pipe_wake_fsync(pipe, buf, tail);
>   				tail++;
>   				pipe_commit_read(pipe, tail);
>   				do_wakeup = 1;
> @@ -717,6 +776,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
>   		pipe->ring_size = pipe_bufs;
>   		pipe->user = user;
>   		mutex_init(&pipe->mutex);
> +		INIT_LIST_HEAD(&pipe->fsync);
>   		return pipe;
>   	}
>   
> @@ -1060,6 +1120,7 @@ const struct file_operations pipefifo_fops = {
>   	.llseek		= no_llseek,
>   	.read_iter	= pipe_read,
>   	.write_iter	= pipe_write,
> +	.fsync		= pipe_fsync,
>   	.poll		= pipe_poll,
>   	.unlocked_ioctl	= pipe_ioctl,
>   	.release	= pipe_release,
> diff --git a/fs/splice.c b/fs/splice.c
> index 3f72bc31b6ec..e106367e1be6 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -523,6 +523,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
>   
>   		if (!buf->len) {
>   			pipe_buf_release(pipe, buf);
> +			pipe_wake_fsync(pipe, buf, tail);
>   			tail++;
>   			pipe_commit_read(pipe, tail);
>   			if (pipe->files)
> @@ -771,6 +772,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
>   				ret -= buf->len;
>   				buf->len = 0;
>   				pipe_buf_release(pipe, buf);
> +				pipe_wake_fsync(pipe, buf, tail);
>   				tail++;
>   				pipe_commit_read(pipe, tail);
>   				if (pipe->files)
> @@ -1613,6 +1615,7 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
>   			 */
>   			*obuf = *ibuf;
>   			ibuf->ops = NULL;
> +			pipe_wake_fsync(ipipe, ibuf, i_tail);
>   			i_tail++;
>   			pipe_commit_read(ipipe, i_tail);
>   			input_wakeup = true;
> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
> index 90055ff16550..1a3027089558 100644
> --- a/include/linux/pipe_fs_i.h
> +++ b/include/linux/pipe_fs_i.h
> @@ -8,6 +8,7 @@
>   #define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
>   #define PIPE_BUF_FLAG_GIFT	0x04	/* page is a gift */
>   #define PIPE_BUF_FLAG_PACKET	0x08	/* read() as a packet */
> +#define PIPE_BUF_FLAG_FSYNC	0x10	/* fsync() is waiting for this buffer to die */
>   
>   /**
>    *	struct pipe_buffer - a linux kernel pipe buffer
> @@ -43,6 +44,7 @@ struct pipe_buffer {
>    *	@w_counter: writer counter
>    *	@fasync_readers: reader side fasync
>    *	@fasync_writers: writer side fasync
> + *	@fsync: Waiting fsyncs
>    *	@bufs: the circular array of pipe buffers
>    *	@user: the user who created this pipe
>    **/
> @@ -62,6 +64,7 @@ struct pipe_inode_info {
>   	struct page *tmp_page;
>   	struct fasync_struct *fasync_readers;
>   	struct fasync_struct *fasync_writers;
> +	struct list_head fsync;
>   	struct pipe_buffer *bufs;
>   	struct user_struct *user;
>   };
> @@ -268,6 +271,25 @@ extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
>   long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
>   struct pipe_inode_info *get_pipe_info(struct file *file);
>   
> +void __pipe_wake_fsync(struct pipe_inode_info *pipe, unsigned int tail);
> +
> +/**
> + * pipe_wake_fsync - Wake up anyone waiting with fsync for this point
> + * @pipe: The pipe that owns the buffer
> + * @buf: The pipe buffer in question
> + * @tail: The index in the ring of the buffer
> + *
> + * Check to see if anyone is waiting for the pipe ring to clear up to and
> + * including this buffer, and, if they are, wake them up.
> + */
> +static inline void pipe_wake_fsync(struct pipe_inode_info *pipe,
> +				   struct pipe_buffer *buf,
> +				   unsigned int tail)
> +{
> +	if (unlikely(buf->flags & PIPE_BUF_FLAG_FSYNC))
> +		__pipe_wake_fsync(pipe, tail);
> +}
> +
>   int create_pipe_files(struct file **, int);
>   unsigned int round_pipe_size(unsigned long size);
>   
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index e22f4e283f6d..38d52524cd21 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -404,7 +404,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
>   	buf->offset = offset;
>   	buf->len = bytes;
>   
> -	pipe_commit_read(pipe, i_head);
> +	pipe_commit_write(pipe, i_head);
>   	i->iov_offset = offset + bytes;
>   	i->head = i_head;
>   out:
> 
> 


  parent reply	other threads:[~2019-10-27 16:04 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-23 20:17 [RFC PATCH 00/10] pipe: Notification queue preparation [ver #2] David Howells
2019-10-23 20:17 ` [RFC PATCH 01/10] pipe: Reduce #inclusion of pipe_fs_i.h " David Howells
2019-10-23 20:17 ` [RFC PATCH 02/10] Remove the nr_exclusive argument from __wake_up_sync_key() " David Howells
2019-10-23 20:17 ` [RFC PATCH 03/10] Add wake_up_interruptible_sync_poll_locked() " David Howells
2019-10-23 20:17 ` [RFC PATCH 04/10] pipe: Use head and tail pointers for the ring, not cursor and length " David Howells
2019-10-26 14:58   ` [pipe] 6567a02d20: BUG:kernel_NULL_pointer_dereference,address kernel test robot
2019-10-27 14:03   ` [RFC PATCH 04/10] pipe: Use head and tail pointers for the ring, not cursor and length [ver #2] Linus Torvalds
2019-10-30 16:19   ` Ilya Dryomov
2019-10-30 20:35     ` Rasmus Villemoes
2019-10-30 22:16       ` Ilya Dryomov
2019-10-30 22:38         ` Rasmus Villemoes
2019-10-31 15:11     ` David Howells
2019-10-31 15:57       ` Ilya Dryomov
2019-11-01 14:53       ` David Howells
2019-10-31 14:57   ` David Howells
2019-11-03 11:17     ` Matthew Wilcox
2019-12-06 21:47   ` Johannes Hirte
2019-12-06 22:14     ` Linus Torvalds
2019-12-07  0:00       ` Johannes Hirte
2019-12-07  1:03         ` Linus Torvalds
2019-12-08 17:56           ` Johannes Hirte
2019-12-08 18:10             ` Linus Torvalds
2019-12-07  6:47         ` Linus Torvalds
2019-12-06 22:15   ` David Howells
2019-10-23 20:17 ` [RFC PATCH 05/10] pipe: Allow pipes to have kernel-reserved slots " David Howells
2019-10-23 20:18 ` [RFC PATCH 06/10] pipe: Advance tail pointer inside of wait spinlock in pipe_read() " David Howells
2019-10-23 20:18 ` [RFC PATCH 07/10] pipe: Conditionalise wakeup " David Howells
2019-10-27 15:57   ` Konstantin Khlebnikov
2019-10-31 15:21   ` David Howells
2019-10-31 16:38   ` David Howells
2019-11-03 11:04     ` Konstantin Khlebnikov
2019-10-23 20:18 ` [RFC PATCH 08/10] pipe: Rearrange sequence in pipe_write() to preallocate slot " David Howells
2019-10-23 20:18 ` [RFC PATCH 09/10] pipe: Remove redundant wakeup from pipe_write() " David Howells
2019-10-23 20:18 ` [RFC PATCH 10/10] pipe: Check for ring full inside of the spinlock in " David Howells
2019-10-24 10:32 ` [RFC PATCH 04/10] pipe: Use head and tail pointers for the ring, not cursor and length " David Howells
2019-10-24 13:14 ` [RFC PATCH 00/10] pipe: Notification queue preparation " Peter Zijlstra
2019-10-24 16:57 ` [RFC PATCH 11/10] pipe: Add fsync() support " David Howells
2019-10-24 21:29   ` Linus Torvalds
2019-10-25  8:34   ` David Howells
2019-10-27 15:22   ` Christoph Hellwig
2019-10-27 16:04   ` Konstantin Khlebnikov [this message]
2019-10-31 15:13   ` David Howells
2019-10-31 15:15   ` David Howells
2019-11-02 18:53     ` Linus Torvalds
2019-11-02 19:34     ` David Howells
2019-11-02 20:31       ` Andy Lutomirski
2019-11-02 22:03         ` Linus Torvalds
2019-11-02 22:09           ` Linus Torvalds
2019-11-02 22:30           ` Andy Lutomirski
2019-11-02 23:02             ` Linus Torvalds
2019-11-02 23:09               ` Linus Torvalds
2019-11-02 23:14                 ` Andy Lutomirski
2019-11-03 12:02                   ` Konstantin Khlebnikov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c6e044cc-5596-90b7-4418-6ad7009d6d79@yandex-team.ru \
    --to=khlebnikov@yandex-team.ru \
    --cc=christian@brauner.io \
    --cc=dhowells@redhat.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=keyrings@vger.kernel.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=linux-usb@vger.kernel.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=nicolas.dichtel@6wind.com \
    --cc=peterz@infradead.org \
    --cc=raven@themaw.net \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).