All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/11] splice cleanups
@ 2016-09-14  8:37 Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 01/11] pipe: add pipe_buf_get() helper Miklos Szeredi
                   ` (13 more replies)
  0 siblings, 14 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

This contains assorted cleanups in the splice area:

 - add helpers for pipe buf ops instead of directly calling them

 - page cache buf doesn't seem to need confirming (since ages)

 - generic_file_splice_read() and generic_file_read() have lots of
   duplication

Git tree is here:

 git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git#splice

Thanks,
Miklos

---
Miklos Szeredi (11):
  pipe: add pipe_buf_get() helper
  pipe: add pipe_buf_release() helper
  pipe: add pipe_buf_confirm() helper
  pipe: add pipe_buf_steal() helper
  pipe: fix comment in pipe_buf_operations
  pipe: no need to confirm page cache buf
  pipe: remove generic_pipe_buf_confirm()
  filemap: add get_page_for_read() helper
  splice: use get_page_for_read()
  splice: don't check i_size in generic_file_splice_read()
  splice: fold __generic_file_splice_read() into caller

 drivers/char/virtio_console.c |   2 +-
 fs/fuse/dev.c                 |  15 +-
 fs/pipe.c                     |  31 +---
 fs/splice.c                   | 301 ++++++-------------------------------
 include/linux/pagemap.h       |   3 +
 include/linux/pipe_fs_i.h     |  73 ++++++---
 kernel/relay.c                |   1 -
 kernel/trace/trace.c          |   2 -
 mm/filemap.c                  | 339 ++++++++++++++++++++++--------------------
 9 files changed, 281 insertions(+), 486 deletions(-)

-- 
2.5.5

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 01/11] pipe: add pipe_buf_get() helper
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 02/11] pipe: add pipe_buf_release() helper Miklos Szeredi
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c             |  2 +-
 fs/splice.c               |  4 ++--
 include/linux/pipe_fs_i.h | 11 +++++++++++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index a94d2ed81ab4..5cb20600bc9e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1993,7 +1993,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 			pipe->nrbufs--;
 		} else {
-			ibuf->ops->get(pipe, ibuf);
+			pipe_buf_get(pipe, ibuf);
 			*obuf = *ibuf;
 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 			obuf->len = rem;
diff --git a/fs/splice.c b/fs/splice.c
index dd9bf7e410d2..36457a192c13 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1876,7 +1876,7 @@ retry:
 			 * Get a reference to this pipe buffer,
 			 * so we can copy the contents over.
 			 */
-			ibuf->ops->get(ipipe, ibuf);
+			pipe_buf_get(ipipe, ibuf);
 			*obuf = *ibuf;
 
 			/*
@@ -1948,7 +1948,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		 * Get a reference to this pipe buffer,
 		 * so we can copy the contents over.
 		 */
-		ibuf->ops->get(ipipe, ibuf);
+		pipe_buf_get(ipipe, ibuf);
 
 		obuf = opipe->bufs + nbuf;
 		*obuf = *ibuf;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 24f5470d3944..10876f3cb3da 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -115,6 +115,17 @@ struct pipe_buf_operations {
 	void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
+/**
+ * pipe_buf_get - get a reference to a pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to get a reference to
+ */
+static inline void pipe_buf_get(struct pipe_inode_info *pipe,
+				struct pipe_buffer *buf)
+{
+	buf->ops->get(pipe, buf);
+}
+
 /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
    memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE		PAGE_SIZE
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 02/11] pipe: add pipe_buf_release() helper
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 01/11] pipe: add pipe_buf_get() helper Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 03/11] pipe: add pipe_buf_confirm() helper Miklos Szeredi
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c             |  7 +++----
 fs/pipe.c                 |  5 ++---
 fs/splice.c               | 14 ++++----------
 include/linux/pipe_fs_i.h | 14 ++++++++++++++
 4 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5cb20600bc9e..13be2fddcace 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2015,10 +2015,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 
 	ret = fuse_dev_do_write(fud, &cs, len);
 
-	for (idx = 0; idx < nbuf; idx++) {
-		struct pipe_buffer *buf = &bufs[idx];
-		buf->ops->release(pipe, buf);
-	}
+	for (idx = 0; idx < nbuf; idx++)
+		pipe_buf_release(pipe, &bufs[idx]);
+
 out:
 	kfree(bufs);
 	return ret;
diff --git a/fs/pipe.c b/fs/pipe.c
index 4ebe6b2e5217..67b5f1923835 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -299,8 +299,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			}
 
 			if (!buf->len) {
-				buf->ops = NULL;
-				ops->release(pipe, buf);
+				pipe_buf_release(pipe, buf);
 				curbuf = (curbuf + 1) & (pipe->buffers - 1);
 				pipe->curbuf = curbuf;
 				pipe->nrbufs = --bufs;
@@ -664,7 +663,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
 	for (i = 0; i < pipe->buffers; i++) {
 		struct pipe_buffer *buf = pipe->bufs + i;
 		if (buf->ops)
-			buf->ops->release(pipe, buf);
+			pipe_buf_release(pipe, buf);
 	}
 	if (pipe->tmp_page)
 		__free_page(pipe->tmp_page);
diff --git a/fs/splice.c b/fs/splice.c
index 36457a192c13..e4dc45926494 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -757,7 +757,6 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
 
 	while (pipe->nrbufs) {
 		struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
-		const struct pipe_buf_operations *ops = buf->ops;
 
 		sd->len = buf->len;
 		if (sd->len > sd->total_len)
@@ -783,8 +782,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
 		sd->total_len -= ret;
 
 		if (!buf->len) {
-			buf->ops = NULL;
-			ops->release(pipe, buf);
+			pipe_buf_release(pipe, buf);
 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 			pipe->nrbufs--;
 			if (pipe->files)
@@ -1030,11 +1028,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 		while (ret) {
 			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
 			if (ret >= buf->len) {
-				const struct pipe_buf_operations *ops = buf->ops;
 				ret -= buf->len;
 				buf->len = 0;
-				buf->ops = NULL;
-				ops->release(pipe, buf);
+				pipe_buf_release(pipe, buf);
 				pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 				pipe->nrbufs--;
 				if (pipe->files)
@@ -1273,10 +1269,8 @@ out_release:
 	for (i = 0; i < pipe->buffers; i++) {
 		struct pipe_buffer *buf = pipe->bufs + i;
 
-		if (buf->ops) {
-			buf->ops->release(pipe, buf);
-			buf->ops = NULL;
-		}
+		if (buf->ops)
+			pipe_buf_release(pipe, buf);
 	}
 
 	if (!bytes)
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 10876f3cb3da..d24fa6da6ae3 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -126,6 +126,20 @@ static inline void pipe_buf_get(struct pipe_inode_info *pipe,
 	buf->ops->get(pipe, buf);
 }
 
+/**
+ * pipe_buf_release - put a reference to a pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to put a reference to
+ */
+static inline void pipe_buf_release(struct pipe_inode_info *pipe,
+				    struct pipe_buffer *buf)
+{
+	const struct pipe_buf_operations *ops = buf->ops;
+
+	buf->ops = NULL;
+	ops->release(pipe, buf);
+}
+
 /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
    memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE		PAGE_SIZE
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 03/11] pipe: add pipe_buf_confirm() helper
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 01/11] pipe: add pipe_buf_get() helper Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 02/11] pipe: add pipe_buf_release() helper Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 04/11] pipe: add pipe_buf_steal() helper Miklos Szeredi
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c             |  4 ++--
 fs/pipe.c                 |  8 +++-----
 fs/splice.c               |  4 ++--
 include/linux/pipe_fs_i.h | 12 +++++++++++-
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 13be2fddcace..9aa757f283e5 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
 		struct pipe_buffer *buf = cs->pipebufs;
 
 		if (!cs->write) {
-			err = buf->ops->confirm(cs->pipe, buf);
+			err = pipe_buf_confirm(cs->pipe, buf);
 			if (err)
 				return err;
 
@@ -828,7 +828,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 
 	fuse_copy_finish(cs);
 
-	err = buf->ops->confirm(cs->pipe, buf);
+	err = pipe_buf_confirm(cs->pipe, buf);
 	if (err)
 		return err;
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 67b5f1923835..4fc422f0dea8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		if (bufs) {
 			int curbuf = pipe->curbuf;
 			struct pipe_buffer *buf = pipe->bufs + curbuf;
-			const struct pipe_buf_operations *ops = buf->ops;
 			size_t chars = buf->len;
 			size_t written;
 			int error;
@@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			if (chars > total_len)
 				chars = total_len;
 
-			error = ops->confirm(pipe, buf);
+			error = pipe_buf_confirm(pipe, buf);
 			if (error) {
 				if (!ret)
 					ret = error;
@@ -382,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
 							(pipe->buffers - 1);
 		struct pipe_buffer *buf = pipe->bufs + lastbuf;
-		const struct pipe_buf_operations *ops = buf->ops;
 		int offset = buf->offset + buf->len;
 
-		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-			ret = ops->confirm(pipe, buf);
+		if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) {
+			ret = pipe_buf_confirm(pipe, buf);
 			if (ret)
 				goto out;
 
diff --git a/fs/splice.c b/fs/splice.c
index e4dc45926494..ba7a2240d58c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -762,7 +762,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
 		if (sd->len > sd->total_len)
 			sd->len = sd->total_len;
 
-		ret = buf->ops->confirm(pipe, buf);
+		ret = pipe_buf_confirm(pipe, buf);
 		if (unlikely(ret)) {
 			if (ret == -ENODATA)
 				ret = 0;
@@ -1001,7 +1001,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			if (idx == pipe->buffers - 1)
 				idx = -1;
 
-			ret = buf->ops->confirm(pipe, buf);
+			ret = pipe_buf_confirm(pipe, buf);
 			if (unlikely(ret)) {
 				if (ret == -ENODATA)
 					ret = 0;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index d24fa6da6ae3..654413334537 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -140,6 +140,17 @@ static inline void pipe_buf_release(struct pipe_inode_info *pipe,
 	ops->release(pipe, buf);
 }
 
+/**
+ * pipe_buf_confirm - verify contents of the pipe buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to confirm
+ */
+static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
+				   struct pipe_buffer *buf)
+{
+	return buf->ops->confirm(pipe, buf);
+}
+
 /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
    memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE		PAGE_SIZE
@@ -154,7 +165,6 @@ extern unsigned long pipe_user_pages_hard;
 extern unsigned long pipe_user_pages_soft;
 int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 
-
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe);
 
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 04/11] pipe: add pipe_buf_steal() helper
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (2 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 03/11] pipe: add pipe_buf_confirm() helper Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 05/11] pipe: fix comment in pipe_buf_operations Miklos Szeredi
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 drivers/char/virtio_console.c |  2 +-
 fs/fuse/dev.c                 |  2 +-
 include/linux/pipe_fs_i.h     | 11 +++++++++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 5da47e26a012..8114744bf30c 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -889,7 +889,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		return 0;
 
 	/* Try lock this page */
-	if (buf->ops->steal(pipe, buf) == 0) {
+	if (pipe_buf_steal(pipe, buf) == 0) {
 		/* Get reference and unlock page for moving */
 		get_page(buf->page);
 		unlock_page(buf->page);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9aa757f283e5..6830b3ca1bbb 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -841,7 +841,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 	if (cs->len != PAGE_SIZE)
 		goto out_fallback;
 
-	if (buf->ops->steal(cs->pipe, buf) != 0)
+	if (pipe_buf_steal(cs->pipe, buf) != 0)
 		goto out_fallback;
 
 	newpage = buf->page;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 654413334537..bddccf0159bb 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -151,6 +151,17 @@ static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
 	return buf->ops->confirm(pipe, buf);
 }
 
+/**
+ * pipe_buf_steal - attempt to take ownership of a pipe_buffer
+ * @pipe:	the pipe that the buffer belongs to
+ * @buf:	the buffer to attempt to steal
+ */
+static inline int pipe_buf_steal(struct pipe_inode_info *pipe,
+				 struct pipe_buffer *buf)
+{
+	return buf->ops->steal(pipe, buf);
+}
+
 /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
    memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE		PAGE_SIZE
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 05/11] pipe: fix comment in pipe_buf_operations
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (3 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 04/11] pipe: add pipe_buf_steal() helper Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 06/11] pipe: no need to confirm page cache buf Miklos Szeredi
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

Map and unmap ops no longer exist.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/pipe_fs_i.h | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index bddccf0159bb..e7497c9dde7f 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -66,15 +66,10 @@ struct pipe_inode_info {
  *
  * ->confirm()
  *	->steal()
- *	...
- *	->map()
- *	...
- *	->unmap()
  *
- * That is, ->map() must be called on a confirmed buffer,
- * same goes for ->steal(). See below for the meaning of each
- * operation. Also see kerneldoc in fs/pipe.c for the pipe
- * and generic variants of these hooks.
+ * That is, ->steal() must be called on a confirmed buffer.
+ * See below for the meaning of each operation. Also see kerneldoc
+ * in fs/pipe.c for the pipe and generic variants of these hooks.
  */
 struct pipe_buf_operations {
 	/*
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 06/11] pipe: no need to confirm page cache buf
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (4 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 05/11] pipe: fix comment in pipe_buf_operations Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-27  3:40   ` Al Viro
  2016-09-14  8:37 ` [PATCH 07/11] pipe: remove generic_pipe_buf_confirm() Miklos Szeredi
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

page_cache_pipe_buf_confirm() is used only in page_cache_pipe_buf_ops.

page_cache_pipe_buf_ops is used in two places:

1) __generic_file_splice_read()

This iterates all the pages, if not uptodate locks page, and if still not
uptodate does ->readpage() which reads the page synchronously.

2) shmem_file_splice_read()

This calls shmem_getpage() on individual pages.  shmem_getpage() swaps in
the page synchronously if not in memory, so it also seems to return an
uptodate page.

So all pages put into the buffer will be uptodate.

Things could happen to that page that make it not uptodate while sitting in
the pipe, but it's questionable whether we should care about that.
Checking for being uptodate in the face of such page state change is always
going to be racy.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/splice.c | 43 +------------------------------------------
 1 file changed, 1 insertion(+), 42 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index ba7a2240d58c..0ecbe3011796 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -92,51 +92,10 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
 	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
-/*
- * Check whether the contents of buf is OK to access. Since the content
- * is a page cache page, IO may be in flight.
- */
-static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
-				       struct pipe_buffer *buf)
-{
-	struct page *page = buf->page;
-	int err;
-
-	if (!PageUptodate(page)) {
-		lock_page(page);
-
-		/*
-		 * Page got truncated/unhashed. This will cause a 0-byte
-		 * splice, if this is the first page.
-		 */
-		if (!page->mapping) {
-			err = -ENODATA;
-			goto error;
-		}
-
-		/*
-		 * Uh oh, read-error from disk.
-		 */
-		if (!PageUptodate(page)) {
-			err = -EIO;
-			goto error;
-		}
-
-		/*
-		 * Page is ok afterall, we are done.
-		 */
-		unlock_page(page);
-	}
-
-	return 0;
-error:
-	unlock_page(page);
-	return err;
-}
 
 const struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.can_merge = 0,
-	.confirm = page_cache_pipe_buf_confirm,
+	.confirm = generic_pipe_buf_confirm,
 	.release = page_cache_pipe_buf_release,
 	.steal = page_cache_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 07/11] pipe: remove generic_pipe_buf_confirm()
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (5 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 06/11] pipe: no need to confirm page cache buf Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-16 11:23   ` Christoph Hellwig
  2016-09-14  8:37 ` [PATCH 08/11] filemap: add get_page_for_read() helper Miklos Szeredi
                   ` (6 subsequent siblings)
  13 siblings, 1 reply; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

Now all instances of .confirm are set to generic_pipe_buf_confirm().  The
method and the helper can be removed.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/pipe.c                 | 18 ------------------
 fs/splice.c               |  4 ----
 include/linux/pipe_fs_i.h | 18 +-----------------
 kernel/relay.c            |  1 -
 kernel/trace/trace.c      |  2 --
 5 files changed, 1 insertion(+), 42 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 4fc422f0dea8..47b41568b3fc 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -200,22 +200,6 @@ void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 EXPORT_SYMBOL(generic_pipe_buf_get);
 
 /**
- * generic_pipe_buf_confirm - verify contents of the pipe buffer
- * @info:	the pipe that the buffer belongs to
- * @buf:	the buffer to confirm
- *
- * Description:
- *	This function does nothing, because the generic pipe code uses
- *	pages that are always good when inserted into the pipe.
- */
-int generic_pipe_buf_confirm(struct pipe_inode_info *info,
-			     struct pipe_buffer *buf)
-{
-	return 0;
-}
-EXPORT_SYMBOL(generic_pipe_buf_confirm);
-
-/**
  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to put a reference to
@@ -232,7 +216,6 @@ EXPORT_SYMBOL(generic_pipe_buf_release);
 
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
-	.confirm = generic_pipe_buf_confirm,
 	.release = anon_pipe_buf_release,
 	.steal = anon_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
@@ -240,7 +223,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
 
 static const struct pipe_buf_operations packet_pipe_buf_ops = {
 	.can_merge = 0,
-	.confirm = generic_pipe_buf_confirm,
 	.release = anon_pipe_buf_release,
 	.steal = anon_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
diff --git a/fs/splice.c b/fs/splice.c
index 0ecbe3011796..dc4648ba6e8d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -95,7 +95,6 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
 
 const struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.can_merge = 0,
-	.confirm = generic_pipe_buf_confirm,
 	.release = page_cache_pipe_buf_release,
 	.steal = page_cache_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
@@ -113,7 +112,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 
 static const struct pipe_buf_operations user_page_pipe_buf_ops = {
 	.can_merge = 0,
-	.confirm = generic_pipe_buf_confirm,
 	.release = page_cache_pipe_buf_release,
 	.steal = user_page_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
@@ -507,7 +505,6 @@ EXPORT_SYMBOL(generic_file_splice_read);
 
 static const struct pipe_buf_operations default_pipe_buf_ops = {
 	.can_merge = 0,
-	.confirm = generic_pipe_buf_confirm,
 	.release = generic_pipe_buf_release,
 	.steal = generic_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
@@ -522,7 +519,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
 /* Pipe buffer operations for a socket and similar. */
 const struct pipe_buf_operations nosteal_pipe_buf_ops = {
 	.can_merge = 0,
-	.confirm = generic_pipe_buf_confirm,
 	.release = generic_pipe_buf_release,
 	.steal = generic_pipe_buf_nosteal,
 	.get = generic_pipe_buf_get,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index e7497c9dde7f..ae9d87ea95cb 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -62,12 +62,6 @@ struct pipe_inode_info {
 };
 
 /*
- * Note on the nesting of these functions:
- *
- * ->confirm()
- *	->steal()
- *
- * That is, ->steal() must be called on a confirmed buffer.
  * See below for the meaning of each operation. Also see kerneldoc
  * in fs/pipe.c for the pipe and generic variants of these hooks.
  */
@@ -80,15 +74,6 @@ struct pipe_buf_operations {
 	int can_merge;
 
 	/*
-	 * ->confirm() verifies that the data in the pipe buffer is there
-	 * and that the contents are good. If the pages in the pipe belong
-	 * to a file system, we may need to wait for IO completion in this
-	 * hook. Returns 0 for good, or a negative error value in case of
-	 * error.
-	 */
-	int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
-
-	/*
 	 * When the contents of this pipe buffer has been completely
 	 * consumed by a reader, ->release() is called.
 	 */
@@ -143,7 +128,7 @@ static inline void pipe_buf_release(struct pipe_inode_info *pipe,
 static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
 				   struct pipe_buffer *buf)
 {
-	return buf->ops->confirm(pipe, buf);
+	return 0;
 }
 
 /**
@@ -179,7 +164,6 @@ void free_pipe_info(struct pipe_inode_info *);
 
 /* Generic pipe buffer ops functions */
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
-int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 
diff --git a/kernel/relay.c b/kernel/relay.c
index d797502140b9..da954ce520b2 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1223,7 +1223,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
 
 static const struct pipe_buf_operations relay_pipe_buf_ops = {
 	.can_merge = 0,
-	.confirm = generic_pipe_buf_confirm,
 	.release = relay_pipe_buf_release,
 	.steal = generic_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dade4c9559cc..bdf891c78708 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5219,7 +5219,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
 
 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
 	.can_merge		= 0,
-	.confirm		= generic_pipe_buf_confirm,
 	.release		= generic_pipe_buf_release,
 	.steal			= generic_pipe_buf_steal,
 	.get			= generic_pipe_buf_get,
@@ -6114,7 +6113,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 /* Pipe buffer operations for a buffer. */
 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
 	.can_merge		= 0,
-	.confirm		= generic_pipe_buf_confirm,
 	.release		= buffer_pipe_buf_release,
 	.steal			= generic_pipe_buf_steal,
 	.get			= buffer_pipe_buf_get,
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 08/11] filemap: add get_page_for_read() helper
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (6 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 07/11] pipe: remove generic_pipe_buf_confirm() Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-27  3:43   ` Al Viro
  2016-09-14  8:37 ` [PATCH 09/11] splice: use get_page_for_read() Miklos Szeredi
                   ` (5 subsequent siblings)
  13 siblings, 1 reply; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

Getting the page for reading is pretty complicated.  This functionality is
also duplicated between generic_file_read() generic_file_splice_read().

So first extract the common code from do_generic_file_read() into a
separate helper function that takes a filp, the page index, the offset into
the page and the byte count and returns the page ready for reading (or an
error).

This makes do_generic_file_read() much easier to read as well.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/pagemap.h |   3 +
 mm/filemap.c            | 339 +++++++++++++++++++++++++-----------------------
 2 files changed, 177 insertions(+), 165 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 66a1260b33de..f0369ded606c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -653,4 +653,7 @@ static inline unsigned long dir_pages(struct inode *inode)
 			       PAGE_SHIFT;
 }
 
+int get_page_for_read(struct file *filp, unsigned long offset, size_t count,
+		      pgoff_t index, struct page **pagep);
+
 #endif /* _LINUX_PAGEMAP_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287dfc5372..288e0ee803b8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1648,6 +1648,170 @@ static void shrink_readahead_size_eio(struct file *filp,
 	ra->ra_pages /= 4;
 }
 
+int get_page_for_read(struct file *filp, unsigned long offset, size_t count,
+		      pgoff_t index, struct page **pagep)
+{
+	struct address_space *mapping = filp->f_mapping;
+	struct inode *inode = mapping->host;
+	struct file_ra_state *ra = &filp->f_ra;
+	int error = 0;
+	struct page *page;
+	pgoff_t end_index;
+	loff_t isize;
+	unsigned long nr;
+	pgoff_t pg_count = (offset + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+find_page:
+	page = find_get_page(mapping, index);
+	if (!page) {
+		page_cache_sync_readahead(mapping, ra, filp, index, pg_count);
+		page = find_get_page(mapping, index);
+		if (unlikely(page == NULL))
+			goto no_cached_page;
+	}
+	if (PageReadahead(page)) {
+		page_cache_async_readahead(mapping, ra, filp, page,
+					   index, pg_count);
+	}
+	if (!PageUptodate(page)) {
+		/*
+		 * See comment in do_read_cache_page on why
+		 * wait_on_page_locked is used to avoid unnecessarily
+		 * serialisations and why it's safe.
+		 */
+		wait_on_page_locked_killable(page);
+		if (PageUptodate(page))
+			goto page_ok;
+
+		if (inode->i_blkbits == PAGE_SHIFT ||
+		    !mapping->a_ops->is_partially_uptodate)
+			goto page_not_up_to_date;
+		if (!trylock_page(page))
+			goto page_not_up_to_date;
+		/* Did it get truncated before we got the lock? */
+		if (!page->mapping)
+			goto page_not_up_to_date_locked;
+		if (!mapping->a_ops->is_partially_uptodate(page, offset, count))
+			goto page_not_up_to_date_locked;
+		unlock_page(page);
+	}
+page_ok:
+	/*
+	 * i_size must be checked after we know the page is Uptodate.
+	 *
+	 * Checking i_size after the check allows us to calculate
+	 * the correct value for "nr", which means the zero-filled
+	 * part of the page is not copied back to userspace (unless
+	 * another truncate extends the file - this is desired though).
+	 */
+
+	isize = i_size_read(inode);
+	end_index = (isize - 1) >> PAGE_SHIFT;
+	if (unlikely(!isize || index > end_index))
+		goto out_put_page;
+
+	/* nr is the maximum number of bytes to copy from this page */
+	nr = PAGE_SIZE;
+	if (index == end_index) {
+		nr = ((isize - 1) & ~PAGE_MASK) + 1;
+		if (nr <= offset)
+			goto out_put_page;
+	}
+	nr = nr - offset;
+
+	*pagep = page;
+	return nr;
+
+page_not_up_to_date:
+	/* Get exclusive access to the page ... */
+	error = lock_page_killable(page);
+	if (unlikely(error))
+		goto out_put_page;
+
+page_not_up_to_date_locked:
+	/* Did it get truncated before we got the lock? */
+	if (!page->mapping) {
+		unlock_page(page);
+		put_page(page);
+		cond_resched();
+		goto find_page;
+	}
+
+	/* Did somebody else fill it already? */
+	if (PageUptodate(page)) {
+		unlock_page(page);
+		goto page_ok;
+	}
+
+readpage:
+	/*
+	 * A previous I/O error may have been due to temporary
+	 * failures, eg. multipath errors.
+	 * PG_error will be set again if readpage fails.
+	 */
+	ClearPageError(page);
+	/* Start the actual read. The read will unlock the page. */
+	error = mapping->a_ops->readpage(filp, page);
+
+	if (unlikely(error)) {
+		if (error == AOP_TRUNCATED_PAGE) {
+			put_page(page);
+			error = 0;
+			goto find_page;
+		}
+		goto out_put_page;
+	}
+
+	if (!PageUptodate(page)) {
+		error = lock_page_killable(page);
+		if (unlikely(error))
+			goto out_put_page;
+		if (!PageUptodate(page)) {
+			if (page->mapping == NULL) {
+				/*
+				 * invalidate_mapping_pages got it
+				 */
+				unlock_page(page);
+				put_page(page);
+				goto find_page;
+			}
+			unlock_page(page);
+			shrink_readahead_size_eio(filp, ra);
+			error = -EIO;
+			goto out_put_page;
+		}
+		unlock_page(page);
+	}
+	goto page_ok;
+
+no_cached_page:
+	/*
+	 * Ok, it wasn't cached, so we need to create a new
+	 * page..
+	 */
+	page = page_cache_alloc_cold(mapping);
+	if (!page) {
+		error = -ENOMEM;
+		goto out;
+	}
+	error = add_to_page_cache_lru(page, mapping, index,
+				      mapping_gfp_constraint(mapping, GFP_KERNEL));
+	if (!error)
+		goto readpage;
+
+	if (error == -EEXIST) {
+		put_page(page);
+		error = 0;
+		goto find_page;
+	}
+
+out_put_page:
+	put_page(page);
+out:
+	return error;
+
+}
+
 /**
  * do_generic_file_read - generic file read routine
  * @filp:	the file to read
@@ -1664,11 +1828,8 @@ static void shrink_readahead_size_eio(struct file *filp,
 static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 		struct iov_iter *iter, ssize_t written)
 {
-	struct address_space *mapping = filp->f_mapping;
-	struct inode *inode = mapping->host;
 	struct file_ra_state *ra = &filp->f_ra;
 	pgoff_t index;
-	pgoff_t last_index;
 	pgoff_t prev_index;
 	unsigned long offset;      /* offset into pagecache page */
 	unsigned int prev_offset;
@@ -1677,87 +1838,26 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 	index = *ppos >> PAGE_SHIFT;
 	prev_index = ra->prev_pos >> PAGE_SHIFT;
 	prev_offset = ra->prev_pos & (PAGE_SIZE-1);
-	last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
 	offset = *ppos & ~PAGE_MASK;
 
 	for (;;) {
 		struct page *page;
-		pgoff_t end_index;
-		loff_t isize;
-		unsigned long nr, ret;
+		long nr, ret;
 
 		cond_resched();
-find_page:
-		page = find_get_page(mapping, index);
-		if (!page) {
-			page_cache_sync_readahead(mapping,
-					ra, filp,
-					index, last_index - index);
-			page = find_get_page(mapping, index);
-			if (unlikely(page == NULL))
-				goto no_cached_page;
-		}
-		if (PageReadahead(page)) {
-			page_cache_async_readahead(mapping,
-					ra, filp, page,
-					index, last_index - index);
-		}
-		if (!PageUptodate(page)) {
-			/*
-			 * See comment in do_read_cache_page on why
-			 * wait_on_page_locked is used to avoid unnecessarily
-			 * serialisations and why it's safe.
-			 */
-			wait_on_page_locked_killable(page);
-			if (PageUptodate(page))
-				goto page_ok;
-
-			if (inode->i_blkbits == PAGE_SHIFT ||
-					!mapping->a_ops->is_partially_uptodate)
-				goto page_not_up_to_date;
-			if (!trylock_page(page))
-				goto page_not_up_to_date;
-			/* Did it get truncated before we got the lock? */
-			if (!page->mapping)
-				goto page_not_up_to_date_locked;
-			if (!mapping->a_ops->is_partially_uptodate(page,
-							offset, iter->count))
-				goto page_not_up_to_date_locked;
-			unlock_page(page);
-		}
-page_ok:
-		/*
-		 * i_size must be checked after we know the page is Uptodate.
-		 *
-		 * Checking i_size after the check allows us to calculate
-		 * the correct value for "nr", which means the zero-filled
-		 * part of the page is not copied back to userspace (unless
-		 * another truncate extends the file - this is desired though).
-		 */
-
-		isize = i_size_read(inode);
-		end_index = (isize - 1) >> PAGE_SHIFT;
-		if (unlikely(!isize || index > end_index)) {
-			put_page(page);
-			goto out;
-		}
-
-		/* nr is the maximum number of bytes to copy from this page */
-		nr = PAGE_SIZE;
-		if (index == end_index) {
-			nr = ((isize - 1) & ~PAGE_MASK) + 1;
-			if (nr <= offset) {
-				put_page(page);
-				goto out;
-			}
+		ret = get_page_for_read(filp, offset, iter->count, index,
+					&page);
+		if (ret <= 0) {
+			error = ret;
+			break;
 		}
-		nr = nr - offset;
+		nr = ret;
 
 		/* If users can be writing to this page using arbitrary
 		 * virtual addresses, take care about potential aliasing
 		 * before reading the page on the kernel side.
 		 */
-		if (mapping_writably_mapped(mapping))
+		if (mapping_writably_mapped(filp->f_mapping))
 			flush_dcache_page(page);
 
 		/*
@@ -1782,104 +1882,13 @@ page_ok:
 		put_page(page);
 		written += ret;
 		if (!iov_iter_count(iter))
-			goto out;
+			break;
 		if (ret < nr) {
 			error = -EFAULT;
-			goto out;
-		}
-		continue;
-
-page_not_up_to_date:
-		/* Get exclusive access to the page ... */
-		error = lock_page_killable(page);
-		if (unlikely(error))
-			goto readpage_error;
-
-page_not_up_to_date_locked:
-		/* Did it get truncated before we got the lock? */
-		if (!page->mapping) {
-			unlock_page(page);
-			put_page(page);
-			continue;
-		}
-
-		/* Did somebody else fill it already? */
-		if (PageUptodate(page)) {
-			unlock_page(page);
-			goto page_ok;
-		}
-
-readpage:
-		/*
-		 * A previous I/O error may have been due to temporary
-		 * failures, eg. multipath errors.
-		 * PG_error will be set again if readpage fails.
-		 */
-		ClearPageError(page);
-		/* Start the actual read. The read will unlock the page. */
-		error = mapping->a_ops->readpage(filp, page);
-
-		if (unlikely(error)) {
-			if (error == AOP_TRUNCATED_PAGE) {
-				put_page(page);
-				error = 0;
-				goto find_page;
-			}
-			goto readpage_error;
-		}
-
-		if (!PageUptodate(page)) {
-			error = lock_page_killable(page);
-			if (unlikely(error))
-				goto readpage_error;
-			if (!PageUptodate(page)) {
-				if (page->mapping == NULL) {
-					/*
-					 * invalidate_mapping_pages got it
-					 */
-					unlock_page(page);
-					put_page(page);
-					goto find_page;
-				}
-				unlock_page(page);
-				shrink_readahead_size_eio(filp, ra);
-				error = -EIO;
-				goto readpage_error;
-			}
-			unlock_page(page);
-		}
-
-		goto page_ok;
-
-readpage_error:
-		/* UHHUH! A synchronous read error occurred. Report it */
-		put_page(page);
-		goto out;
-
-no_cached_page:
-		/*
-		 * Ok, it wasn't cached, so we need to create a new
-		 * page..
-		 */
-		page = page_cache_alloc_cold(mapping);
-		if (!page) {
-			error = -ENOMEM;
-			goto out;
-		}
-		error = add_to_page_cache_lru(page, mapping, index,
-				mapping_gfp_constraint(mapping, GFP_KERNEL));
-		if (error) {
-			put_page(page);
-			if (error == -EEXIST) {
-				error = 0;
-				goto find_page;
-			}
-			goto out;
+			break;
 		}
-		goto readpage;
 	}
 
-out:
 	ra->prev_pos = prev_index;
 	ra->prev_pos <<= PAGE_SHIFT;
 	ra->prev_pos |= prev_offset;
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 09/11] splice: use get_page_for_read()
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (7 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 08/11] filemap: add get_page_for_read() helper Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-27  3:45   ` Al Viro
  2016-09-14  8:37 ` [PATCH 10/11] splice: don't check i_size in generic_file_splice_read() Miklos Szeredi
                   ` (4 subsequent siblings)
  13 siblings, 1 reply; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

What __generic_file_splice_read() does is get a series of uptodate pages
and put them into the pipe buffer.

The get_page_for_read() helper can now be used to get the pages,
simplifying the code and making sure the splice(2) stays in sync with
read(2).

For example get_page_for_read() can handle partially uptodate pages and now
splice can take advantage of these as well.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/splice.c | 169 +++++-------------------------------------------------------
 1 file changed, 12 insertions(+), 157 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index dc4648ba6e8d..e7757b363b6c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -265,14 +265,12 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 			   struct pipe_inode_info *pipe, size_t len,
 			   unsigned int flags)
 {
-	struct address_space *mapping = in->f_mapping;
 	unsigned int loff, nr_pages, req_pages;
 	struct page *pages[PIPE_DEF_BUFFERS];
 	struct partial_page partial[PIPE_DEF_BUFFERS];
 	struct page *page;
-	pgoff_t index, end_index;
-	loff_t isize;
-	int error, page_nr;
+	pgoff_t index;
+	int error;
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
@@ -290,168 +288,25 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	nr_pages = min(req_pages, spd.nr_pages_max);
 
-	/*
-	 * Lookup the (hopefully) full range of pages we need.
-	 */
-	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
-	index += spd.nr_pages;
-
-	/*
-	 * If find_get_pages_contig() returned fewer pages than we needed,
-	 * readahead/allocate the rest and fill in the holes.
-	 */
-	if (spd.nr_pages < nr_pages)
-		page_cache_sync_readahead(mapping, &in->f_ra, in,
-				index, req_pages - spd.nr_pages);
-
 	error = 0;
-	while (spd.nr_pages < nr_pages) {
-		/*
-		 * Page could be there, find_get_pages_contig() breaks on
-		 * the first hole.
-		 */
-		page = find_get_page(mapping, index);
-		if (!page) {
-			/*
-			 * page didn't exist, allocate one.
-			 */
-			page = page_cache_alloc_cold(mapping);
-			if (!page)
-				break;
+	while (spd.nr_pages < nr_pages && len) {
+		long ret;
 
-			error = add_to_page_cache_lru(page, mapping, index,
-				   mapping_gfp_constraint(mapping, GFP_KERNEL));
-			if (unlikely(error)) {
-				put_page(page);
-				if (error == -EEXIST)
-					continue;
-				break;
-			}
-			/*
-			 * add_to_page_cache() locks the page, unlock it
-			 * to avoid convoluting the logic below even more.
-			 */
-			unlock_page(page);
-		}
-
-		spd.pages[spd.nr_pages++] = page;
-		index++;
-	}
-
-	/*
-	 * Now loop over the map and see if we need to start IO on any
-	 * pages, fill in the partial map, etc.
-	 */
-	index = *ppos >> PAGE_SHIFT;
-	nr_pages = spd.nr_pages;
-	spd.nr_pages = 0;
-	for (page_nr = 0; page_nr < nr_pages; page_nr++) {
-		unsigned int this_len;
-
-		if (!len)
+		ret = get_page_for_read(in, loff, len, index, &page);
+		if (ret <= 0) {
+			error = ret;
 			break;
-
-		/*
-		 * this_len is the max we'll use from this page
-		 */
-		this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
-		page = spd.pages[page_nr];
-
-		if (PageReadahead(page))
-			page_cache_async_readahead(mapping, &in->f_ra, in,
-					page, index, req_pages - page_nr);
-
-		/*
-		 * If the page isn't uptodate, we may need to start io on it
-		 */
-		if (!PageUptodate(page)) {
-			lock_page(page);
-
-			/*
-			 * Page was truncated, or invalidated by the
-			 * filesystem.  Redo the find/create, but this time the
-			 * page is kept locked, so there's no chance of another
-			 * race with truncate/invalidate.
-			 */
-			if (!page->mapping) {
-				unlock_page(page);
-retry_lookup:
-				page = find_or_create_page(mapping, index,
-						mapping_gfp_mask(mapping));
-
-				if (!page) {
-					error = -ENOMEM;
-					break;
-				}
-				put_page(spd.pages[page_nr]);
-				spd.pages[page_nr] = page;
-			}
-			/*
-			 * page was already under io and is now done, great
-			 */
-			if (PageUptodate(page)) {
-				unlock_page(page);
-				goto fill_it;
-			}
-
-			/*
-			 * need to read in the page
-			 */
-			error = mapping->a_ops->readpage(in, page);
-			if (unlikely(error)) {
-				/*
-				 * Re-lookup the page
-				 */
-				if (error == AOP_TRUNCATED_PAGE)
-					goto retry_lookup;
-
-				break;
-			}
-		}
-fill_it:
-		/*
-		 * i_size must be checked after PageUptodate.
-		 */
-		isize = i_size_read(mapping->host);
-		end_index = (isize - 1) >> PAGE_SHIFT;
-		if (unlikely(!isize || index > end_index))
-			break;
-
-		/*
-		 * if this is the last page, see if we need to shrink
-		 * the length and stop
-		 */
-		if (end_index == index) {
-			unsigned int plen;
-
-			/*
-			 * max good bytes in this page
-			 */
-			plen = ((isize - 1) & ~PAGE_MASK) + 1;
-			if (plen <= loff)
-				break;
-
-			/*
-			 * force quit after adding this page
-			 */
-			this_len = min(this_len, plen - loff);
-			len = this_len;
 		}
 
-		spd.partial[page_nr].offset = loff;
-		spd.partial[page_nr].len = this_len;
-		len -= this_len;
-		loff = 0;
+		spd.pages[spd.nr_pages] = page;
+		spd.partial[spd.nr_pages].offset = loff;
+		spd.partial[spd.nr_pages].len = ret;
 		spd.nr_pages++;
 		index++;
+		len -= ret;
+		loff = 0;
 	}
 
-	/*
-	 * Release any pages at the end, if we quit early. 'page_nr' is how far
-	 * we got, 'nr_pages' is how many pages are in the map.
-	 */
-	while (page_nr < nr_pages)
-		put_page(spd.pages[page_nr++]);
 	in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
 
 	if (spd.nr_pages)
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 10/11] splice: don't check i_size in generic_file_splice_read()
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (8 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 09/11] splice: use get_page_for_read() Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-14  8:37 ` [PATCH 11/11] splice: fold __generic_file_splice_read() into caller Miklos Szeredi
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

This is handled in get_page_for_read() already.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/splice.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index e7757b363b6c..bee282803ccf 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -334,20 +334,11 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 				 struct pipe_inode_info *pipe, size_t len,
 				 unsigned int flags)
 {
-	loff_t isize, left;
 	int ret;
 
 	if (IS_DAX(in->f_mapping->host))
 		return default_file_splice_read(in, ppos, pipe, len, flags);
 
-	isize = i_size_read(in->f_mapping->host);
-	if (unlikely(*ppos >= isize))
-		return 0;
-
-	left = isize - *ppos;
-	if (unlikely(left < len))
-		len = left;
-
 	ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
 	if (ret > 0) {
 		*ppos += ret;
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 11/11] splice: fold __generic_file_splice_read() into caller
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (9 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 10/11] splice: don't check i_size in generic_file_splice_read() Miklos Szeredi
@ 2016-09-14  8:37 ` Miklos Szeredi
  2016-09-14  8:55 ` [PATCH 00/11] splice cleanups Cedric Blancher
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  8:37 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, Al Viro

generic_file_splice_read() does so little that it makes no sense to keep
__generic_file_splice_read() a separate function.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/splice.c | 70 +++++++++++++++++++++++++------------------------------------
 1 file changed, 29 insertions(+), 41 deletions(-)

diff --git a/fs/splice.c b/fs/splice.c
index bee282803ccf..395cbb6b4926 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -260,17 +260,30 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
 	kfree(spd->partial);
 }
 
-static int
-__generic_file_splice_read(struct file *in, loff_t *ppos,
-			   struct pipe_inode_info *pipe, size_t len,
-			   unsigned int flags)
+/**
+ * generic_file_splice_read - splice data from file to a pipe
+ * @in:		file to splice from
+ * @ppos:	position in @in
+ * @pipe:	pipe to splice to
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Description:
+ *    Will read pages from given file and fill them into a pipe. Can be
+ *    used as long as the address_space operations for the source implements
+ *    a readpage() hook.
+ *
+ */
+ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
+				 struct pipe_inode_info *pipe, size_t len,
+				 unsigned int flags)
 {
 	unsigned int loff, nr_pages, req_pages;
 	struct page *pages[PIPE_DEF_BUFFERS];
 	struct partial_page partial[PIPE_DEF_BUFFERS];
 	struct page *page;
 	pgoff_t index;
-	int error;
+	int ret = 0;
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
@@ -280,6 +293,9 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 		.spd_release = spd_release_page,
 	};
 
+	if (IS_DAX(in->f_mapping->host))
+		return default_file_splice_read(in, ppos, pipe, len, flags);
+
 	if (splice_grow_spd(pipe, &spd))
 		return -ENOMEM;
 
@@ -288,62 +304,34 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	nr_pages = min(req_pages, spd.nr_pages_max);
 
-	error = 0;
 	while (spd.nr_pages < nr_pages && len) {
-		long ret;
+		int nr;
 
-		ret = get_page_for_read(in, loff, len, index, &page);
-		if (ret <= 0) {
-			error = ret;
+		nr = get_page_for_read(in, loff, len, index, &page);
+		if (nr <= 0) {
+			ret = nr;
 			break;
 		}
 
 		spd.pages[spd.nr_pages] = page;
 		spd.partial[spd.nr_pages].offset = loff;
-		spd.partial[spd.nr_pages].len = ret;
+		spd.partial[spd.nr_pages].len = nr;
 		spd.nr_pages++;
 		index++;
-		len -= ret;
+		len -= nr;
 		loff = 0;
 	}
 
 	in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
 
 	if (spd.nr_pages)
-		error = splice_to_pipe(pipe, &spd);
-
-	splice_shrink_spd(&spd);
-	return error;
-}
-
-/**
- * generic_file_splice_read - splice data from file to a pipe
- * @in:		file to splice from
- * @ppos:	position in @in
- * @pipe:	pipe to splice to
- * @len:	number of bytes to splice
- * @flags:	splice modifier flags
- *
- * Description:
- *    Will read pages from given file and fill them into a pipe. Can be
- *    used as long as the address_space operations for the source implements
- *    a readpage() hook.
- *
- */
-ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
-				 struct pipe_inode_info *pipe, size_t len,
-				 unsigned int flags)
-{
-	int ret;
-
-	if (IS_DAX(in->f_mapping->host))
-		return default_file_splice_read(in, ppos, pipe, len, flags);
+		ret = splice_to_pipe(pipe, &spd);
 
-	ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
 	if (ret > 0) {
 		*ppos += ret;
 		file_accessed(in);
 	}
+	splice_shrink_spd(&spd);
 
 	return ret;
 }
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 00/11] splice cleanups
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (10 preceding siblings ...)
  2016-09-14  8:37 ` [PATCH 11/11] splice: fold __generic_file_splice_read() into caller Miklos Szeredi
@ 2016-09-14  8:55 ` Cedric Blancher
  2016-09-14  9:30   ` Miklos Szeredi
  2016-09-16 11:24 ` Christoph Hellwig
  2016-09-27  3:55 ` Al Viro
  13 siblings, 1 reply; 21+ messages in thread
From: Cedric Blancher @ 2016-09-14  8:55 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, Linux Kernel Mailing List, Al Viro

Is there any shell which uses pipe splicing?

Ced

On 14 September 2016 at 10:37, Miklos Szeredi <mszeredi@redhat.com> wrote:
> This contains assorted cleanups in the splice area:
>
>  - add helpers for pipe buf ops instead of directly calling them
>
>  - page cache buf doesn't seem to need confirming (since ages)
>
>  - generic_file_splice_read() and generic_file_read() have lots of
>    duplication
>
> Git tree is here:
>
>  git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git#splice
>
> Thanks,
> Miklos
>
> ---
> Miklos Szeredi (11):
>   pipe: add pipe_buf_get() helper
>   pipe: add pipe_buf_release() helper
>   pipe: add pipe_buf_confirm() helper
>   pipe: add pipe_buf_steal() helper
>   pipe: fix comment in pipe_buf_operations
>   pipe: no need to confirm page cache buf
>   pipe: remove generic_pipe_buf_confirm()
>   filemap: add get_page_for_read() helper
>   splice: use get_page_for_read()
>   splice: don't check i_size in generic_file_splice_read()
>   splice: fold __generic_file_splice_read() into caller
>
>  drivers/char/virtio_console.c |   2 +-
>  fs/fuse/dev.c                 |  15 +-
>  fs/pipe.c                     |  31 +---
>  fs/splice.c                   | 301 ++++++-------------------------------
>  include/linux/pagemap.h       |   3 +
>  include/linux/pipe_fs_i.h     |  73 ++++++---
>  kernel/relay.c                |   1 -
>  kernel/trace/trace.c          |   2 -
>  mm/filemap.c                  | 339 ++++++++++++++++++++++--------------------
>  9 files changed, 281 insertions(+), 486 deletions(-)
>
> --
> 2.5.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Cedric Blancher <cedric.blancher@gmail.com>
[https://plus.google.com/u/0/+CedricBlancher/]
Institute Pasteur

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 00/11] splice cleanups
  2016-09-14  8:55 ` [PATCH 00/11] splice cleanups Cedric Blancher
@ 2016-09-14  9:30   ` Miklos Szeredi
  0 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-14  9:30 UTC (permalink / raw)
  To: Cedric Blancher; +Cc: linux-fsdevel, Linux Kernel Mailing List, Al Viro

[-- Attachment #1: Type: text/plain, Size: 421 bytes --]

On Wed, Sep 14, 2016 at 10:55 AM, Cedric Blancher
<cedric.blancher@gmail.com> wrote:
> Is there any shell which uses pipe splicing?

for i in /usr/bin/*; do if file $i | grep -q ELF; then if nm -D $i |
grep -q splice; then echo $i; fi; fi; done

For me it does not yield anything by which you could easily try out splicing.

Attaching a simple cp-like program which uses splice if you want to try it out.

Thanks,
Miklos

[-- Attachment #2: cp-splice.c --]
[-- Type: text/x-csrc, Size: 1005 bytes --]

#define _GNU_SOURCE

#include <err.h>
#include <fcntl.h>
#include <unistd.h>

int main(int argc, char *argv[])
{
	int to, from, res;
	int pip[2];
	int siz = 65536;

	if (argc != 3)
		err(1, "usage: %s from_file to_file", argv[0]);

	from = open(argv[1], O_RDONLY);
	if (from == -1)
		err(1, "opening %s", argv[1]);

	to = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644);
	if (to == -1)
		err(1, "opening %s", argv[2]);

	res = pipe(pip);
	if (res == -1)
		err(1, "creating pipe");

	while (1) {
		int num;

		res = splice(from, NULL, pip[1], NULL, siz, 0);
		if (res == -1)
			err(1, "splicing from %s to pipe", argv[1]);

		num = res;
		if (num == 0)
			break;

		do {
			res = splice(pip[0], NULL, to, NULL, num, 0);
			if (res == -1)
				err(1, "splicing from pipe to %s", argv[2]);

			if (res == 0)
				break;

			num -= res;
		} while (num);
	}

	res = close(to);
	if (res == -1)
		err(1, "closing %s", argv[2]);

	res = close(from);
	if (res == -1)
		err(1, "closing %s", argv[1]);

	return 0;
}

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 07/11] pipe: remove generic_pipe_buf_confirm()
  2016-09-14  8:37 ` [PATCH 07/11] pipe: remove generic_pipe_buf_confirm() Miklos Szeredi
@ 2016-09-16 11:23   ` Christoph Hellwig
  0 siblings, 0 replies; 21+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:23 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, linux-kernel, Al Viro

>  static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
>  				   struct pipe_buffer *buf)
>  {
> -	return buf->ops->confirm(pipe, buf);
> +	return 0;
>  }

Why do you keep pipe_buf_confirm around?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 00/11] splice cleanups
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (11 preceding siblings ...)
  2016-09-14  8:55 ` [PATCH 00/11] splice cleanups Cedric Blancher
@ 2016-09-16 11:24 ` Christoph Hellwig
  2016-09-27  3:55 ` Al Viro
  13 siblings, 0 replies; 21+ messages in thread
From: Christoph Hellwig @ 2016-09-16 11:24 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, linux-kernel, Al Viro

On Wed, Sep 14, 2016 at 10:37:05AM +0200, Miklos Szeredi wrote:
> This contains assorted cleanups in the splice area:
> 
>  - add helpers for pipe buf ops instead of directly calling them
> 
>  - page cache buf doesn't seem to need confirming (since ages)
> 
>  - generic_file_splice_read() and generic_file_read() have lots of
>    duplication

The whole series looks great to me:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 06/11] pipe: no need to confirm page cache buf
  2016-09-14  8:37 ` [PATCH 06/11] pipe: no need to confirm page cache buf Miklos Szeredi
@ 2016-09-27  3:40   ` Al Viro
  2016-09-27  7:34     ` Miklos Szeredi
  0 siblings, 1 reply; 21+ messages in thread
From: Al Viro @ 2016-09-27  3:40 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, linux-kernel, Jens Axboe, Linus Torvalds

On Wed, Sep 14, 2016 at 10:37:11AM +0200, Miklos Szeredi wrote:

> Things could happen to that page that make it not uptodate while sitting in
> the pipe, but it's questionable whether we should care about that.
> Checking for being uptodate in the face of such page state change is always
> going to be racy.

I'm not sure it's the right thing to do here; that area looks like a victim
of serious bitrot - once upon a time it was ->map(), which used to lock
page, verity that it's valid, and kmap it.  ->unmap() did kunmap + unlock.

Then the validate part got split off (->pin(), later renamed to ->confirm()),
with lock _not_ held over the kmap/kunmap.  That's the point when it got racy,
AFAICS.  OTOH, I would really hate to hold a page locked over e.g. copying to
userland - too easy to get deadlocks that way.

Jens, could you comment?  Pages definitely shouldn't be getting into pipe
without having been uptodate; the question is what (if anything) should be
done about having a page go non-uptodate (on truncate, hole-punching, etc.)
while a reference to it is sitting in a pipe buffer.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 08/11] filemap: add get_page_for_read() helper
  2016-09-14  8:37 ` [PATCH 08/11] filemap: add get_page_for_read() helper Miklos Szeredi
@ 2016-09-27  3:43   ` Al Viro
  0 siblings, 0 replies; 21+ messages in thread
From: Al Viro @ 2016-09-27  3:43 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, linux-kernel

On Wed, Sep 14, 2016 at 10:37:13AM +0200, Miklos Szeredi wrote:
> Getting the page for reading is pretty complicated.  This functionality is
> also duplicated between generic_file_read() generic_file_splice_read().
> 
> So first extract the common code from do_generic_file_read() into a
> separate helper function that takes a filp, the page index, the offset into
> the page and the byte count and returns the page ready for reading (or an
> error).
> 
> This makes do_generic_file_read() much easier to read as well.

__generic_file_splice_read() horrors are not going to survive - see the
patchset posted on fsdevel.  do_generic_file_read() getting easier to
read is certainly a good thing, provided that we don't screw the code
generation for what's a fairly hot path.  IOW, that one really needs
profiling.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 09/11] splice: use get_page_for_read()
  2016-09-14  8:37 ` [PATCH 09/11] splice: use get_page_for_read() Miklos Szeredi
@ 2016-09-27  3:45   ` Al Viro
  0 siblings, 0 replies; 21+ messages in thread
From: Al Viro @ 2016-09-27  3:45 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, linux-kernel

On Wed, Sep 14, 2016 at 10:37:14AM +0200, Miklos Szeredi wrote:
> What __generic_file_splice_read() does is get a series of uptodate pages
> and put them into the pipe buffer.
> 
> The get_page_for_read() helper can now be used to get the pages,
> simplifying the code and making sure the splice(2) stays in sync with
> read(2).
> 
> For example get_page_for_read() can handle partially uptodate pages and now
> splice can take advantage of these as well.

... or, better yet, kill __generic_file_splice_read() off.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 00/11] splice cleanups
  2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
                   ` (12 preceding siblings ...)
  2016-09-16 11:24 ` Christoph Hellwig
@ 2016-09-27  3:55 ` Al Viro
  13 siblings, 0 replies; 21+ messages in thread
From: Al Viro @ 2016-09-27  3:55 UTC (permalink / raw)
  To: Miklos Szeredi; +Cc: linux-fsdevel, linux-kernel

On Wed, Sep 14, 2016 at 10:37:05AM +0200, Miklos Szeredi wrote:
> This contains assorted cleanups in the splice area:
> 
>  - add helpers for pipe buf ops instead of directly calling them
> 
>  - page cache buf doesn't seem to need confirming (since ages)
> 
>  - generic_file_splice_read() and generic_file_read() have lots of
>    duplication
> 
> Git tree is here:
> 
>  git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git#splice

My apologies for not replying back when it had been first posted, especially
since I'd been actively messing with splice-related code at that time
(it started in "xfs_file_splice_read: possible circular locking dependency
detected" thread on xfs list).  I've no objections against your inline
helpers.  I'm not so sure about ->confirm() and I really think that
__generic_file_splice_read() should simply die.

Could you rebase the beginning of that thing on top of #work.splice_read
in vfs.git?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 06/11] pipe: no need to confirm page cache buf
  2016-09-27  3:40   ` Al Viro
@ 2016-09-27  7:34     ` Miklos Szeredi
  0 siblings, 0 replies; 21+ messages in thread
From: Miklos Szeredi @ 2016-09-27  7:34 UTC (permalink / raw)
  To: Al Viro; +Cc: linux-fsdevel, lkml, Jens Axboe, Linus Torvalds

On Tue, Sep 27, 2016 at 5:40 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Wed, Sep 14, 2016 at 10:37:11AM +0200, Miklos Szeredi wrote:
>
>> Things could happen to that page that make it not uptodate while sitting in
>> the pipe, but it's questionable whether we should care about that.
>> Checking for being uptodate in the face of such page state change is always
>> going to be racy.
>
> I'm not sure it's the right thing to do here; that area looks like a victim
> of serious bitrot - once upon a time it was ->map(), which used to lock
> page, verity that it's valid, and kmap it.  ->unmap() did kunmap + unlock.
>
> Then the validate part got split off (->pin(), later renamed to ->confirm()),
> with lock _not_ held over the kmap/kunmap.  That's the point when it got racy,
> AFAICS.  OTOH, I would really hate to hold a page locked over e.g. copying to
> userland - too easy to get deadlocks that way.
>
> Jens, could you comment?  Pages definitely shouldn't be getting into pipe
> without having been uptodate; the question is what (if anything) should be
> done about having a page go non-uptodate (on truncate, hole-punching, etc.)
> while a reference to it is sitting in a pipe buffer.

Truncate/holepunch is interesting.  It doesn't make the page go
non-uptodate, just clears page->mapping.  Works like a charm, old data
can be read from the pipe just fine.

Partial truncate is even more interesting, because it will result in
partially cleared data (race is there with plain read() as well,
AFAICS, but very narrow).

Page invalidation by filesystems is similar to truncate.  Old data can
be read from the pipe.  And in fact this probably the way we want it
to work, since redoing the page lookup in ->confirm() would be really
messy.

Also just modifying the data sitting in the pipe will also result in
undefined behavior; either the old or the new data can be read out
from the other end of the pipe.

And while not explicitly documented, all the above cases are fine and
implicit in the non-copy behavior of splice.  Perhaps the man page
should note that modifying data after splice but before reading from
the other end of the pipe results in undefined behavior.

I haven't looked at filesystems, but generic code calls
ClearPageUptodate() from only a few places:

end_swap_bio_read(): does it on a non-uptodate page
page_endio(): AFAICS part of the page reading chain, again doing it on
a non-uptodate page
me_swapcache_dirty(): memory error on dirty swapcache page, this is
the only one that would make sense to trigger EIO on reading the pipe
buffer.  But then why only the dirty swapcache case?

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2016-09-27  7:35 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-14  8:37 [PATCH 00/11] splice cleanups Miklos Szeredi
2016-09-14  8:37 ` [PATCH 01/11] pipe: add pipe_buf_get() helper Miklos Szeredi
2016-09-14  8:37 ` [PATCH 02/11] pipe: add pipe_buf_release() helper Miklos Szeredi
2016-09-14  8:37 ` [PATCH 03/11] pipe: add pipe_buf_confirm() helper Miklos Szeredi
2016-09-14  8:37 ` [PATCH 04/11] pipe: add pipe_buf_steal() helper Miklos Szeredi
2016-09-14  8:37 ` [PATCH 05/11] pipe: fix comment in pipe_buf_operations Miklos Szeredi
2016-09-14  8:37 ` [PATCH 06/11] pipe: no need to confirm page cache buf Miklos Szeredi
2016-09-27  3:40   ` Al Viro
2016-09-27  7:34     ` Miklos Szeredi
2016-09-14  8:37 ` [PATCH 07/11] pipe: remove generic_pipe_buf_confirm() Miklos Szeredi
2016-09-16 11:23   ` Christoph Hellwig
2016-09-14  8:37 ` [PATCH 08/11] filemap: add get_page_for_read() helper Miklos Szeredi
2016-09-27  3:43   ` Al Viro
2016-09-14  8:37 ` [PATCH 09/11] splice: use get_page_for_read() Miklos Szeredi
2016-09-27  3:45   ` Al Viro
2016-09-14  8:37 ` [PATCH 10/11] splice: don't check i_size in generic_file_splice_read() Miklos Szeredi
2016-09-14  8:37 ` [PATCH 11/11] splice: fold __generic_file_splice_read() into caller Miklos Szeredi
2016-09-14  8:55 ` [PATCH 00/11] splice cleanups Cedric Blancher
2016-09-14  9:30   ` Miklos Szeredi
2016-09-16 11:24 ` Christoph Hellwig
2016-09-27  3:55 ` Al Viro

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.