All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, Al Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>
Cc: David Howells <dhowells@redhat.com>,
	Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
	Jeff Layton <jlayton@kernel.org>,
	David Hildenbrand <david@redhat.com>,
	Jason Gunthorpe <jgg@nvidia.com>,
	Logan Gunthorpe <logang@deltatee.com>,
	Hillf Danton <hdanton@sina.com>,
	Christian Brauner <brauner@kernel.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Daniel Golle <daniel@makrotopia.org>,
	Guenter Roeck <groeck7@gmail.com>, Christoph Hellwig <hch@lst.de>,
	John Hubbard <jhubbard@nvidia.com>,
	Hugh Dickins <hughd@google.com>
Subject: [PATCH v22 09/31] shmem: Implement splice-read
Date: Mon, 22 May 2023 14:49:56 +0100	[thread overview]
Message-ID: <20230522135018.2742245-10-dhowells@redhat.com> (raw)
In-Reply-To: <20230522135018.2742245-1-dhowells@redhat.com>

The new filemap_splice_read() has an implicit expectation via
filemap_get_pages() that ->read_folio() exists if ->readahead() doesn't
fully populate the pagecache of the file it is reading from[1], potentially
leading to a jump to NULL if this doesn't exist.  shmem, however, (and by
extension, tmpfs, ramfs and rootfs), doesn't have ->read_folio(),

Work around this by equipping shmem with its own splice-read
implementation, based on filemap_splice_read(), but able to paste in
zero_page when there's a page missing.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Daniel Golle <daniel@makrotopia.org>
cc: Guenter Roeck <groeck7@gmail.com>
cc: Christoph Hellwig <hch@lst.de>
cc: Jens Axboe <axboe@kernel.dk>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: John Hubbard <jhubbard@nvidia.com>
cc: David Hildenbrand <david@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: Hugh Dickins <hughd@google.com>
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/Y+pdHFFTk1TTEBsO@makrotopia.org/ [1]
---

Notes:
    ver #19)
     - Remove a missed get_page() on the zero page.
    
    ver #18)
     - Don't take/release a ref on the zero page.

 mm/shmem.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 133 insertions(+), 1 deletion(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index e40a08c5c6d7..1f504ed982cf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2731,6 +2731,138 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	return retval ? retval : error;
 }
 
+static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
+			      struct pipe_buffer *buf)
+{
+	return true;
+}
+
+static void zero_pipe_buf_release(struct pipe_inode_info *pipe,
+				  struct pipe_buffer *buf)
+{
+}
+
+static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
+				    struct pipe_buffer *buf)
+{
+	return false;
+}
+
+static const struct pipe_buf_operations zero_pipe_buf_ops = {
+	.release	= zero_pipe_buf_release,
+	.try_steal	= zero_pipe_buf_try_steal,
+	.get		= zero_pipe_buf_get,
+};
+
+static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
+					loff_t fpos, size_t size)
+{
+	size_t offset = fpos & ~PAGE_MASK;
+
+	size = min_t(size_t, size, PAGE_SIZE - offset);
+
+	if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
+		struct pipe_buffer *buf = pipe_head_buf(pipe);
+
+		*buf = (struct pipe_buffer) {
+			.ops	= &zero_pipe_buf_ops,
+			.page	= ZERO_PAGE(0),
+			.offset	= offset,
+			.len	= size,
+		};
+		pipe->head++;
+	}
+
+	return size;
+}
+
+static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
+				      struct pipe_inode_info *pipe,
+				      size_t len, unsigned int flags)
+{
+	struct inode *inode = file_inode(in);
+	struct address_space *mapping = inode->i_mapping;
+	struct folio *folio = NULL;
+	size_t total_spliced = 0, used, npages, n, part;
+	loff_t isize;
+	int error = 0;
+
+	/* Work out how much data we can actually add into the pipe */
+	used = pipe_occupancy(pipe->head, pipe->tail);
+	npages = max_t(ssize_t, pipe->max_usage - used, 0);
+	len = min_t(size_t, len, npages * PAGE_SIZE);
+
+	do {
+		if (*ppos >= i_size_read(inode))
+			break;
+
+		error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
+		if (error) {
+			if (error == -EINVAL)
+				error = 0;
+			break;
+		}
+		if (folio) {
+			folio_unlock(folio);
+
+			if (folio_test_hwpoison(folio)) {
+				error = -EIO;
+				break;
+			}
+		}
+
+		/*
+		 * i_size must be checked after we know the pages are Uptodate.
+		 *
+		 * Checking i_size after the check allows us to calculate
+		 * the correct value for "nr", which means the zero-filled
+		 * part of the page is not copied back to userspace (unless
+		 * another truncate extends the file - this is desired though).
+		 */
+		isize = i_size_read(inode);
+		if (unlikely(*ppos >= isize))
+			break;
+		part = min_t(loff_t, isize - *ppos, len);
+
+		if (folio) {
+			/*
+			 * If users can be writing to this page using arbitrary
+			 * virtual addresses, take care about potential aliasing
+			 * before reading the page on the kernel side.
+			 */
+			if (mapping_writably_mapped(mapping))
+				flush_dcache_folio(folio);
+			folio_mark_accessed(folio);
+			/*
+			 * Ok, we have the page, and it's up-to-date, so we can
+			 * now splice it into the pipe.
+			 */
+			n = splice_folio_into_pipe(pipe, folio, *ppos, part);
+			folio_put(folio);
+			folio = NULL;
+		} else {
+			n = splice_zeropage_into_pipe(pipe, *ppos, len);
+		}
+
+		if (!n)
+			break;
+		len -= n;
+		total_spliced += n;
+		*ppos += n;
+		in->f_ra.prev_pos = *ppos;
+		if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+			break;
+
+		cond_resched();
+	} while (len);
+
+	if (folio)
+		folio_put(folio);
+
+	file_accessed(in);
+	return total_spliced ? total_spliced : error;
+}
+
 static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 {
 	struct address_space *mapping = file->f_mapping;
@@ -3971,7 +4103,7 @@ static const struct file_operations shmem_file_operations = {
 	.read_iter	= shmem_file_read_iter,
 	.write_iter	= generic_file_write_iter,
 	.fsync		= noop_fsync,
-	.splice_read	= generic_file_splice_read,
+	.splice_read	= shmem_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= shmem_fallocate,
 #endif


  parent reply	other threads:[~2023-05-22 13:53 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-22 13:49 [PATCH v22 00/31] splice: Kill ITER_PIPE David Howells
2023-05-22 13:49 ` [PATCH v22 01/31] splice: Fix filemap_splice_read() to use the correct inode David Howells
2023-05-22 13:49 ` [PATCH v22 02/31] splice: Make filemap_splice_read() check s_maxbytes David Howells
2023-05-22 13:49 ` [PATCH v22 03/31] splice: Rename direct_splice_read() to copy_splice_read() David Howells
2023-05-22 13:49 ` [PATCH v22 04/31] splice: Clean up copy_splice_read() a bit David Howells
2023-05-22 13:49 ` [PATCH v22 05/31] splice: Make do_splice_to() generic and export it David Howells
2023-05-22 13:49 ` [PATCH v22 06/31] splice: Check for zero count in vfs_splice_read() David Howells
2023-05-22 13:49 ` [PATCH v22 07/31] splice: Make splice from an O_DIRECT fd use copy_splice_read() David Howells
2023-05-22 13:49 ` [PATCH v22 08/31] splice: Make splice from a DAX file " David Howells
2023-05-22 13:49   ` David Howells
2023-05-22 13:49 ` David Howells [this message]
2023-05-22 13:49 ` [PATCH v22 10/31] overlayfs: Implement splice-read David Howells
2023-05-22 13:49 ` [PATCH v22 11/31] coda: " David Howells
2023-05-22 13:49 ` [PATCH v22 12/31] tty, proc, kernfs, random: Use copy_splice_read() David Howells
2023-05-22 13:50 ` [PATCH v22 13/31] net: Make sock_splice_read() use copy_splice_read() by default David Howells
2023-05-22 13:50 ` [PATCH v22 14/31] 9p: Add splice_read wrapper David Howells
2023-05-22 13:50 ` [PATCH v22 15/31] afs: Provide a splice-read wrapper David Howells
2023-05-22 13:50 ` [PATCH v22 16/31] ceph: " David Howells
2023-05-22 13:50 ` [PATCH v22 17/31] ecryptfs: " David Howells
2023-05-22 13:50 ` [PATCH v22 18/31] ext4: " David Howells
2023-05-22 13:50 ` [PATCH v22 19/31] f2fs: " David Howells
2023-05-22 13:50   ` [f2fs-dev] " David Howells
2023-05-24  3:01   ` Chao Yu
2023-05-24  3:01     ` [f2fs-dev] " Chao Yu
2023-07-06  0:18   ` patchwork-bot+f2fs
2023-07-06  0:18     ` patchwork-bot+f2fs
2023-05-22 13:50 ` [PATCH v22 20/31] nfs: " David Howells
2023-05-22 13:50 ` [PATCH v22 21/31] ntfs3: " David Howells
2023-05-22 13:50 ` [PATCH v22 22/31] ocfs2: " David Howells
2023-05-22 13:50   ` [Ocfs2-devel] " David Howells via Ocfs2-devel
2023-05-22 13:50 ` [PATCH v22 23/31] orangefs: " David Howells
2023-05-22 13:50 ` [PATCH v22 24/31] xfs: " David Howells
2023-05-22 13:50 ` [PATCH v22 25/31] zonefs: " David Howells
2023-05-23  2:48   ` Damien Le Moal
2023-05-23 20:43   ` David Howells
2023-05-24 23:13     ` Damien Le Moal
2023-05-22 13:50 ` [PATCH v22 26/31] trace: Convert trace/seq to use copy_splice_read() David Howells
2023-05-22 13:50 ` [PATCH v22 27/31] cifs: Use filemap_splice_read() David Howells
2023-05-22 13:50 ` [PATCH v22 28/31] splice: Use filemap_splice_read() instead of generic_file_splice_read() David Howells
2023-05-22 13:50 ` [PATCH v22 29/31] splice: Remove generic_file_splice_read() David Howells
2023-05-22 13:50 ` [PATCH v22 30/31] iov_iter: Kill ITER_PIPE David Howells
2023-05-22 13:50 ` [PATCH v22 31/31] splice: kdoc for filemap_splice_read() and copy_splice_read() David Howells
2023-05-23 17:47 ` [PATCH v22 00/31] splice: Kill ITER_PIPE Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230522135018.2742245-10-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=daniel@makrotopia.org \
    --cc=david@redhat.com \
    --cc=groeck7@gmail.com \
    --cc=hch@infradead.org \
    --cc=hch@lst.de \
    --cc=hdanton@sina.com \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=jgg@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=jlayton@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=logang@deltatee.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.