All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED
Date: Tue, 10 Dec 2019 09:24:52 -0700	[thread overview]
Message-ID: <20191210162454.8608-4-axboe@kernel.dk> (raw)
In-Reply-To: <20191210162454.8608-1-axboe@kernel.dk>

If RWF_UNCACHED is set for io_uring (or pwritev2(2)), we'll drop the
cache instantiated for buffered writes. If new pages aren't
instantiated, we leave them alone. This provides similar semantics to
reads with RWF_UNCACHED set.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h |  3 ++
 mm/filemap.c       | 78 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bf58db1bc032..bcf486c132a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -285,6 +285,7 @@ enum positive_aop_returns {
 #define AOP_FLAG_NOFS			0x0002 /* used by filesystem to direct
 						* helper code (eg buffer layer)
 						* to clear GFP_FS from alloc */
+#define AOP_FLAG_UNCACHED		0x0004
 
 /*
  * oh the beauties of C type declarations.
@@ -3105,6 +3106,8 @@ extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *,
 				     struct kiocb *);
+extern void write_drop_cached_pages(struct page **,
+				    struct address_space *mapping, unsigned *);
 
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index fe37bd2b2630..d6171bf705f9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3287,10 +3287,12 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 					pgoff_t index, unsigned flags)
 {
 	struct page *page;
-	int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
+	int fgp_flags = FGP_LOCK|FGP_WRITE;
 
 	if (flags & AOP_FLAG_NOFS)
 		fgp_flags |= FGP_NOFS;
+	if (!(flags & AOP_FLAG_UNCACHED))
+		fgp_flags |= FGP_CREAT;
 
 	page = pagecache_get_page(mapping, index, fgp_flags,
 			mapping_gfp_mask(mapping));
@@ -3301,21 +3303,67 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
 
+/*
+ * Start writeback on the pages in pgs[], and then try and remove those pages
+ * from the page cached. Used with RWF_UNCACHED.
+ */
+void write_drop_cached_pages(struct page **pgs, struct address_space *mapping,
+			     unsigned *nr)
+{
+	loff_t start, end;
+	int i;
+
+	end = 0;
+	start = LLONG_MAX;
+	for (i = 0; i < *nr; i++) {
+		struct page *page = pgs[i];
+		loff_t off;
+
+		off = (loff_t) page_to_index(page) << PAGE_SHIFT;
+		if (off < start)
+			start = off;
+		if (off > end)
+			end = off;
+		get_page(page);
+	}
+
+	__filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE);
+
+	for (i = 0; i < *nr; i++) {
+		struct page *page = pgs[i];
+
+		lock_page(page);
+		if (page->mapping == mapping) {
+			wait_on_page_writeback(page);
+			if (!page_has_private(page) ||
+			    try_to_release_page(page, 0))
+				remove_mapping(mapping, page);
+		}
+		unlock_page(page);
+	}
+	*nr = 0;
+}
+EXPORT_SYMBOL_GPL(write_drop_cached_pages);
+
+#define GPW_PAGE_BATCH		16
+
 ssize_t generic_perform_write(struct file *file,
 				struct iov_iter *i, struct kiocb *iocb)
 {
 	struct address_space *mapping = file->f_mapping;
 	const struct address_space_operations *a_ops = mapping->a_ops;
+	struct page *drop_pages[GPW_PAGE_BATCH];
 	loff_t pos = iocb->ki_pos;
 	long status = 0;
 	ssize_t written = 0;
-	unsigned int flags = 0;
+	unsigned int flags = 0, drop_nr = 0;
 
 	do {
 		struct page *page;
 		unsigned long offset;	/* Offset into pagecache page */
 		unsigned long bytes;	/* Bytes to write to page */
 		size_t copied;		/* Bytes copied from user */
+		bool drop_page = false;	/* drop page after IO */
 		void *fsdata;
 
 		offset = (pos & (PAGE_SIZE - 1));
@@ -3323,6 +3371,9 @@ ssize_t generic_perform_write(struct file *file,
 						iov_iter_count(i));
 
 again:
+		if (iocb->ki_flags & IOCB_UNCACHED)
+			flags |= AOP_FLAG_UNCACHED;
+
 		/*
 		 * Bring in the user page that we will copy from _first_.
 		 * Otherwise there's a nasty deadlock on copying from the
@@ -3343,10 +3394,17 @@ ssize_t generic_perform_write(struct file *file,
 			break;
 		}
 
+retry:
 		status = a_ops->write_begin(file, mapping, pos, bytes, flags,
 						&page, &fsdata);
-		if (unlikely(status < 0))
+		if (unlikely(status < 0)) {
+			if (status == -ENOMEM && (flags & AOP_FLAG_UNCACHED)) {
+				drop_page = true;
+				flags &= ~AOP_FLAG_UNCACHED;
+				goto retry;
+			}
 			break;
+		}
 
 		if (mapping_writably_mapped(mapping))
 			flush_dcache_page(page);
@@ -3376,12 +3434,22 @@ ssize_t generic_perform_write(struct file *file,
 						iov_iter_single_seg_count(i));
 			goto again;
 		}
+		if (drop_page &&
+		    ((pos >> PAGE_SHIFT) != ((pos + copied) >> PAGE_SHIFT))) {
+			drop_pages[drop_nr] = page;
+			if (++drop_nr == GPW_PAGE_BATCH)
+				write_drop_cached_pages(drop_pages, mapping,
+								&drop_nr);
+		} else
+			balance_dirty_pages_ratelimited(mapping);
+
 		pos += copied;
 		written += copied;
-
-		balance_dirty_pages_ratelimited(mapping);
 	} while (iov_iter_count(i));
 
+	if (drop_nr)
+		write_drop_cached_pages(drop_pages, mapping, &drop_nr);
+
 	return written ? written : status;
 }
 EXPORT_SYMBOL(generic_perform_write);
-- 
2.24.0


  parent reply	other threads:[~2019-12-10 16:25 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-10 16:24 [PATCHSET 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-10 16:24 ` [PATCH 1/5] fs: add read support " Jens Axboe
2019-12-10 16:24 ` [PATCH 2/5] mm: make generic_perform_write() take a struct kiocb Jens Axboe
2019-12-10 16:24 ` Jens Axboe [this message]
2019-12-10 16:55   ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Matthew Wilcox
2019-12-10 17:02     ` Jens Axboe
2019-12-10 18:35       ` Chris Mason
2019-12-10 18:58       ` Matthew Wilcox
2019-12-10 19:10         ` Jens Axboe
2019-12-11  0:23   ` Dave Chinner
2019-12-11  0:28     ` Dave Chinner
2019-12-11 14:39     ` Jens Axboe
2019-12-10 16:24 ` [PATCH 4/5] iomap: pass in the write_begin/write_end flags to iomap_actor Jens Axboe
2019-12-10 16:24 ` [PATCH 5/5] iomap: support RWF_UNCACHED for buffered writes Jens Axboe
2019-12-10 21:17 ` [PATCHSET 0/5] Support for RWF_UNCACHED Andreas Dilger
2019-12-12 15:47 ` Christoph Hellwig
2019-12-12 15:52   ` Jens Axboe
2019-12-10 20:42 [PATCHSET v2 " Jens Axboe
2019-12-10 20:43 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe
2019-12-14  0:01   ` Andrew Morton
2019-12-11 15:29 [PATCHSET v3 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-11 15:29 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe
2019-12-12 19:01 [PATCHSET v4 0/5] Support for RWF_UNCACHED Jens Axboe
2019-12-12 19:01 ` [PATCH 3/5] mm: make buffered writes work with RWF_UNCACHED Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191210162454.8608-4-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.