Linux-Block Archive on lore.kernel.org
 help / color / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: linux-xfs@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
	linux-mm@kvack.org
Subject: [PATCH 30/33] iomap: add initial support for writes without buffer heads
Date: Wed,  9 May 2018 09:48:27 +0200
Message-ID: <20180509074830.16196-31-hch@lst.de> (raw)
In-Reply-To: <20180509074830.16196-1-hch@lst.de>

For now just limited to blocksize == PAGE_SIZE, where we can simply read
in the full page in write begin, and just set the whole page dirty after
copying data into it.  This code is enabled by default and XFS will now
be feed pages without buffer heads in ->writepage and ->writepages.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 120 insertions(+), 9 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 967bd31540fe..a3861945504f 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -308,6 +308,56 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 		truncate_pagecache_range(inode, max(pos, i_size), pos + len);
 }
 
+static int
+iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
+		unsigned poff, unsigned plen, struct iomap *iomap)
+{
+	struct bio_vec bvec;
+	struct bio bio;
+	int ret;
+
+	bio_init(&bio, &bvec, 1);
+	bio.bi_opf = REQ_OP_READ;
+	bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
+	bio_set_dev(&bio, iomap->bdev);
+	__bio_add_page(&bio, page, plen, poff);
+	ret = submit_bio_wait(&bio);
+	if (ret < 0 && iomap_block_needs_zeroing(inode, block_start, iomap))
+		zero_user(page, poff, plen);
+	return ret;
+}
+
+static int
+__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+		struct page *page, struct iomap *iomap)
+{
+	loff_t block_size = i_blocksize(inode);
+	loff_t block_start = pos & ~(block_size - 1);
+	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
+	unsigned poff = block_start & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
+	int status;
+
+	if (PageUptodate(page))
+		return 0;
+
+	if (iomap_block_needs_zeroing(inode, block_start, iomap)) {
+		unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
+		unsigned pend = poff + plen;
+
+		if (poff < from || pend > to)
+			zero_user_segments(page, poff, from, to, pend);
+	} else {
+		status = iomap_read_page_sync(inode, block_start, page,
+				poff, plen, iomap);
+		if (status < 0)
+			return status;
+		SetPageUptodate(page);
+	}
+
+	return 0;
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap)
@@ -325,7 +375,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	if (!page)
 		return -ENOMEM;
 
-	status = __block_write_begin_int(page, pos, len, NULL, iomap);
+	if (i_blocksize(inode) == PAGE_SIZE)
+		status = __iomap_write_begin(inode, pos, len, page, iomap);
+	else
+		status = __block_write_begin_int(page, pos, len, NULL, iomap);
 	if (unlikely(status)) {
 		unlock_page(page);
 		put_page(page);
@@ -338,12 +391,63 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	return status;
 }
 
+static int
+iomap_set_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+	int newly_dirty;
+
+	if (unlikely(!mapping))
+		return !TestSetPageDirty(page);
+
+	/*
+	 * Lock out page->mem_cgroup migration to keep PageDirty
+	 * synchronized with per-memcg dirty page counters.
+	 */
+	lock_page_memcg(page);
+	newly_dirty = !TestSetPageDirty(page);
+	if (newly_dirty)
+		__set_page_dirty(page, mapping, 0);
+	unlock_page_memcg(page);
+
+	if (newly_dirty)
+		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	return newly_dirty;
+}
+
+static int
+__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+		unsigned copied, struct page *page, struct iomap *iomap)
+{
+	unsigned start = pos & (PAGE_SIZE - 1);
+	int ret;
+
+	if (unlikely(copied < len)) {
+		/* see block_write_end() for an explanation */
+		if (!PageUptodate(page))
+			copied = 0;
+		if (iomap_block_needs_zeroing(inode, pos, iomap))
+			zero_user(page, start + copied, len - copied);
+	}
+
+	flush_dcache_page(page);
+	SetPageUptodate(page);
+	iomap_set_page_dirty(page);
+	ret = __generic_write_end(inode, pos, copied, page);
+	if (ret < len)
+		iomap_write_failed(inode, pos, len);
+	return ret;
+}
+
 static int
 iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
-		unsigned copied, struct page *page)
+		unsigned copied, struct page *page, struct iomap *iomap)
 {
 	int ret;
 
+	if (i_blocksize(inode) == PAGE_SIZE)
+		return __iomap_write_end(inode, pos, len, copied, page, iomap);
+
 	ret = generic_write_end(NULL, inode->i_mapping, pos, len,
 			copied, page, NULL);
 	if (ret < len)
@@ -400,7 +504,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 		flush_dcache_page(page);
 
-		status = iomap_write_end(inode, pos, bytes, copied, page);
+		status = iomap_write_end(inode, pos, bytes, copied, page,
+				iomap);
 		if (unlikely(status < 0))
 			break;
 		copied = status;
@@ -494,7 +599,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 		WARN_ON_ONCE(!PageUptodate(page));
 
-		status = iomap_write_end(inode, pos, bytes, bytes, page);
+		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 		if (unlikely(status <= 0)) {
 			if (WARN_ON_ONCE(status == 0))
 				return -EIO;
@@ -546,7 +651,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
 	zero_user(page, offset, bytes);
 	mark_page_accessed(page);
 
-	return iomap_write_end(inode, pos, bytes, bytes, page);
+	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 }
 
 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
@@ -632,11 +737,14 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 	struct page *page = data;
 	int ret;
 
-	ret = __block_write_begin_int(page, pos, length, NULL, iomap);
-	if (ret)
-		return ret;
+	if (i_blocksize(inode) != PAGE_SIZE) {
+		ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+		if (ret)
+			return ret;
+
+		block_commit_write(page, 0, length);
+	}
 
-	block_commit_write(page, 0, length);
 	return length;
 }
 
@@ -663,6 +771,9 @@ int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 	else
 		length = PAGE_SIZE;
 
+	if (i_blocksize(inode) == PAGE_SIZE)
+		WARN_ON_ONCE(!PageUptodate(page));
+
 	offset = page_offset(page);
 	while (length > 0) {
 		ret = iomap_apply(inode, offset, length,
-- 
2.17.0

  parent reply index

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-09  7:47 stop using buffer heads in xfs and iomap Christoph Hellwig
2018-05-09  7:47 ` [PATCH 01/33] block: add a lower-level bio_add_page interface Christoph Hellwig
2018-05-09 15:12   ` Matthew Wilcox
2018-05-10  6:40     ` Christoph Hellwig
2018-05-10 21:49       ` Andreas Dilger
2018-05-11  6:29         ` Christoph Hellwig
2018-05-15 16:47           ` Jens Axboe
2018-05-10  8:52   ` Ming Lei
2018-05-11  6:24     ` Christoph Hellwig
2018-05-16  5:06   ` Ritesh Harjani
2018-05-16 18:05     ` Christoph Hellwig
2018-05-17  4:18       ` Ritesh Harjani
2018-05-09  7:47 ` [PATCH 02/33] fs: factor out a __generic_write_end helper Christoph Hellwig
2018-05-09 15:15   ` Matthew Wilcox
2018-05-10  6:40     ` Christoph Hellwig
2018-05-09  7:48 ` [PATCH 03/33] fs: move page_cache_seek_hole_data to iomap.c Christoph Hellwig
2018-05-09  7:48 ` [PATCH 04/33] fs: remove the buffer_unwritten check in page_seek_hole_data Christoph Hellwig
2018-05-17 11:33   ` Andreas Grünbacher
2018-05-09  7:48 ` [PATCH 05/33] fs: use ->is_partially_uptodate in page_cache_seek_hole_data Christoph Hellwig
2018-05-09  7:48 ` [PATCH 06/33] mm: give the 'ret' variable a better name __do_page_cache_readahead Christoph Hellwig
2018-05-09 15:45   ` Matthew Wilcox
2018-05-10  6:41     ` Christoph Hellwig
2018-05-09  7:48 ` [PATCH 07/33] mm: split ->readpages calls to avoid non-contiguous pages lists Christoph Hellwig
2018-05-09 15:46   ` Matthew Wilcox
2018-05-09  7:48 ` [PATCH 08/33] iomap: use __bio_add_page in iomap_dio_zero Christoph Hellwig
2018-05-09  7:48 ` [PATCH 09/33] iomap: add a iomap_sector helper Christoph Hellwig
2018-05-09  7:48 ` [PATCH 10/33] iomap: add an iomap-based bmap implementation Christoph Hellwig
2018-05-09 16:46   ` Darrick J. Wong
2018-05-10  6:42     ` Christoph Hellwig
2018-05-10 15:08       ` Darrick J. Wong
2018-05-11  6:25         ` Christoph Hellwig
2018-05-12  1:56           ` Darrick J. Wong
2018-05-09  7:48 ` [PATCH 11/33] iomap: add an iomap-based readpage and readpages implementation Christoph Hellwig
2018-05-10  1:17   ` Dave Chinner
2018-05-10  6:44     ` Christoph Hellwig
2018-05-09  7:48 ` [PATCH 12/33] xfs: use iomap_bmap Christoph Hellwig
2018-05-09  7:48 ` [PATCH 13/33] xfs: use iomap for blocksize == PAGE_SIZE readpage and readpages Christoph Hellwig
2018-05-09  7:48 ` [PATCH 14/33] xfs: simplify xfs_bmap_punch_delalloc_range Christoph Hellwig
2018-05-09  7:48 ` [PATCH 15/33] xfs: simplify xfs_aops_discard_page Christoph Hellwig
2018-05-09  7:48 ` [PATCH 16/33] xfs: move locking into xfs_bmap_punch_delalloc_range Christoph Hellwig
2018-05-09  7:48 ` [PATCH 17/33] xfs: make xfs_writepage_map extent map centric Christoph Hellwig
2018-05-09  7:48 ` [PATCH 18/33] xfs: remove the now unused XFS_BMAPI_IGSTATE flag Christoph Hellwig
2018-05-09  7:48 ` [PATCH 19/33] xfs: remove xfs_reflink_find_cow_mapping Christoph Hellwig
2018-05-09  7:48 ` [PATCH 20/33] xfs: remove xfs_reflink_trim_irec_to_next_cow Christoph Hellwig
2018-05-09  7:48 ` [PATCH 21/33] xfs: simplify xfs_map_blocks by using xfs_iext_lookup_extent directly Christoph Hellwig
2018-05-09  7:48 ` [PATCH 22/33] xfs: don't clear imap_valid for a non-uptodate buffers Christoph Hellwig
2018-05-09  7:48 ` [PATCH 23/33] xfs: remove the imap_valid flag Christoph Hellwig
2018-05-09  7:48 ` [PATCH 24/33] xfs: don't look at buffer heads in xfs_add_to_ioend Christoph Hellwig
2018-05-09  7:48 ` [PATCH 25/33] xfs: move all writeback buffer_head manipulation into xfs_map_at_offset Christoph Hellwig
2018-05-09  7:48 ` [PATCH 26/33] xfs: allow writeback on pages without buffer heads Christoph Hellwig
2018-05-09  7:48 ` [PATCH 27/33] xfs: remove xfs_start_page_writeback Christoph Hellwig
2018-05-09  7:48 ` [PATCH 28/33] xfs: refactor the tail of xfs_writepage_map Christoph Hellwig
2018-05-09  7:48 ` [PATCH 29/33] xfs: do not set the page uptodate in xfs_writepage_map Christoph Hellwig
2018-05-09  7:48 ` Christoph Hellwig [this message]
2018-05-09  7:48 ` [PATCH 31/33] iomap: add support for sub-pagesize buffered I/O without buffer heads Christoph Hellwig
2018-05-14 16:00   ` Goldwyn Rodrigues
2018-05-15  7:26     ` Christoph Hellwig
2018-05-15 13:47       ` Goldwyn Rodrigues
2018-05-16  5:46         ` Dave Chinner
2018-05-09  7:48 ` [PATCH 32/33] xfs: add support for sub-pagesize writeback without buffer_heads Christoph Hellwig
2018-05-09  7:48 ` [PATCH 33/33] fs: remove __block_write_begin and iomap_to_bh Christoph Hellwig
2018-05-10 15:13 ` stop using buffer heads in xfs and iomap Darrick J. Wong
2018-05-11  6:22   ` Christoph Hellwig
2018-05-11  6:39     ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180509074830.16196-31-hch@lst.de \
    --to=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Block Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-block/0 linux-block/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-block linux-block/ https://lore.kernel.org/linux-block \
		linux-block@vger.kernel.org
	public-inbox-index linux-block

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-block


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git