All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: willy@infradead.org, hch@lst.de, trond.myklebust@primarydata.com
Cc: "Darrick J. Wong" <djwong@kernel.org>,
	Trond Myklebust <trond.myklebust@hammerspace.com>,
	linux-nfs@vger.kernel.org, linux-block@vger.kernel.org,
	linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, dhowells@redhat.com, dhowells@redhat.com,
	darrick.wong@oracle.com, viro@zeniv.linux.org.uk,
	jlayton@kernel.org, torvalds@linux-foundation.org,
	linux-nfs@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v3 6/9] mm: Make __swap_writepage() do async DIO if asked for it
Date: Fri, 24 Sep 2021 18:18:54 +0100	[thread overview]
Message-ID: <163250393435.2330363.12822795853508093546.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <163250387273.2330363.13240781819520072222.stgit@warthog.procyon.org.uk>

Make __swap_writepage()'s DIO path do sync DIO if the writeback control's
sync mode is WB_SYNC_ALL and async DIO if not.

Note that this causes hanging processes in sunrpc if the swapfile is on
NFS.  I'm not sure whether it's due to misscheduling or something else.

Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Christoph Hellwig <hch@lst.de>
cc: Darrick J. Wong <djwong@kernel.org>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: linux-nfs@vger.kernel.org
cc: linux-block@vger.kernel.org
cc: linux-xfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
---

 mm/page_io.c |  133 ++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 92 insertions(+), 41 deletions(-)

diff --git a/mm/page_io.c b/mm/page_io.c
index 6b1465699c72..8f1199d59162 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -298,6 +298,96 @@ static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
 #define bio_associate_blkg_from_page(bio, page)		do { } while (0)
 #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
 
+static void swapfile_write_complete(struct page *page, long ret)
+{
+	if (ret == thp_size(page)) {
+		count_swpout_vm_event(page);
+	} else {
+		/*
+		 * In the case of swap-over-nfs, this can be a
+		 * temporary failure if the system has limited memory
+		 * for allocating transmit buffers.  Mark the page
+		 * dirty and avoid rotate_reclaimable_page but
+		 * rate-limit the messages but do not flag PageError
+		 * like the normal direct-to-bio case as it could be
+		 * temporary.
+		 */
+		set_page_dirty(page);
+		ClearPageReclaim(page);
+		pr_err_ratelimited("Write error (%ld) on dio swapfile (%llu)\n",
+				   ret, page_file_offset(page));
+	}
+	end_page_writeback(page);
+}
+
+static void __swapfile_write_complete(struct kiocb *iocb, long ret, long ret2)
+{
+	struct swapfile_kiocb *ki = container_of(iocb, struct swapfile_kiocb, iocb);
+
+	swapfile_write_complete(iocb->ki_swap_page, ret);
+	swapfile_put_kiocb(ki);
+}
+
+static int swapfile_write_sync(struct swap_info_struct *sis,
+			       struct page *page, struct writeback_control *wbc,
+			       struct iov_iter *from)
+{
+	struct kiocb kiocb;
+	struct file *swap_file = sis->swap_file;
+	int ret;
+
+	init_sync_kiocb(&kiocb, swap_file);
+	kiocb.ki_swap_page	= page;
+	kiocb.ki_pos		= page_file_offset(page);
+	kiocb.ki_flags		= IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP;
+
+	set_page_writeback(page);
+	unlock_page(page);
+
+	ret = swap_file->f_mapping->a_ops->swap_rw(&kiocb, from);
+	swapfile_write_complete(page, ret);
+	return ret == page_size(page) ? 0 : ret >= 0 ? -ENODATA : ret;
+}
+
+static int swapfile_write(struct swap_info_struct *sis,
+			  struct page *page, struct writeback_control *wbc)
+{
+	struct swapfile_kiocb *ki;
+	struct file *swap_file = sis->swap_file;
+	struct bio_vec bv = {
+		.bv_page	= page,
+		.bv_len		= page_size(page),
+		.bv_offset	= 0
+	};
+	struct iov_iter from;
+	int ret;
+
+	iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
+
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		return swapfile_write_sync(sis, page, wbc, &from);
+
+	ki = kzalloc(sizeof(*ki), GFP_KERNEL);
+	if (!ki)
+		return -ENOMEM;
+
+	refcount_set(&ki->ref, 2);
+	init_sync_kiocb(&ki->iocb, swap_file);
+	ki->iocb.ki_swap_page	= page;
+	ki->iocb.ki_pos		= page_file_offset(page);
+	ki->iocb.ki_flags	= IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP;
+	ki->iocb.ki_complete	= __swapfile_write_complete;
+
+	set_page_writeback(page);
+	unlock_page(page);
+	ret = swap_file->f_mapping->a_ops->swap_rw(&ki->iocb, &from);
+
+	if (ret != -EIOCBQUEUED)
+		__swapfile_write_complete(&ki->iocb, ret, 0);
+	swapfile_put_kiocb(ki);
+	return ret == page_size(page) ? 0 : ret >= 0 ? -ENODATA : ret;
+}
+
 int __swap_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct bio *bio;
@@ -305,47 +395,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc)
 	struct swap_info_struct *sis = page_swap_info(page);
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
-	if (data_race(sis->flags & SWP_FS_OPS)) {
-		struct kiocb kiocb;
-		struct file *swap_file = sis->swap_file;
-		struct address_space *mapping = swap_file->f_mapping;
-		struct bio_vec bv = {
-			.bv_page = page,
-			.bv_len  = PAGE_SIZE,
-			.bv_offset = 0
-		};
-		struct iov_iter from;
-
-		iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
-		init_sync_kiocb(&kiocb, swap_file);
-		kiocb.ki_pos	= page_file_offset(page);
-		kiocb.ki_flags	= IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP;
-
-		set_page_writeback(page);
-		unlock_page(page);
-		ret = mapping->a_ops->swap_rw(&kiocb, &from);
-		if (ret == PAGE_SIZE) {
-			count_vm_event(PSWPOUT);
-			ret = 0;
-		} else {
-			/*
-			 * In the case of swap-over-nfs, this can be a
-			 * temporary failure if the system has limited
-			 * memory for allocating transmit buffers.
-			 * Mark the page dirty and avoid
-			 * rotate_reclaimable_page but rate-limit the
-			 * messages but do not flag PageError like
-			 * the normal direct-to-bio case as it could
-			 * be temporary.
-			 */
-			set_page_dirty(page);
-			ClearPageReclaim(page);
-			pr_err_ratelimited("Write error (%d) on dio swapfile (%llu)\n",
-					   ret, page_file_offset(page));
-		}
-		end_page_writeback(page);
-		return ret;
-	}
+	if (data_race(sis->flags & SWP_FS_OPS))
+		return swapfile_write(sis, page, wbc);
 
 	ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
 	if (!ret) {



  parent reply	other threads:[~2021-09-24 17:20 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-24 17:17 [RFC][PATCH v3 0/9] mm: Use DIO for swap and fix NFS swapfiles David Howells
2021-09-24 17:18 ` [PATCH v3 1/9] mm: Remove the callback func argument from __swap_writepage() David Howells
2021-09-24 17:18 ` [PATCH v3 2/9] mm: Add 'supports' field to the address_space_operations to list features David Howells
2021-09-24 20:10   ` Matthew Wilcox
2021-09-24 17:18 ` [PATCH v3 3/9] mm: Make swap_readpage() void David Howells
2021-09-24 22:07   ` Matthew Wilcox
2021-09-24 17:18 ` [PATCH v3 4/9] Introduce IOCB_SWAP kiocb flag to trigger REQ_SWAP David Howells
2021-09-26 21:56   ` Dave Chinner
2021-09-24 17:18 ` [PATCH v3 5/9] mm: Make swap_readpage() for SWP_FS_OPS use ->swap_rw() not ->readpage() David Howells
2021-09-24 17:18 ` David Howells [this message]
2021-09-24 17:19 ` [PATCH v3 7/9] nfs: Fix write to swapfile failure due to generic_write_checks() David Howells
2021-09-24 17:19 ` [PATCH v3 8/9] block, btrfs, ext4, xfs: Implement swap_rw David Howells
2021-09-24 17:19 ` [PATCH v3 9/9] mm: Remove swap BIO paths and only use DIO paths David Howells
2021-09-25 14:56   ` Matthew Wilcox
2021-09-25 15:36   ` David Howells
2021-09-25 17:09     ` Matthew Wilcox
2021-09-26 23:08       ` Damien Le Moal
2021-09-27  1:25         ` Dave Chinner
2021-09-27  1:41           ` Damien Le Moal
2021-09-27 20:03     ` David Sterba
2021-09-25 23:42 ` [RFC][PATCH v3 0/9] mm: Use DIO for swap and fix NFS swapfiles Dave Chinner
2021-09-26  3:10   ` Matthew Wilcox
2021-09-26 22:36     ` Dave Chinner
2021-09-27 20:07 ` David Sterba
2021-09-28  3:11 ` NeilBrown
2021-09-30 15:54   ` Steve French
2021-09-30 15:54     ` Steve French
2021-09-29 15:45 ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=163250393435.2330363.12822795853508093546.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=darrick.wong@oracle.com \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=jlayton@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@hammerspace.com \
    --cc=trond.myklebust@primarydata.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.