All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: willy@infradead.org, hch@lst.de, trond.myklebust@primarydata.com
Cc: Jens Axboe <axboe@kernel.dk>,
	"Darrick J. Wong" <djwong@kernel.org>,
	linux-block@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
	dhowells@redhat.com, dhowells@redhat.com,
	darrick.wong@oracle.com, viro@zeniv.linux.org.uk,
	jlayton@kernel.org, torvalds@linux-foundation.org,
	linux-nfs@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v3 9/9] mm: Remove swap BIO paths and only use DIO paths
Date: Fri, 24 Sep 2021 18:19:23 +0100	[thread overview]
Message-ID: <163250396319.2330363.10564506508011638258.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <163250387273.2330363.13240781819520072222.stgit@warthog.procyon.org.uk>

Delete the BIO-generating swap read/write paths and always use ->swap_rw().
This puts the mapping layer in the filesystem.

[!] ALSO: Add a compile-time knob to disable swap by asynchronous DIO, only
    using synchronous DIO.  Async DIO doesn't seem to work, with ATA errors
    being chucked out by the swap-on-blockdev and swapfile-on-XFS.  It also
    misbehaves on NFS.

I have tested this with sync DIO on ext4-swapfile, xfs-swapfile, a raw
blockdev and NFS.  The first three work; NFS works for a while then grinds to
a halt, chucking out lists of blocked sunrpc operations (I suspect it can't
allocate memory somewhere).

Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: Christoph Hellwig <hch@lst.de>
cc: Jens Axboe <axboe@kernel.dk>
cc: Darrick J. Wong <djwong@kernel.org>
cc: linux-block@vger.kernel.org
cc: linux-xfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
---

 mm/page_io.c  |  156 +++------------------------------------------------------
 mm/swapfile.c |    4 +
 2 files changed, 10 insertions(+), 150 deletions(-)

diff --git a/mm/page_io.c b/mm/page_io.c
index 8f1199d59162..b48318951380 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -26,6 +26,8 @@
 #include <linux/uio.h>
 #include <linux/sched/task.h>
 
+#define ONLY_USE_SYNC_DIO 1
+
 /*
  * Keep track of the kiocb we're using to do async DIO.  We have to
  * refcount it until various things stop looking at the kiocb *after*
@@ -42,30 +44,6 @@ static void swapfile_put_kiocb(struct swapfile_kiocb *ki)
 		kfree(ki);
 }
 
-static void end_swap_bio_write(struct bio *bio)
-{
-	struct page *page = bio_first_page_all(bio);
-
-	if (bio->bi_status) {
-		SetPageError(page);
-		/*
-		 * We failed to write the page out to swap-space.
-		 * Re-dirty the page in order to avoid it being reclaimed.
-		 * Also print a dire warning that things will go BAD (tm)
-		 * very quickly.
-		 *
-		 * Also clear PG_reclaim to avoid rotate_reclaimable_page()
-		 */
-		set_page_dirty(page);
-		pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
-				     MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
-				     (unsigned long long)bio->bi_iter.bi_sector);
-		ClearPageReclaim(page);
-	}
-	end_page_writeback(page);
-	bio_put(bio);
-}
-
 static void swap_slot_free_notify(struct page *page)
 {
 	struct swap_info_struct *sis;
@@ -114,32 +92,6 @@ static void swap_slot_free_notify(struct page *page)
 	}
 }
 
-static void end_swap_bio_read(struct bio *bio)
-{
-	struct page *page = bio_first_page_all(bio);
-	struct task_struct *waiter = bio->bi_private;
-
-	if (bio->bi_status) {
-		SetPageError(page);
-		ClearPageUptodate(page);
-		pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
-				     MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
-				     (unsigned long long)bio->bi_iter.bi_sector);
-		goto out;
-	}
-
-	SetPageUptodate(page);
-	swap_slot_free_notify(page);
-out:
-	unlock_page(page);
-	WRITE_ONCE(bio->bi_private, NULL);
-	bio_put(bio);
-	if (waiter) {
-		blk_wake_io_task(waiter);
-		put_task_struct(waiter);
-	}
-}
-
 int generic_swapfile_activate(struct swap_info_struct *sis,
 				struct file *swap_file,
 				sector_t *span)
@@ -279,25 +231,6 @@ static inline void count_swpout_vm_event(struct page *page)
 	count_vm_events(PSWPOUT, thp_nr_pages(page));
 }
 
-#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
-{
-	struct cgroup_subsys_state *css;
-	struct mem_cgroup *memcg;
-
-	memcg = page_memcg(page);
-	if (!memcg)
-		return;
-
-	rcu_read_lock();
-	css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys);
-	bio_associate_blkg_from_css(bio, css);
-	rcu_read_unlock();
-}
-#else
-#define bio_associate_blkg_from_page(bio, page)		do { } while (0)
-#endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
-
 static void swapfile_write_complete(struct page *page, long ret)
 {
 	if (ret == thp_size(page)) {
@@ -364,7 +297,7 @@ static int swapfile_write(struct swap_info_struct *sis,
 
 	iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
 
-	if (wbc->sync_mode == WB_SYNC_ALL)
+	if (ONLY_USE_SYNC_DIO || wbc->sync_mode == WB_SYNC_ALL)
 		return swapfile_write_sync(sis, page, wbc, &from);
 
 	ki = kzalloc(sizeof(*ki), GFP_KERNEL);
@@ -390,40 +323,17 @@ static int swapfile_write(struct swap_info_struct *sis,
 
 int __swap_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct bio *bio;
-	int ret;
 	struct swap_info_struct *sis = page_swap_info(page);
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
-	if (data_race(sis->flags & SWP_FS_OPS))
-		return swapfile_write(sis, page, wbc);
-
-	ret = bdev_write_page(sis->bdev, swap_page_sector(page), page, wbc);
-	if (!ret) {
-		count_swpout_vm_event(page);
-		return 0;
-	}
-
-	bio = bio_alloc(GFP_NOIO, 1);
-	bio_set_dev(bio, sis->bdev);
-	bio->bi_iter.bi_sector = swap_page_sector(page);
-	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
-	bio->bi_end_io = end_swap_bio_write;
-	bio_add_page(bio, page, thp_size(page), 0);
-
-	bio_associate_blkg_from_page(bio, page);
-	count_swpout_vm_event(page);
-	set_page_writeback(page);
-	unlock_page(page);
-	submit_bio(bio);
-
-	return 0;
+	return swapfile_write(sis, page, wbc);
 }
 
 static void swapfile_read_complete(struct page *page, long ret)
 {
 	if (ret == page_size(page)) {
 		count_vm_event(PSWPIN);
+		swap_slot_free_notify(page);
 		SetPageUptodate(page);
 	} else {
 		SetPageError(page);
@@ -473,7 +383,7 @@ static void swapfile_read(struct swap_info_struct *sis, struct page *page,
 
 	iov_iter_bvec(&to, READ, &bv, 1, thp_size(page));
 
-	if (synchronous)
+	if (ONLY_USE_SYNC_DIO || synchronous)
 		return swapfile_read_sync(sis, page, &to);
 
 	ki = kzalloc(sizeof(*ki), GFP_KERNEL);
@@ -495,10 +405,7 @@ static void swapfile_read(struct swap_info_struct *sis, struct page *page,
 
 void swap_readpage(struct page *page, bool synchronous)
 {
-	struct bio *bio;
 	struct swap_info_struct *sis = page_swap_info(page);
-	blk_qc_t qc;
-	struct gendisk *disk;
 	unsigned long pflags;
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
@@ -515,58 +422,9 @@ void swap_readpage(struct page *page, bool synchronous)
 	if (frontswap_load(page) == 0) {
 		SetPageUptodate(page);
 		unlock_page(page);
-		goto out;
-	}
-
-	if (data_race(sis->flags & SWP_FS_OPS)) {
+	} else {
 		swapfile_read(sis, page, synchronous);
-		goto out;
 	}
-
-	if (sis->flags & SWP_SYNCHRONOUS_IO) {
-		if (!bdev_read_page(sis->bdev, swap_page_sector(page), page)) {
-			if (trylock_page(page)) {
-				swap_slot_free_notify(page);
-				unlock_page(page);
-			}
-
-			count_vm_event(PSWPIN);
-			goto out;
-		}
-	}
-
-	bio = bio_alloc(GFP_KERNEL, 1);
-	bio_set_dev(bio, sis->bdev);
-	bio->bi_opf = REQ_OP_READ;
-	bio->bi_iter.bi_sector = swap_page_sector(page);
-	bio->bi_end_io = end_swap_bio_read;
-	bio_add_page(bio, page, thp_size(page), 0);
-
-	disk = bio->bi_bdev->bd_disk;
-	/*
-	 * Keep this task valid during swap readpage because the oom killer may
-	 * attempt to access it in the page fault retry time check.
-	 */
-	if (synchronous) {
-		bio->bi_opf |= REQ_HIPRI;
-		get_task_struct(current);
-		bio->bi_private = current;
-	}
-	count_vm_event(PSWPIN);
-	bio_get(bio);
-	qc = submit_bio(bio);
-	while (synchronous) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!READ_ONCE(bio->bi_private))
-			break;
-
-		if (!blk_poll(disk->queue, qc, true))
-			blk_io_schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-	bio_put(bio);
-
-out:
 	psi_memstall_leave(&pflags);
 }
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 22d10f713848..95d2571e3727 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2918,6 +2918,8 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 			return -EINVAL;
 		p->flags |= SWP_BLKDEV;
 	} else if (S_ISREG(inode->i_mode)) {
+		if (!inode->i_mapping->a_ops->swap_rw)
+			return -EINVAL;
 		p->bdev = inode->i_sb->s_bdev;
 	}
 
@@ -3165,7 +3167,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		name = NULL;
 		goto bad_swap;
 	}
-	swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0);
+	swap_file = file_open_name(name, O_RDWR | O_LARGEFILE | O_DIRECT, 0);
 	if (IS_ERR(swap_file)) {
 		error = PTR_ERR(swap_file);
 		swap_file = NULL;



  parent reply	other threads:[~2021-09-24 17:20 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-24 17:17 [RFC][PATCH v3 0/9] mm: Use DIO for swap and fix NFS swapfiles David Howells
2021-09-24 17:18 ` [PATCH v3 1/9] mm: Remove the callback func argument from __swap_writepage() David Howells
2021-09-24 17:18 ` [PATCH v3 2/9] mm: Add 'supports' field to the address_space_operations to list features David Howells
2021-09-24 20:10   ` Matthew Wilcox
2021-09-24 17:18 ` [PATCH v3 3/9] mm: Make swap_readpage() void David Howells
2021-09-24 22:07   ` Matthew Wilcox
2021-09-24 17:18 ` [PATCH v3 4/9] Introduce IOCB_SWAP kiocb flag to trigger REQ_SWAP David Howells
2021-09-26 21:56   ` Dave Chinner
2021-09-24 17:18 ` [PATCH v3 5/9] mm: Make swap_readpage() for SWP_FS_OPS use ->swap_rw() not ->readpage() David Howells
2021-09-24 17:18 ` [PATCH v3 6/9] mm: Make __swap_writepage() do async DIO if asked for it David Howells
2021-09-24 17:19 ` [PATCH v3 7/9] nfs: Fix write to swapfile failure due to generic_write_checks() David Howells
2021-09-24 17:19 ` [PATCH v3 8/9] block, btrfs, ext4, xfs: Implement swap_rw David Howells
2021-09-24 17:19 ` David Howells [this message]
2021-09-25 14:56   ` [PATCH v3 9/9] mm: Remove swap BIO paths and only use DIO paths Matthew Wilcox
2021-09-25 15:36   ` David Howells
2021-09-25 17:09     ` Matthew Wilcox
2021-09-26 23:08       ` Damien Le Moal
2021-09-27  1:25         ` Dave Chinner
2021-09-27  1:41           ` Damien Le Moal
2021-09-27 20:03     ` David Sterba
2021-09-25 23:42 ` [RFC][PATCH v3 0/9] mm: Use DIO for swap and fix NFS swapfiles Dave Chinner
2021-09-26  3:10   ` Matthew Wilcox
2021-09-26 22:36     ` Dave Chinner
2021-09-27 20:07 ` David Sterba
2021-09-28  3:11 ` NeilBrown
2021-09-30 15:54   ` Steve French
2021-09-30 15:54     ` Steve French
2021-09-29 15:45 ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=163250396319.2330363.10564506508011638258.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=darrick.wong@oracle.com \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=jlayton@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@primarydata.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.