All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: willy@infradead.org, hch@lst.de, trond.myklebust@primarydata.com
Cc: Jens Axboe <axboe@kernel.dk>,
	"Darrick J. Wong" <djwong@kernel.org>,
	linux-block@vger.kernel.org, linux-xfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-mm@kvack.org,
	dhowells@redhat.com, dhowells@redhat.com,
	darrick.wong@oracle.com, viro@zeniv.linux.org.uk,
	jlayton@kernel.org, torvalds@linux-foundation.org,
	linux-nfs@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v3 5/9] mm: Make swap_readpage() for SWP_FS_OPS use ->swap_rw() not ->readpage()
Date: Fri, 24 Sep 2021 18:18:41 +0100	[thread overview]
Message-ID: <163250392134.2330363.2715808422502485629.stgit@warthog.procyon.org.uk> (raw)
In-Reply-To: <163250387273.2330363.13240781819520072222.stgit@warthog.procyon.org.uk>

Make swap_readpage() use the ->swap_rw() method on the filesystem to do
direct I/O rather then ->readpage() when accessing a swap file
(SWP_FS_OPS).

Make swap_writepage() similarly use ->swap_rw() also rather than the
->direct_IO() method.

Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: Christoph Hellwig <hch@lst.de>
cc: Jens Axboe <axboe@kernel.dk>
cc: Darrick J. Wong <djwong@kernel.org>
cc: linux-block@vger.kernel.org
cc: linux-xfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
---

 include/linux/fs.h |    2 +
 mm/page_io.c       |  106 +++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c20f4423e2f1..c8f7724ecded 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -338,6 +338,7 @@ struct kiocb {
 	union {
 		unsigned int		ki_cookie; /* for ->iopoll */
 		struct wait_page_queue	*ki_waitq; /* for async buffered IO */
+		struct page	*ki_swap_page;	/* For swapfile_read/write */
 	};
 
 	randomized_struct_fields_end
@@ -404,6 +405,7 @@ struct address_space_operations {
 	int (*releasepage) (struct page *, gfp_t);
 	void (*freepage)(struct page *);
 	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
+	ssize_t (*swap_rw)(struct kiocb *, struct iov_iter *);
 	/*
 	 * migrate the contents of a page to the specified target. If
 	 * migrate_mode is MIGRATE_ASYNC, it must not block.
diff --git a/mm/page_io.c b/mm/page_io.c
index b9fe25101a39..6b1465699c72 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -4,7 +4,7 @@
  *
  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  *
- *  Swap reorganised 29.12.95, 
+ *  Swap reorganised 29.12.95,
  *  Asynchronous swapping added 30.12.95. Stephen Tweedie
  *  Removed race in async swapping. 14.4.1996. Bruno Haible
  *  Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
@@ -26,6 +26,22 @@
 #include <linux/uio.h>
 #include <linux/sched/task.h>
 
+/*
+ * Keep track of the kiocb we're using to do async DIO.  We have to
+ * refcount it until various things stop looking at the kiocb *after*
+ * calling ->ki_complete().
+ */
+struct swapfile_kiocb {
+	struct kiocb		iocb;
+	refcount_t		ref;
+};
+
+static void swapfile_put_kiocb(struct swapfile_kiocb *ki)
+{
+	if (refcount_dec_and_test(&ki->ref))
+		kfree(ki);
+}
+
 static void end_swap_bio_write(struct bio *bio)
 {
 	struct page *page = bio_first_page_all(bio);
@@ -302,11 +318,12 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc)
 
 		iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
 		init_sync_kiocb(&kiocb, swap_file);
-		kiocb.ki_pos = page_file_offset(page);
+		kiocb.ki_pos	= page_file_offset(page);
+		kiocb.ki_flags	= IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP;
 
 		set_page_writeback(page);
 		unlock_page(page);
-		ret = mapping->a_ops->direct_IO(&kiocb, &from);
+		ret = mapping->a_ops->swap_rw(&kiocb, &from);
 		if (ret == PAGE_SIZE) {
 			count_vm_event(PSWPOUT);
 			ret = 0;
@@ -323,8 +340,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc)
 			 */
 			set_page_dirty(page);
 			ClearPageReclaim(page);
-			pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
-					   page_file_offset(page));
+			pr_err_ratelimited("Write error (%d) on dio swapfile (%llu)\n",
+					   ret, page_file_offset(page));
 		}
 		end_page_writeback(page);
 		return ret;
@@ -352,6 +369,79 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc)
 	return 0;
 }
 
+static void swapfile_read_complete(struct page *page, long ret)
+{
+	if (ret == page_size(page)) {
+		count_vm_event(PSWPIN);
+		SetPageUptodate(page);
+	} else {
+		SetPageError(page);
+		pr_err_ratelimited("Read error (%ld) on dio swapfile (%llu)\n",
+				   ret, page_file_offset(page));
+	}
+
+	unlock_page(page);
+}
+
+static void __swapfile_read_complete(struct kiocb *iocb, long ret, long ret2)
+{
+	struct swapfile_kiocb *ki = container_of(iocb, struct swapfile_kiocb, iocb);
+
+	swapfile_read_complete(iocb->ki_swap_page, ret);
+	swapfile_put_kiocb(ki);
+}
+
+static void swapfile_read_sync(struct swap_info_struct *sis, struct page *page,
+			       struct iov_iter *to)
+{
+	struct kiocb kiocb;
+	struct file *swap_file = sis->swap_file;
+	int ret;
+
+	init_sync_kiocb(&kiocb, swap_file);
+	kiocb.ki_swap_page	= page;
+	kiocb.ki_pos		= page_file_offset(page);
+	kiocb.ki_flags		= IOCB_DIRECT | IOCB_SWAP;
+	ret = swap_file->f_mapping->a_ops->swap_rw(&kiocb, to);
+
+	swapfile_read_complete(page, ret);
+}
+
+static void swapfile_read(struct swap_info_struct *sis, struct page *page,
+			  bool synchronous)
+{
+	struct swapfile_kiocb *ki;
+	struct file *swap_file = sis->swap_file;
+	struct bio_vec bv = {
+		.bv_page = page,
+		.bv_len  = thp_size(page),
+		.bv_offset = 0
+	};
+	struct iov_iter to;
+	int ret;
+
+	iov_iter_bvec(&to, READ, &bv, 1, thp_size(page));
+
+	if (synchronous)
+		return swapfile_read_sync(sis, page, &to);
+
+	ki = kzalloc(sizeof(*ki), GFP_KERNEL);
+	if (!ki)
+		return;
+
+	refcount_set(&ki->ref, 2);
+	init_sync_kiocb(&ki->iocb, swap_file);
+	ki->iocb.ki_swap_page	= page;
+	ki->iocb.ki_flags	= IOCB_DIRECT | IOCB_SWAP;
+	ki->iocb.ki_pos		= page_file_offset(page);
+	ki->iocb.ki_complete	= __swapfile_read_complete;
+
+	ret = swap_file->f_mapping->a_ops->swap_rw(&ki->iocb, &to);
+	if (ret != -EIOCBQUEUED)
+		__swapfile_read_complete(&ki->iocb, ret, 0);
+	swapfile_put_kiocb(ki);
+}
+
 void swap_readpage(struct page *page, bool synchronous)
 {
 	struct bio *bio;
@@ -378,11 +468,7 @@ void swap_readpage(struct page *page, bool synchronous)
 	}
 
 	if (data_race(sis->flags & SWP_FS_OPS)) {
-		struct file *swap_file = sis->swap_file;
-		struct address_space *mapping = swap_file->f_mapping;
-
-		if (!mapping->a_ops->readpage(swap_file, page))
-			count_vm_event(PSWPIN);
+		swapfile_read(sis, page, synchronous);
 		goto out;
 	}
 



  parent reply	other threads:[~2021-09-24 17:20 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-24 17:17 [RFC][PATCH v3 0/9] mm: Use DIO for swap and fix NFS swapfiles David Howells
2021-09-24 17:18 ` [PATCH v3 1/9] mm: Remove the callback func argument from __swap_writepage() David Howells
2021-09-24 17:18 ` [PATCH v3 2/9] mm: Add 'supports' field to the address_space_operations to list features David Howells
2021-09-24 20:10   ` Matthew Wilcox
2021-09-24 17:18 ` [PATCH v3 3/9] mm: Make swap_readpage() void David Howells
2021-09-24 22:07   ` Matthew Wilcox
2021-09-24 17:18 ` [PATCH v3 4/9] Introduce IOCB_SWAP kiocb flag to trigger REQ_SWAP David Howells
2021-09-26 21:56   ` Dave Chinner
2021-09-24 17:18 ` David Howells [this message]
2021-09-24 17:18 ` [PATCH v3 6/9] mm: Make __swap_writepage() do async DIO if asked for it David Howells
2021-09-24 17:19 ` [PATCH v3 7/9] nfs: Fix write to swapfile failure due to generic_write_checks() David Howells
2021-09-24 17:19 ` [PATCH v3 8/9] block, btrfs, ext4, xfs: Implement swap_rw David Howells
2021-09-24 17:19 ` [PATCH v3 9/9] mm: Remove swap BIO paths and only use DIO paths David Howells
2021-09-25 14:56   ` Matthew Wilcox
2021-09-25 15:36   ` David Howells
2021-09-25 17:09     ` Matthew Wilcox
2021-09-26 23:08       ` Damien Le Moal
2021-09-27  1:25         ` Dave Chinner
2021-09-27  1:41           ` Damien Le Moal
2021-09-27 20:03     ` David Sterba
2021-09-25 23:42 ` [RFC][PATCH v3 0/9] mm: Use DIO for swap and fix NFS swapfiles Dave Chinner
2021-09-26  3:10   ` Matthew Wilcox
2021-09-26 22:36     ` Dave Chinner
2021-09-27 20:07 ` David Sterba
2021-09-28  3:11 ` NeilBrown
2021-09-30 15:54   ` Steve French
2021-09-30 15:54     ` Steve French
2021-09-29 15:45 ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=163250392134.2330363.2715808422502485629.stgit@warthog.procyon.org.uk \
    --to=dhowells@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=darrick.wong@oracle.com \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=jlayton@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=trond.myklebust@primarydata.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.