All of lore.kernel.org
 help / color / mirror / Atom feed
From: NeilBrown <neilb@suse.de>
To: Trond Myklebust <trond.myklebust@hammerspace.com>,
	Anna Schumaker <anna.schumaker@netapp.com>,
	Chuck Lever <chuck.lever@oracle.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Mel Gorman <mgorman@suse.de>,
	Christoph Hellwig <hch@infradead.org>,
	David Howells <dhowells@redhat.com>
Cc: linux-nfs@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 06/23] MM: introduce ->swap_rw and use it for reads from SWP_FS_OPS swap-space
Date: Mon, 24 Jan 2022 14:48:32 +1100	[thread overview]
Message-ID: <164299611276.26253.13667789323141516970.stgit@noble.brown> (raw)
In-Reply-To: <164299573337.26253.7538614611220034049.stgit@noble.brown>

swap currently uses ->readpage to read swap pages.  This can only
request one page at a time from the filesystem, which is not most
efficient.

swap uses ->direct_IO for writes which while this is adequate is an
inappropriate over-loading.  ->direct_IO may need to had handle allocate
space for holes or other details that are not relevant for swap.

So this patch introduces a new address_space operation: ->swap_rw.
In this patch it is used for reads, and a subsequent patch will switch
writes to use it.

No filesystem yet supports ->swap_rw, but that is not a problem because
no filesystem actually works with filesystem-based swap.
Only two filesystems set SWP_FS_OPS:
- cifs sets the flag, but ->direct_IO always fails so swap cannot work.
- nfs sets the flag, but ->direct_IO calls generic_write_checks()
  which has failed on swap files for several releases.

To ensure that a NULL ->swap_rw isn't called, ->activate_swap() for both
NFS and cifs are changed to fail if ->swap_rw is not set.  This can be
removed if/when the function is added.

Future patches will restore swap-over-NFS functionality.

To submit an async read with ->swap_rw() we need to allocate a structure
to hold the kiocb and other details.  swap_readpage() cannot handle
transient failure, so we create a mempool to provide the structures.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/cifs/file.c     |    4 +++
 fs/nfs/file.c      |    4 +++
 include/linux/fs.h |    1 +
 mm/page_io.c       |   68 +++++++++++++++++++++++++++++++++++++++++++++++-----
 mm/swap.h          |    1 +
 mm/swapfile.c      |    5 ++++
 6 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c795d4a9ec4a..b3898c4aa5ad 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -4946,6 +4946,10 @@ static int cifs_swap_activate(struct swap_info_struct *sis,
 
 	cifs_dbg(FYI, "swap activate\n");
 
+	if (!swap_file->f_mapping->a_ops->swap_rw)
+		/* Cannot support swap */
+		return -EINVAL;
+
 	spin_lock(&inode->i_lock);
 	blocks = inode->i_blocks;
 	isize = inode->i_size;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d5aa55c7edb0..3dbef2c31567 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -492,6 +492,10 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
 	struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
 	struct inode *inode = file->f_mapping->host;
 
+	if (!file->f_mapping->a_ops->swap_rw)
+		/* Cannot support swap */
+		return -EINVAL;
+
 	spin_lock(&inode->i_lock);
 	blocks = inode->i_blocks;
 	isize = inode->i_size;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f3daaea16554..4fade3b20c87 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -409,6 +409,7 @@ struct address_space_operations {
 	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
 				sector_t *span);
 	void (*swap_deactivate)(struct file *file);
+	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
 };
 
 extern const struct address_space_operations empty_aops;
diff --git a/mm/page_io.c b/mm/page_io.c
index 34b12d6f94d7..e90a3231f225 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -284,6 +284,25 @@ static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
 #define bio_associate_blkg_from_page(bio, page)		do { } while (0)
 #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
 
+struct swap_iocb {
+	struct kiocb		iocb;
+	struct bio_vec		bvec;
+};
+static mempool_t *sio_pool;
+
+int sio_pool_init(void)
+{
+	if (!sio_pool) {
+		mempool_t *pool = mempool_create_kmalloc_pool(
+			SWAP_CLUSTER_MAX, sizeof(struct swap_iocb));
+		if (cmpxchg(&sio_pool, NULL, pool))
+			mempool_destroy(pool);
+	}
+	if (!sio_pool)
+		return -ENOMEM;
+	return 0;
+}
+
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		bio_end_io_t end_write_func)
 {
@@ -355,6 +374,48 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 	return 0;
 }
 
+static void sio_read_complete(struct kiocb *iocb, long ret)
+{
+	struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
+	struct page *page = sio->bvec.bv_page;
+
+	if (ret != 0 && ret != PAGE_SIZE) {
+		SetPageError(page);
+		ClearPageUptodate(page);
+		pr_alert_ratelimited("Read-error on swap-device\n");
+	} else {
+		SetPageUptodate(page);
+		count_vm_event(PSWPIN);
+	}
+	unlock_page(page);
+	mempool_free(sio, sio_pool);
+}
+
+static int swap_readpage_fs(struct page *page)
+{
+	struct swap_info_struct *sis = page_swap_info(page);
+	struct file *swap_file = sis->swap_file;
+	struct address_space *mapping = swap_file->f_mapping;
+	struct iov_iter from;
+	struct swap_iocb *sio;
+	loff_t pos = page_file_offset(page);
+	int ret;
+
+	sio = mempool_alloc(sio_pool, GFP_KERNEL);
+	init_sync_kiocb(&sio->iocb, swap_file);
+	sio->iocb.ki_pos = pos;
+	sio->iocb.ki_complete = sio_read_complete;
+	sio->bvec.bv_page = page;
+	sio->bvec.bv_len = PAGE_SIZE;
+	sio->bvec.bv_offset = 0;
+
+	iov_iter_bvec(&from, READ, &sio->bvec, 1, PAGE_SIZE);
+	ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+	if (ret != -EIOCBQUEUED)
+		sio_read_complete(&sio->iocb, ret);
+	return ret;
+}
+
 int swap_readpage(struct page *page, bool synchronous)
 {
 	struct bio *bio;
@@ -381,12 +442,7 @@ int swap_readpage(struct page *page, bool synchronous)
 	}
 
 	if (data_race(sis->flags & SWP_FS_OPS)) {
-		struct file *swap_file = sis->swap_file;
-		struct address_space *mapping = swap_file->f_mapping;
-
-		ret = mapping->a_ops->readpage(swap_file, page);
-		if (!ret)
-			count_vm_event(PSWPIN);
+		ret = swap_readpage_fs(page);
 		goto out;
 	}
 
diff --git a/mm/swap.h b/mm/swap.h
index 5c676e55f288..e8ee995cf8d8 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -3,6 +3,7 @@
 #include <linux/blk_types.h> /* for bio_end_io_t */
 
 /* linux/mm/page_io.c */
+int sio_pool_init(void);
 int swap_readpage(struct page *page, bool do_poll);
 int swap_writepage(struct page *page, struct writeback_control *wbc);
 void end_swap_bio_write(struct bio *bio);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ed6028aea8bf..c800c17bf0c8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2350,6 +2350,11 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
 		if (ret < 0)
 			return ret;
 		sis->flags |= SWP_ACTIVATED;
+		if ((sis->flags & SWP_FS_OPS) &&
+		    sio_pool_init() != 0) {
+			destroy_swap_extents(sis);
+			return -ENOMEM;
+		}
 		return ret;
 	}
 



  parent reply	other threads:[~2022-01-24  3:51 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-24  3:48 [PATCH 00/23 V3] Repair SWAP-over_NFS NeilBrown
2022-01-24  3:48 ` [PATCH 05/23] MM: reclaim mustn't enter FS for SWP_FS_OPS swap-space NeilBrown
2022-01-24  7:31   ` Christoph Hellwig
2022-01-24  3:48 ` [PATCH 03/23] MM: drop swap_set_page_dirty NeilBrown
2022-01-24  7:28   ` Christoph Hellwig
2022-01-24  3:48 ` [PATCH 14/23] NFS: swap IO handling is slightly different for O_DIRECT IO NeilBrown
2022-01-24  8:58   ` Christoph Hellwig
2022-01-24 13:22   ` Mark Hemment
2022-01-26 22:51     ` NeilBrown
2022-01-24  3:48 ` [PATCH 22/23] NFS: swap-out must always use STABLE writes NeilBrown
2022-01-26  3:45   ` Trond Myklebust
2022-01-26 21:42     ` NeilBrown
2022-01-24  3:48 ` [PATCH 23/23] SUNRPC: lock against ->sock changing during sysfs read NeilBrown
2022-01-24  3:48 ` [PATCH 08/23] DOC: update documentation for swap_activate and swap_rw NeilBrown
2022-01-24  8:50   ` Christoph Hellwig
2022-01-24  3:48 ` [PATCH 07/23] MM: perform async writes to SWP_FS_OPS swap-space using ->swap_rw NeilBrown
2022-01-24  8:49   ` Christoph Hellwig
2022-01-24  3:48 ` [PATCH 02/23] MM: extend block-plugging to cover all swap reads with read-ahead NeilBrown
2022-01-24  7:27   ` Christoph Hellwig
2022-01-26 21:47     ` NeilBrown
2022-01-26 23:09       ` Hugh Dickins
2022-01-27  0:32         ` NeilBrown
2022-01-24  3:48 ` [PATCH 16/23] SUNRPC/auth: async tasks mustn't block waiting for memory NeilBrown
2022-01-24  3:48 ` [PATCH 04/23] MM: move responsibility for setting SWP_FS_OPS to ->swap_activate NeilBrown
2022-01-24  7:30   ` Christoph Hellwig
2022-01-24  3:48 ` NeilBrown [this message]
2022-01-24  8:48   ` [PATCH 06/23] MM: introduce ->swap_rw and use it for reads from SWP_FS_OPS swap-space Christoph Hellwig
2022-01-24  3:48 ` [PATCH 15/23] SUNRPC/call_alloc: async tasks mustn't block waiting for memory NeilBrown
2022-01-24  3:48 ` [PATCH 20/23] SUNRPC: improve 'swap' handling: scheduling and PF_MEMALLOC NeilBrown
2022-01-24  3:48 ` [PATCH 01/23] MM: create new mm/swap.h header file NeilBrown
2022-02-07 13:51   ` Geert Uytterhoeven
2022-01-24  3:48 ` [PATCH 09/23] MM: submit multipage reads for SWP_FS_OPS swap-space NeilBrown
2022-01-24  8:25   ` kernel test robot
2022-01-24  8:25     ` kernel test robot
2022-01-24  8:52   ` Christoph Hellwig
2022-01-24  9:27   ` kernel test robot
2022-01-24  9:27     ` kernel test robot
2022-01-24 13:16   ` Mark Hemment
2022-01-26 22:04     ` NeilBrown
2022-02-08 11:07   ` Geert Uytterhoeven
2022-01-24  3:48 ` [PATCH 12/23] NFS: remove IS_SWAPFILE hack NeilBrown
2022-01-24  8:56   ` Christoph Hellwig
2022-01-24  3:48 ` [PATCH 19/23] NFS: discard NFS_RPC_SWAPFLAGS and RPC_TASK_ROOTCREDS NeilBrown
2022-01-24  3:48 ` [PATCH 17/23] SUNRPC/xprt: async tasks mustn't block waiting for memory NeilBrown
2022-01-24  3:48 ` [PATCH 18/23] SUNRPC: remove scheduling boost for "SWAPPER" tasks NeilBrown
2022-01-24  3:48 ` [PATCH 21/23] NFSv4: keep state manager thread active if swap is enabled NeilBrown
2022-01-24  3:48 ` [PATCH 11/23] VFS: Add FMODE_CAN_ODIRECT file flag NeilBrown
2022-01-24  8:56   ` Christoph Hellwig
2022-01-26 22:14     ` NeilBrown
2022-01-24  3:48 ` [PATCH 10/23] MM: submit multipage write for SWP_FS_OPS swap-space NeilBrown
2022-01-24  8:55   ` Christoph Hellwig
2022-01-24 10:29   ` kernel test robot
2022-01-24 10:29     ` kernel test robot
2022-01-24  3:48 ` [PATCH 13/23] NFS: rename nfs_direct_IO and use as ->swap_rw NeilBrown
2022-01-24  8:57   ` Christoph Hellwig
2022-02-07 17:55 ` [PATCH 00/23 V3] Repair SWAP-over_NFS Geert Uytterhoeven

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=164299611276.26253.13667789323141516970.stgit@noble.brown \
    --to=neilb@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=anna.schumaker@netapp.com \
    --cc=chuck.lever@oracle.com \
    --cc=dhowells@redhat.com \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.