All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Jeff Layton <jlayton@kernel.org>, Steve French <smfrench@gmail.com>
Cc: David Howells <dhowells@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Marc Dionne <marc.dionne@auristor.com>,
	Paulo Alcantara <pc@manguebit.com>,
	Shyam Prasad N <sprasad@microsoft.com>,
	Tom Talpey <tom@talpey.com>,
	Dominique Martinet <asmadeus@codewreck.org>,
	Ilya Dryomov <idryomov@gmail.com>,
	Christian Brauner <christian@brauner.io>,
	linux-afs@lists.infradead.org, linux-cifs@vger.kernel.org,
	linux-nfs@vger.kernel.org, ceph-devel@vger.kernel.org,
	v9fs@lists.linux.dev, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-cachefs@redhat.com
Subject: [RFC PATCH 09/53] netfs: Implement unbuffered/DIO vs buffered I/O locking
Date: Fri, 13 Oct 2023 17:03:38 +0100	[thread overview]
Message-ID: <20231013160423.2218093-10-dhowells@redhat.com> (raw)
In-Reply-To: <20231013160423.2218093-1-dhowells@redhat.com>

Borrow NFS's direct-vs-buffered I/O locking into netfslib.  Similar code is
also used in ceph.

Modify it to have the correct checker annotations for i_rwsem lock
acquisition/release and to return -ERESTARTSYS if waits are interrupted.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
---
 fs/netfs/Makefile     |   1 +
 fs/netfs/locking.c    | 209 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/netfs.h |  10 ++
 3 files changed, 220 insertions(+)
 create mode 100644 fs/netfs/locking.c

diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index cd22554d9048..647ce1935674 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -4,6 +4,7 @@ netfs-y := \
 	buffered_read.o \
 	io.o \
 	iterator.o \
+	locking.o \
 	main.o \
 	misc.o \
 	objects.o
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
new file mode 100644
index 000000000000..fecca8ea6322
--- /dev/null
+++ b/fs/netfs/locking.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I/O and data path helper functionality.
+ *
+ * Borrowed from NFS Copyright (c) 2016 Trond Myklebust
+ */
+
+#include <linux/kernel.h>
+#include <linux/netfs.h>
+
+/*
+ * inode_dio_wait_interruptible - wait for outstanding DIO requests to finish
+ * @inode: inode to wait for
+ *
+ * Waits for all pending direct I/O requests to finish so that we can
+ * proceed with a truncate or equivalent operation.
+ *
+ * Must be called under a lock that serializes taking new references
+ * to i_dio_count, usually by inode->i_mutex.
+ */
+static int inode_dio_wait_interruptible(struct inode *inode)
+{
+	if (!atomic_read(&inode->i_dio_count))
+		return 0;
+
+	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
+	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
+
+	for (;;) {
+		prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
+		if (!atomic_read(&inode->i_dio_count))
+			break;
+		if (signal_pending(current))
+			break;
+		schedule();
+	}
+	finish_wait(wq, &q.wq_entry);
+
+	return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
+}
+
+/* Call with exclusively locked inode->i_rwsem */
+static int netfs_block_o_direct(struct netfs_inode *ictx)
+{
+	if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags))
+		return 0;
+	clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+	return inode_dio_wait_interruptible(&ictx->inode);
+}
+
+/**
+ * netfs_start_io_read - declare the file is being used for buffered reads
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is unset,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that buffered read operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas direct I/O
+ * operations need to wait to grab an exclusive lock in order to set
+ * NETFS_ICTX_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
+ */
+int netfs_start_io_read(struct inode *inode)
+	__acquires(inode->i_rwsem)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+
+	/* Be an optimist! */
+	if (down_read_interruptible(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) == 0)
+		return 0;
+	up_read(&inode->i_rwsem);
+
+	/* Slow path.... */
+	if (down_write_killable(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	if (netfs_block_o_direct(ictx) < 0) {
+		up_write(&inode->i_rwsem);
+		return -ERESTARTSYS;
+	}
+	downgrade_write(&inode->i_rwsem);
+	return 0;
+}
+
+/**
+ * netfs_end_io_read - declare that the buffered read operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_read(struct inode *inode)
+	__releases(inode->i_rwsem)
+{
+	up_read(&inode->i_rwsem);
+}
+
+/**
+ * netfs_start_io_write - declare the file is being used for buffered writes
+ * @inode: file inode
+ *
+ * Declare that a buffered read operation is about to start, and ensure
+ * that we block all direct I/O.
+ */
+int netfs_start_io_write(struct inode *inode)
+	__acquires(inode->i_rwsem)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+
+	if (down_write_killable(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	if (netfs_block_o_direct(ictx) < 0) {
+		up_write(&inode->i_rwsem);
+		return -ERESTARTSYS;
+	}
+	return 0;
+}
+
+/**
+ * netfs_end_io_write - declare that the buffered write operation is done
+ * @inode: file inode
+ *
+ * Declare that a buffered write operation is done, and release the
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_write(struct inode *inode)
+	__releases(inode->i_rwsem)
+{
+	up_write(&inode->i_rwsem);
+}
+
+/* Call with exclusively locked inode->i_rwsem */
+static int netfs_block_buffered(struct inode *inode)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+	int ret;
+
+	if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags)) {
+		set_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+		if (inode->i_mapping->nrpages != 0) {
+			unmap_mapping_range(inode->i_mapping, 0, 0, 0);
+			ret = filemap_fdatawait(inode->i_mapping);
+			if (ret < 0) {
+				clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
+				return ret;
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * netfs_start_io_direct - declare the file is being used for direct i/o
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is about to start, and ensure
+ * that we block all buffered I/O.
+ * On exit, the function ensures that the NETFS_ICTX_ODIRECT flag is set,
+ * and holds a shared lock on inode->i_rwsem to ensure that the flag
+ * cannot be changed.
+ * In practice, this means that direct I/O operations are allowed to
+ * execute in parallel, thanks to the shared lock, whereas buffered I/O
+ * operations need to wait to grab an exclusive lock in order to clear
+ * NETFS_ICTX_ODIRECT.
+ * Note that buffered writes and truncates both take a write lock on
+ * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
+ */
+int netfs_start_io_direct(struct inode *inode)
+	__acquires(inode->i_rwsem)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+	int ret;
+
+	/* Be an optimist! */
+	if (down_read_interruptible(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	if (test_bit(NETFS_ICTX_ODIRECT, &ictx->flags) != 0)
+		return 0;
+	up_read(&inode->i_rwsem);
+
+	/* Slow path.... */
+	if (down_write_killable(&inode->i_rwsem) < 0)
+		return -ERESTARTSYS;
+	ret = netfs_block_buffered(inode);
+	if (ret < 0) {
+		up_write(&inode->i_rwsem);
+		return ret;
+	}
+	downgrade_write(&inode->i_rwsem);
+	return 0;
+}
+
+/**
+ * netfs_end_io_direct - declare that the direct i/o operation is done
+ * @inode: file inode
+ *
+ * Declare that a direct I/O operation is done, and release the shared
+ * lock on inode->i_rwsem.
+ */
+void netfs_end_io_direct(struct inode *inode)
+	__releases(inode->i_rwsem)
+{
+	up_read(&inode->i_rwsem);
+}
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 02e888c170da..33d4487a91e9 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -131,6 +131,8 @@ struct netfs_inode {
 	loff_t			remote_i_size;	/* Size of the remote file */
 	loff_t			zero_point;	/* Size after which we assume there's no data
 						 * on the server */
+	unsigned long		flags;
+#define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 };
 
 /*
@@ -315,6 +317,13 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
 				struct iov_iter *new,
 				iov_iter_extraction_t extraction_flags);
 
+int netfs_start_io_read(struct inode *inode);
+void netfs_end_io_read(struct inode *inode);
+int netfs_start_io_write(struct inode *inode);
+void netfs_end_io_write(struct inode *inode);
+int netfs_start_io_direct(struct inode *inode);
+void netfs_end_io_direct(struct inode *inode);
+
 /**
  * netfs_inode - Get the netfs inode context from the inode
  * @inode: The inode to query
@@ -341,6 +350,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
 	ctx->ops = ops;
 	ctx->remote_i_size = i_size_read(&ctx->inode);
 	ctx->zero_point = ctx->remote_i_size;
+	ctx->flags = 0;
 #if IS_ENABLED(CONFIG_FSCACHE)
 	ctx->cache = NULL;
 #endif


  parent reply	other threads:[~2023-10-13 16:05 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-13 16:03 [RFC PATCH 00/53] netfs, afs, cifs: Delegate high-level I/O to netfslib David Howells
2023-10-13 16:03 ` [RFC PATCH 01/53] netfs: Add a procfile to list in-progress requests David Howells
2023-10-13 16:03 ` [RFC PATCH 02/53] netfs: Track the fpos above which the server has no data David Howells
2023-10-13 16:03 ` [RFC PATCH 03/53] netfs: Note nonblockingness in the netfs_io_request struct David Howells
2023-10-13 16:03 ` [RFC PATCH 04/53] netfs: Allow the netfs to make the io (sub)request alloc larger David Howells
2023-10-13 16:03 ` [RFC PATCH 05/53] netfs: Add a ->free_subrequest() op David Howells
2023-10-13 16:03 ` [RFC PATCH 06/53] afs: Don't use folio->private to record partial modification David Howells
2023-10-13 16:03 ` [RFC PATCH 07/53] netfs: Provide invalidate_folio and release_folio calls David Howells
2023-10-16 15:50   ` Jeff Layton
2023-10-13 16:03 ` [RFC PATCH 08/53] netfs: Add rsize to netfs_io_request David Howells
2023-10-16 15:54   ` Jeff Layton
2023-10-16 16:19   ` David Howells
2023-10-13 16:03 ` David Howells [this message]
2023-10-16 15:56   ` [RFC PATCH 09/53] netfs: Implement unbuffered/DIO vs buffered I/O locking Jeff Layton
2023-10-16 16:09   ` David Howells
2023-10-13 16:03 ` [RFC PATCH 10/53] netfs: Add iov_iters to (sub)requests to describe various buffers David Howells
2023-10-13 16:03 ` [RFC PATCH 11/53] netfs: Add support for DIO buffering David Howells
2023-10-16 16:10   ` Jeff Layton
2023-11-17 20:20   ` David Howells
2023-10-13 16:03 ` [RFC PATCH 12/53] netfs: Provide tools to create a buffer in an xarray David Howells
2023-10-13 17:27   ` Matthew Wilcox
2023-10-18 15:03   ` Jeff Layton
2023-11-17 20:11   ` David Howells
2023-10-13 16:03 ` [RFC PATCH 13/53] netfs: Add bounce buffering support David Howells
2023-10-13 16:03 ` [RFC PATCH 14/53] netfs: Add func to calculate pagecount/size-limited span of an iterator David Howells
2023-10-13 16:03 ` [RFC PATCH 15/53] netfs: Limit subrequest by size or number of segments David Howells
2023-10-13 16:03 ` [RFC PATCH 16/53] netfs: Export netfs_put_subrequest() and some tracepoints David Howells
2023-10-13 16:03 ` [RFC PATCH 17/53] netfs: Extend the netfs_io_*request structs to handle writes David Howells
2023-10-13 16:03 ` [RFC PATCH 18/53] netfs: Add a hook to allow tell the netfs to update its i_size David Howells
2023-10-13 16:03 ` [RFC PATCH 19/53] netfs: Make netfs_put_request() handle a NULL pointer David Howells
2023-10-13 16:03 ` [RFC PATCH 20/53] fscache: Add a function to begin an cache op from a netfslib request David Howells
2023-10-13 16:03 ` [RFC PATCH 21/53] netfs: Make the refcounting of netfs_begin_read() easier to use David Howells
2023-10-13 16:03 ` [RFC PATCH 22/53] netfs: Prep to use folio->private for write grouping and streaming write David Howells
2023-10-13 16:03 ` [RFC PATCH 23/53] netfs: Dispatch write requests to process a writeback slice David Howells
2023-10-13 16:03 ` [RFC PATCH 24/53] netfs: Provide func to copy data to pagecache for buffered write David Howells
2023-10-13 16:03 ` [RFC PATCH 25/53] netfs: Make netfs_read_folio() handle streaming-write pages David Howells
2023-10-13 16:03 ` [RFC PATCH 26/53] netfs: Allocate multipage folios in the writepath David Howells
2023-10-13 16:03 ` [RFC PATCH 27/53] netfs: Implement support for unbuffered/DIO read David Howells
2023-10-13 16:03 ` [RFC PATCH 28/53] netfs: Implement unbuffered/DIO write support David Howells
2023-10-13 16:03 ` [RFC PATCH 29/53] netfs: Implement buffered write API David Howells
2023-10-13 16:03 ` [RFC PATCH 30/53] netfs: Allow buffered shared-writeable mmap through netfs_page_mkwrite() David Howells
2023-10-13 16:04 ` [RFC PATCH 31/53] netfs: Provide netfs_file_read_iter() David Howells
2023-10-13 16:04 ` [RFC PATCH 32/53] netfs: Provide a writepages implementation David Howells
2023-10-13 16:04 ` [RFC PATCH 33/53] netfs: Provide minimum blocksize parameter David Howells
2023-10-13 16:04 ` [RFC PATCH 34/53] netfs: Make netfs_skip_folio_read() take account of blocksize David Howells
2023-10-13 16:04 ` [RFC PATCH 35/53] netfs: Perform content encryption David Howells
2023-10-13 16:04 ` [RFC PATCH 36/53] netfs: Decrypt encrypted content David Howells
2023-10-13 16:04 ` [RFC PATCH 37/53] netfs: Support decryption on ubuffered/DIO read David Howells
2023-10-13 16:04 ` [RFC PATCH 38/53] netfs: Support encryption on Unbuffered/DIO write David Howells
2023-10-13 16:04 ` [RFC PATCH 39/53] netfs: Provide a launder_folio implementation David Howells
2023-10-13 16:04 ` [RFC PATCH 40/53] netfs: Implement a write-through caching option David Howells
2023-10-13 16:04 ` [RFC PATCH 41/53] netfs: Rearrange netfs_io_subrequest to put request pointer first David Howells
2023-10-13 16:04 ` [RFC PATCH 42/53] afs: Use the netfs write helpers David Howells
2023-10-13 16:04 ` [RFC PATCH 43/53] cifs: Replace cifs_readdata with a wrapper around netfs_io_subrequest David Howells
2023-10-13 16:04 ` [RFC PATCH 44/53] cifs: Share server EOF pos with netfslib David Howells
2023-10-13 16:04 ` [RFC PATCH 45/53] cifs: Replace cifs_writedata with a wrapper around netfs_io_subrequest David Howells
2023-10-13 16:04 ` [RFC PATCH 46/53] cifs: Use more fields from netfs_io_subrequest David Howells
2023-10-13 16:04 ` [RFC PATCH 47/53] cifs: Make wait_mtu_credits take size_t args David Howells
2023-10-13 16:04 ` [RFC PATCH 48/53] cifs: Implement netfslib hooks David Howells
2023-10-13 16:04 ` [RFC PATCH 49/53] cifs: Move cifs_loose_read_iter() and cifs_file_write_iter() to file.c David Howells
2023-10-13 16:04 ` [RFC PATCH 50/53] cifs: Cut over to using netfslib David Howells
2023-10-13 16:04 ` [RFC PATCH 51/53] cifs: Remove some code that's no longer used, part 1 David Howells
2023-10-13 16:04 ` [RFC PATCH 52/53] cifs: Remove some code that's no longer used, part 2 David Howells
2023-10-13 16:04 ` [RFC PATCH 53/53] cifs: Remove some code that's no longer used, part 3 David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231013160423.2218093-10-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=asmadeus@codewreck.org \
    --cc=ceph-devel@vger.kernel.org \
    --cc=christian@brauner.io \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=linux-afs@lists.infradead.org \
    --cc=linux-cachefs@redhat.com \
    --cc=linux-cifs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=marc.dionne@auristor.com \
    --cc=netdev@vger.kernel.org \
    --cc=pc@manguebit.com \
    --cc=smfrench@gmail.com \
    --cc=sprasad@microsoft.com \
    --cc=tom@talpey.com \
    --cc=v9fs@lists.linux.dev \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.