All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	Waiman Long <longman@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Will Deacon <will@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-ext4@vger.kernel.org, cluster-devel@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 07/12] iomap: allow holding i_rwsem until aio completion
Date: Tue, 14 Jan 2020 17:12:20 +0100	[thread overview]
Message-ID: <20200114161225.309792-8-hch@lst.de> (raw)
In-Reply-To: <20200114161225.309792-1-hch@lst.de>

The direct I/O code currently uses a hand crafted i_dio_count that needs
to be incremented under i_rwsem and then is decremented when I/O
completes.  That scheme means file system code needs to be very careful
to wait for i_dio_count to reach zero under i_rwsem in various places
that are very cumbersome to get rid.  It also means we can't get the
effect of an exclusive i_rwsem for actually asynchronous I/O, forcing
pointless synchronous execution of sub-blocksize writes.

Replace the i_dio_count scheme with holding i_rwsem over the duration
of the whole I/O.  While this introduces a non-owner unlock that isn't
nice to RT workload, the open coded locking primitive using i_dio_count
isn't any better.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/direct-io.c  | 44 +++++++++++++++++++++++++++++++++++++------
 include/linux/iomap.h |  2 ++
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index e706329d71a0..0113ac33b0a0 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -70,7 +70,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
 	dio->submit.cookie = submit_bio(bio);
 }
 
-static ssize_t iomap_dio_complete(struct iomap_dio *dio)
+static ssize_t iomap_dio_complete(struct iomap_dio *dio, bool unlock)
 {
 	const struct iomap_dio_ops *dops = dio->dops;
 	struct kiocb *iocb = dio->iocb;
@@ -112,6 +112,13 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 			dio_warn_stale_pagecache(iocb->ki_filp);
 	}
 
+	if (unlock) {
+		if (dio->flags & IOMAP_DIO_RWSEM_EXCL)
+			up_write(&inode->i_rwsem);
+		else if (dio->flags & IOMAP_DIO_RWSEM_SHARED)
+			up_read(&inode->i_rwsem);
+	}
+
 	/*
 	 * If this is a DSYNC write, make sure we push it to stable storage now
 	 * that we've written data.
@@ -129,8 +136,22 @@ static void iomap_dio_complete_work(struct work_struct *work)
 {
 	struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
 	struct kiocb *iocb = dio->iocb;
+	struct inode *inode = file_inode(iocb->ki_filp);
 
-	iocb->ki_complete(iocb, iomap_dio_complete(dio), 0);
+	/*
+	 * XXX: For reads this code is directly called from bio ->end_io, which
+	 * often is hard or softirq context.  In that case lockdep records the
+	 * below as lock acquisitions from irq context and causes warnings.
+	 */
+	if (dio->flags & IOMAP_DIO_RWSEM_EXCL) {
+		rwsem_acquire(&inode->i_rwsem.dep_map, 0, 0, _THIS_IP_);
+		if (IS_ENABLED(CONFIG_RWSEM_SPIN_ON_OWNER))
+			atomic_long_set(&inode->i_rwsem.owner, (long)current);
+	} else if (dio->flags & IOMAP_DIO_RWSEM_SHARED) {
+		rwsem_acquire_read(&inode->i_rwsem.dep_map, 0, 0, _THIS_IP_);
+	}
+
+	iocb->ki_complete(iocb, iomap_dio_complete(dio, true), 0);
 }
 
 /*
@@ -430,7 +451,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	dio->i_size = i_size_read(inode);
 	dio->dops = dops;
 	dio->error = 0;
-	dio->flags = 0;
+	dio->flags = dio_flags;
 
 	dio->submit.iter = iter;
 	dio->submit.waiter = current;
@@ -551,8 +572,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	dio->wait_for_completion = wait_for_completion;
 	if (!atomic_dec_and_test(&dio->ref)) {
 		if (!wait_for_completion)
-			return -EIOCBQUEUED;
-
+			goto async_completion;
 		for (;;) {
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			if (!READ_ONCE(dio->submit.waiter))
@@ -567,10 +587,22 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		__set_current_state(TASK_RUNNING);
 	}
 
-	return iomap_dio_complete(dio);
+	return iomap_dio_complete(dio, false);
 
 out_free_dio:
 	kfree(dio);
 	return ret;
+
+async_completion:
+	/*
+	 * We are returning to userspace now, but i_rwsem is still held until
+	 * the I/O completion comes back.
+	 */
+	if (dio_flags & (IOMAP_DIO_RWSEM_EXCL | IOMAP_DIO_RWSEM_SHARED))
+		rwsem_release(&inode->i_rwsem.dep_map, _THIS_IP_);
+	if ((dio_flags & IOMAP_DIO_RWSEM_EXCL) &&
+	    IS_ENABLED(CONFIG_RWSEM_SPIN_ON_OWNER))
+		atomic_long_set(&inode->i_rwsem.owner, RWSEM_OWNER_UNKNOWN);
+	return -EIOCBQUEUED;
 }
 EXPORT_SYMBOL_GPL(iomap_dio_rw);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3faeb8fd0961..f259bb979d7f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -249,6 +249,8 @@ int iomap_writepages(struct address_space *mapping,
 #define IOMAP_DIO_UNWRITTEN	(1 << 0)	/* covers unwritten extent(s) */
 #define IOMAP_DIO_COW		(1 << 1)	/* covers COW extent(s) */
 #define IOMAP_DIO_SYNCHRONOUS	(1 << 2)	/* no async completion */
+#define IOMAP_DIO_RWSEM_EXCL	(1 << 3)	/* holds shared i_rwsem */
+#define IOMAP_DIO_RWSEM_SHARED	(1 << 4)	/* holds exclusive i_rwsem */
 
 struct iomap_dio_ops {
 	int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
-- 
2.24.1


  parent reply	other threads:[~2020-01-14 16:13 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-14 16:12 RFC: hold i_rwsem until aio completes Christoph Hellwig
2020-01-14 16:12 ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 01/12] mm: fix a comment in sys_swapon Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-02-10 23:29   ` Andrew Morton
2020-02-10 23:29     ` [Cluster-devel] " Andrew Morton
2020-02-12  7:37     ` Christoph Hellwig
2020-02-12  7:37       ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 02/12] locking/rwsem: Exit early when held by an anonymous owner Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 18:17   ` Waiman Long
2020-01-14 18:17     ` [Cluster-devel] " Waiman Long
2020-01-14 18:25     ` Christoph Hellwig
2020-01-14 18:25       ` [Cluster-devel] " Christoph Hellwig
2020-01-14 18:33       ` Waiman Long
2020-01-14 18:33         ` [Cluster-devel] " Waiman Long
2020-01-14 18:55       ` Waiman Long
2020-01-14 18:55         ` [Cluster-devel] " Waiman Long
2020-01-14 16:12 ` [PATCH 03/12] xfs: fix IOCB_NOWAIT handling in xfs_file_dio_aio_read Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 04/12] gfs2: move setting current->backing_dev_info Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 05/12] gfs2: fix O_SYNC write handling Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-27  9:03   ` Christoph Hellwig
2020-01-28 16:57     ` Bob Peterson
2020-02-06 15:31   ` Andreas Gruenbacher
2020-02-06 15:31     ` Andreas Gruenbacher
2020-02-06 15:31     ` Andreas Gruenbacher
2020-01-14 16:12 ` [PATCH 06/12] iomap: pass a flags value to iomap_dio_rw Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` Christoph Hellwig [this message]
2020-01-14 16:12 ` [PATCH 08/12] ext4: hold i_rwsem until AIO completes Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 21:50   ` Theodore Y. Ts'o
2020-01-14 21:50     ` [Cluster-devel] " Theodore Y. Ts'o
2020-01-15  6:48     ` Christoph Hellwig
2020-01-15  6:48       ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 09/12] gfs2: " Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 10/12] xfs: " Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 11/12] xfs: don't set IOMAP_DIO_SYNCHRONOUS for unaligned I/O Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 16:12 ` [PATCH 12/12] iomap: remove the inode_dio_begin/end calls Christoph Hellwig
2020-01-14 16:12   ` [Cluster-devel] " Christoph Hellwig
2020-01-14 18:47 ` RFC: hold i_rwsem until aio completes Matthew Wilcox
2020-01-14 18:47   ` [Cluster-devel] " Matthew Wilcox
2020-01-15  6:54   ` Christoph Hellwig
2020-01-15  6:54     ` [Cluster-devel] " Christoph Hellwig
2020-01-14 19:27 ` Jason Gunthorpe
2020-01-14 19:27   ` [Cluster-devel] " Jason Gunthorpe
2020-01-15  6:56   ` Christoph Hellwig
2020-01-15  6:56     ` [Cluster-devel] " Christoph Hellwig
2020-01-15 13:24     ` Jason Gunthorpe
2020-01-15 13:24       ` [Cluster-devel] " Jason Gunthorpe
2020-01-15 14:33       ` Peter Zijlstra
2020-01-15 14:33         ` [Cluster-devel] " Peter Zijlstra
2020-01-15 14:49         ` Jason Gunthorpe
2020-01-15 14:49           ` [Cluster-devel] " Jason Gunthorpe
2020-01-15 19:03           ` Waiman Long
2020-01-15 19:03             ` [Cluster-devel] " Waiman Long
2020-01-15 19:07             ` Christoph Hellwig
2020-01-15 19:07               ` [Cluster-devel] " Christoph Hellwig
2020-01-18 22:40         ` Matthew Wilcox
2020-01-18 22:40           ` [Cluster-devel] " Matthew Wilcox
2020-01-15 15:36       ` Christoph Hellwig
2020-01-15 15:36         ` [Cluster-devel] " Christoph Hellwig
2020-01-15 16:26         ` Jason Gunthorpe
2020-01-15 16:26           ` [Cluster-devel] " Jason Gunthorpe
2020-01-16 14:00 ` Jan Kara
2020-01-16 14:00   ` [Cluster-devel] " Jan Kara
2020-02-03 17:44   ` Christoph Hellwig
2020-02-03 17:44     ` [Cluster-devel] " Christoph Hellwig
2020-01-18  9:28 ` Dave Chinner
2020-01-18  9:28   ` [Cluster-devel] " Dave Chinner
2020-02-03 17:46   ` Christoph Hellwig
2020-02-03 17:46     ` [Cluster-devel] " Christoph Hellwig
2020-02-03 23:02     ` Dave Chinner
2020-02-03 23:02       ` [Cluster-devel] " Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200114161225.309792-8-hch@lst.de \
    --to=hch@lst.de \
    --cc=akpm@linux-foundation.org \
    --cc=cluster-devel@redhat.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.