* [PATCH 2/3] fs: use RCU for free_super() vs. __sb_start_write()
[not found] <20150630220455.7DE1D236@viggo.jf.intel.com>
@ 2015-06-30 22:04 ` Dave Hansen
2015-06-30 22:04 ` [PATCH 3/3] fs: replace memory barrier in __sb_end_write() with RCU Dave Hansen
1 sibling, 0 replies; 2+ messages in thread
From: Dave Hansen @ 2015-06-30 22:04 UTC (permalink / raw)
To: dave; +Cc: dave.hansen, viro, linux-fsdevel, linux-kernel, paulmck, ak
From: Dave Hansen <dave.hansen@linux.intel.com>
Currently, __sb_start_write() and freeze_super() can race with
each other. __sb_start_write() uses a smp_mb() to ensure that
freeze_super() can see its write to sb->s_writers.counter and
that it can see freeze_super()'s update to sb->s_writers.frozen.
This all seems to work fine.
But, this smp_mb() makes __sb_start_write() the single hottest
function in the kernel if I sit in a loop and do tiny write()s to
tmpfs over and over. This is on a very small 2-core system, so
it will only get worse on larger systems.
This _seems_ like an ideal case for RCU. __sb_start_write() is
the RCU read-side and is in a very fast, performance-sensitive
path. freeze_super() is the RCU writer and is in an extremely
rare non-performance-sensitive path.
Instead of doing and smp_wmb() in __sb_start_write(), we do
rcu_read_lock(). This ensures that a CPU doing freeze_super()
can not proceed past its synchronize_rcu() until the grace
period has ended and the 's_writers.frozen = SB_FREEZE_WRITE'
is visible to __sb_start_write().
This patch increases the number of writes/second that I can do
by 5.6% over the last patch. The combined total increase is
19.7%.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
---
b/fs/super.c | 63 ++++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 39 insertions(+), 24 deletions(-)
diff -puN fs/super.c~rcu-__sb_start_write fs/super.c
--- a/fs/super.c~rcu-__sb_start_write 2015-06-30 15:03:57.191416186 -0700
+++ b/fs/super.c 2015-06-30 15:03:57.195416366 -0700
@@ -1190,27 +1190,21 @@ static void acquire_freeze_lock(struct s
*/
int __sb_start_write(struct super_block *sb, int level, bool wait)
{
-retry:
- if (unlikely(sb->s_writers.frozen >= level)) {
+ rcu_read_lock();
+ while (unlikely(sb->s_writers.frozen >= level)) {
+ rcu_read_unlock();
if (!wait)
return 0;
wait_event(sb->s_writers.wait_unfrozen,
sb->s_writers.frozen < level);
+ rcu_read_lock();
}
#ifdef CONFIG_LOCKDEP
acquire_freeze_lock(sb, level, !wait, _RET_IP_);
#endif
percpu_counter_inc(&sb->s_writers.counter[level-1]);
- /*
- * Make sure counter is updated before we check for frozen.
- * freeze_super() first sets frozen and then checks the counter.
- */
- smp_mb();
- if (unlikely(sb->s_writers.frozen >= level)) {
- __sb_end_write(sb, level);
- goto retry;
- }
+ rcu_read_unlock();
return 1;
}
EXPORT_SYMBOL(__sb_start_write);
@@ -1254,6 +1248,29 @@ static void sb_wait_write(struct super_b
} while (writers);
}
+static void __thaw_super(struct super_block *sb)
+{
+ sb->s_writers.frozen = SB_UNFROZEN;
+ /*
+ * RCU protects us against races where we are taking
+ * s_writers.frozen in to a less permissive state. When
+ * that happens, __sb_start_write() might not yet have
+ * seen our write and might still increment
+ * s_writers.counter.
+ *
+ * Here, however, we are transitioning to a _more_
+ * permissive state. The filesystem is frozen and no
+ * writes to s_writers.counter are being permitted.
+ *
+ * A smp_wmb() is sufficient here because we just need
+ * to ensure that new calls __sb_start_write() are
+ * allowed, not that _concurrent_ calls have finished.
+ */
+ smp_wmb();
+ wake_up(&sb->s_writers.wait_unfrozen);
+ deactivate_locked_super(sb);
+}
+
/**
* freeze_super - lock the filesystem and force it into a consistent state
* @sb: the super to lock
@@ -1312,7 +1329,13 @@ int freeze_super(struct super_block *sb)
/* From now on, no new normal writers can start */
sb->s_writers.frozen = SB_FREEZE_WRITE;
- smp_wmb();
+ /*
+ * After we synchronize_rcu(), we have ensured that everyone
+ * who reads sb->s_writers.frozen under rcu_read_lock() can
+ * now see our update. This pretty much means that
+ * __sb_start_write() will not allow any new writers.
+ */
+ synchronize_rcu();
/* Release s_umount to preserve sb_start_write -> s_umount ordering */
up_write(&sb->s_umount);
@@ -1322,7 +1345,7 @@ int freeze_super(struct super_block *sb)
/* Now we go and block page faults... */
down_write(&sb->s_umount);
sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
- smp_wmb();
+ synchronize_rcu();
sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
@@ -1331,7 +1354,7 @@ int freeze_super(struct super_block *sb)
/* Now wait for internal filesystem counter */
sb->s_writers.frozen = SB_FREEZE_FS;
- smp_wmb();
+ synchronize_rcu();
sb_wait_write(sb, SB_FREEZE_FS);
if (sb->s_op->freeze_fs) {
@@ -1339,11 +1362,7 @@ int freeze_super(struct super_block *sb)
if (ret) {
printk(KERN_ERR
"VFS:Filesystem freeze failed\n");
- sb->s_writers.frozen = SB_UNFROZEN;
- smp_wmb();
- wake_up(&sb->s_writers.wait_unfrozen);
- deactivate_locked_super(sb);
- return ret;
+ __thaw_super(sb);
}
}
/*
@@ -1386,11 +1405,7 @@ int thaw_super(struct super_block *sb)
}
out:
- sb->s_writers.frozen = SB_UNFROZEN;
- smp_wmb();
- wake_up(&sb->s_writers.wait_unfrozen);
- deactivate_locked_super(sb);
-
+ __thaw_super(sb);
return 0;
}
EXPORT_SYMBOL(thaw_super);
_
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH 3/3] fs: replace memory barrier in __sb_end_write() with RCU
[not found] <20150630220455.7DE1D236@viggo.jf.intel.com>
2015-06-30 22:04 ` [PATCH 2/3] fs: use RCU for free_super() vs. __sb_start_write() Dave Hansen
@ 2015-06-30 22:04 ` Dave Hansen
1 sibling, 0 replies; 2+ messages in thread
From: Dave Hansen @ 2015-06-30 22:04 UTC (permalink / raw)
To: dave; +Cc: dave.hansen, viro, linux-fsdevel, linux-kernel, paulmck, ak
From: Dave Hansen <dave.hansen@linux.intel.com>
If I sit in a loop and do write()s to small tmpfs files,
__sb_end_write() is third-hottest kernel function due to its
smp_mb().
__sb_end_write() uses the barrier to avoid races with freeze_super()
and its calls to sb_wait_write(). But, now that freeze_super() is
calling synchronize_rcu() before each sb_wait_write() call, we can
use that to our advantage.
The synchronize_rcu() ensures that all __sb_end_write() will see
freeze_super()'s updates to s_writers.counter. That, in turn,
guarantees that __sb_end_write() will try to wake up any subsequent
call by freeze_super() to sb_wait_write().
This improves the number of writes/second I can do by 6.1% on top
of the previous patch. The total improvement is 27.1% over a
completely unpatched kernel.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
---
b/fs/super.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff -puN fs/super.c~selectively-do-barriers-in-__sb_end_write fs/super.c
--- a/fs/super.c~selectively-do-barriers-in-__sb_end_write 2015-06-30 15:03:57.565433061 -0700
+++ b/fs/super.c 2015-06-30 15:03:57.568433196 -0700
@@ -1146,14 +1146,23 @@ out:
*/
void __sb_end_write(struct super_block *sb, int level)
{
+ rcu_read_lock();
percpu_counter_dec(&sb->s_writers.counter[level-1]);
/*
- * Make sure s_writers are updated before we wake up waiters in
- * freeze_super().
+ * We are racing here with freeze_super()'s calls to
+ * sb_wait_write(). We want to ensure that we call
+ * wake_up() whenever one of those calls _might_ be
+ * in sb_wait_write().
+ *
+ * Since freeze_super() does a synchronize_rcu() before
+ * each of its sb_wait_write() calls, it can guarantee
+ * that it sees our update to s_writers.counter as well
+ * as that we see its update to s_writers.frozen.
*/
- smp_mb();
- if (waitqueue_active(&sb->s_writers.wait))
+ if (unlikely(sb->s_writers.frozen >= level))
wake_up(&sb->s_writers.wait);
+ rcu_read_unlock();
+
rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
}
EXPORT_SYMBOL(__sb_end_write);
_
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-06-30 22:04 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20150630220455.7DE1D236@viggo.jf.intel.com>
2015-06-30 22:04 ` [PATCH 2/3] fs: use RCU for free_super() vs. __sb_start_write() Dave Hansen
2015-06-30 22:04 ` [PATCH 3/3] fs: replace memory barrier in __sb_end_write() with RCU Dave Hansen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).