All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-4.14] xfs: fix AIM7 regression
@ 2017-10-19  7:47 Christoph Hellwig
  2017-10-19 11:38 ` Brian Foster
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2017-10-19  7:47 UTC (permalink / raw)
  To: linux-xfs

Apparently our current rwsem code doesn't like doing the trylock, then
lock for real scheme.  So change our read/write methods to just do the
trylock for the RWF_NOWAIT case.  This fixes a ~25% regression in
AIM7.

Fixes: 91f9943e ("fs: support RWF_NOWAIT for buffered reads")
Reported-by: kernel test robot <xiaolong.ye@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 309e26c9dddb..f40b5da5d467 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -237,11 +237,13 @@ xfs_file_dax_read(
 	if (!count)
 		return 0; /* skip atime */
 
-	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
-		if (iocb->ki_flags & IOCB_NOWAIT)
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
 			return -EAGAIN;
+	} else {
 		xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	}
+
 	ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
@@ -259,9 +261,10 @@ xfs_file_buffered_aio_read(
 
 	trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
 
-	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
-		if (iocb->ki_flags & IOCB_NOWAIT)
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
 			return -EAGAIN;
+	} else {
 		xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	}
 	ret = generic_file_read_iter(iocb, to);
@@ -552,9 +555,10 @@ xfs_file_dio_aio_write(
 		iolock = XFS_IOLOCK_SHARED;
 	}
 
-	if (!xfs_ilock_nowait(ip, iolock)) {
-		if (iocb->ki_flags & IOCB_NOWAIT)
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!xfs_ilock_nowait(ip, iolock))
 			return -EAGAIN;
+	} else {
 		xfs_ilock(ip, iolock);
 	}
 
@@ -606,9 +610,10 @@ xfs_file_dax_write(
 	size_t			count;
 	loff_t			pos;
 
-	if (!xfs_ilock_nowait(ip, iolock)) {
-		if (iocb->ki_flags & IOCB_NOWAIT)
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (!xfs_ilock_nowait(ip, iolock))
 			return -EAGAIN;
+	} else {
 		xfs_ilock(ip, iolock);
 	}
 
-- 
2.14.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.14] xfs: fix AIM7 regression
  2017-10-19  7:47 [PATCH for-4.14] xfs: fix AIM7 regression Christoph Hellwig
@ 2017-10-19 11:38 ` Brian Foster
  2017-10-19 13:14   ` Christoph Hellwig
  0 siblings, 1 reply; 6+ messages in thread
From: Brian Foster @ 2017-10-19 11:38 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-xfs

On Thu, Oct 19, 2017 at 09:47:05AM +0200, Christoph Hellwig wrote:
> Apparently our current rwsem code doesn't like doing the trylock, then
> lock for real scheme.  So change our read/write methods to just do the
> trylock for the RWF_NOWAIT case.  This fixes a ~25% regression in
> AIM7.
> 

The code looks fine, but this seems really strange. If the trylock
fails, then wouldn't the blocking lock have slept anyways if done
initially? Is there any more background info available on this, or
perhaps a theory on why there is such a significant regression..?

Brian

> Fixes: 91f9943e ("fs: support RWF_NOWAIT for buffered reads")
> Reported-by: kernel test robot <xiaolong.ye@intel.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 309e26c9dddb..f40b5da5d467 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -237,11 +237,13 @@ xfs_file_dax_read(
>  	if (!count)
>  		return 0; /* skip atime */
>  
> -	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
> -		if (iocb->ki_flags & IOCB_NOWAIT)
> +	if (iocb->ki_flags & IOCB_NOWAIT) {
> +		if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
>  			return -EAGAIN;
> +	} else {
>  		xfs_ilock(ip, XFS_IOLOCK_SHARED);
>  	}
> +
>  	ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
>  	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
>  
> @@ -259,9 +261,10 @@ xfs_file_buffered_aio_read(
>  
>  	trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
>  
> -	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
> -		if (iocb->ki_flags & IOCB_NOWAIT)
> +	if (iocb->ki_flags & IOCB_NOWAIT) {
> +		if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
>  			return -EAGAIN;
> +	} else {
>  		xfs_ilock(ip, XFS_IOLOCK_SHARED);
>  	}
>  	ret = generic_file_read_iter(iocb, to);
> @@ -552,9 +555,10 @@ xfs_file_dio_aio_write(
>  		iolock = XFS_IOLOCK_SHARED;
>  	}
>  
> -	if (!xfs_ilock_nowait(ip, iolock)) {
> -		if (iocb->ki_flags & IOCB_NOWAIT)
> +	if (iocb->ki_flags & IOCB_NOWAIT) {
> +		if (!xfs_ilock_nowait(ip, iolock))
>  			return -EAGAIN;
> +	} else {
>  		xfs_ilock(ip, iolock);
>  	}
>  
> @@ -606,9 +610,10 @@ xfs_file_dax_write(
>  	size_t			count;
>  	loff_t			pos;
>  
> -	if (!xfs_ilock_nowait(ip, iolock)) {
> -		if (iocb->ki_flags & IOCB_NOWAIT)
> +	if (iocb->ki_flags & IOCB_NOWAIT) {
> +		if (!xfs_ilock_nowait(ip, iolock))
>  			return -EAGAIN;
> +	} else {
>  		xfs_ilock(ip, iolock);
>  	}
>  
> -- 
> 2.14.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.14] xfs: fix AIM7 regression
  2017-10-19 11:38 ` Brian Foster
@ 2017-10-19 13:14   ` Christoph Hellwig
  2017-10-19 22:44     ` Darrick J. Wong
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2017-10-19 13:14 UTC (permalink / raw)
  To: Brian Foster; +Cc: Christoph Hellwig, linux-xfs

On Thu, Oct 19, 2017 at 07:38:48AM -0400, Brian Foster wrote:
> On Thu, Oct 19, 2017 at 09:47:05AM +0200, Christoph Hellwig wrote:
> > Apparently our current rwsem code doesn't like doing the trylock, then
> > lock for real scheme.  So change our read/write methods to just do the
> > trylock for the RWF_NOWAIT case.  This fixes a ~25% regression in
> > AIM7.
> > 
> 
> The code looks fine, but this seems really strange. If the trylock
> fails, then wouldn't the blocking lock have slept anyways if done
> initially? Is there any more background info available on this, or
> perhaps a theory on why there is such a significant regression..?

No, unfortunately I don't have a theory, but I agree it is odd
behavior in the rwsem code.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.14] xfs: fix AIM7 regression
  2017-10-19 13:14   ` Christoph Hellwig
@ 2017-10-19 22:44     ` Darrick J. Wong
  2017-10-20  6:55       ` Christoph Hellwig
  2017-11-07 10:22       ` Jan Kara
  0 siblings, 2 replies; 6+ messages in thread
From: Darrick J. Wong @ 2017-10-19 22:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Brian Foster, linux-xfs

On Thu, Oct 19, 2017 at 03:14:07PM +0200, Christoph Hellwig wrote:
> On Thu, Oct 19, 2017 at 07:38:48AM -0400, Brian Foster wrote:
> > On Thu, Oct 19, 2017 at 09:47:05AM +0200, Christoph Hellwig wrote:
> > > Apparently our current rwsem code doesn't like doing the trylock, then
> > > lock for real scheme.  So change our read/write methods to just do the
> > > trylock for the RWF_NOWAIT case.  This fixes a ~25% regression in
> > > AIM7.
> > > 
> > 
> > The code looks fine, but this seems really strange. If the trylock
> > fails, then wouldn't the blocking lock have slept anyways if done
> > initially? Is there any more background info available on this, or
> > perhaps a theory on why there is such a significant regression..?
> 
> No, unfortunately I don't have a theory, but I agree it is odd
> behavior in the rwsem code.

<shrug> I want to know a little more about why there's a performance hit
in the down_read_trylock -> down_read case.  Are we getting penalized
for that?  Is it some weird interaction with lockdep?

--D

> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.14] xfs: fix AIM7 regression
  2017-10-19 22:44     ` Darrick J. Wong
@ 2017-10-20  6:55       ` Christoph Hellwig
  2017-11-07 10:22       ` Jan Kara
  1 sibling, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2017-10-20  6:55 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Christoph Hellwig, Brian Foster, Ye Xiaolong, linux-xfs,
	Peter Zijlstra, Ingo Molnar, linux-kernel

On Thu, Oct 19, 2017 at 03:44:31PM -0700, Darrick J. Wong wrote:
> > > The code looks fine, but this seems really strange. If the trylock
> > > fails, then wouldn't the blocking lock have slept anyways if done
> > > initially? Is there any more background info available on this, or
> > > perhaps a theory on why there is such a significant regression..?
> > 
> > No, unfortunately I don't have a theory, but I agree it is odd
> > behavior in the rwsem code.
> 
> <shrug> I want to know a little more about why there's a performance hit
> in the down_read_trylock -> down_read case.  Are we getting penalized
> for that?  Is it some weird interaction with lockdep?

I don't think the test bot did run with lockdep.  But feel free to take
a look at the mail thread titled

[lkp-robot] [fs]  91f9943e1c:  aim7.jobs-per-min -26.6% regression

on lkml.  Note that synthetic benchmarks on XFS always saw weird
effects from rwsem details.  I remember that a few years ago I had
to back to the mainline patch to move the rwsem fastpath out of line
because thay caused a major performance regressions on CIFS file
serving benchmarks on a very low end ARM NAS box.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH for-4.14] xfs: fix AIM7 regression
  2017-10-19 22:44     ` Darrick J. Wong
  2017-10-20  6:55       ` Christoph Hellwig
@ 2017-11-07 10:22       ` Jan Kara
  1 sibling, 0 replies; 6+ messages in thread
From: Jan Kara @ 2017-11-07 10:22 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Christoph Hellwig, Brian Foster, linux-xfs, Ingo Molnar,
	Peter Zijlstra, x86

On Thu 19-10-17 15:44:31, Darrick J. Wong wrote:
> On Thu, Oct 19, 2017 at 03:14:07PM +0200, Christoph Hellwig wrote:
> > On Thu, Oct 19, 2017 at 07:38:48AM -0400, Brian Foster wrote:
> > > On Thu, Oct 19, 2017 at 09:47:05AM +0200, Christoph Hellwig wrote:
> > > > Apparently our current rwsem code doesn't like doing the trylock, then
> > > > lock for real scheme.  So change our read/write methods to just do the
> > > > trylock for the RWF_NOWAIT case.  This fixes a ~25% regression in
> > > > AIM7.
> > > > 
> > > 
> > > The code looks fine, but this seems really strange. If the trylock
> > > fails, then wouldn't the blocking lock have slept anyways if done
> > > initially? Is there any more background info available on this, or
> > > perhaps a theory on why there is such a significant regression..?
> > 
> > No, unfortunately I don't have a theory, but I agree it is odd
> > behavior in the rwsem code.
> 
> <shrug> I want to know a little more about why there's a performance hit
> in the down_read_trylock -> down_read case.  Are we getting penalized
> for that?  Is it some weird interaction with lockdep?

At least on x86, __down_read_trylock() is implemented very much differently
from __down_read(). In particular if there's heavy contention on the
semaphore from readers, __down_read_trylock() implementation seems to be
prone to going through cmpxchg loop several times which could explain
observed performance data. But I'm just guessing... Adding some x86 people
to CC just in case they have more to say.

								Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-11-07 10:22 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-19  7:47 [PATCH for-4.14] xfs: fix AIM7 regression Christoph Hellwig
2017-10-19 11:38 ` Brian Foster
2017-10-19 13:14   ` Christoph Hellwig
2017-10-19 22:44     ` Darrick J. Wong
2017-10-20  6:55       ` Christoph Hellwig
2017-11-07 10:22       ` Jan Kara

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.