All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue()
@ 2014-07-25  3:59 Trond Myklebust
  2014-07-25  3:59 ` [PATCH 2/3] SUNRPC: svc_tcp_write_space: don't clear SOCK_NOSPACE prematurely Trond Myklebust
  2014-07-29 19:31   ` Bruce Fields
  0 siblings, 2 replies; 6+ messages in thread
From: Trond Myklebust @ 2014-07-25  3:59 UTC (permalink / raw)
  To: Bruce Fields; +Cc: linux-nfs

Ensure that all calls to svc_xprt_enqueue() except svc_xprt_received()
check the value of XPT_BUSY, before attempting to grab spinlocks etc.
This is to avoid situations such as the following "perf" trace,
which shows heavy contention on the pool spinlock:

    54.15%            nfsd  [kernel.kallsyms]        [k] _raw_spin_lock_bh
                      |
                      --- _raw_spin_lock_bh
                         |
                         |--71.43%-- svc_xprt_enqueue
                         |          |
                         |          |--50.31%-- svc_reserve
                         |          |
                         |          |--31.35%-- svc_xprt_received
                         |          |
                         |          |--18.34%-- svc_tcp_data_ready
...

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/svc_xprt.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b4737fbdec13..54a761fa6351 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -23,6 +23,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static void svc_age_temp_xprts(unsigned long closure);
 static void svc_delete_xprt(struct svc_xprt *xprt);
+static void svc_xprt_do_enqueue(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
 	if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
 		return;
 	/* As soon as we clear busy, the xprt could be closed and
-	 * 'put', so we need a reference to call svc_xprt_enqueue with:
+	 * 'put', so we need a reference to call svc_xprt_do_enqueue with:
 	 */
 	svc_xprt_get(xprt);
+	smp_mb__before_clear_bit();
 	clear_bit(XPT_BUSY, &xprt->xpt_flags);
-	svc_xprt_enqueue(xprt);
+	svc_xprt_do_enqueue(xprt);
 	svc_xprt_put(xprt);
 }
 
@@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
 	return false;
 }
 
-/*
- * Queue up a transport with data pending. If there are idle nfsd
- * processes, wake 'em up.
- *
- */
-void svc_xprt_enqueue(struct svc_xprt *xprt)
+static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 {
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
@@ -398,6 +395,18 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 out_unlock:
 	spin_unlock_bh(&pool->sp_lock);
 }
+
+/*
+ * Queue up a transport with data pending. If there are idle nfsd
+ * processes, wake 'em up.
+ *
+ */
+void svc_xprt_enqueue(struct svc_xprt *xprt)
+{
+	if (test_bit(XPT_BUSY, &xprt->xpt_flags))
+		return;
+	svc_xprt_do_enqueue(xprt);
+}
 EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 
 /*
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] SUNRPC: svc_tcp_write_space: don't clear SOCK_NOSPACE prematurely
  2014-07-25  3:59 [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue() Trond Myklebust
@ 2014-07-25  3:59 ` Trond Myklebust
  2014-07-25  3:59   ` [PATCH 3/3] SUNRPC: Allow svc_reserve() to notify TCP socket that space has been freed Trond Myklebust
  2014-07-29 19:31   ` Bruce Fields
  1 sibling, 1 reply; 6+ messages in thread
From: Trond Myklebust @ 2014-07-25  3:59 UTC (permalink / raw)
  To: Bruce Fields; +Cc: linux-nfs

If requests are queued in the socket inbuffer waiting for an
svc_tcp_has_wspace() requirement to be satisfied, then we do not want
to clear the SOCK_NOSPACE flag until we've satisfied that requirement.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/svcsock.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b507cd327d9b..7322ea1164fd 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -446,11 +446,31 @@ static void svc_write_space(struct sock *sk)
 	}
 }
 
+static int svc_tcp_has_wspace(struct svc_xprt *xprt)
+{
+	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
+	struct svc_serv *serv = svsk->sk_xprt.xpt_server;
+	int required;
+
+	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
+		return 1;
+	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
+	if (sk_stream_wspace(svsk->sk_sk) >= required ||
+	    (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
+	     atomic_read(&xprt->xpt_reserved) == 0))
+		return 1;
+	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+	return 0;
+}
+
 static void svc_tcp_write_space(struct sock *sk)
 {
+	struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
 	struct socket *sock = sk->sk_socket;
 
-	if (sk_stream_is_writeable(sk) && sock)
+	if (!sk_stream_is_writeable(sk) || !sock)
+		return;
+	if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 	svc_write_space(sk);
 }
@@ -1197,23 +1217,6 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
 	svc_putnl(resv, 0);
 }
 
-static int svc_tcp_has_wspace(struct svc_xprt *xprt)
-{
-	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
-	struct svc_serv *serv = svsk->sk_xprt.xpt_server;
-	int required;
-
-	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
-		return 1;
-	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
-	if (sk_stream_wspace(svsk->sk_sk) >= required ||
-	    (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
-	     atomic_read(&xprt->xpt_reserved) == 0))
-		return 1;
-	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-	return 0;
-}
-
 static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
 				       struct net *net,
 				       struct sockaddr *sa, int salen,
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] SUNRPC: Allow svc_reserve() to notify TCP socket that space has been freed
  2014-07-25  3:59 ` [PATCH 2/3] SUNRPC: svc_tcp_write_space: don't clear SOCK_NOSPACE prematurely Trond Myklebust
@ 2014-07-25  3:59   ` Trond Myklebust
  0 siblings, 0 replies; 6+ messages in thread
From: Trond Myklebust @ 2014-07-25  3:59 UTC (permalink / raw)
  To: Bruce Fields; +Cc: linux-nfs

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 +
 net/sunrpc/svc_xprt.c           | 2 ++
 net/sunrpc/svcsock.c            | 9 +++++++++
 3 files changed, 12 insertions(+)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 7235040a19b2..8f241ac6934b 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -25,6 +25,7 @@ struct svc_xprt_ops {
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
 	int		(*xpo_secure_port)(struct svc_rqst *);
+	void		(*xpo_adjust_wspace)(struct svc_xprt *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 54a761fa6351..32647b2a6a34 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -448,6 +448,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 		atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
 		rqstp->rq_reserved = space;
 
+		if (xprt->xpt_ops->xpo_adjust_wspace)
+			xprt->xpt_ops->xpo_adjust_wspace(xprt);
 		svc_xprt_enqueue(xprt);
 	}
 }
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7322ea1164fd..72597d7fe60a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -475,6 +475,14 @@ static void svc_tcp_write_space(struct sock *sk)
 	svc_write_space(sk);
 }
 
+static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
+{
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+	if (svc_tcp_has_wspace(xprt))
+		clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+}
+
 /*
  * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
  */
@@ -1288,6 +1296,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_has_wspace = svc_tcp_has_wspace,
 	.xpo_accept = svc_tcp_accept,
 	.xpo_secure_port = svc_sock_secure_port,
+	.xpo_adjust_wspace = svc_tcp_adjust_wspace,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue()
  2014-07-25  3:59 [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue() Trond Myklebust
@ 2014-07-29 19:31   ` Bruce Fields
  2014-07-29 19:31   ` Bruce Fields
  1 sibling, 0 replies; 6+ messages in thread
From: Bruce Fields @ 2014-07-29 19:31 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs, Peter Zijlstra, Paul E. McKenney, Ingo Molnar,
	linux-kernel, linux-arch

All three patches look good to me, thanks.

>From private email, this:

On Thu, Jul 24, 2014 at 11:59:31PM -0400, Trond Myklebust wrote:
> @@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
>  	if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
>  		return;
>  	/* As soon as we clear busy, the xprt could be closed and
> -	 * 'put', so we need a reference to call svc_xprt_enqueue with:
> +	 * 'put', so we need a reference to call svc_xprt_do_enqueue with:
>  	 */
>  	svc_xprt_get(xprt);
> +	smp_mb__before_clear_bit();

triggered a warning about smp_mb__before_clear_bit noticed by the kbuild
robot.  Looks like that was due to
febdbfe8a91ce0d11939d4940b592eb0dba8d663 "arch: Prepare for
smp_mb__{before,after}_atomic()".

You questioned whether deprecating smp_mb__{before,after}_clear_bit was
an unnecessary burden on people maintaining stable kernels or doing
backports more generally.  Cc'ing some addresses from that commit.

Whatever--I'll probably just replace do the clear_bit->before_atomic
replacement and apply unless there's some objection.

--b.

>  	clear_bit(XPT_BUSY, &xprt->xpt_flags);
> -	svc_xprt_enqueue(xprt);
> +	svc_xprt_do_enqueue(xprt);
>  	svc_xprt_put(xprt);
>  }
>  
> @@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
>  	return false;
>  }
>  
> -/*
> - * Queue up a transport with data pending. If there are idle nfsd
> - * processes, wake 'em up.
> - *
> - */
> -void svc_xprt_enqueue(struct svc_xprt *xprt)
> +static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
>  {
>  	struct svc_pool *pool;
>  	struct svc_rqst	*rqstp;
> @@ -398,6 +395,18 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
>  out_unlock:
>  	spin_unlock_bh(&pool->sp_lock);
>  }
> +
> +/*
> + * Queue up a transport with data pending. If there are idle nfsd
> + * processes, wake 'em up.
> + *
> + */
> +void svc_xprt_enqueue(struct svc_xprt *xprt)
> +{
> +	if (test_bit(XPT_BUSY, &xprt->xpt_flags))
> +		return;
> +	svc_xprt_do_enqueue(xprt);
> +}
>  EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
>  
>  /*
> -- 
> 1.9.3
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue()
@ 2014-07-29 19:31   ` Bruce Fields
  0 siblings, 0 replies; 6+ messages in thread
From: Bruce Fields @ 2014-07-29 19:31 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: linux-nfs, Peter Zijlstra, Paul E. McKenney, Ingo Molnar,
	linux-kernel, linux-arch

All three patches look good to me, thanks.

From private email, this:

On Thu, Jul 24, 2014 at 11:59:31PM -0400, Trond Myklebust wrote:
> @@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
>  	if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
>  		return;
>  	/* As soon as we clear busy, the xprt could be closed and
> -	 * 'put', so we need a reference to call svc_xprt_enqueue with:
> +	 * 'put', so we need a reference to call svc_xprt_do_enqueue with:
>  	 */
>  	svc_xprt_get(xprt);
> +	smp_mb__before_clear_bit();

triggered a warning about smp_mb__before_clear_bit noticed by the kbuild
robot.  Looks like that was due to
febdbfe8a91ce0d11939d4940b592eb0dba8d663 "arch: Prepare for
smp_mb__{before,after}_atomic()".

You questioned whether deprecating smp_mb__{before,after}_clear_bit was
an unnecessary burden on people maintaining stable kernels or doing
backports more generally.  Cc'ing some addresses from that commit.

Whatever--I'll probably just replace do the clear_bit->before_atomic
replacement and apply unless there's some objection.

--b.

>  	clear_bit(XPT_BUSY, &xprt->xpt_flags);
> -	svc_xprt_enqueue(xprt);
> +	svc_xprt_do_enqueue(xprt);
>  	svc_xprt_put(xprt);
>  }
>  
> @@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
>  	return false;
>  }
>  
> -/*
> - * Queue up a transport with data pending. If there are idle nfsd
> - * processes, wake 'em up.
> - *
> - */
> -void svc_xprt_enqueue(struct svc_xprt *xprt)
> +static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
>  {
>  	struct svc_pool *pool;
>  	struct svc_rqst	*rqstp;
> @@ -398,6 +395,18 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
>  out_unlock:
>  	spin_unlock_bh(&pool->sp_lock);
>  }
> +
> +/*
> + * Queue up a transport with data pending. If there are idle nfsd
> + * processes, wake 'em up.
> + *
> + */
> +void svc_xprt_enqueue(struct svc_xprt *xprt)
> +{
> +	if (test_bit(XPT_BUSY, &xprt->xpt_flags))
> +		return;
> +	svc_xprt_do_enqueue(xprt);
> +}
>  EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
>  
>  /*
> -- 
> 1.9.3
> 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue()
  2014-07-29 19:31   ` Bruce Fields
  (?)
@ 2014-07-29 19:57   ` Peter Zijlstra
  -1 siblings, 0 replies; 6+ messages in thread
From: Peter Zijlstra @ 2014-07-29 19:57 UTC (permalink / raw)
  To: Bruce Fields
  Cc: Trond Myklebust, linux-nfs, Paul E. McKenney, Ingo Molnar,
	linux-kernel, linux-arch

On Tue, Jul 29, 2014 at 03:31:08PM -0400, Bruce Fields wrote:
> All three patches look good to me, thanks.
> 
> From private email, this:
> 
> On Thu, Jul 24, 2014 at 11:59:31PM -0400, Trond Myklebust wrote:
> > @@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt)
> >  	if (!test_bit(XPT_BUSY, &xprt->xpt_flags))
> >  		return;
> >  	/* As soon as we clear busy, the xprt could be closed and
> > -	 * 'put', so we need a reference to call svc_xprt_enqueue with:
> > +	 * 'put', so we need a reference to call svc_xprt_do_enqueue with:
> >  	 */
> >  	svc_xprt_get(xprt);
> > +	smp_mb__before_clear_bit();
> 
> triggered a warning about smp_mb__before_clear_bit noticed by the kbuild
> robot.  Looks like that was due to
> febdbfe8a91ce0d11939d4940b592eb0dba8d663 "arch: Prepare for
> smp_mb__{before,after}_atomic()".
> 
> You questioned whether deprecating smp_mb__{before,after}_clear_bit was
> an unnecessary burden on people maintaining stable kernels or doing
> backports more generally.  Cc'ing some addresses from that commit.

I absolutely do not care one whit for that. The kernel lives, deal with
it.

Memory barriers are hard enough, we do not need multiple versions of the
same thing just to confuse people.

> Whatever--I'll probably just replace do the clear_bit->before_atomic
> replacement and apply unless there's some objection.

The old API was preserved to allow for non-flag-day migration to the new
API. I should do a final sweep and then kill the old API.

The conversion shrank the memory barrier API by 4 variants while
covering more cases, which is an absolute win in my book.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-07-29 19:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-25  3:59 [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue() Trond Myklebust
2014-07-25  3:59 ` [PATCH 2/3] SUNRPC: svc_tcp_write_space: don't clear SOCK_NOSPACE prematurely Trond Myklebust
2014-07-25  3:59   ` [PATCH 3/3] SUNRPC: Allow svc_reserve() to notify TCP socket that space has been freed Trond Myklebust
2014-07-29 19:31 ` [PATCH 1/3] SUNRPC: Reduce contention in svc_xprt_enqueue() Bruce Fields
2014-07-29 19:31   ` Bruce Fields
2014-07-29 19:57   ` Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.