All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/10] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status
@ 2016-06-24 14:55 Trond Myklebust
  2016-06-24 14:55 ` [PATCH 02/10] SUNRPC: Don't allocate a full sockaddr_storage for tracing Trond Myklebust
  2016-06-24 18:28 ` [PATCH 01/10] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status J. Bruce Fields
  0 siblings, 2 replies; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

There is no guarantee that either the request or the svc_xprt exist
by the time we get round to printing the trace message.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/trace/events/sunrpc.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 003dca933803..e82493d07a86 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -455,20 +455,22 @@ TRACE_EVENT(svc_recv,
 	TP_ARGS(rqst, status),
 
 	TP_STRUCT__entry(
-		__field(struct sockaddr *, addr)
 		__field(__be32, xid)
 		__field(int, status)
 		__field(unsigned long, flags)
+		__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
 	),
 
 	TP_fast_assign(
-		__entry->addr = (struct sockaddr *)&rqst->rq_addr;
 		__entry->xid = status > 0 ? rqst->rq_xid : 0;
 		__entry->status = status;
 		__entry->flags = rqst->rq_flags;
+		memcpy(__get_dynamic_array(addr),
+			&rqst->rq_addr, rqst->rq_addrlen);
 	),
 
-	TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr,
+	TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s",
+			(struct sockaddr *)__get_dynamic_array(addr),
 			be32_to_cpu(__entry->xid), __entry->status,
 			show_rqstp_flags(__entry->flags))
 );
@@ -480,22 +482,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
 	TP_ARGS(rqst, status),
 
 	TP_STRUCT__entry(
-		__field(struct sockaddr *, addr)
 		__field(__be32, xid)
-		__field(int, dropme)
 		__field(int, status)
 		__field(unsigned long, flags)
+		__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
 	),
 
 	TP_fast_assign(
-		__entry->addr = (struct sockaddr *)&rqst->rq_addr;
 		__entry->xid = rqst->rq_xid;
 		__entry->status = status;
 		__entry->flags = rqst->rq_flags;
+		memcpy(__get_dynamic_array(addr),
+			&rqst->rq_addr, rqst->rq_addrlen);
 	),
 
 	TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s",
-		__entry->addr, be32_to_cpu(__entry->xid),
+		(struct sockaddr *)__get_dynamic_array(addr),
+		be32_to_cpu(__entry->xid),
 		__entry->status, show_rqstp_flags(__entry->flags))
 );
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 02/10] SUNRPC: Don't allocate a full sockaddr_storage for tracing
  2016-06-24 14:55 [PATCH 01/10] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status Trond Myklebust
@ 2016-06-24 14:55 ` Trond Myklebust
  2016-06-24 14:55   ` [PATCH 03/10] SUNRPC: Add a tracepoint for server socket out-of-space conditions Trond Myklebust
  2016-06-24 18:28 ` [PATCH 01/10] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status J. Bruce Fields
  1 sibling, 1 reply; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

We're always tracing IPv4 or IPv6 addresses, so we can save a lot
of space on the ringbuffer by allocating the correct sockaddr size.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/trace/events/sunrpc.h | 47 +++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index e82493d07a86..a01a076ea060 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -532,20 +532,27 @@ TRACE_EVENT(svc_xprt_do_enqueue,
 
 	TP_STRUCT__entry(
 		__field(struct svc_xprt *, xprt)
-		__field_struct(struct sockaddr_storage, ss)
 		__field(int, pid)
 		__field(unsigned long, flags)
+		__dynamic_array(unsigned char, addr, xprt != NULL ?
+			xprt->xpt_remotelen : 0)
 	),
 
 	TP_fast_assign(
 		__entry->xprt = xprt;
-		xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
 		__entry->pid = rqst? rqst->rq_task->pid : 0;
-		__entry->flags = xprt ? xprt->xpt_flags : 0;
+		if (xprt) {
+			memcpy(__get_dynamic_array(addr),
+				&xprt->xpt_remote,
+				xprt->xpt_remotelen);
+			__entry->flags = xprt->xpt_flags;
+		} else
+			__entry->flags = 0;
 	),
 
 	TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt,
-		(struct sockaddr *)&__entry->ss,
+		__get_dynamic_array_len(addr) != 0 ?
+			(struct sockaddr *)__get_dynamic_array(addr) : NULL,
 		__entry->pid, show_svc_xprt_flags(__entry->flags))
 );
 
@@ -556,18 +563,25 @@ TRACE_EVENT(svc_xprt_dequeue,
 
 	TP_STRUCT__entry(
 		__field(struct svc_xprt *, xprt)
-		__field_struct(struct sockaddr_storage, ss)
 		__field(unsigned long, flags)
+		__dynamic_array(unsigned char, addr, xprt != NULL ?
+			xprt->xpt_remotelen : 0)
 	),
 
 	TP_fast_assign(
-		__entry->xprt = xprt,
-		xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
-		__entry->flags = xprt ? xprt->xpt_flags : 0;
+		__entry->xprt = xprt;
+		if (xprt) {
+			memcpy(__get_dynamic_array(addr),
+					&xprt->xpt_remote,
+					xprt->xpt_remotelen);
+			__entry->flags = xprt->xpt_flags;
+		} else
+			__entry->flags = 0;
 	),
 
 	TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt,
-		(struct sockaddr *)&__entry->ss,
+		__get_dynamic_array_len(addr) != 0 ?
+			(struct sockaddr *)__get_dynamic_array(addr) : NULL,
 		show_svc_xprt_flags(__entry->flags))
 );
 
@@ -595,19 +609,26 @@ TRACE_EVENT(svc_handle_xprt,
 	TP_STRUCT__entry(
 		__field(struct svc_xprt *, xprt)
 		__field(int, len)
-		__field_struct(struct sockaddr_storage, ss)
 		__field(unsigned long, flags)
+		__dynamic_array(unsigned char, addr, xprt != NULL ?
+			xprt->xpt_remotelen : 0)
 	),
 
 	TP_fast_assign(
 		__entry->xprt = xprt;
-		xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss));
 		__entry->len = len;
-		__entry->flags = xprt ? xprt->xpt_flags : 0;
+		if (xprt) {
+			memcpy(__get_dynamic_array(addr),
+					&xprt->xpt_remote,
+					xprt->xpt_remotelen);
+			__entry->flags = xprt->xpt_flags;
+		} else
+			__entry->flags = 0;
 	),
 
 	TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt,
-		(struct sockaddr *)&__entry->ss,
+		__get_dynamic_array_len(addr) != 0 ?
+			(struct sockaddr *)__get_dynamic_array(addr) : NULL,
 		__entry->len, show_svc_xprt_flags(__entry->flags))
 );
 #endif /* _TRACE_SUNRPC_H */
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 03/10] SUNRPC: Add a tracepoint for server socket out-of-space conditions
  2016-06-24 14:55 ` [PATCH 02/10] SUNRPC: Don't allocate a full sockaddr_storage for tracing Trond Myklebust
@ 2016-06-24 14:55   ` Trond Myklebust
  2016-06-24 14:55     ` [PATCH 04/10] SUNRPC: Add tracepoints for dropped and deferred requests Trond Myklebust
  0 siblings, 1 reply; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Add a tracepoint to track when the processing of incoming RPC data gets
deferred due to out-of-space issues on the outgoing transport.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/trace/events/sunrpc.h | 10 +++++++++-
 net/sunrpc/svc_xprt.c         |  8 ++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index a01a076ea060..f06701b3235c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -556,7 +556,7 @@ TRACE_EVENT(svc_xprt_do_enqueue,
 		__entry->pid, show_svc_xprt_flags(__entry->flags))
 );
 
-TRACE_EVENT(svc_xprt_dequeue,
+DECLARE_EVENT_CLASS(svc_xprt_event,
 	TP_PROTO(struct svc_xprt *xprt),
 
 	TP_ARGS(xprt),
@@ -585,6 +585,14 @@ TRACE_EVENT(svc_xprt_dequeue,
 		show_svc_xprt_flags(__entry->flags))
 );
 
+DEFINE_EVENT(svc_xprt_event, svc_xprt_dequeue,
+	TP_PROTO(struct svc_xprt *xprt),
+	TP_ARGS(xprt));
+
+DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
+	TP_PROTO(struct svc_xprt *xprt),
+	TP_ARGS(xprt));
+
 TRACE_EVENT(svc_wake_up,
 	TP_PROTO(int pid),
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index f5572e31d518..58372076a41a 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -331,8 +331,12 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
 {
 	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
 		return true;
-	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED)))
-		return xprt->xpt_ops->xpo_has_wspace(xprt);
+	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
+		if (xprt->xpt_ops->xpo_has_wspace(xprt))
+			return true;
+		trace_svc_xprt_no_write_space(xprt);
+		return false;
+	}
 	return false;
 }
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 04/10] SUNRPC: Add tracepoints for dropped and deferred requests
  2016-06-24 14:55   ` [PATCH 03/10] SUNRPC: Add a tracepoint for server socket out-of-space conditions Trond Myklebust
@ 2016-06-24 14:55     ` Trond Myklebust
  2016-06-24 14:55       ` [PATCH 05/10] SUNRPC: lock the socket while detaching it Trond Myklebust
  0 siblings, 1 reply; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Dropping and/or deferring requests has an impact on performance. Let's
make sure we can trace those events.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/trace/events/sunrpc.h | 61 +++++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/svc_xprt.c         |  4 +++
 2 files changed, 65 insertions(+)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index f06701b3235c..8a13e3903839 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -475,6 +475,39 @@ TRACE_EVENT(svc_recv,
 			show_rqstp_flags(__entry->flags))
 );
 
+DECLARE_EVENT_CLASS(svc_rqst_event,
+
+	TP_PROTO(struct svc_rqst *rqst),
+
+	TP_ARGS(rqst),
+
+	TP_STRUCT__entry(
+		__field(__be32, xid)
+		__field(unsigned long, flags)
+		__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
+	),
+
+	TP_fast_assign(
+		__entry->xid = rqst->rq_xid;
+		__entry->flags = rqst->rq_flags;
+		memcpy(__get_dynamic_array(addr),
+			&rqst->rq_addr, rqst->rq_addrlen);
+	),
+
+	TP_printk("addr=%pIScp rq_xid=0x%x flags=%s",
+		(struct sockaddr *)__get_dynamic_array(addr),
+		be32_to_cpu(__entry->xid),
+		show_rqstp_flags(__entry->flags))
+);
+
+DEFINE_EVENT(svc_rqst_event, svc_defer,
+	TP_PROTO(struct svc_rqst *rqst),
+	TP_ARGS(rqst));
+
+DEFINE_EVENT(svc_rqst_event, svc_drop,
+	TP_PROTO(struct svc_rqst *rqst),
+	TP_ARGS(rqst));
+
 DECLARE_EVENT_CLASS(svc_rqst_status,
 
 	TP_PROTO(struct svc_rqst *rqst, int status),
@@ -639,6 +672,34 @@ TRACE_EVENT(svc_handle_xprt,
 			(struct sockaddr *)__get_dynamic_array(addr) : NULL,
 		__entry->len, show_svc_xprt_flags(__entry->flags))
 );
+
+
+DECLARE_EVENT_CLASS(svc_deferred_event,
+	TP_PROTO(struct svc_deferred_req *dr),
+
+	TP_ARGS(dr),
+
+	TP_STRUCT__entry(
+		__field(__be32, xid)
+		__dynamic_array(unsigned char, addr, dr->addrlen)
+	),
+
+	TP_fast_assign(
+		__entry->xid = *(__be32 *)(dr->args + (dr->xprt_hlen>>2));
+		memcpy(__get_dynamic_array(addr), &dr->addr, dr->addrlen);
+	),
+
+	TP_printk("addr=%pIScp xid=0x%x",
+		(struct sockaddr *)__get_dynamic_array(addr),
+		be32_to_cpu(__entry->xid))
+);
+
+DEFINE_EVENT(svc_deferred_event, svc_drop_deferred,
+	TP_PROTO(struct svc_deferred_req *dr),
+	TP_ARGS(dr));
+DEFINE_EVENT(svc_deferred_event, svc_revisit_deferred,
+	TP_PROTO(struct svc_deferred_req *dr),
+	TP_ARGS(dr));
 #endif /* _TRACE_SUNRPC_H */
 
 #include <trace/define_trace.h>
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 58372076a41a..eccc61237ca9 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -873,6 +873,7 @@ EXPORT_SYMBOL_GPL(svc_recv);
  */
 void svc_drop(struct svc_rqst *rqstp)
 {
+	trace_svc_drop(rqstp);
 	dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
 	svc_xprt_release(rqstp);
 }
@@ -1150,6 +1151,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 		spin_unlock(&xprt->xpt_lock);
 		dprintk("revisit canceled\n");
 		svc_xprt_put(xprt);
+		trace_svc_drop_deferred(dr);
 		kfree(dr);
 		return;
 	}
@@ -1207,6 +1209,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
 	set_bit(RQ_DROPME, &rqstp->rq_flags);
 
 	dr->handle.revisit = svc_revisit;
+	trace_svc_defer(rqstp);
 	return &dr->handle;
 }
 
@@ -1247,6 +1250,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
 				struct svc_deferred_req,
 				handle.recent);
 		list_del_init(&dr->handle.recent);
+		trace_svc_revisit_deferred(dr);
 	} else
 		clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
 	spin_unlock(&xprt->xpt_lock);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 05/10] SUNRPC: lock the socket while detaching it
  2016-06-24 14:55     ` [PATCH 04/10] SUNRPC: Add tracepoints for dropped and deferred requests Trond Myklebust
@ 2016-06-24 14:55       ` Trond Myklebust
  2016-06-24 14:55         ` [PATCH 06/10] SUNRPC: Call the default socket callbacks instead of open coding Trond Myklebust
  2016-06-24 21:06         ` [PATCH 05/10] SUNRPC: lock the socket while detaching it J. Bruce Fields
  0 siblings, 2 replies; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Prevent callbacks from triggering while we're detaching the socket.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/svcsock.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index dadfec66dbd8..abe2da602fb8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1611,9 +1611,12 @@ static void svc_sock_detach(struct svc_xprt *xprt)
 	dprintk("svc: svc_sock_detach(%p)\n", svsk);
 
 	/* put back the old socket callbacks */
+	lock_sock(sk);
 	sk->sk_state_change = svsk->sk_ostate;
 	sk->sk_data_ready = svsk->sk_odata;
 	sk->sk_write_space = svsk->sk_owspace;
+	sk->sk_user_data = NULL;
+	release_sock(sk);
 
 	wq = sk_sleep(sk);
 	if (sunrpc_waitqueue_active(wq))
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 06/10] SUNRPC: Call the default socket callbacks instead of open coding
  2016-06-24 14:55       ` [PATCH 05/10] SUNRPC: lock the socket while detaching it Trond Myklebust
@ 2016-06-24 14:55         ` Trond Myklebust
  2016-06-24 14:55           ` [PATCH 07/10] SUNRPC: Micro optimisation for svc_data_ready Trond Myklebust
  2016-06-24 21:06         ` [PATCH 05/10] SUNRPC: lock the socket while detaching it J. Bruce Fields
  1 sibling, 1 reply; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Rather than code up our own versions of the socket callbacks, just
call the defaults.
This also allows us to merge svc_udp_data_ready() and svc_tcp_data_ready().

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/svcsock.c | 88 ++++++++++++----------------------------------------
 1 file changed, 19 insertions(+), 69 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index abe2da602fb8..03134708deeb 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -60,7 +60,6 @@
 
 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
 					 int flags);
-static void		svc_udp_data_ready(struct sock *);
 static int		svc_udp_recvfrom(struct svc_rqst *);
 static int		svc_udp_sendto(struct svc_rqst *);
 static void		svc_sock_detach(struct svc_xprt *);
@@ -398,48 +397,21 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
 	return svc_port_is_privileged(svc_addr(rqstp));
 }
 
-static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
-{
-	if (!wq)
-		return false;
-	/*
-	 * There should normally be a memory * barrier here--see
-	 * wq_has_sleeper().
-	 *
-	 * It appears that isn't currently necessary, though, basically
-	 * because callers all appear to have sufficient memory barriers
-	 * between the time the relevant change is made and the
-	 * time they call these callbacks.
-	 *
-	 * The nfsd code itself doesn't actually explicitly wait on
-	 * these waitqueues, but it may wait on them for example in
-	 * sendpage() or sendmsg() calls.  (And those may be the only
-	 * places, since it it uses nonblocking reads.)
-	 *
-	 * Maybe we should add the memory barriers anyway, but these are
-	 * hot paths so we'd need to be convinced there's no sigificant
-	 * penalty.
-	 */
-	return waitqueue_active(wq);
-}
-
 /*
  * INET callback when data has been received on the socket.
  */
-static void svc_udp_data_ready(struct sock *sk)
+static void svc_data_ready(struct sock *sk)
 {
 	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
-	wait_queue_head_t *wq = sk_sleep(sk);
 
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), busy=%d\n",
 			svsk, sk,
 			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+		svsk->sk_odata(sk);
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sunrpc_waitqueue_active(wq))
-		wake_up_interruptible(wq);
 }
 
 /*
@@ -448,19 +420,13 @@ static void svc_udp_data_ready(struct sock *sk)
 static void svc_write_space(struct sock *sk)
 {
 	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data);
-	wait_queue_head_t *wq = sk_sleep(sk);
 
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
 			svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+		svsk->sk_owspace(sk);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-
-	if (sunrpc_waitqueue_active(wq)) {
-		dprintk("RPC svc_write_space: someone sleeping on %p\n",
-		       svsk);
-		wake_up_interruptible(wq);
-	}
 }
 
 static int svc_tcp_has_wspace(struct svc_xprt *xprt)
@@ -485,11 +451,15 @@ static void svc_tcp_write_space(struct sock *sk)
 	struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
 	struct socket *sock = sk->sk_socket;
 
+	if (!svsk)
+		return;
+
 	if (!sk_stream_is_writeable(sk) || !sock)
 		return;
-	if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
+	if (svc_tcp_has_wspace(&svsk->sk_xprt)) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
-	svc_write_space(sk);
+		svc_write_space(sk);
+	}
 }
 
 static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
@@ -746,7 +716,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
 		      &svsk->sk_xprt, serv);
 	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
-	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
+	svsk->sk_sk->sk_data_ready = svc_data_ready;
 	svsk->sk_sk->sk_write_space = svc_write_space;
 
 	/* initialise setting must have enough space to
@@ -786,11 +756,12 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 static void svc_tcp_listen_data_ready(struct sock *sk)
 {
 	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
-	wait_queue_head_t *wq;
 
 	dprintk("svc: socket %p TCP (listen) state change %d\n",
 		sk, sk->sk_state);
 
+	if (svsk)
+		svsk->sk_odata(sk);
 	/*
 	 * This callback may called twice when a new connection
 	 * is established as a child socket inherits everything
@@ -808,10 +779,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
 		} else
 			printk("svc: socket %p: no user data\n", sk);
 	}
-
-	wq = sk_sleep(sk);
-	if (sunrpc_waitqueue_active(wq))
-		wake_up_interruptible_all(wq);
 }
 
 /*
@@ -820,7 +787,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
 static void svc_tcp_state_change(struct sock *sk)
 {
 	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
-	wait_queue_head_t *wq = sk_sleep(sk);
 
 	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
 		sk, sk->sk_state, sk->sk_user_data);
@@ -828,26 +794,10 @@ static void svc_tcp_state_change(struct sock *sk)
 	if (!svsk)
 		printk("svc: socket %p: no user data\n", sk);
 	else {
+		svsk->sk_ostate(sk);
 		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sunrpc_waitqueue_active(wq))
-		wake_up_interruptible_all(wq);
-}
-
-static void svc_tcp_data_ready(struct sock *sk)
-{
-	struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
-	wait_queue_head_t *wq = sk_sleep(sk);
-
-	dprintk("svc: socket %p TCP data ready (svsk %p)\n",
-		sk, sk->sk_user_data);
-	if (svsk) {
-		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-		svc_xprt_enqueue(&svsk->sk_xprt);
-	}
-	if (sunrpc_waitqueue_active(wq))
-		wake_up_interruptible(wq);
 }
 
 /*
@@ -901,6 +851,11 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	dprintk("%s: connect from %s\n", serv->sv_name,
 		__svc_print_addr(sin, buf, sizeof(buf)));
 
+	/* Reset the inherited callbacks before calling svc_setup_socket */
+	newsock->sk->sk_state_change = svsk->sk_ostate;
+	newsock->sk->sk_data_ready = svsk->sk_odata;
+	newsock->sk->sk_write_space = svsk->sk_owspace;
+
 	/* make sure that a write doesn't block forever when
 	 * low on memory
 	 */
@@ -1357,7 +1312,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 	} else {
 		dprintk("setting up TCP socket for reading\n");
 		sk->sk_state_change = svc_tcp_state_change;
-		sk->sk_data_ready = svc_tcp_data_ready;
+		sk->sk_data_ready = svc_data_ready;
 		sk->sk_write_space = svc_tcp_write_space;
 
 		svsk->sk_reclen = 0;
@@ -1606,7 +1561,6 @@ static void svc_sock_detach(struct svc_xprt *xprt)
 {
 	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 	struct sock *sk = svsk->sk_sk;
-	wait_queue_head_t *wq;
 
 	dprintk("svc: svc_sock_detach(%p)\n", svsk);
 
@@ -1617,10 +1571,6 @@ static void svc_sock_detach(struct svc_xprt *xprt)
 	sk->sk_write_space = svsk->sk_owspace;
 	sk->sk_user_data = NULL;
 	release_sock(sk);
-
-	wq = sk_sleep(sk);
-	if (sunrpc_waitqueue_active(wq))
-		wake_up_interruptible(wq);
 }
 
 /*
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 07/10] SUNRPC: Micro optimisation for svc_data_ready
  2016-06-24 14:55         ` [PATCH 06/10] SUNRPC: Call the default socket callbacks instead of open coding Trond Myklebust
@ 2016-06-24 14:55           ` Trond Myklebust
  2016-06-24 14:55             ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit Trond Myklebust
  0 siblings, 1 reply; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Don't call svc_xprt_enqueue() if the XPT_DATA flag is already set.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/svcsock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 03134708deeb..338d6fe1103d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -409,8 +409,8 @@ static void svc_data_ready(struct sock *sk)
 			svsk, sk,
 			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
 		svsk->sk_odata(sk);
-		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-		svc_xprt_enqueue(&svsk->sk_xprt);
+		if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
+			svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 }
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 08/10] SUNRPC: Add a server side per-connection limit
  2016-06-24 14:55           ` [PATCH 07/10] SUNRPC: Micro optimisation for svc_data_ready Trond Myklebust
@ 2016-06-24 14:55             ` Trond Myklebust
  2016-06-24 14:55               ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation Trond Myklebust
                                 ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Allow the user to limit the number of requests serviced through a single
connection.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 Documentation/kernel-parameters.txt |  6 ++++++
 include/linux/sunrpc/svc.h          |  1 +
 include/linux/sunrpc/svc_xprt.h     |  1 +
 net/sunrpc/svc_xprt.c               | 39 ++++++++++++++++++++++++++++++++++---
 4 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 82b42c958d1c..48ba6d2e670a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3832,6 +3832,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			using these two parameters to set the minimum and
 			maximum port values.
 
+	sunrpc.svc_rpc_per_connection_limit=
+			[NFS,SUNRPC]
+			Limit the number of requests that the server will
+			process in parallel from a single connection.
+			The default value is 0 (no limit).
+
 	sunrpc.pool_mode=
 			[NFS]
 			Control how the NFS server code allocates CPUs to
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 7ca44fb5b675..7321ae933867 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -268,6 +268,7 @@ struct svc_rqst {
 						 * cache pages */
 #define	RQ_VICTIM	(5)			/* about to be shut down */
 #define	RQ_BUSY		(6)			/* request is busy */
+#define	RQ_DATA		(7)			/* request has data */
 	unsigned long		rq_flags;	/* flags field */
 
 	void *			rq_argp;	/* decoded arguments */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b7dabc4baafd..555c004de51e 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -69,6 +69,7 @@ struct svc_xprt {
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
+	atomic_t		xpt_nr_rqsts;	/* Number of requests */
 	struct mutex		xpt_mutex;	/* to serialize sending data */
 	spinlock_t		xpt_lock;	/* protects sk_deferred
 						 * and xpt_auth_cache */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index eccc61237ca9..cd26d0d9e41f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -21,6 +21,10 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static unsigned int svc_rpc_per_connection_limit __read_mostly;
+module_param(svc_rpc_per_connection_limit, uint, 0644);
+
+
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -327,12 +331,41 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
 }
 EXPORT_SYMBOL_GPL(svc_print_addr);
 
+static bool svc_xprt_slots_in_range(struct svc_xprt *xprt)
+{
+	unsigned int limit = svc_rpc_per_connection_limit;
+	int nrqsts = atomic_read(&xprt->xpt_nr_rqsts);
+
+	return limit == 0 || (nrqsts >= 0 && nrqsts < limit);
+}
+
+static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt)
+{
+	if (!test_bit(RQ_DATA, &rqstp->rq_flags)) {
+		if (!svc_xprt_slots_in_range(xprt))
+			return false;
+		atomic_inc(&xprt->xpt_nr_rqsts);
+		set_bit(RQ_DATA, &rqstp->rq_flags);
+	}
+	return true;
+}
+
+static void svc_xprt_release_slot(struct svc_rqst *rqstp)
+{
+	struct svc_xprt	*xprt = rqstp->rq_xprt;
+	if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
+		atomic_dec(&xprt->xpt_nr_rqsts);
+		svc_xprt_enqueue(xprt);
+	}
+}
+
 static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
 {
 	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
 		return true;
 	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
-		if (xprt->xpt_ops->xpo_has_wspace(xprt))
+		if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
+		    svc_xprt_slots_in_range(xprt))
 			return true;
 		trace_svc_xprt_no_write_space(xprt);
 		return false;
@@ -514,8 +547,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
 
 	rqstp->rq_res.head[0].iov_len = 0;
 	svc_reserve(rqstp, 0);
+	svc_xprt_release_slot(rqstp);
 	rqstp->rq_xprt = NULL;
-
 	svc_xprt_put(xprt);
 }
 
@@ -783,7 +816,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
 			svc_add_new_temp_xprt(serv, newxpt);
 		else
 			module_put(xprt->xpt_class->xcl_owner);
-	} else {
+	} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
 		/* XPT_DATA|XPT_DEFERRED case: */
 		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
 			rqstp, rqstp->rq_pool->sp_id, xprt,
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 09/10] SUNRPC: Change TCP socket space reservation
  2016-06-24 14:55             ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit Trond Myklebust
@ 2016-06-24 14:55               ` Trond Myklebust
  2016-06-24 14:55                 ` [PATCH 10/10] SUNRPC: Remove unused callback xpo_adjust_wspace() Trond Myklebust
                                   ` (2 more replies)
  2016-06-24 21:15               ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit J. Bruce Fields
  2016-07-06 16:16               ` J. Bruce Fields
  2 siblings, 3 replies; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Instead of trying (and failing) to predict how much writeable socket space
will be available to the RPC call, just fall back to the simple model of
deferring processing until the socket is uncongested.

If a limit is neeeded, then set the hard per-connection limit.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 net/sunrpc/svcsock.c | 47 ++++-------------------------------------------
 1 file changed, 4 insertions(+), 43 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 338d6fe1103d..bc3ef0734f2f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -431,43 +431,11 @@ static void svc_write_space(struct sock *sk)
 
 static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 {
-	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
-	struct svc_serv *serv = svsk->sk_xprt.xpt_server;
-	int required;
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 
 	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
 		return 1;
-	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
-	if (sk_stream_wspace(svsk->sk_sk) >= required ||
-	    (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
-	     atomic_read(&xprt->xpt_reserved) == 0))
-		return 1;
-	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-	return 0;
-}
-
-static void svc_tcp_write_space(struct sock *sk)
-{
-	struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
-	struct socket *sock = sk->sk_socket;
-
-	if (!svsk)
-		return;
-
-	if (!sk_stream_is_writeable(sk) || !sock)
-		return;
-	if (svc_tcp_has_wspace(&svsk->sk_xprt)) {
-		clear_bit(SOCK_NOSPACE, &sock->flags);
-		svc_write_space(sk);
-	}
-}
-
-static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
-{
-	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
-
-	if (svc_tcp_has_wspace(xprt))
-		clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+	return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 }
 
 /*
@@ -1272,7 +1240,6 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_has_wspace = svc_tcp_has_wspace,
 	.xpo_accept = svc_tcp_accept,
 	.xpo_secure_port = svc_sock_secure_port,
-	.xpo_adjust_wspace = svc_tcp_adjust_wspace,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
@@ -1313,7 +1280,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 		dprintk("setting up TCP socket for reading\n");
 		sk->sk_state_change = svc_tcp_state_change;
 		sk->sk_data_ready = svc_data_ready;
-		sk->sk_write_space = svc_tcp_write_space;
+		sk->sk_write_space = svc_write_space;
 
 		svsk->sk_reclen = 0;
 		svsk->sk_tcplen = 0;
@@ -1383,14 +1350,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)
 		svc_udp_init(svsk, serv);
-	else {
-		/* initialise setting must have enough space to
-		 * receive and respond to one request.
-		 */
-		svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
-					4 * serv->sv_max_mesg);
+	else
 		svc_tcp_init(svsk, serv);
-	}
 
 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
 				svsk, svsk->sk_sk);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 10/10] SUNRPC: Remove unused callback xpo_adjust_wspace()
  2016-06-24 14:55               ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation Trond Myklebust
@ 2016-06-24 14:55                 ` Trond Myklebust
  2016-06-24 21:18                 ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation J. Bruce Fields
  2016-07-06 19:53                 ` J. Bruce Fields
  2 siblings, 0 replies; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 14:55 UTC (permalink / raw)
  To: bfields; +Cc: linux-nfs

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/svc_xprt.h | 1 -
 net/sunrpc/svc_xprt.c           | 2 --
 2 files changed, 3 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 555c004de51e..cb6e4037e590 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -25,7 +25,6 @@ struct svc_xprt_ops {
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
 	int		(*xpo_secure_port)(struct svc_rqst *);
-	void		(*xpo_adjust_wspace)(struct svc_xprt *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cd26d0d9e41f..2503392b95ad 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -515,8 +515,6 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 		atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
 		rqstp->rq_reserved = space;
 
-		if (xprt->xpt_ops->xpo_adjust_wspace)
-			xprt->xpt_ops->xpo_adjust_wspace(xprt);
 		svc_xprt_enqueue(xprt);
 	}
 }
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 01/10] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status
  2016-06-24 14:55 [PATCH 01/10] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status Trond Myklebust
  2016-06-24 14:55 ` [PATCH 02/10] SUNRPC: Don't allocate a full sockaddr_storage for tracing Trond Myklebust
@ 2016-06-24 18:28 ` J. Bruce Fields
  1 sibling, 0 replies; 22+ messages in thread
From: J. Bruce Fields @ 2016-06-24 18:28 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Fri, Jun 24, 2016 at 10:55:43AM -0400, Trond Myklebust wrote:
> There is no guarantee that either the request or the svc_xprt exist
> by the time we get round to printing the trace message.

Thanks.  I'll also add:

  Cc: stable@vger.kernel.org
  Fixes: 83a712e0afef "sunrpc: add some tracepoints around ..."

--b.

> 
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  include/trace/events/sunrpc.h | 17 ++++++++++-------
>  1 file changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> index 003dca933803..e82493d07a86 100644
> --- a/include/trace/events/sunrpc.h
> +++ b/include/trace/events/sunrpc.h
> @@ -455,20 +455,22 @@ TRACE_EVENT(svc_recv,
>  	TP_ARGS(rqst, status),
>  
>  	TP_STRUCT__entry(
> -		__field(struct sockaddr *, addr)
>  		__field(__be32, xid)
>  		__field(int, status)
>  		__field(unsigned long, flags)
> +		__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
>  	),
>  
>  	TP_fast_assign(
> -		__entry->addr = (struct sockaddr *)&rqst->rq_addr;
>  		__entry->xid = status > 0 ? rqst->rq_xid : 0;
>  		__entry->status = status;
>  		__entry->flags = rqst->rq_flags;
> +		memcpy(__get_dynamic_array(addr),
> +			&rqst->rq_addr, rqst->rq_addrlen);
>  	),
>  
> -	TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr,
> +	TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s",
> +			(struct sockaddr *)__get_dynamic_array(addr),
>  			be32_to_cpu(__entry->xid), __entry->status,
>  			show_rqstp_flags(__entry->flags))
>  );
> @@ -480,22 +482,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
>  	TP_ARGS(rqst, status),
>  
>  	TP_STRUCT__entry(
> -		__field(struct sockaddr *, addr)
>  		__field(__be32, xid)
> -		__field(int, dropme)
>  		__field(int, status)
>  		__field(unsigned long, flags)
> +		__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
>  	),
>  
>  	TP_fast_assign(
> -		__entry->addr = (struct sockaddr *)&rqst->rq_addr;
>  		__entry->xid = rqst->rq_xid;
>  		__entry->status = status;
>  		__entry->flags = rqst->rq_flags;
> +		memcpy(__get_dynamic_array(addr),
> +			&rqst->rq_addr, rqst->rq_addrlen);
>  	),
>  
>  	TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s",
> -		__entry->addr, be32_to_cpu(__entry->xid),
> +		(struct sockaddr *)__get_dynamic_array(addr),
> +		be32_to_cpu(__entry->xid),
>  		__entry->status, show_rqstp_flags(__entry->flags))
>  );
>  
> -- 
> 2.7.4

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 05/10] SUNRPC: lock the socket while detaching it
  2016-06-24 14:55       ` [PATCH 05/10] SUNRPC: lock the socket while detaching it Trond Myklebust
  2016-06-24 14:55         ` [PATCH 06/10] SUNRPC: Call the default socket callbacks instead of open coding Trond Myklebust
@ 2016-06-24 21:06         ` J. Bruce Fields
  2016-06-24 21:21           ` Trond Myklebust
  1 sibling, 1 reply; 22+ messages in thread
From: J. Bruce Fields @ 2016-06-24 21:06 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Fri, Jun 24, 2016 at 10:55:47AM -0400, Trond Myklebust wrote:
> Prevent callbacks from triggering while we're detaching the socket.

What motivated this?  Do we have a bug without it?

--b.

> 
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  net/sunrpc/svcsock.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index dadfec66dbd8..abe2da602fb8 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -1611,9 +1611,12 @@ static void svc_sock_detach(struct svc_xprt *xprt)
>  	dprintk("svc: svc_sock_detach(%p)\n", svsk);
>  
>  	/* put back the old socket callbacks */
> +	lock_sock(sk);
>  	sk->sk_state_change = svsk->sk_ostate;
>  	sk->sk_data_ready = svsk->sk_odata;
>  	sk->sk_write_space = svsk->sk_owspace;
> +	sk->sk_user_data = NULL;
> +	release_sock(sk);
>  
>  	wq = sk_sleep(sk);
>  	if (sunrpc_waitqueue_active(wq))
> -- 
> 2.7.4

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 08/10] SUNRPC: Add a server side per-connection limit
  2016-06-24 14:55             ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit Trond Myklebust
  2016-06-24 14:55               ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation Trond Myklebust
@ 2016-06-24 21:15               ` J. Bruce Fields
  2016-06-24 21:24                 ` Trond Myklebust
  2016-07-06 16:16               ` J. Bruce Fields
  2 siblings, 1 reply; 22+ messages in thread
From: J. Bruce Fields @ 2016-06-24 21:15 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Fri, Jun 24, 2016 at 10:55:50AM -0400, Trond Myklebust wrote:
> Allow the user to limit the number of requests serviced through a single
> connection.

Could you give a little detail about the problem you encountered and how
this solved it?

--b.

> 
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  Documentation/kernel-parameters.txt |  6 ++++++
>  include/linux/sunrpc/svc.h          |  1 +
>  include/linux/sunrpc/svc_xprt.h     |  1 +
>  net/sunrpc/svc_xprt.c               | 39 ++++++++++++++++++++++++++++++++++---
>  4 files changed, 44 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 82b42c958d1c..48ba6d2e670a 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -3832,6 +3832,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>  			using these two parameters to set the minimum and
>  			maximum port values.
>  
> +	sunrpc.svc_rpc_per_connection_limit=
> +			[NFS,SUNRPC]
> +			Limit the number of requests that the server will
> +			process in parallel from a single connection.
> +			The default value is 0 (no limit).
> +
>  	sunrpc.pool_mode=
>  			[NFS]
>  			Control how the NFS server code allocates CPUs to
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 7ca44fb5b675..7321ae933867 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -268,6 +268,7 @@ struct svc_rqst {
>  						 * cache pages */
>  #define	RQ_VICTIM	(5)			/* about to be shut down */
>  #define	RQ_BUSY		(6)			/* request is busy */
> +#define	RQ_DATA		(7)			/* request has data */
>  	unsigned long		rq_flags;	/* flags field */
>  
>  	void *			rq_argp;	/* decoded arguments */
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> index b7dabc4baafd..555c004de51e 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -69,6 +69,7 @@ struct svc_xprt {
>  
>  	struct svc_serv		*xpt_server;	/* service for transport */
>  	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
> +	atomic_t		xpt_nr_rqsts;	/* Number of requests */
>  	struct mutex		xpt_mutex;	/* to serialize sending data */
>  	spinlock_t		xpt_lock;	/* protects sk_deferred
>  						 * and xpt_auth_cache */
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index eccc61237ca9..cd26d0d9e41f 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -21,6 +21,10 @@
>  
>  #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
>  
> +static unsigned int svc_rpc_per_connection_limit __read_mostly;
> +module_param(svc_rpc_per_connection_limit, uint, 0644);
> +
> +
>  static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
>  static int svc_deferred_recv(struct svc_rqst *rqstp);
>  static struct cache_deferred_req *svc_defer(struct cache_req *req);
> @@ -327,12 +331,41 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
>  }
>  EXPORT_SYMBOL_GPL(svc_print_addr);
>  
> +static bool svc_xprt_slots_in_range(struct svc_xprt *xprt)
> +{
> +	unsigned int limit = svc_rpc_per_connection_limit;
> +	int nrqsts = atomic_read(&xprt->xpt_nr_rqsts);
> +
> +	return limit == 0 || (nrqsts >= 0 && nrqsts < limit);
> +}
> +
> +static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> +{
> +	if (!test_bit(RQ_DATA, &rqstp->rq_flags)) {
> +		if (!svc_xprt_slots_in_range(xprt))
> +			return false;
> +		atomic_inc(&xprt->xpt_nr_rqsts);
> +		set_bit(RQ_DATA, &rqstp->rq_flags);
> +	}
> +	return true;
> +}
> +
> +static void svc_xprt_release_slot(struct svc_rqst *rqstp)
> +{
> +	struct svc_xprt	*xprt = rqstp->rq_xprt;
> +	if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
> +		atomic_dec(&xprt->xpt_nr_rqsts);
> +		svc_xprt_enqueue(xprt);
> +	}
> +}
> +
>  static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
>  {
>  	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
>  		return true;
>  	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
> -		if (xprt->xpt_ops->xpo_has_wspace(xprt))
> +		if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
> +		    svc_xprt_slots_in_range(xprt))
>  			return true;
>  		trace_svc_xprt_no_write_space(xprt);
>  		return false;
> @@ -514,8 +547,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
>  
>  	rqstp->rq_res.head[0].iov_len = 0;
>  	svc_reserve(rqstp, 0);
> +	svc_xprt_release_slot(rqstp);
>  	rqstp->rq_xprt = NULL;
> -
>  	svc_xprt_put(xprt);
>  }
>  
> @@ -783,7 +816,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
>  			svc_add_new_temp_xprt(serv, newxpt);
>  		else
>  			module_put(xprt->xpt_class->xcl_owner);
> -	} else {
> +	} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
>  		/* XPT_DATA|XPT_DEFERRED case: */
>  		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
>  			rqstp, rqstp->rq_pool->sp_id, xprt,
> -- 
> 2.7.4

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 09/10] SUNRPC: Change TCP socket space reservation
  2016-06-24 14:55               ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation Trond Myklebust
  2016-06-24 14:55                 ` [PATCH 10/10] SUNRPC: Remove unused callback xpo_adjust_wspace() Trond Myklebust
@ 2016-06-24 21:18                 ` J. Bruce Fields
  2016-06-24 21:31                   ` Trond Myklebust
  2016-07-06 19:53                 ` J. Bruce Fields
  2 siblings, 1 reply; 22+ messages in thread
From: J. Bruce Fields @ 2016-06-24 21:18 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Fri, Jun 24, 2016 at 10:55:51AM -0400, Trond Myklebust wrote:
> Instead of trying (and failing) to predict how much writeable socket space
> will be available to the RPC call, just fall back to the simple model of
> deferring processing until the socket is uncongested.

OK, it would be a relief to get rid of that, I guess that explains the
previous patch.

But was there some specific reason you were running into this?

In general I could use a little more idea of the motivation for these
patches.

--b.

> 
> If a limit is neeeded, then set the hard per-connection limit.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  net/sunrpc/svcsock.c | 47 ++++-------------------------------------------
>  1 file changed, 4 insertions(+), 43 deletions(-)
> 
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 338d6fe1103d..bc3ef0734f2f 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -431,43 +431,11 @@ static void svc_write_space(struct sock *sk)
>  
>  static int svc_tcp_has_wspace(struct svc_xprt *xprt)
>  {
> -	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
> -	struct svc_serv *serv = svsk->sk_xprt.xpt_server;
> -	int required;
> +	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>  
>  	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
>  		return 1;
> -	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
> -	if (sk_stream_wspace(svsk->sk_sk) >= required ||
> -	    (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
> -	     atomic_read(&xprt->xpt_reserved) == 0))
> -		return 1;
> -	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> -	return 0;
> -}
> -
> -static void svc_tcp_write_space(struct sock *sk)
> -{
> -	struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
> -	struct socket *sock = sk->sk_socket;
> -
> -	if (!svsk)
> -		return;
> -
> -	if (!sk_stream_is_writeable(sk) || !sock)
> -		return;
> -	if (svc_tcp_has_wspace(&svsk->sk_xprt)) {
> -		clear_bit(SOCK_NOSPACE, &sock->flags);
> -		svc_write_space(sk);
> -	}
> -}
> -
> -static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
> -{
> -	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> -
> -	if (svc_tcp_has_wspace(xprt))
> -		clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>  }
>  
>  /*
> @@ -1272,7 +1240,6 @@ static struct svc_xprt_ops svc_tcp_ops = {
>  	.xpo_has_wspace = svc_tcp_has_wspace,
>  	.xpo_accept = svc_tcp_accept,
>  	.xpo_secure_port = svc_sock_secure_port,
> -	.xpo_adjust_wspace = svc_tcp_adjust_wspace,
>  };
>  
>  static struct svc_xprt_class svc_tcp_class = {
> @@ -1313,7 +1280,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
>  		dprintk("setting up TCP socket for reading\n");
>  		sk->sk_state_change = svc_tcp_state_change;
>  		sk->sk_data_ready = svc_data_ready;
> -		sk->sk_write_space = svc_tcp_write_space;
> +		sk->sk_write_space = svc_write_space;
>  
>  		svsk->sk_reclen = 0;
>  		svsk->sk_tcplen = 0;
> @@ -1383,14 +1350,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
>  	/* Initialize the socket */
>  	if (sock->type == SOCK_DGRAM)
>  		svc_udp_init(svsk, serv);
> -	else {
> -		/* initialise setting must have enough space to
> -		 * receive and respond to one request.
> -		 */
> -		svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
> -					4 * serv->sv_max_mesg);
> +	else
>  		svc_tcp_init(svsk, serv);
> -	}
>  
>  	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
>  				svsk, svsk->sk_sk);
> -- 
> 2.7.4

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 05/10] SUNRPC: lock the socket while detaching it
  2016-06-24 21:06         ` [PATCH 05/10] SUNRPC: lock the socket while detaching it J. Bruce Fields
@ 2016-06-24 21:21           ` Trond Myklebust
  0 siblings, 0 replies; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 21:21 UTC (permalink / raw)
  To: Fields Bruce; +Cc: linux-nfs


> On Jun 24, 2016, at 17:06, J. Bruce Fields <bfields@fieldses.org> wrote:
> 
> On Fri, Jun 24, 2016 at 10:55:47AM -0400, Trond Myklebust wrote:
>> Prevent callbacks from triggering while we're detaching the socket.
> 
> What motivated this?  Do we have a bug without it?

Code inspection. Without locking, what ensures that you don’t have another processor calling into, say, svc_tcp_state_change() while this processor is tearing down your svc_sock.

> 
> --b.
> 
>> 
>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>> ---
>> net/sunrpc/svcsock.c | 3 +++
>> 1 file changed, 3 insertions(+)
>> 
>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
>> index dadfec66dbd8..abe2da602fb8 100644
>> --- a/net/sunrpc/svcsock.c
>> +++ b/net/sunrpc/svcsock.c
>> @@ -1611,9 +1611,12 @@ static void svc_sock_detach(struct svc_xprt *xprt)
>> 	dprintk("svc: svc_sock_detach(%p)\n", svsk);
>> 
>> 	/* put back the old socket callbacks */
>> +	lock_sock(sk);
>> 	sk->sk_state_change = svsk->sk_ostate;
>> 	sk->sk_data_ready = svsk->sk_odata;
>> 	sk->sk_write_space = svsk->sk_owspace;
>> +	sk->sk_user_data = NULL;
>> +	release_sock(sk);
>> 
>> 	wq = sk_sleep(sk);
>> 	if (sunrpc_waitqueue_active(wq))
>> -- 
>> 2.7.4
> 


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 08/10] SUNRPC: Add a server side per-connection limit
  2016-06-24 21:15               ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit J. Bruce Fields
@ 2016-06-24 21:24                 ` Trond Myklebust
  0 siblings, 0 replies; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 21:24 UTC (permalink / raw)
  To: Fields Bruce; +Cc: linux-nfs


> On Jun 24, 2016, at 17:15, J. Bruce Fields <bfields@fieldses.org> wrote:
> 
> On Fri, Jun 24, 2016 at 10:55:50AM -0400, Trond Myklebust wrote:
>> Allow the user to limit the number of requests serviced through a single
>> connection.
> 
> Could you give a little detail about the problem you encountered and how
> this solved it?

If you have a large number of clients, some of which are faster than others, then you can end up with a starvation problem, with the fast clients hogging all the threads by virtue of being able to pump out lots more requests. Limiting the number of threads each client (or at least each connection) may use helps to mitigate the problem.

> 
> --b.
> 
>> 
>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>> ---
>> Documentation/kernel-parameters.txt |  6 ++++++
>> include/linux/sunrpc/svc.h          |  1 +
>> include/linux/sunrpc/svc_xprt.h     |  1 +
>> net/sunrpc/svc_xprt.c               | 39 ++++++++++++++++++++++++++++++++++---
>> 4 files changed, 44 insertions(+), 3 deletions(-)
>> 
>> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
>> index 82b42c958d1c..48ba6d2e670a 100644
>> --- a/Documentation/kernel-parameters.txt
>> +++ b/Documentation/kernel-parameters.txt
>> @@ -3832,6 +3832,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>> 			using these two parameters to set the minimum and
>> 			maximum port values.
>> 
>> +	sunrpc.svc_rpc_per_connection_limit=
>> +			[NFS,SUNRPC]
>> +			Limit the number of requests that the server will
>> +			process in parallel from a single connection.
>> +			The default value is 0 (no limit).
>> +
>> 	sunrpc.pool_mode=
>> 			[NFS]
>> 			Control how the NFS server code allocates CPUs to
>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>> index 7ca44fb5b675..7321ae933867 100644
>> --- a/include/linux/sunrpc/svc.h
>> +++ b/include/linux/sunrpc/svc.h
>> @@ -268,6 +268,7 @@ struct svc_rqst {
>> 						 * cache pages */
>> #define	RQ_VICTIM	(5)			/* about to be shut down */
>> #define	RQ_BUSY		(6)			/* request is busy */
>> +#define	RQ_DATA		(7)			/* request has data */
>> 	unsigned long		rq_flags;	/* flags field */
>> 
>> 	void *			rq_argp;	/* decoded arguments */
>> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
>> index b7dabc4baafd..555c004de51e 100644
>> --- a/include/linux/sunrpc/svc_xprt.h
>> +++ b/include/linux/sunrpc/svc_xprt.h
>> @@ -69,6 +69,7 @@ struct svc_xprt {
>> 
>> 	struct svc_serv		*xpt_server;	/* service for transport */
>> 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
>> +	atomic_t		xpt_nr_rqsts;	/* Number of requests */
>> 	struct mutex		xpt_mutex;	/* to serialize sending data */
>> 	spinlock_t		xpt_lock;	/* protects sk_deferred
>> 						 * and xpt_auth_cache */
>> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
>> index eccc61237ca9..cd26d0d9e41f 100644
>> --- a/net/sunrpc/svc_xprt.c
>> +++ b/net/sunrpc/svc_xprt.c
>> @@ -21,6 +21,10 @@
>> 
>> #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
>> 
>> +static unsigned int svc_rpc_per_connection_limit __read_mostly;
>> +module_param(svc_rpc_per_connection_limit, uint, 0644);
>> +
>> +
>> static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
>> static int svc_deferred_recv(struct svc_rqst *rqstp);
>> static struct cache_deferred_req *svc_defer(struct cache_req *req);
>> @@ -327,12 +331,41 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
>> }
>> EXPORT_SYMBOL_GPL(svc_print_addr);
>> 
>> +static bool svc_xprt_slots_in_range(struct svc_xprt *xprt)
>> +{
>> +	unsigned int limit = svc_rpc_per_connection_limit;
>> +	int nrqsts = atomic_read(&xprt->xpt_nr_rqsts);
>> +
>> +	return limit == 0 || (nrqsts >= 0 && nrqsts < limit);
>> +}
>> +
>> +static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt)
>> +{
>> +	if (!test_bit(RQ_DATA, &rqstp->rq_flags)) {
>> +		if (!svc_xprt_slots_in_range(xprt))
>> +			return false;
>> +		atomic_inc(&xprt->xpt_nr_rqsts);
>> +		set_bit(RQ_DATA, &rqstp->rq_flags);
>> +	}
>> +	return true;
>> +}
>> +
>> +static void svc_xprt_release_slot(struct svc_rqst *rqstp)
>> +{
>> +	struct svc_xprt	*xprt = rqstp->rq_xprt;
>> +	if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
>> +		atomic_dec(&xprt->xpt_nr_rqsts);
>> +		svc_xprt_enqueue(xprt);
>> +	}
>> +}
>> +
>> static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
>> {
>> 	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
>> 		return true;
>> 	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
>> -		if (xprt->xpt_ops->xpo_has_wspace(xprt))
>> +		if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
>> +		    svc_xprt_slots_in_range(xprt))
>> 			return true;
>> 		trace_svc_xprt_no_write_space(xprt);
>> 		return false;
>> @@ -514,8 +547,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
>> 
>> 	rqstp->rq_res.head[0].iov_len = 0;
>> 	svc_reserve(rqstp, 0);
>> +	svc_xprt_release_slot(rqstp);
>> 	rqstp->rq_xprt = NULL;
>> -
>> 	svc_xprt_put(xprt);
>> }
>> 
>> @@ -783,7 +816,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
>> 			svc_add_new_temp_xprt(serv, newxpt);
>> 		else
>> 			module_put(xprt->xpt_class->xcl_owner);
>> -	} else {
>> +	} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
>> 		/* XPT_DATA|XPT_DEFERRED case: */
>> 		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
>> 			rqstp, rqstp->rq_pool->sp_id, xprt,
>> -- 
>> 2.7.4
> 


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 09/10] SUNRPC: Change TCP socket space reservation
  2016-06-24 21:18                 ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation J. Bruce Fields
@ 2016-06-24 21:31                   ` Trond Myklebust
  2016-07-06 20:17                     ` Fields Bruce
  0 siblings, 1 reply; 22+ messages in thread
From: Trond Myklebust @ 2016-06-24 21:31 UTC (permalink / raw)
  To: Fields Bruce; +Cc: linux-nfs


> On Jun 24, 2016, at 17:18, J. Bruce Fields <bfields@fieldses.org> wrote:
> 
> On Fri, Jun 24, 2016 at 10:55:51AM -0400, Trond Myklebust wrote:
>> Instead of trying (and failing) to predict how much writeable socket space
>> will be available to the RPC call, just fall back to the simple model of
>> deferring processing until the socket is uncongested.
> 
> OK, it would be a relief to get rid of that, I guess that explains the
> previous patch.
> 
> But was there some specific reason you were running into this?

I’ve been testing using a 40GigE network, which easily hits this bottleneck. The result is that each connection saturates long before we’re even near the saturation of the network or even the disk. So we find ourselves with a server that can continue to scale up to several 100 clients, but with each client seeing the same low performance irrespective of the total number of clients.

> 
> In general I could use a little more idea of the motivation for these
> patches.
> 
> --b.
> 
>> 
>> If a limit is neeeded, then set the hard per-connection limit.
>> 
>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
>> ---
>> net/sunrpc/svcsock.c | 47 ++++-------------------------------------------
>> 1 file changed, 4 insertions(+), 43 deletions(-)
>> 
>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
>> index 338d6fe1103d..bc3ef0734f2f 100644
>> --- a/net/sunrpc/svcsock.c
>> +++ b/net/sunrpc/svcsock.c
>> @@ -431,43 +431,11 @@ static void svc_write_space(struct sock *sk)
>> 
>> static int svc_tcp_has_wspace(struct svc_xprt *xprt)
>> {
>> -	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
>> -	struct svc_serv *serv = svsk->sk_xprt.xpt_server;
>> -	int required;
>> +	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>> 
>> 	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
>> 		return 1;
>> -	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
>> -	if (sk_stream_wspace(svsk->sk_sk) >= required ||
>> -	    (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
>> -	     atomic_read(&xprt->xpt_reserved) == 0))
>> -		return 1;
>> -	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>> -	return 0;
>> -}
>> -
>> -static void svc_tcp_write_space(struct sock *sk)
>> -{
>> -	struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
>> -	struct socket *sock = sk->sk_socket;
>> -
>> -	if (!svsk)
>> -		return;
>> -
>> -	if (!sk_stream_is_writeable(sk) || !sock)
>> -		return;
>> -	if (svc_tcp_has_wspace(&svsk->sk_xprt)) {
>> -		clear_bit(SOCK_NOSPACE, &sock->flags);
>> -		svc_write_space(sk);
>> -	}
>> -}
>> -
>> -static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
>> -{
>> -	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>> -
>> -	if (svc_tcp_has_wspace(xprt))
>> -		clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>> +	return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>> }
>> 
>> /*
>> @@ -1272,7 +1240,6 @@ static struct svc_xprt_ops svc_tcp_ops = {
>> 	.xpo_has_wspace = svc_tcp_has_wspace,
>> 	.xpo_accept = svc_tcp_accept,
>> 	.xpo_secure_port = svc_sock_secure_port,
>> -	.xpo_adjust_wspace = svc_tcp_adjust_wspace,
>> };
>> 
>> static struct svc_xprt_class svc_tcp_class = {
>> @@ -1313,7 +1280,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
>> 		dprintk("setting up TCP socket for reading\n");
>> 		sk->sk_state_change = svc_tcp_state_change;
>> 		sk->sk_data_ready = svc_data_ready;
>> -		sk->sk_write_space = svc_tcp_write_space;
>> +		sk->sk_write_space = svc_write_space;
>> 
>> 		svsk->sk_reclen = 0;
>> 		svsk->sk_tcplen = 0;
>> @@ -1383,14 +1350,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
>> 	/* Initialize the socket */
>> 	if (sock->type == SOCK_DGRAM)
>> 		svc_udp_init(svsk, serv);
>> -	else {
>> -		/* initialise setting must have enough space to
>> -		 * receive and respond to one request.
>> -		 */
>> -		svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
>> -					4 * serv->sv_max_mesg);
>> +	else
>> 		svc_tcp_init(svsk, serv);
>> -	}
>> 
>> 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
>> 				svsk, svsk->sk_sk);
>> -- 
>> 2.7.4
> 


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 08/10] SUNRPC: Add a server side per-connection limit
  2016-06-24 14:55             ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit Trond Myklebust
  2016-06-24 14:55               ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation Trond Myklebust
  2016-06-24 21:15               ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit J. Bruce Fields
@ 2016-07-06 16:16               ` J. Bruce Fields
  2 siblings, 0 replies; 22+ messages in thread
From: J. Bruce Fields @ 2016-07-06 16:16 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

The patch looks good, just a couple notes to myself:

On Fri, Jun 24, 2016 at 10:55:50AM -0400, Trond Myklebust wrote:
> Allow the user to limit the number of requests serviced through a single
> connection.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  Documentation/kernel-parameters.txt |  6 ++++++
>  include/linux/sunrpc/svc.h          |  1 +
>  include/linux/sunrpc/svc_xprt.h     |  1 +
>  net/sunrpc/svc_xprt.c               | 39 ++++++++++++++++++++++++++++++++++---
>  4 files changed, 44 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index 82b42c958d1c..48ba6d2e670a 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -3832,6 +3832,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>  			using these two parameters to set the minimum and
>  			maximum port values.
>  
> +	sunrpc.svc_rpc_per_connection_limit=
> +			[NFS,SUNRPC]
> +			Limit the number of requests that the server will
> +			process in parallel from a single connection.
> +			The default value is 0 (no limit).
> +

Since every connection belongs to only one network namespace, we could
easily impose a different limit per network namespace.  But module
parameters are stuck being global.

On the other hand, a module parameter may be simpler than the
alternatives, and we can always add a second interface later in the
unlikely event somebody cares about this, so.... OK.

>  	sunrpc.pool_mode=
>  			[NFS]
>  			Control how the NFS server code allocates CPUs to
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 7ca44fb5b675..7321ae933867 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -268,6 +268,7 @@ struct svc_rqst {
>  						 * cache pages */
>  #define	RQ_VICTIM	(5)			/* about to be shut down */
>  #define	RQ_BUSY		(6)			/* request is busy */
> +#define	RQ_DATA		(7)			/* request has data */
>  	unsigned long		rq_flags;	/* flags field */
>  
>  	void *			rq_argp;	/* decoded arguments */
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> index b7dabc4baafd..555c004de51e 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -69,6 +69,7 @@ struct svc_xprt {
>  
>  	struct svc_serv		*xpt_server;	/* service for transport */
>  	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
> +	atomic_t		xpt_nr_rqsts;	/* Number of requests */
>  	struct mutex		xpt_mutex;	/* to serialize sending data */
>  	spinlock_t		xpt_lock;	/* protects sk_deferred
>  						 * and xpt_auth_cache */
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index eccc61237ca9..cd26d0d9e41f 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -21,6 +21,10 @@
>  
>  #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
>  
> +static unsigned int svc_rpc_per_connection_limit __read_mostly;
> +module_param(svc_rpc_per_connection_limit, uint, 0644);
> +
> +
>  static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
>  static int svc_deferred_recv(struct svc_rqst *rqstp);
>  static struct cache_deferred_req *svc_defer(struct cache_req *req);
> @@ -327,12 +331,41 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
>  }
>  EXPORT_SYMBOL_GPL(svc_print_addr);
>  
> +static bool svc_xprt_slots_in_range(struct svc_xprt *xprt)
> +{
> +	unsigned int limit = svc_rpc_per_connection_limit;
> +	int nrqsts = atomic_read(&xprt->xpt_nr_rqsts);
> +
> +	return limit == 0 || (nrqsts >= 0 && nrqsts < limit);
> +}
> +
> +static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> +{
> +	if (!test_bit(RQ_DATA, &rqstp->rq_flags)) {
> +		if (!svc_xprt_slots_in_range(xprt))
> +			return false;
> +		atomic_inc(&xprt->xpt_nr_rqsts);
> +		set_bit(RQ_DATA, &rqstp->rq_flags);

The separate test_bit() and set_bit() had me scratching my head for a
moment....  OK, all of the RQ_DATA uses are only from the unique thread
associated with this rqstp, so there should be no races (and the
test_and_clear_bit() below is just for convenience, I guess, we don't
really need it to be atomic).

--b.

> +	}
> +	return true;
> +}
> +
> +static void svc_xprt_release_slot(struct svc_rqst *rqstp)
> +{
> +	struct svc_xprt	*xprt = rqstp->rq_xprt;
> +	if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
> +		atomic_dec(&xprt->xpt_nr_rqsts);
> +		svc_xprt_enqueue(xprt);
> +	}
> +}
> +
>  static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
>  {
>  	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
>  		return true;
>  	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
> -		if (xprt->xpt_ops->xpo_has_wspace(xprt))
> +		if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
> +		    svc_xprt_slots_in_range(xprt))
>  			return true;
>  		trace_svc_xprt_no_write_space(xprt);
>  		return false;
> @@ -514,8 +547,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
>  
>  	rqstp->rq_res.head[0].iov_len = 0;
>  	svc_reserve(rqstp, 0);
> +	svc_xprt_release_slot(rqstp);
>  	rqstp->rq_xprt = NULL;
> -
>  	svc_xprt_put(xprt);
>  }
>  
> @@ -783,7 +816,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
>  			svc_add_new_temp_xprt(serv, newxpt);
>  		else
>  			module_put(xprt->xpt_class->xcl_owner);
> -	} else {
> +	} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
>  		/* XPT_DATA|XPT_DEFERRED case: */
>  		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
>  			rqstp, rqstp->rq_pool->sp_id, xprt,
> -- 
> 2.7.4

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 09/10] SUNRPC: Change TCP socket space reservation
  2016-06-24 14:55               ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation Trond Myklebust
  2016-06-24 14:55                 ` [PATCH 10/10] SUNRPC: Remove unused callback xpo_adjust_wspace() Trond Myklebust
  2016-06-24 21:18                 ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation J. Bruce Fields
@ 2016-07-06 19:53                 ` J. Bruce Fields
  2016-07-06 20:11                   ` Trond Myklebust
  2 siblings, 1 reply; 22+ messages in thread
From: J. Bruce Fields @ 2016-07-06 19:53 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Fri, Jun 24, 2016 at 10:55:51AM -0400, Trond Myklebust wrote:
> Instead of trying (and failing) to predict how much writeable socket space
> will be available to the RPC call, just fall back to the simple model of
> deferring processing until the socket is uncongested.
> 
> If a limit is neeeded, then set the hard per-connection limit.

I was hoping this would be an opportunity to get rid of even more code,
but there's still the udp case.  Do we actually need that?

--b.


> 
> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
> ---
>  net/sunrpc/svcsock.c | 47 ++++-------------------------------------------
>  1 file changed, 4 insertions(+), 43 deletions(-)
> 
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 338d6fe1103d..bc3ef0734f2f 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -431,43 +431,11 @@ static void svc_write_space(struct sock *sk)
>  
>  static int svc_tcp_has_wspace(struct svc_xprt *xprt)
>  {
> -	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
> -	struct svc_serv *serv = svsk->sk_xprt.xpt_server;
> -	int required;
> +	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>  
>  	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
>  		return 1;
> -	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
> -	if (sk_stream_wspace(svsk->sk_sk) >= required ||
> -	    (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
> -	     atomic_read(&xprt->xpt_reserved) == 0))
> -		return 1;
> -	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> -	return 0;
> -}
> -
> -static void svc_tcp_write_space(struct sock *sk)
> -{
> -	struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
> -	struct socket *sock = sk->sk_socket;
> -
> -	if (!svsk)
> -		return;
> -
> -	if (!sk_stream_is_writeable(sk) || !sock)
> -		return;
> -	if (svc_tcp_has_wspace(&svsk->sk_xprt)) {
> -		clear_bit(SOCK_NOSPACE, &sock->flags);
> -		svc_write_space(sk);
> -	}
> -}
> -
> -static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
> -{
> -	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
> -
> -	if (svc_tcp_has_wspace(xprt))
> -		clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
>  }
>  
>  /*
> @@ -1272,7 +1240,6 @@ static struct svc_xprt_ops svc_tcp_ops = {
>  	.xpo_has_wspace = svc_tcp_has_wspace,
>  	.xpo_accept = svc_tcp_accept,
>  	.xpo_secure_port = svc_sock_secure_port,
> -	.xpo_adjust_wspace = svc_tcp_adjust_wspace,
>  };
>  
>  static struct svc_xprt_class svc_tcp_class = {
> @@ -1313,7 +1280,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
>  		dprintk("setting up TCP socket for reading\n");
>  		sk->sk_state_change = svc_tcp_state_change;
>  		sk->sk_data_ready = svc_data_ready;
> -		sk->sk_write_space = svc_tcp_write_space;
> +		sk->sk_write_space = svc_write_space;
>  
>  		svsk->sk_reclen = 0;
>  		svsk->sk_tcplen = 0;
> @@ -1383,14 +1350,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
>  	/* Initialize the socket */
>  	if (sock->type == SOCK_DGRAM)
>  		svc_udp_init(svsk, serv);
> -	else {
> -		/* initialise setting must have enough space to
> -		 * receive and respond to one request.
> -		 */
> -		svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
> -					4 * serv->sv_max_mesg);
> +	else
>  		svc_tcp_init(svsk, serv);
> -	}
>  
>  	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
>  				svsk, svsk->sk_sk);
> -- 
> 2.7.4

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 09/10] SUNRPC: Change TCP socket space reservation
  2016-07-06 19:53                 ` J. Bruce Fields
@ 2016-07-06 20:11                   ` Trond Myklebust
  2016-07-06 20:30                     ` Fields Bruce
  0 siblings, 1 reply; 22+ messages in thread
From: Trond Myklebust @ 2016-07-06 20:11 UTC (permalink / raw)
  To: Fields Bruce; +Cc: linux-nfs


> On Jul 6, 2016, at 15:53, J. Bruce Fields <bfields@fieldses.org> wrote:
> 
> On Fri, Jun 24, 2016 at 10:55:51AM -0400, Trond Myklebust wrote:
>> Instead of trying (and failing) to predict how much writeable socket space
>> will be available to the RPC call, just fall back to the simple model of
>> deferring processing until the socket is uncongested.
>> 
>> If a limit is neeeded, then set the hard per-connection limit.
> 
> I was hoping this would be an opportunity to get rid of even more code,
> but there's still the udp case.  Do we actually need that?
> 

I wasn’t really too concerned about UDP; I consider it to be legacy code.

That said, I’d argue the existing system make a little more sense in the case of UDP as you don’t have any equivalent of the TCP window size. That means the socket buffer sizes are predictable, and so are more easily modelled as a pipeline.

Cheers
  Trond


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 09/10] SUNRPC: Change TCP socket space reservation
  2016-06-24 21:31                   ` Trond Myklebust
@ 2016-07-06 20:17                     ` Fields Bruce
  0 siblings, 0 replies; 22+ messages in thread
From: Fields Bruce @ 2016-07-06 20:17 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Fri, Jun 24, 2016 at 09:31:07PM +0000, Trond Myklebust wrote:
> 
> > On Jun 24, 2016, at 17:18, J. Bruce Fields <bfields@fieldses.org> wrote:
> > 
> > On Fri, Jun 24, 2016 at 10:55:51AM -0400, Trond Myklebust wrote:
> >> Instead of trying (and failing) to predict how much writeable socket space
> >> will be available to the RPC call, just fall back to the simple model of
> >> deferring processing until the socket is uncongested.
> > 
> > OK, it would be a relief to get rid of that, I guess that explains the
> > previous patch.
> > 
> > But was there some specific reason you were running into this?
> 
> I’ve been testing using a 40GigE network, which easily hits this bottleneck. The result is that each connection saturates long before we’re even near the saturation of the network or even the disk. So we find ourselves with a server that can continue to scale up to several 100 clients, but with each client seeing the same low performance irrespective of the total number of clients.

OK, I'm stealing some of your responses for a slightly more verbose
changelog (see below), but otherwise committing the patches unchanged
for 4.8.  Thanks!

--b.

    SUNRPC: Change TCP socket space reservation
    
    The current server rpc tcp code attempts to predict how much writeable
    socket space will be available to a given RPC call before accepting it
    for processing.  On a 40GigE network, we've found this throttles
    individual clients long before the network or disk is saturated.  The
    server may handle more clients easily, but the bandwidth of individual
    clients is still artificially limited.
    
    Instead of trying (and failing) to predict how much writeable socket space
    will be available to the RPC call, just fall back to the simple model of
    deferring processing until the socket is uncongested.
    
    This may increase the risk of fast clients starving slower clients; in
    such cases, the previous patch allows setting a hard per-connection
    limit.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 09/10] SUNRPC: Change TCP socket space reservation
  2016-07-06 20:11                   ` Trond Myklebust
@ 2016-07-06 20:30                     ` Fields Bruce
  0 siblings, 0 replies; 22+ messages in thread
From: Fields Bruce @ 2016-07-06 20:30 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs

On Wed, Jul 06, 2016 at 08:11:53PM +0000, Trond Myklebust wrote:
> 
> > On Jul 6, 2016, at 15:53, J. Bruce Fields <bfields@fieldses.org>
> > wrote:
> > 
> > On Fri, Jun 24, 2016 at 10:55:51AM -0400, Trond Myklebust wrote:
> >> Instead of trying (and failing) to predict how much writeable
> >> socket space will be available to the RPC call, just fall back to
> >> the simple model of deferring processing until the socket is
> >> uncongested.
> >> 
> >> If a limit is neeeded, then set the hard per-connection limit.
> > 
> > I was hoping this would be an opportunity to get rid of even more
> > code, but there's still the udp case.  Do we actually need that?
> > 
> 
> I wasn’t really too concerned about UDP; I consider it to be legacy
> code.

Sure, but by the same token maybe it's not worth carrying the extra code
for a udp-only optimization; quick untested diff looks like:

 fs/nfsd/nfs3proc.c              |  1 -
 fs/nfsd/nfs4state.c             |  1 -
 fs/nfsd/nfs4xdr.c               |  1 -
 fs/nfsd/nfsproc.c               |  1 -
 include/linux/sunrpc/svc.h      | 16 ----------------
 include/linux/sunrpc/svc_xprt.h |  1 -
 net/sunrpc/svc.c                |  6 ------
 net/sunrpc/svc_xprt.c           | 37 -------------------------------------
 net/sunrpc/svcsock.c            | 13 +------------
 9 files changed, 1 insertion(+), 76 deletions(-)

> That said, I’d argue the existing system make a little more sense in
> the case of UDP as you don’t have any equivalent of the TCP window
> size. That means the socket buffer sizes are predictable, and so are
> more easily modelled as a pipeline.

OK, could be.

--b.

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2016-07-06 20:31 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-24 14:55 [PATCH 01/10] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status Trond Myklebust
2016-06-24 14:55 ` [PATCH 02/10] SUNRPC: Don't allocate a full sockaddr_storage for tracing Trond Myklebust
2016-06-24 14:55   ` [PATCH 03/10] SUNRPC: Add a tracepoint for server socket out-of-space conditions Trond Myklebust
2016-06-24 14:55     ` [PATCH 04/10] SUNRPC: Add tracepoints for dropped and deferred requests Trond Myklebust
2016-06-24 14:55       ` [PATCH 05/10] SUNRPC: lock the socket while detaching it Trond Myklebust
2016-06-24 14:55         ` [PATCH 06/10] SUNRPC: Call the default socket callbacks instead of open coding Trond Myklebust
2016-06-24 14:55           ` [PATCH 07/10] SUNRPC: Micro optimisation for svc_data_ready Trond Myklebust
2016-06-24 14:55             ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit Trond Myklebust
2016-06-24 14:55               ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation Trond Myklebust
2016-06-24 14:55                 ` [PATCH 10/10] SUNRPC: Remove unused callback xpo_adjust_wspace() Trond Myklebust
2016-06-24 21:18                 ` [PATCH 09/10] SUNRPC: Change TCP socket space reservation J. Bruce Fields
2016-06-24 21:31                   ` Trond Myklebust
2016-07-06 20:17                     ` Fields Bruce
2016-07-06 19:53                 ` J. Bruce Fields
2016-07-06 20:11                   ` Trond Myklebust
2016-07-06 20:30                     ` Fields Bruce
2016-06-24 21:15               ` [PATCH 08/10] SUNRPC: Add a server side per-connection limit J. Bruce Fields
2016-06-24 21:24                 ` Trond Myklebust
2016-07-06 16:16               ` J. Bruce Fields
2016-06-24 21:06         ` [PATCH 05/10] SUNRPC: lock the socket while detaching it J. Bruce Fields
2016-06-24 21:21           ` Trond Myklebust
2016-06-24 18:28 ` [PATCH 01/10] SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status J. Bruce Fields

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.