All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] SUNRPC: Fix disconnection races
@ 2018-12-17 20:05 Trond Myklebust
  2018-12-17 22:06 ` Chuck Lever
  0 siblings, 1 reply; 3+ messages in thread
From: Trond Myklebust @ 2018-12-17 20:05 UTC (permalink / raw)
  To: Dave Wysochanski, Scott Mayhew; +Cc: Chuck Lever, linux-nfs

When the socket is closed, we need to call xprt_disconnect_done() in order
to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.

However, we also want to ensure that we don't wake them up before the socket
is closed, since that would cause thundering herd issues with everyone
piling up to retransmit before the TCP shutdown dance has completed.
Only the task that holds XPRT_LOCKED needs to wake up early in order to
allow the close to complete.

Reported-by: Dave Wysochanski <dwysocha@redhat.com>
Reported-by: Scott Mayhew <smayhew@redhat.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprt.c     |  4 +++-
 net/sunrpc/xprtsock.c | 10 ++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ce927002862a..089e53a87379 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
 	/* Try to schedule an autoclose RPC call */
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
-	xprt_wake_pending_tasks(xprt, -EAGAIN);
+	else if (xprt->snd_task)
+		rpc_wake_up_queued_task_set_status(&xprt->pending,
+				xprt->snd_task, -ENOTCONN);
 	spin_unlock_bh(&xprt->transport_lock);
 }
 EXPORT_SYMBOL_GPL(xprt_force_disconnect);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8a5e823e0b33..f0b3700cec95 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
 
 	trace_rpc_socket_close(xprt, sock);
 	sock_release(sock);
+
+	xprt_disconnect_done(xprt);
 }
 
 /**
@@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
 
 	xs_reset_transport(transport);
 	xprt->reestablish_timeout = 0;
-
-	xprt_disconnect_done(xprt);
 }
 
 static void xs_inject_disconnect(struct rpc_xprt *xprt)
@@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk)
 					&transport->sock_state))
 			xprt_clear_connecting(xprt);
 		clear_bit(XPRT_CLOSING, &xprt->state);
-		if (sk->sk_err)
-			xprt_wake_pending_tasks(xprt, -sk->sk_err);
 		/* Trigger the socket release */
 		xs_tcp_force_close(xprt);
 	}
@@ -2092,8 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
 	trace_rpc_socket_connect(xprt, sock, 0);
 	status = 0;
 out:
-	xprt_unlock_connect(xprt, transport);
 	xprt_clear_connecting(xprt);
+	xprt_unlock_connect(xprt, transport);
 	xprt_wake_pending_tasks(xprt, status);
 }
 
@@ -2329,8 +2327,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 	}
 	status = -EAGAIN;
 out:
-	xprt_unlock_connect(xprt, transport);
 	xprt_clear_connecting(xprt);
+	xprt_unlock_connect(xprt, transport);
 	xprt_wake_pending_tasks(xprt, status);
 }
 
-- 
2.19.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] SUNRPC: Fix disconnection races
  2018-12-17 20:05 [PATCH] SUNRPC: Fix disconnection races Trond Myklebust
@ 2018-12-17 22:06 ` Chuck Lever
  2018-12-17 22:24   ` Trond Myklebust
  0 siblings, 1 reply; 3+ messages in thread
From: Chuck Lever @ 2018-12-17 22:06 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Dave Wysochanski, Scott Mayhew, Linux NFS Mailing List



> On Dec 17, 2018, at 3:05 PM, Trond Myklebust <trondmy@gmail.com> wrote:
> 
> When the socket is closed, we need to call xprt_disconnect_done() in order
> to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.
> 
> However, we also want to ensure that we don't wake them up before the socket
> is closed, since that would cause thundering herd issues with everyone
> piling up to retransmit before the TCP shutdown dance has completed.
> Only the task that holds XPRT_LOCKED needs to wake up early in order to
> allow the close to complete.
> 
> Reported-by: Dave Wysochanski <dwysocha@redhat.com>
> Reported-by: Scott Mayhew <smayhew@redhat.com>
> Cc: Chuck Lever <chuck.lever@oracle.com>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
> net/sunrpc/xprt.c     |  4 +++-
> net/sunrpc/xprtsock.c | 10 ++++------
> 2 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index ce927002862a..089e53a87379 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
> 	/* Try to schedule an autoclose RPC call */
> 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
> 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
> -	xprt_wake_pending_tasks(xprt, -EAGAIN);
> +	else if (xprt->snd_task)
> +		rpc_wake_up_queued_task_set_status(&xprt->pending,
> +				xprt->snd_task, -ENOTCONN);
> 	spin_unlock_bh(&xprt->transport_lock);
> }
> EXPORT_SYMBOL_GPL(xprt_force_disconnect);

Hrm, well I'm not sure xprt_connect_status is ready to deal with -ENOTCONN.

     kworker/2:3-2342  [002]  3706.246289: rpc_task_wakeup:      task:39992@5 flags=0001 state=001e status=-107 timeout=120000 queue=xprt_pending
     kworker/0:7-2424  [000]  3706.246296: xprtrdma_reconnect:   peer=[192.168.2.55]:20049 r_xprt=0xffff88886c465000
  kworker/u25:28-2270  [004]  3706.246321: rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-107 action=xprt_connect_status
  kworker/u25:28-2270  [004]  3706.246323: rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-5 action=call_connect_status
  kworker/u25:28-2270  [004]  3706.246323: rpc_connect_status:   task:39992@5 status=-5
  kworker/u25:28-2270  [004]  3706.246324: rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-5 action=rpc_exit_task
  kworker/u25:28-2270  [004]  3706.246327: rpc_stats_latency:    task:39992@5 xid=0x2f1ad350 nfsv3 READ backlog=0 rtt=0 execute=13028
  kworker/u25:28-2270  [004]  3706.246333: xprtrdma_rpc_done:    task:39992@5 req=0xffff88886a438000 rep=(nil)

static void xprt_connect_status(struct rpc_task *task)
{
        switch (task->tk_status) {
        case 0:
                dprintk("RPC: %5u xprt_connect_status: connection established\n",
                                task->tk_pid);
                break;
        case -ECONNREFUSED:
        case -ECONNRESET:
        case -ECONNABORTED:
        case -ENETUNREACH:
        case -EHOSTUNREACH:
        case -EPIPE:
        case -EAGAIN:
                dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
                break;
        case -ETIMEDOUT:
                dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
                                "out\n", task->tk_pid);
                break;
        default:
                dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
                                "server %s\n", task->tk_pid, -task->tk_status,
                                task->tk_rqstp->rq_xprt->servername);
                task->tk_status = -EIO;
        }    
}

Maybe the wake-up status should remain -EAGAIN ?


> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 8a5e823e0b33..f0b3700cec95 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
> 
> 	trace_rpc_socket_close(xprt, sock);
> 	sock_release(sock);
> +
> +	xprt_disconnect_done(xprt);
> }
> 
> /**
> @@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
> 
> 	xs_reset_transport(transport);
> 	xprt->reestablish_timeout = 0;
> -
> -	xprt_disconnect_done(xprt);
> }
> 
> static void xs_inject_disconnect(struct rpc_xprt *xprt)
> @@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk)
> 					&transport->sock_state))
> 			xprt_clear_connecting(xprt);
> 		clear_bit(XPRT_CLOSING, &xprt->state);
> -		if (sk->sk_err)
> -			xprt_wake_pending_tasks(xprt, -sk->sk_err);
> 		/* Trigger the socket release */
> 		xs_tcp_force_close(xprt);
> 	}
> @@ -2092,8 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
> 	trace_rpc_socket_connect(xprt, sock, 0);
> 	status = 0;
> out:
> -	xprt_unlock_connect(xprt, transport);
> 	xprt_clear_connecting(xprt);
> +	xprt_unlock_connect(xprt, transport);
> 	xprt_wake_pending_tasks(xprt, status);
> }
> 
> @@ -2329,8 +2327,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
> 	}
> 	status = -EAGAIN;
> out:
> -	xprt_unlock_connect(xprt, transport);
> 	xprt_clear_connecting(xprt);
> +	xprt_unlock_connect(xprt, transport);
> 	xprt_wake_pending_tasks(xprt, status);
> }
> 
> -- 
> 2.19.2
> 

--
Chuck Lever




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] SUNRPC: Fix disconnection races
  2018-12-17 22:06 ` Chuck Lever
@ 2018-12-17 22:24   ` Trond Myklebust
  0 siblings, 0 replies; 3+ messages in thread
From: Trond Myklebust @ 2018-12-17 22:24 UTC (permalink / raw)
  To: chuck.lever; +Cc: smayhew, dwysocha, linux-nfs

On Mon, 2018-12-17 at 17:06 -0500, Chuck Lever wrote:
> > On Dec 17, 2018, at 3:05 PM, Trond Myklebust <trondmy@gmail.com>
> > wrote:
> > 
> > When the socket is closed, we need to call xprt_disconnect_done()
> > in order
> > to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping
> > tasks.
> > 
> > However, we also want to ensure that we don't wake them up before
> > the socket
> > is closed, since that would cause thundering herd issues with
> > everyone
> > piling up to retransmit before the TCP shutdown dance has
> > completed.
> > Only the task that holds XPRT_LOCKED needs to wake up early in
> > order to
> > allow the close to complete.
> > 
> > Reported-by: Dave Wysochanski <dwysocha@redhat.com>
> > Reported-by: Scott Mayhew <smayhew@redhat.com>
> > Cc: Chuck Lever <chuck.lever@oracle.com>
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> > net/sunrpc/xprt.c     |  4 +++-
> > net/sunrpc/xprtsock.c | 10 ++++------
> > 2 files changed, 7 insertions(+), 7 deletions(-)
> > 
> > diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> > index ce927002862a..089e53a87379 100644
> > --- a/net/sunrpc/xprt.c
> > +++ b/net/sunrpc/xprt.c
> > @@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt
> > *xprt)
> > 	/* Try to schedule an autoclose RPC call */
> > 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
> > 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
> > -	xprt_wake_pending_tasks(xprt, -EAGAIN);
> > +	else if (xprt->snd_task)
> > +		rpc_wake_up_queued_task_set_status(&xprt->pending,
> > +				xprt->snd_task, -ENOTCONN);
> > 	spin_unlock_bh(&xprt->transport_lock);
> > }
> > EXPORT_SYMBOL_GPL(xprt_force_disconnect);
> 
> Hrm, well I'm not sure xprt_connect_status is ready to deal with
> -ENOTCONN.
> 
>      kworker/2:3-2342  [002]  3706.246289:
> rpc_task_wakeup:      task:39992@5 flags=0001 state=001e status=-107
> timeout=120000 queue=xprt_pending
>      kworker/0:7-2424  [000]  3706.246296:
> xprtrdma_reconnect:   peer=[192.168.2.55]:20049
> r_xprt=0xffff88886c465000
>   kworker/u25:28-2270  [004]  3706.246321:
> rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-107
> action=xprt_connect_status
>   kworker/u25:28-2270  [004]  3706.246323:
> rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-5
> action=call_connect_status
>   kworker/u25:28-2270  [004]  3706.246323:
> rpc_connect_status:   task:39992@5 status=-5
>   kworker/u25:28-2270  [004]  3706.246324:
> rpc_task_run_action:  task:39992@5 flags=0001 state=001d status=-5
> action=rpc_exit_task
>   kworker/u25:28-2270  [004]  3706.246327:
> rpc_stats_latency:    task:39992@5 xid=0x2f1ad350 nfsv3 READ
> backlog=0 rtt=0 execute=13028
>   kworker/u25:28-2270  [004]  3706.246333:
> xprtrdma_rpc_done:    task:39992@5 req=0xffff88886a438000 rep=(nil)
> 
> static void xprt_connect_status(struct rpc_task *task)
> {
>         switch (task->tk_status) {
>         case 0:
>                 dprintk("RPC: %5u xprt_connect_status: connection
> established\n",
>                                 task->tk_pid);
>                 break;
>         case -ECONNREFUSED:
>         case -ECONNRESET:
>         case -ECONNABORTED:
>         case -ENETUNREACH:
>         case -EHOSTUNREACH:
>         case -EPIPE:
>         case -EAGAIN:
>                 dprintk("RPC: %5u xprt_connect_status: retrying\n",
> task->tk_pid);
>                 break;
>         case -ETIMEDOUT:
>                 dprintk("RPC: %5u xprt_connect_status: connect
> attempt timed "
>                                 "out\n", task->tk_pid);
>                 break;
>         default:
>                 dprintk("RPC: %5u xprt_connect_status: error %d
> connecting to "
>                                 "server %s\n", task->tk_pid, -task-
> >tk_status,
>                                 task->tk_rqstp->rq_xprt->servername);
>                 task->tk_status = -EIO;
>         }    
> }
> 
> Maybe the wake-up status should remain -EAGAIN ?


Actually, that brings up a very good point. Given that a number of
things can wake up tasks on xprt->pending, and set an error code, why
do we want xprt_connect_status() to change the error codes on us? That
looks like it can go wrong in quite a few different ways.

Furthermore, call_connect_status() is supposed to perform more or less
the same service, and already knows how to deal with more errors.

> 
> > diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> > index 8a5e823e0b33..f0b3700cec95 100644
> > --- a/net/sunrpc/xprtsock.c
> > +++ b/net/sunrpc/xprtsock.c
> > @@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct
> > sock_xprt *transport)
> > 
> > 	trace_rpc_socket_close(xprt, sock);
> > 	sock_release(sock);
> > +
> > +	xprt_disconnect_done(xprt);
> > }
> > 
> > /**
> > @@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
> > 
> > 	xs_reset_transport(transport);
> > 	xprt->reestablish_timeout = 0;
> > -
> > -	xprt_disconnect_done(xprt);
> > }
> > 
> > static void xs_inject_disconnect(struct rpc_xprt *xprt)
> > @@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock
> > *sk)
> > 					&transport->sock_state))
> > 			xprt_clear_connecting(xprt);
> > 		clear_bit(XPRT_CLOSING, &xprt->state);
> > -		if (sk->sk_err)
> > -			xprt_wake_pending_tasks(xprt, -sk->sk_err);
> > 		/* Trigger the socket release */
> > 		xs_tcp_force_close(xprt);
> > 	}
> > @@ -2092,8 +2090,8 @@ static void xs_udp_setup_socket(struct
> > work_struct *work)
> > 	trace_rpc_socket_connect(xprt, sock, 0);
> > 	status = 0;
> > out:
> > -	xprt_unlock_connect(xprt, transport);
> > 	xprt_clear_connecting(xprt);
> > +	xprt_unlock_connect(xprt, transport);
> > 	xprt_wake_pending_tasks(xprt, status);
> > }
> > 
> > @@ -2329,8 +2327,8 @@ static void xs_tcp_setup_socket(struct
> > work_struct *work)
> > 	}
> > 	status = -EAGAIN;
> > out:
> > -	xprt_unlock_connect(xprt, transport);
> > 	xprt_clear_connecting(xprt);
> > +	xprt_unlock_connect(xprt, transport);
> > 	xprt_wake_pending_tasks(xprt, status);
> > }
> > 
> > -- 
> > 2.19.2
> > 
> 
> --
> Chuck Lever
> 
> 
> 
-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-12-17 22:24 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-17 20:05 [PATCH] SUNRPC: Fix disconnection races Trond Myklebust
2018-12-17 22:06 ` Chuck Lever
2018-12-17 22:24   ` Trond Myklebust

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.