linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net] rxrpc: Fix lack of conn cleanup when local endpoint is cleaned up [ver #2]
@ 2019-08-29 13:12 David Howells
  2019-08-30 22:07 ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: David Howells @ 2019-08-29 13:12 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, marc.dionne, linux-afs, linux-kernel

When a local endpoint is ceases to be in use, such as when the kafs module
is unloaded, the kernel will emit an assertion failure if there are any
outstanding client connections:

	rxrpc: Assertion failed
	------------[ cut here ]------------
	kernel BUG at net/rxrpc/local_object.c:433!

and even beyond that, will evince other oopses if there are service
connections still present.

Fix this by:

 (1) Removing the triggering of connection reaping when an rxrpc socket is
     released.  These don't actually clean up the connections anyway - and
     further, the local endpoint may still be in use through another
     socket.

 (2) Mark the local endpoint as dead when we start the process of tearing
     it down.

 (3) When destroying a local endpoint, strip all of its client connections
     from the idle list and discard the ref on each that the list was
     holding.

 (4) When destroying a local endpoint, call the service connection reaper
     directly (rather than through a workqueue) to immediately kill off all
     outstanding service connections.

 (5) Make the service connection reaper reap connections for which the
     local endpoint is marked dead.

Only after destroying the connections can we close the socket lest we get
an oops in a workqueue that's looking at a connection or a peer.

Fixes: 3d18cbb7fd0c ("rxrpc: Fix conn expiry timers")
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
---

 net/rxrpc/af_rxrpc.c     |    3 ---
 net/rxrpc/ar-internal.h  |    1 +
 net/rxrpc/conn_client.c  |   44 ++++++++++++++++++++++++++++++++++++++++++++
 net/rxrpc/conn_object.c  |    2 +-
 net/rxrpc/local_object.c |    5 ++++-
 5 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0dbbfd1b6487..d72ddb67bb74 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -862,7 +862,6 @@ static void rxrpc_sock_destructor(struct sock *sk)
 static int rxrpc_release_sock(struct sock *sk)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
-	struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
 
 	_enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
 
@@ -898,8 +897,6 @@ static int rxrpc_release_sock(struct sock *sk)
 	rxrpc_release_calls_on_socket(rx);
 	flush_workqueue(rxrpc_workqueue);
 	rxrpc_purge_queue(&sk->sk_receive_queue);
-	rxrpc_queue_work(&rxnet->service_conn_reaper);
-	rxrpc_queue_work(&rxnet->client_conn_reaper);
 
 	rxrpc_unuse_local(rx->local);
 	rx->local = NULL;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 852e58781fda..8051dfdcf26d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -910,6 +910,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *);
 void rxrpc_put_client_conn(struct rxrpc_connection *);
 void rxrpc_discard_expired_client_conns(struct work_struct *);
 void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
+void rxrpc_clean_up_local_conns(struct rxrpc_local *);
 
 /*
  * conn_event.c
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index aea82f909c60..3f1da1b49f69 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -1162,3 +1162,47 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
 
 	_leave("");
 }
+
+/*
+ * Clean up the client connections on a local endpoint.
+ */
+void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
+{
+	struct rxrpc_connection *conn, *tmp;
+	struct rxrpc_net *rxnet = local->rxnet;
+	unsigned int nr_active;
+	LIST_HEAD(graveyard);
+
+	_enter("");
+
+	spin_lock(&rxnet->client_conn_cache_lock);
+	nr_active = rxnet->nr_active_client_conns;
+
+	list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns,
+				 cache_link) {
+		if (conn->params.local == local) {
+			ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_IDLE);
+
+			trace_rxrpc_client(conn, -1, rxrpc_client_discard);
+			if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+				BUG();
+			conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+			list_move(&conn->cache_link, &graveyard);
+			nr_active--;
+		}
+	}
+
+	rxnet->nr_active_client_conns = nr_active;
+	spin_unlock(&rxnet->client_conn_cache_lock);
+	ASSERTCMP(nr_active, >=, 0);
+
+	while (!list_empty(&graveyard)) {
+		conn = list_entry(graveyard.next,
+				  struct rxrpc_connection, cache_link);
+		list_del_init(&conn->cache_link);
+
+		rxrpc_put_connection(conn);
+	}
+
+	_leave(" [culled]");
+}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 434ef392212b..ed05b6922132 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -398,7 +398,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
 		if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
 			continue;
 
-		if (rxnet->live) {
+		if (rxnet->live && !conn->params.local->dead) {
 			idle_timestamp = READ_ONCE(conn->idle_timestamp);
 			expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
 			if (conn->params.local->service_closed)
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 72a6e12a9304..36587260cabd 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -426,11 +426,14 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
 
 	_enter("%d", local->debug_id);
 
+	local->dead = true;
+
 	mutex_lock(&rxnet->local_mutex);
 	list_del_init(&local->link);
 	mutex_unlock(&rxnet->local_mutex);
 
-	ASSERT(RB_EMPTY_ROOT(&local->client_conns));
+	rxrpc_clean_up_local_conns(local);
+	rxrpc_service_connection_reaper(&rxnet->service_conn_reaper);
 	ASSERT(!local->service);
 
 	if (socket) {


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net] rxrpc: Fix lack of conn cleanup when local endpoint is cleaned up [ver #2]
  2019-08-29 13:12 [PATCH net] rxrpc: Fix lack of conn cleanup when local endpoint is cleaned up [ver #2] David Howells
@ 2019-08-30 22:07 ` David Miller
  0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2019-08-30 22:07 UTC (permalink / raw)
  To: dhowells; +Cc: netdev, marc.dionne, linux-afs, linux-kernel

From: David Howells <dhowells@redhat.com>
Date: Thu, 29 Aug 2019 14:12:11 +0100

> When a local endpoint is ceases to be in use, such as when the kafs module
> is unloaded, the kernel will emit an assertion failure if there are any
> outstanding client connections:
> 
> 	rxrpc: Assertion failed
> 	------------[ cut here ]------------
> 	kernel BUG at net/rxrpc/local_object.c:433!
> 
> and even beyond that, will evince other oopses if there are service
> connections still present.
> 
> Fix this by:
 ...
> Only after destroying the connections can we close the socket lest we get
> an oops in a workqueue that's looking at a connection or a peer.
> 
> Fixes: 3d18cbb7fd0c ("rxrpc: Fix conn expiry timers")
> Signed-off-by: David Howells <dhowells@redhat.com>
> Tested-by: Marc Dionne <marc.dionne@auristor.com>

Applied.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net] rxrpc: Fix lack of conn cleanup when local endpoint is cleaned up [ver #2]
       [not found] ` <20190831135906.6028-1-hdanton@sina.com>
  2019-08-31 16:45   ` David Howells
@ 2019-09-01  7:14   ` David Howells
  1 sibling, 0 replies; 4+ messages in thread
From: David Howells @ 2019-09-01  7:14 UTC (permalink / raw)
  To: Hillf Danton; +Cc: dhowells, netdev, marc.dionne, linux-afs, linux-kernel

Hillf Danton <hdanton@sina.com> wrote:

> > It's certainly possible that that can happen.  The reaper is per
> > network-namespace.
> > 
> > conn->params.local holds a ref on the local endpoint.
> > 
> Then local endpoint can not become dead without connection reaper
> running first, because of the ref held by connection. When it is
> dead, however, there is no need to run reaper directly (rather than
> through a workqueue).

The reaper is per-net_ns, not per-local.  There may be more than one local
endpoint in a net_ns and they share the list of service connections.

David

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net] rxrpc: Fix lack of conn cleanup when local endpoint is cleaned up [ver #2]
       [not found] ` <20190831135906.6028-1-hdanton@sina.com>
@ 2019-08-31 16:45   ` David Howells
  2019-09-01  7:14   ` David Howells
  1 sibling, 0 replies; 4+ messages in thread
From: David Howells @ 2019-08-31 16:45 UTC (permalink / raw)
  To: Hillf Danton; +Cc: dhowells, netdev, marc.dionne, linux-afs, linux-kernel

Hillf Danton <hdanton@sina.com> wrote:

> > -		if (rxnet->live) {
> > +		if (rxnet->live && !conn->params.local->dead) {
> >  			idle_timestamp = READ_ONCE(conn->idle_timestamp);
> >  			expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
> >  			if (conn->params.local->service_closed)
> 
> Is there any chance out there that this reaper starts running one minute
> after the dead local went into graveyard?

It's certainly possible that that can happen.  The reaper is per
network-namespace.

conn->params.local holds a ref on the local endpoint.

It may be worth wrapping the "local->dead = true;" in rxrpc_local_destroyer()
in rxnet->conn_lock.

David

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-09-01  7:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-29 13:12 [PATCH net] rxrpc: Fix lack of conn cleanup when local endpoint is cleaned up [ver #2] David Howells
2019-08-30 22:07 ` David Miller
     [not found] <20190901020519.2392-1-hdanton@sina.com>
     [not found] ` <20190831135906.6028-1-hdanton@sina.com>
2019-08-31 16:45   ` David Howells
2019-09-01  7:14   ` David Howells

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).