All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH mptcp-next 0/4] mptcp: listen dump support
@ 2022-03-24 13:57 Florian Westphal
  2022-03-24 13:57 ` [PATCH mptcp-next 1/4] mptcp: diag: switch to context structure Florian Westphal
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Florian Westphal @ 2022-03-24 13:57 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Done in a not-very-elegant way:
Iterate over tcp listen hash, then pick out the tcp sockets
with mptcp-ctx structure attached, then take the conn->sk for dumping.

First patch is preparation/cleanup.
Second patch gets rid of locking to avoid a lockdep splat.
If the socket lock is really needed (I don't see where) I can workaround
this by dropping locks temporarily when iterating the listen hash table,
but its a bit more awkward.

Last patch is needed so that 'Send-Q' has expected content instead of 0.

Sample output:
ss -Mil
State        Recv-Q Send-Q Local Address:Port  Peer Address:Port
LISTEN       0      20     127.0.0.1:12000     0.0.0.0:*
         subflows_max:2

Florian Westphal (4):
  mptcp: diag: switch to context structure
  mptcp: remove locking in mptcp_diag_fill_info
  mptcp: listen diag dump support
  mptcp: let mptcp listen dump show listen backlog size

 net/mptcp/mptcp_diag.c | 91 +++++++++++++++++++++++++++++++++++++++---
 net/mptcp/protocol.c   |  5 ++-
 net/mptcp/sockopt.c    |  6 ---
 3 files changed, 90 insertions(+), 12 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next 1/4] mptcp: diag: switch to context structure
  2022-03-24 13:57 [PATCH mptcp-next 0/4] mptcp: listen dump support Florian Westphal
@ 2022-03-24 13:57 ` Florian Westphal
  2022-03-24 13:57 ` [PATCH mptcp-next 2/4] mptcp: remove locking in mptcp_diag_fill_info Florian Westphal
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Florian Westphal @ 2022-03-24 13:57 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Raw access to cb->arg[] is deprecated, use a context structure.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/mptcp_diag.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f44125dd6697..c4992eeb67d8 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -66,20 +66,28 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
 	return err;
 }
 
+struct mptcp_diag_ctx {
+	long s_slot;
+	long s_num;
+};
+
 static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			    const struct inet_diag_req_v2 *r)
 {
 	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+	struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
 	struct net *net = sock_net(skb->sk);
 	struct inet_diag_dump_data *cb_data;
 	struct mptcp_sock *msk;
 	struct nlattr *bc;
 
+	BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
+
 	cb_data = cb->data;
 	bc = cb_data->inet_diag_nla_bc;
 
-	while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) !=
-	       NULL) {
+	while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
+					    &diag_ctx->s_num)) != NULL) {
 		struct inet_sock *inet = (struct inet_sock *)msk;
 		struct sock *sk = (struct sock *)msk;
 		int ret = 0;
@@ -101,7 +109,7 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		sock_put(sk);
 		if (ret < 0) {
 			/* will retry on the same position */
-			cb->args[1]--;
+			diag_ctx->s_num--;
 			break;
 		}
 		cond_resched();
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next 2/4] mptcp: remove locking in mptcp_diag_fill_info
  2022-03-24 13:57 [PATCH mptcp-next 0/4] mptcp: listen dump support Florian Westphal
  2022-03-24 13:57 ` [PATCH mptcp-next 1/4] mptcp: diag: switch to context structure Florian Westphal
@ 2022-03-24 13:57 ` Florian Westphal
  2022-03-24 13:57 ` [PATCH mptcp-next 3/4] mptcp: listen diag dump support Florian Westphal
  2022-03-24 13:57 ` [PATCH mptcp-next 4/4] mptcp: let mptcp listen dump show listen backlog size Florian Westphal
  3 siblings, 0 replies; 9+ messages in thread
From: Florian Westphal @ 2022-03-24 13:57 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Problem is that listener iteration would call this from atomic context
so this locking is not allowed.

One way is to drop locks before calling the helper, but afaics the lock
isn't really needed, all values are fetched via READ_ONCE().

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/sockopt.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index f949d22f52bd..826b0c1dae98 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -853,15 +853,11 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int
 
 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
 {
-	struct sock *sk = &msk->sk.icsk_inet.sk;
 	u32 flags = 0;
-	bool slow;
 	u8 val;
 
 	memset(info, 0, sizeof(*info));
 
-	slow = lock_sock_fast(sk);
-
 	info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
 	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
 	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
@@ -882,8 +878,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
 	info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
 	info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
 	info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
-
-	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next 3/4] mptcp: listen diag dump support
  2022-03-24 13:57 [PATCH mptcp-next 0/4] mptcp: listen dump support Florian Westphal
  2022-03-24 13:57 ` [PATCH mptcp-next 1/4] mptcp: diag: switch to context structure Florian Westphal
  2022-03-24 13:57 ` [PATCH mptcp-next 2/4] mptcp: remove locking in mptcp_diag_fill_info Florian Westphal
@ 2022-03-24 13:57 ` Florian Westphal
  2022-03-24 16:52   ` Paolo Abeni
  2022-03-24 13:57 ` [PATCH mptcp-next 4/4] mptcp: let mptcp listen dump show listen backlog size Florian Westphal
  3 siblings, 1 reply; 9+ messages in thread
From: Florian Westphal @ 2022-03-24 13:57 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

makes 'ss -Ml' show mptcp listen sockets.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/mptcp_diag.c | 67 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index c4992eeb67d8..6a6dfc7eac33 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -69,6 +69,8 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
 struct mptcp_diag_ctx {
 	long s_slot;
 	long s_num;
+	unsigned int l_slot;
+	unsigned int l_num;
 };
 
 static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
@@ -114,6 +116,71 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		}
 		cond_resched();
 	}
+
+	if (r->idiag_states & TCPF_LISTEN) {
+		int i;
+
+		for (i = diag_ctx->l_slot; i < INET_LHTABLE_SIZE; i++) {
+			struct inet_listen_hashbucket *ilb;
+			struct hlist_nulls_node *node;
+			struct sock *sk;
+			int num = 0;
+
+			ilb = &tcp_hashinfo.listening_hash[i];
+
+			rcu_read_lock();
+			spin_lock(&ilb->lock);
+			sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+				const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
+				struct inet_sock *inet = inet_sk(sk);
+				int ret;
+
+				if (num < diag_ctx->l_num)
+					goto next_listen;
+
+				if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp"))
+					goto next_listen;
+
+				sk = ctx->conn;
+				if (!sk || !net_eq(sock_net(sk), net))
+					goto next_listen;
+
+				if (r->sdiag_family != AF_UNSPEC &&
+				    sk->sk_family != r->sdiag_family)
+					goto next_listen;
+
+				if (r->id.idiag_sport != inet->inet_sport &&
+				    r->id.idiag_sport)
+					goto next_listen;
+
+				if (!refcount_inc_not_zero(&sk->sk_refcnt))
+					goto next_listen;
+
+				ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
+				sock_put(sk);
+
+				if (ret < 0) {
+					spin_unlock(&ilb->lock);
+					rcu_read_unlock();
+					diag_ctx->l_slot = i;
+					diag_ctx->l_num = num;
+					return;
+				}
+				diag_ctx->l_num = num + 1;
+				num = 0;
+next_listen:
+				++num;
+			}
+			spin_unlock(&ilb->lock);
+			rcu_read_unlock();
+
+			cond_resched();
+			diag_ctx->l_num = 0;
+		}
+
+		diag_ctx->l_num = 0;
+		diag_ctx->l_slot = i;
+	}
 }
 
 static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH mptcp-next 4/4] mptcp: let mptcp listen dump show listen backlog size
  2022-03-24 13:57 [PATCH mptcp-next 0/4] mptcp: listen dump support Florian Westphal
                   ` (2 preceding siblings ...)
  2022-03-24 13:57 ` [PATCH mptcp-next 3/4] mptcp: listen diag dump support Florian Westphal
@ 2022-03-24 13:57 ` Florian Westphal
  2022-03-24 16:45   ` Paolo Abeni
  3 siblings, 1 reply; 9+ messages in thread
From: Florian Westphal @ 2022-03-24 13:57 UTC (permalink / raw)
  To: mptcp; +Cc: Florian Westphal

Before:
State   Recv-Q Send-Q  ..
LISTEN       0      0

After:
LISTEN       0     20

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/mptcp/mptcp_diag.c | 10 ++++++++--
 net/mptcp/protocol.c   |  5 ++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index 6a6dfc7eac33..e1babab45e86 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -189,8 +189,14 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_info *info = _info;
 
-	r->idiag_rqueue = sk_rmem_alloc_get(sk);
-	r->idiag_wqueue = sk_wmem_alloc_get(sk);
+	if (inet_sk_state_load(sk) == TCP_LISTEN) {
+		r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
+		r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
+	} else {
+		r->idiag_rqueue = sk_rmem_alloc_get(sk);
+		r->idiag_wqueue = sk_wmem_alloc_get(sk);
+	}
+
 	if (!info)
 		return;
 
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d3887f628b54..a29cfc4c44a1 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3589,8 +3589,11 @@ static int mptcp_listen(struct socket *sock, int backlog)
 
 	err = ssock->ops->listen(ssock, backlog);
 	inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
-	if (!err)
+	if (!err) {
 		mptcp_copy_inaddrs(sock->sk, ssock->sk);
+		WRITE_ONCE(sock->sk->sk_max_ack_backlog,
+			   READ_ONCE(ssock->sk->sk_max_ack_backlog));
+	}
 
 unlock:
 	release_sock(sock->sk);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH mptcp-next 4/4] mptcp: let mptcp listen dump show listen backlog size
  2022-03-24 13:57 ` [PATCH mptcp-next 4/4] mptcp: let mptcp listen dump show listen backlog size Florian Westphal
@ 2022-03-24 16:45   ` Paolo Abeni
  2022-03-24 17:35     ` Florian Westphal
  0 siblings, 1 reply; 9+ messages in thread
From: Paolo Abeni @ 2022-03-24 16:45 UTC (permalink / raw)
  To: Florian Westphal, mptcp

On Thu, 2022-03-24 at 14:57 +0100, Florian Westphal wrote:
> Before:
> State   Recv-Q Send-Q  ..
> LISTEN       0      0
> 
> After:
> LISTEN       0     20
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  net/mptcp/mptcp_diag.c | 10 ++++++++--
>  net/mptcp/protocol.c   |  5 ++++-
>  2 files changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
> index 6a6dfc7eac33..e1babab45e86 100644
> --- a/net/mptcp/mptcp_diag.c
> +++ b/net/mptcp/mptcp_diag.c
> @@ -189,8 +189,14 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
>  	struct mptcp_sock *msk = mptcp_sk(sk);
>  	struct mptcp_info *info = _info;
>  
> -	r->idiag_rqueue = sk_rmem_alloc_get(sk);
> -	r->idiag_wqueue = sk_wmem_alloc_get(sk);
> +	if (inet_sk_state_load(sk) == TCP_LISTEN) {
> +		r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
> +		r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);

What about accessing msk->first->sk*ack_backlog instead ? Are there any
possible subtle races? Otherwise sk_ack_backlog will always be 0,
right?

Thanks!

Paolo


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: listen diag dump support
  2022-03-24 13:57 ` [PATCH mptcp-next 3/4] mptcp: listen diag dump support Florian Westphal
@ 2022-03-24 16:52   ` Paolo Abeni
  2022-03-24 17:07     ` Florian Westphal
  0 siblings, 1 reply; 9+ messages in thread
From: Paolo Abeni @ 2022-03-24 16:52 UTC (permalink / raw)
  To: Florian Westphal, mptcp

On Thu, 2022-03-24 at 14:57 +0100, Florian Westphal wrote:
> makes 'ss -Ml' show mptcp listen sockets.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  net/mptcp/mptcp_diag.c | 67 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
> 
> diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
> index c4992eeb67d8..6a6dfc7eac33 100644
> --- a/net/mptcp/mptcp_diag.c
> +++ b/net/mptcp/mptcp_diag.c
> @@ -69,6 +69,8 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
>  struct mptcp_diag_ctx {
>  	long s_slot;
>  	long s_num;
> +	unsigned int l_slot;
> +	unsigned int l_num;
>  };
>  
>  static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
> @@ -114,6 +116,71 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
>  		}
>  		cond_resched();
>  	}
> +
> +	if (r->idiag_states & TCPF_LISTEN) {
> +		int i;

perhaps here we need to add something alike:

		if (r->id.idiag_dport)
			return;

???

Also, what about moving the following chunk in separate helper -
mptcp_diag_dump_listener()?

Thanks!

Paolo


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH mptcp-next 3/4] mptcp: listen diag dump support
  2022-03-24 16:52   ` Paolo Abeni
@ 2022-03-24 17:07     ` Florian Westphal
  0 siblings, 0 replies; 9+ messages in thread
From: Florian Westphal @ 2022-03-24 17:07 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: Florian Westphal, mptcp

Paolo Abeni <pabeni@redhat.com> wrote:
> On Thu, 2022-03-24 at 14:57 +0100, Florian Westphal wrote:
> > makes 'ss -Ml' show mptcp listen sockets.
> > 
> > Signed-off-by: Florian Westphal <fw@strlen.de>
> > ---
> >  net/mptcp/mptcp_diag.c | 67 ++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 67 insertions(+)
> > 
> > diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
> > index c4992eeb67d8..6a6dfc7eac33 100644
> > --- a/net/mptcp/mptcp_diag.c
> > +++ b/net/mptcp/mptcp_diag.c
> > @@ -69,6 +69,8 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
> >  struct mptcp_diag_ctx {
> >  	long s_slot;
> >  	long s_num;
> > +	unsigned int l_slot;
> > +	unsigned int l_num;
> >  };
> >  
> >  static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
> > @@ -114,6 +116,71 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
> >  		}
> >  		cond_resched();
> >  	}
> > +
> > +	if (r->idiag_states & TCPF_LISTEN) {
> > +		int i;
> 
> perhaps here we need to add something alike:
> 
> 		if (r->id.idiag_dport)
> 			return;
[..]

> Also, what about moving the following chunk in separate helper -
> mptcp_diag_dump_listener()?
Changed to:

if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0)
            mptcp_diag_dump_listeners(skb, cb, r, net_admin);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH mptcp-next 4/4] mptcp: let mptcp listen dump show listen backlog size
  2022-03-24 16:45   ` Paolo Abeni
@ 2022-03-24 17:35     ` Florian Westphal
  0 siblings, 0 replies; 9+ messages in thread
From: Florian Westphal @ 2022-03-24 17:35 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: Florian Westphal, mptcp

Paolo Abeni <pabeni@redhat.com> wrote:
> > -	r->idiag_rqueue = sk_rmem_alloc_get(sk);
> > -	r->idiag_wqueue = sk_wmem_alloc_get(sk);
> > +	if (inet_sk_state_load(sk) == TCP_LISTEN) {
> > +		r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
> > +		r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
> 
> What about accessing msk->first->sk*ack_backlog instead ? Are there any
> possible subtle races? Otherwise sk_ack_backlog will always be 0,
> right?

Yes, msk->first would be better here, will drop this patch
and merge the msk->first bit into preceeding one.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-03-24 17:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-24 13:57 [PATCH mptcp-next 0/4] mptcp: listen dump support Florian Westphal
2022-03-24 13:57 ` [PATCH mptcp-next 1/4] mptcp: diag: switch to context structure Florian Westphal
2022-03-24 13:57 ` [PATCH mptcp-next 2/4] mptcp: remove locking in mptcp_diag_fill_info Florian Westphal
2022-03-24 13:57 ` [PATCH mptcp-next 3/4] mptcp: listen diag dump support Florian Westphal
2022-03-24 16:52   ` Paolo Abeni
2022-03-24 17:07     ` Florian Westphal
2022-03-24 13:57 ` [PATCH mptcp-next 4/4] mptcp: let mptcp listen dump show listen backlog size Florian Westphal
2022-03-24 16:45   ` Paolo Abeni
2022-03-24 17:35     ` Florian Westphal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.