linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yuchung Cheng <ycheng@google.com>
To: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	"David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	Eric Dumazet <edumazet@google.com>,
	Martin KaFai Lau <kafai@fb.com>,
	Kuniyuki Iwashima <kuni1840@gmail.com>
Subject: Re: [PATCH net-next] tcp: Add stats for socket migration.
Date: Wed, 23 Jun 2021 09:34:24 -0700	[thread overview]
Message-ID: <CAK6E8=e_nP2_2OidZ0QZutieqORROTz-+X-NCGEr+uxM1W_btw@mail.gmail.com> (raw)
In-Reply-To: <20210622233529.65158-1-kuniyu@amazon.co.jp>

On Tue, Jun 22, 2021 at 4:36 PM Kuniyuki Iwashima <kuniyu@amazon.co.jp> wrote:
>
> This commit adds two stats for the socket migration feature to evaluate the
> effectiveness: LINUX_MIB_TCPMIGRATEREQ(SUCCESS|FAILURE).
>
> If the migration fails because of the own_req race in receiving ACK and
> sending SYN+ACK paths, we do not increment the failure stat. Then another
> CPU is responsible for the req.
>
> Link: https://lore.kernel.org/bpf/CAK6E8=cgFKuGecTzSCSQ8z3YJ_163C0uwO9yRvfDSE7vOe9mJA@mail.gmail.com/
> Suggested-by: Yuchung Cheng <ycheng@google.com>
Looks good. thanks

Acked-by: Yuchung Cheng <ycheng@google.com>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>  include/uapi/linux/snmp.h       |  2 ++
>  net/core/sock_reuseport.c       | 15 +++++++++++----
>  net/ipv4/inet_connection_sock.c | 15 +++++++++++++--
>  net/ipv4/proc.c                 |  2 ++
>  net/ipv4/tcp_minisocks.c        |  3 +++
>  5 files changed, 31 insertions(+), 6 deletions(-)
>
> diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> index 26fc60ce9298..904909d020e2 100644
> --- a/include/uapi/linux/snmp.h
> +++ b/include/uapi/linux/snmp.h
> @@ -290,6 +290,8 @@ enum
>         LINUX_MIB_TCPDUPLICATEDATAREHASH,       /* TCPDuplicateDataRehash */
>         LINUX_MIB_TCPDSACKRECVSEGS,             /* TCPDSACKRecvSegs */
>         LINUX_MIB_TCPDSACKIGNOREDDUBIOUS,       /* TCPDSACKIgnoredDubious */
> +       LINUX_MIB_TCPMIGRATEREQSUCCESS,         /* TCPMigrateReqSuccess */
> +       LINUX_MIB_TCPMIGRATEREQFAILURE,         /* TCPMigrateReqFailure */
>         __LINUX_MIB_MAX
>  };
>
> diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> index de5ee3ae86d5..3f00a28fe762 100644
> --- a/net/core/sock_reuseport.c
> +++ b/net/core/sock_reuseport.c
> @@ -6,6 +6,7 @@
>   * selecting the socket index from the array of available sockets.
>   */
>
> +#include <net/ip.h>
>  #include <net/sock_reuseport.h>
>  #include <linux/bpf.h>
>  #include <linux/idr.h>
> @@ -536,7 +537,7 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
>
>         socks = READ_ONCE(reuse->num_socks);
>         if (unlikely(!socks))
> -               goto out;
> +               goto failure;
>
>         /* paired with smp_wmb() in __reuseport_add_sock() */
>         smp_rmb();
> @@ -546,13 +547,13 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
>         if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
>                 if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
>                         goto select_by_hash;
> -               goto out;
> +               goto failure;
>         }
>
>         if (!skb) {
>                 skb = alloc_skb(0, GFP_ATOMIC);
>                 if (!skb)
> -                       goto out;
> +                       goto failure;
>                 allocated = true;
>         }
>
> @@ -565,12 +566,18 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
>         if (!nsk)
>                 nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
>
> -       if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
> +       if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) {
>                 nsk = NULL;
> +               goto failure;
> +       }
>
>  out:
>         rcu_read_unlock();
>         return nsk;
> +
> +failure:
> +       __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
> +       goto out;
>  }
>  EXPORT_SYMBOL(reuseport_migrate_sock);
>
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 0eea878edc30..754013fa393b 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -703,6 +703,8 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
>
>         nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
>         if (!nreq) {
> +               __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
> +
>                 /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
>                 sock_put(sk);
>                 return NULL;
> @@ -876,9 +878,10 @@ static void reqsk_timer_handler(struct timer_list *t)
>                 if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
>                         /* delete timer */
>                         inet_csk_reqsk_queue_drop(sk_listener, nreq);
> -                       goto drop;
> +                       goto no_ownership;
>                 }
>
> +               __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
>                 reqsk_migrate_reset(oreq);
>                 reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
>                 reqsk_put(oreq);
> @@ -887,17 +890,19 @@ static void reqsk_timer_handler(struct timer_list *t)
>                 return;
>         }
>
> -drop:
>         /* Even if we can clone the req, we may need not retransmit any more
>          * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
>          * CPU may win the "own_req" race so that inet_ehash_insert() fails.
>          */
>         if (nreq) {
> +               __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
> +no_ownership:
>                 reqsk_migrate_reset(nreq);
>                 reqsk_queue_removed(queue, nreq);
>                 __reqsk_free(nreq);
>         }
>
> +drop:
>         inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
>  }
>
> @@ -1135,11 +1140,13 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
>
>                         refcount_set(&nreq->rsk_refcnt, 1);
>                         if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
> +                               __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
>                                 reqsk_migrate_reset(req);
>                                 reqsk_put(req);
>                                 return child;
>                         }
>
> +                       __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
>                         reqsk_migrate_reset(nreq);
>                         __reqsk_free(nreq);
>                 } else if (inet_csk_reqsk_queue_add(sk, req, child)) {
> @@ -1188,8 +1195,12 @@ void inet_csk_listen_stop(struct sock *sk)
>                                 refcount_set(&nreq->rsk_refcnt, 1);
>
>                                 if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
> +                                       __NET_INC_STATS(sock_net(nsk),
> +                                                       LINUX_MIB_TCPMIGRATEREQSUCCESS);
>                                         reqsk_migrate_reset(req);
>                                 } else {
> +                                       __NET_INC_STATS(sock_net(nsk),
> +                                                       LINUX_MIB_TCPMIGRATEREQFAILURE);
>                                         reqsk_migrate_reset(nreq);
>                                         __reqsk_free(nreq);
>                                 }
> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index 6d46297a99f8..b0d3a09dc84e 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -295,6 +295,8 @@ static const struct snmp_mib snmp4_net_list[] = {
>         SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
>         SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
>         SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
> +       SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
> +       SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
>         SNMP_MIB_SENTINEL
>  };
>
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index f258a4c0da71..0a4f3f16140a 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -786,6 +786,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
>         return inet_csk_complete_hashdance(sk, child, req, own_req);
>
>  listen_overflow:
> +       if (sk != req->rsk_listener)
> +               __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
> +
>         if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
>                 inet_rsk(req)->acked = 1;
>                 return NULL;
> --
> 2.30.2
>

  reply	other threads:[~2021-06-23 16:35 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-22 23:35 [PATCH net-next] tcp: Add stats for socket migration Kuniyuki Iwashima
2021-06-23 16:34 ` Yuchung Cheng [this message]
2021-06-23 20:00 ` patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAK6E8=e_nP2_2OidZ0QZutieqORROTz-+X-NCGEr+uxM1W_btw@mail.gmail.com' \
    --to=ycheng@google.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kafai@fb.com \
    --cc=kuba@kernel.org \
    --cc=kuni1840@gmail.com \
    --cc=kuniyu@amazon.co.jp \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).