All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tom Herbert <tom@herbertland.com>
To: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Linux Kernel Network Developers <netdev@vger.kernel.org>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	Stephen Hemminger <stephen@networkplumber.org>
Subject: Re: [PATCH net-next v2] net: Add sysctl to toggle early demux for tcp and udp
Date: Thu, 9 Mar 2017 19:42:53 -0800	[thread overview]
Message-ID: <CALx6S360Y3tmKMHVMUEuRKCskuwXi+9JBfce6WiSpko1sHj=Yg@mail.gmail.com> (raw)
In-Reply-To: <1489116660-4244-1-git-send-email-subashab@codeaurora.org>

On Thu, Mar 9, 2017 at 7:31 PM, Subash Abhinov Kasiviswanathan
<subashab@codeaurora.org> wrote:
> Certain system process significant unconnected UDP workload.
> It would be preferrable to disable UDP early demux for those systems
> and enable it for TCP only.
>
Presumably you want this for performance reasons. Can you provide some
before and after numbers?

> v1->v2: Change function pointer instead of adding conditional as
> suggested by Stephen.
>
> Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Cc: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  include/net/netns/ipv4.h   |  2 ++
>  include/net/tcp.h          |  2 ++
>  include/net/udp.h          |  2 ++
>  net/ipv4/af_inet.c         | 22 ++++++++++++++++++++--
>  net/ipv4/sysctl_net_ipv4.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
>  net/ipv6/tcp_ipv6.c        | 10 +++++++++-
>  6 files changed, 82 insertions(+), 3 deletions(-)
>
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 0378e88..1e74da23 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -86,6 +86,8 @@ struct netns_ipv4 {
>         /* Shall we try to damage output packets if routing dev changes? */
>         int sysctl_ip_dynaddr;
>         int sysctl_ip_early_demux;
> +       int sysctl_tcp_early_demux;
> +       int sysctl_udp_early_demux;
>
>         int sysctl_fwmark_reflect;
>         int sysctl_tcp_fwmark_accept;
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 6061963..3b6446d 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1953,4 +1953,6 @@ static inline void tcp_listendrop(const struct sock *sk)
>         __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
>  }
>
> +void tcp_v4_early_demux_configure(int enable);
> +void tcp_v6_early_demux_configure(int enable);
>  #endif /* _TCP_H */
> diff --git a/include/net/udp.h b/include/net/udp.h
> index 1661791..7de31d5 100644
> --- a/include/net/udp.h
> +++ b/include/net/udp.h
> @@ -373,4 +373,6 @@ struct udp_iter_state {
>  #if IS_ENABLED(CONFIG_IPV6)
>  void udpv6_encap_enable(void);
>  #endif
> +
> +void udp_v4_early_demux_configure(int enable);
>  #endif /* _UDP_H */
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index f750698..3e11d74 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1579,7 +1579,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
>  };
>  #endif
>
> -static const struct net_protocol tcp_protocol = {
> +static struct net_protocol tcp_protocol = {
>         .early_demux    =       tcp_v4_early_demux,
>         .handler        =       tcp_v4_rcv,
>         .err_handler    =       tcp_v4_err,
> @@ -1588,7 +1588,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
>         .icmp_strict_tag_validation = 1,
>  };
>
> -static const struct net_protocol udp_protocol = {
> +static struct net_protocol udp_protocol = {
>         .early_demux =  udp_v4_early_demux,
>         .handler =      udp_rcv,
>         .err_handler =  udp_err,
> @@ -1596,6 +1596,22 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
>         .netns_ok =     1,
>  };
>
> +void tcp_v4_early_demux_configure(int enable)
> +{
> +       if (enable)
> +               tcp_protocol.early_demux = tcp_v4_early_demux;
> +       else
> +               tcp_protocol.early_demux = NULL;
> +}
> +
> +void udp_v4_early_demux_configure(int enable)
> +{
> +       if (enable)
> +               udp_protocol.early_demux = udp_v4_early_demux;
> +       else
> +               udp_protocol.early_demux = NULL;
> +}
> +
>  static const struct net_protocol icmp_protocol = {
>         .handler =      icmp_rcv,
>         .err_handler =  icmp_err,
> @@ -1700,6 +1716,8 @@ static __net_init int inet_init_net(struct net *net)
>         net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
>         net->ipv4.sysctl_ip_dynaddr = 0;
>         net->ipv4.sysctl_ip_early_demux = 1;
> +       net->ipv4.sysctl_udp_early_demux = 1;
> +       net->ipv4.sysctl_tcp_early_demux = 1;
>
>         return 0;
>  }
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index b2fa498..c61383b 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -253,6 +253,39 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
>         return ret;
>  }
>
> +static int proc_tcp_early_demux(struct ctl_table *table, int write,
> +                               void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +       int ret = 0;
> +
> +       ret = proc_dointvec(table, write, buffer, lenp, ppos);
> +
> +       if (write && !ret) {
> +               int enabled = init_net.ipv4.sysctl_tcp_early_demux;
> +
> +               tcp_v4_early_demux_configure(enabled);
> +               tcp_v6_early_demux_configure(enabled);
> +       }
> +
> +       return ret;
> +}
> +
> +static int proc_udp_early_demux(struct ctl_table *table, int write,
> +                               void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +       int ret = 0;
> +
> +       ret = proc_dointvec(table, write, buffer, lenp, ppos);
> +
> +       if (write && !ret) {
> +               int enabled = init_net.ipv4.sysctl_udp_early_demux;
> +
> +               udp_v4_early_demux_configure(enabled);
> +       }
> +
> +       return ret;
> +}
> +
>  static struct ctl_table ipv4_table[] = {
>         {
>                 .procname       = "tcp_timestamps",
> @@ -737,6 +770,20 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
>                 .proc_handler   = proc_dointvec
>         },
>         {
> +               .procname       = "udp_early_demux",
> +               .data           = &init_net.ipv4.sysctl_udp_early_demux,
> +               .maxlen         = sizeof(int),
> +               .mode           = 0644,
> +               .proc_handler   = proc_udp_early_demux
> +       },
> +       {
> +               .procname       = "tcp_early_demux",
> +               .data           = &init_net.ipv4.sysctl_tcp_early_demux,
> +               .maxlen         = sizeof(int),
> +               .mode           = 0644,
> +               .proc_handler   = proc_tcp_early_demux
> +       },
> +       {
>                 .procname       = "ip_default_ttl",
>                 .data           = &init_net.ipv4.sysctl_ip_default_ttl,
>                 .maxlen         = sizeof(int),
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 4c60c6f..0dd761c 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1926,13 +1926,21 @@ struct proto tcpv6_prot = {
>         .diag_destroy           = tcp_abort,
>  };
>
> -static const struct inet6_protocol tcpv6_protocol = {
> +static struct inet6_protocol tcpv6_protocol = {
>         .early_demux    =       tcp_v6_early_demux,
>         .handler        =       tcp_v6_rcv,
>         .err_handler    =       tcp_v6_err,
>         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
>  };
>
> +void tcp_v6_early_demux_configure(int enable)
> +{
> +       if (enable)
> +               tcpv6_protocol.early_demux = tcp_v6_early_demux;
> +       else
> +               tcpv6_protocol.early_demux = NULL;
> +}
> +
>  static struct inet_protosw tcpv6_protosw = {
>         .type           =       SOCK_STREAM,
>         .protocol       =       IPPROTO_TCP,
> --
> 1.9.1
>

  reply	other threads:[~2017-03-10  3:42 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-10  3:31 [PATCH net-next v2] net: Add sysctl to toggle early demux for tcp and udp Subash Abhinov Kasiviswanathan
2017-03-10  3:42 ` Tom Herbert [this message]
2017-03-10  5:26   ` Subash Abhinov Kasiviswanathan
2017-03-10 16:33     ` Tom Herbert
2017-03-11  0:22       ` Eric Dumazet
2017-03-11  0:49         ` Tom Herbert
2017-03-18 17:32           ` Subash Abhinov Kasiviswanathan
2017-03-18 17:44             ` Tom Herbert
2017-03-19  2:07               ` Subash Abhinov Kasiviswanathan
2017-03-19 19:18                 ` Eric Dumazet
2017-03-21 22:49                 ` Tom Herbert
2017-03-10  4:25 ` Eric Dumazet
2017-03-10  7:34   ` Subash Abhinov Kasiviswanathan
2017-03-10 12:42 ` kbuild test robot
2017-03-10 12:44 ` kbuild test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CALx6S360Y3tmKMHVMUEuRKCskuwXi+9JBfce6WiSpko1sHj=Yg@mail.gmail.com' \
    --to=tom@herbertland.com \
    --cc=eric.dumazet@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=stephen@networkplumber.org \
    --cc=subashab@codeaurora.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.