* [PATCH net-next 1/2] ipv4: add sysctl to skip route notify on nexthop changes
2020-04-23 5:21 [PATCH net-next 0/2] nexthop: sysctl to skip route notifications on nexthop changes Roopa Prabhu
@ 2020-04-23 5:21 ` Roopa Prabhu
2020-04-23 5:21 ` [PATCH net-next 2/2] ipv6: " Roopa Prabhu
[not found] ` <CAJieiUgHMjVozdSE_DM1yDnGuUEXkamDgmKwUfdBbvhTdx3Eqg@mail.gmail.com>
2 siblings, 0 replies; 5+ messages in thread
From: Roopa Prabhu @ 2020-04-23 5:21 UTC (permalink / raw)
To: dsahern, davem; +Cc: netdev, nikolay, bpoirier
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Route notifications on nexthop changes exists for backward
compatibility. In systems which have moved to the new
nexthop API, these route update notifications cancel the
performance benefits provided by the new nexthop API.
This patch adds a sysctl to disable these route notifications
generated for changes to nexthop objects.
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
include/net/netns/ipv4.h | 2 ++
net/ipv4/af_inet.c | 1 +
net/ipv4/nexthop.c | 3 ++-
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
4 files changed, 12 insertions(+), 1 deletion(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 154b8f0..59a190c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -111,6 +111,8 @@ struct netns_ipv4 {
int sysctl_tcp_early_demux;
int sysctl_udp_early_demux;
+ int sysctl_nexthop_skip_route_notify;
+
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
#ifdef CONFIG_NET_L3_MASTER_DEV
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c618e24..7c1db4b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1835,6 +1835,7 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_early_demux = 1;
net->ipv4.sysctl_udp_early_demux = 1;
net->ipv4.sysctl_tcp_early_demux = 1;
+ net->ipv4.sysctl_nexthop_skip_route_notify = 0;
#ifdef CONFIG_SYSCTL
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index fdfca53..fc6c76b 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -895,7 +895,8 @@ static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
{
struct fib6_info *f6i;
- if (!list_empty(&nh->fi_list)) {
+ if (!net->ipv4.sysctl_nexthop_skip_route_notify &&
+ !list_empty(&nh->fi_list)) {
struct fib_info *fi;
/* expectation is a few fib_info per nexthop and then
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 81b267e..1cd010d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -711,6 +711,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_tcp_early_demux
},
{
+ .procname = "nexthop_skip_route_notify",
+ .data = &init_net.ipv4.sysctl_nexthop_skip_route_notify,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
--
2.1.4
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH net-next 2/2] ipv6: add sysctl to skip route notify on nexthop changes
2020-04-23 5:21 [PATCH net-next 0/2] nexthop: sysctl to skip route notifications on nexthop changes Roopa Prabhu
2020-04-23 5:21 ` [PATCH net-next 1/2] ipv4: add sysctl to skip route notify " Roopa Prabhu
@ 2020-04-23 5:21 ` Roopa Prabhu
[not found] ` <CAJieiUgHMjVozdSE_DM1yDnGuUEXkamDgmKwUfdBbvhTdx3Eqg@mail.gmail.com>
2 siblings, 0 replies; 5+ messages in thread
From: Roopa Prabhu @ 2020-04-23 5:21 UTC (permalink / raw)
To: dsahern, davem; +Cc: netdev, nikolay, bpoirier
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Route notifications on nexthop changes exists for backward
compatibility. In systems which have moved to the new
nexthop API, these route update notifications cancel
the performance benefits provided by the new nexthop API.
This patch adds a sysctl to disable these route notifications.
The sysctl check is added in fib6_rt_update which seems
like the least intrusive approach. I have considered adding the
sysctl check in nexthop code that calls fib6_rt_update: But
that requires the sysctl access to be via ipv6_stub.
That seems overkill. I have also considered making fib6_rt_update
ipv6_stub op to take a nexthop, but that creates more problems
with exposing nexthop object to ipv6_stub.
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
include/net/netns/ipv6.h | 1 +
net/ipv6/route.c | 14 ++++++++++++++
2 files changed, 15 insertions(+)
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5ec0544..25818493 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -51,6 +51,7 @@ struct netns_sysctl_ipv6 {
int max_hbh_opts_len;
int seg6_flowlabel;
bool skip_notify_on_dev_down;
+ bool nexthop_skip_route_notify;
};
struct netns_ipv6 {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 310cbdd..d023ba0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6006,6 +6006,9 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct sk_buff *skb;
int err = -ENOBUFS;
+ if (net->ipv6.sysctl.nexthop_skip_route_notify)
+ return;
+
/* call_fib6_entry_notifiers will be removed when in-kernel notifier
* is implemented and supported for nexthop objects
*/
@@ -6188,6 +6191,15 @@ static struct ctl_table ipv6_route_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "nexthop_skip_route_notify",
+ .data = &init_net.ipv6.sysctl.nexthop_skip_route_notify,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{ }
};
@@ -6212,6 +6224,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
+ table[11].data = &net->ipv6.sysctl.nexthop_skip_route_notify;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
@@ -6283,6 +6296,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
net->ipv6.sysctl.skip_notify_on_dev_down = 0;
+ net->ipv6.sysctl.nexthop_skip_route_notify = 0;
net->ipv6.ip6_rt_gc_expire = 30*HZ;
--
2.1.4
^ permalink raw reply related [flat|nested] 5+ messages in thread