All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
@ 2021-07-06  5:25 Ryoga Saito
  2021-07-08  1:31 ` Andrea Mayer
  0 siblings, 1 reply; 12+ messages in thread
From: Ryoga Saito @ 2021-07-06  5:25 UTC (permalink / raw)
  To: netdev; +Cc: netfilter-devel, davem, Ryoga Saito

Tunneling protocols such as VXLAN or IPIP are implemented using virtual
network devices (vxlan0 or ipip0). Therefore, conntrack can record both
inner flows and outer flows correctly. In contrast, SRv6 is implemented
using lightweight tunnel infrastructure. Therefore, SRv6 packets are
encapsulated and decapsulated without passing through virtual network
device. Due to the following problems caused by this, conntrack can't
record both inner flows and outer flows correctly.

First problem is caused when SRv6 packets are encapsulated. In VXLAN, at
first, packets received are passed to nf_conntrack_in called from
ip_rcv/ipv6_rcv. These packets are sent to virtual network device and these
flows are confirmed in ip_output/ip6_output. However, in SRv6, at first,
packets are passed to nf_conntrack_in, encapsulated and flows are confirmed
in ipv6_output even if inner packets are IPv4. Therefore, IPv6 conntrack
needs to be enabled to track IPv4 inner flow.

Second problem is caused when SRv6 packets are decapsulated. If IPv6
conntrack is enabled, SRv6 packets are passed to nf_conntrack_in called
from ipv6_rcv. Even if inner packets are passed to nf_conntrack_in after
packets are decapsulated, flow aren't tracked because skb->_nfct is already
set. Therefore, IPv6 conntrack needs to be disabled to track IPv4 flow
when packets are decapsulated.

This patch solves these problems and allows conntrack to record inner flows
correctly. It introduces netfilter hooks to srv6 lwtunnel and srv6local
lwtunnel. Before decapsulation/encapsulation, packets are hooked with
POST_ROUTING chain and after decapsulation/encapsulation, packets are
hooked with LOCAL_OUT chain.

Signed-off-by: Ryoga Saito <proelbtn@gmail.com>
---
 net/ipv6/seg6_iptunnel.c |  54 ++++++++++++++++++++----
 net/ipv6/seg6_local.c    | 104 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 113 insertions(+), 45 deletions(-)

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59..ce638da 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -26,6 +26,7 @@
 #ifdef CONFIG_IPV6_SEG6_HMAC
 #include <net/seg6_hmac.h>
 #endif
+#include <net/netfilter/nf_conntrack.h>

 static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
 {
@@ -295,15 +296,23 @@ static int seg6_do_srh(struct sk_buff *skb)

 	ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+	nf_reset_ct(skb);

 	return 0;
 }

-static int seg6_input(struct sk_buff *skb)
+static int seg6_input_finish(struct net *net, struct sock *sk,
+			     struct sk_buff *skb)
+{
+	return dst_input(skb);
+}
+
+static int seg6_input_core(struct net *net, struct sock *sk,
+			   struct sk_buff *skb)
 {
 	struct dst_entry *orig_dst = skb_dst(skb);
 	struct dst_entry *dst = NULL;
-	struct seg6_lwt *slwt;
+	struct seg6_lwt *slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 	int err;

 	err = seg6_do_srh(skb);
@@ -312,8 +321,6 @@ static int seg6_input(struct sk_buff *skb)
 		return err;
 	}

-	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
-
 	preempt_disable();
 	dst = dst_cache_get(&slwt->cache);
 	preempt_enable();
@@ -337,10 +344,27 @@ static int seg6_input(struct sk_buff *skb)
 	if (unlikely(err))
 		return err;

-	return dst_input(skb);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, dev_net(skb->dev), NULL,
+		       skb, NULL, skb_dst(skb)->dev, seg6_input_finish);
 }

-static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int seg6_input(struct sk_buff *skb)
+{
+	int proto;
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		proto = NFPROTO_IPV6;
+	else if (skb->protocol == htons(ETH_P_IP))
+		proto = NFPROTO_IPV4;
+	else
+		return -EINVAL;
+
+	return NF_HOOK(proto, NF_INET_POST_ROUTING, dev_net(skb->dev), NULL,
+		       skb, NULL, skb_dst(skb)->dev, seg6_input_core);
+}
+
+static int seg6_output_core(struct net *net, struct sock *sk,
+			    struct sk_buff *skb)
 {
 	struct dst_entry *orig_dst = skb_dst(skb);
 	struct dst_entry *dst = NULL;
@@ -387,12 +411,28 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (unlikely(err))
 		goto drop;

-	return dst_output(net, sk, skb);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL,
+		       skb_dst(skb)->dev, dst_output);
 drop:
 	kfree_skb(skb);
 	return err;
 }

+static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	int proto;
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		proto = NFPROTO_IPV6;
+	else if (skb->protocol == htons(ETH_P_IP))
+		proto = NFPROTO_IPV4;
+	else
+		return -EINVAL;
+
+	return NF_HOOK(proto, NF_INET_POST_ROUTING, net, sk, skb, NULL,
+		       skb_dst(skb)->dev, seg6_output_core);
+}
+
 static int seg6_build_state(struct net *net, struct nlattr *nla,
 			    unsigned int family, const void *cfg,
 			    struct lwtunnel_state **ts,
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 60bf3b8..dd29acc 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -30,6 +30,7 @@
 #include <net/seg6_local.h>
 #include <linux/etherdevice.h>
 #include <linux/bpf.h>
+#include <net/netfilter/nf_conntrack.h>

 #define SEG6_F_ATTR(i)		BIT(i)

@@ -413,12 +414,31 @@ static int input_action_end_dx2(struct sk_buff *skb,
 	return -EINVAL;
 }

+static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
+				       struct sk_buff *skb)
+{
+	struct dst_entry *orig_dst = skb_dst(skb);
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+	struct in6_addr *nhaddr = NULL;
+
+	/* The inner packet is not associated to any local interface,
+	 * so we do not call netif_rx().
+	 *
+	 * If slwt->nh6 is set to ::, then lookup the nexthop for the
+	 * inner packet's DA. Otherwise, use the specified nexthop.
+	 */
+	if (!ipv6_addr_any(&slwt->nh6))
+		nhaddr = &slwt->nh6;
+
+	seg6_lookup_nexthop(skb, nhaddr, 0);
+
+	return dst_input(skb);
+}
+
 /* decapsulate and forward to specified nexthop */
 static int input_action_end_dx6(struct sk_buff *skb,
 				struct seg6_local_lwt *slwt)
 {
-	struct in6_addr *nhaddr = NULL;
-
 	/* this function accepts IPv6 encapsulated packets, with either
 	 * an SRH with SL=0, or no SRH.
 	 */
@@ -429,33 +449,43 @@ static int input_action_end_dx6(struct sk_buff *skb,
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto drop;

-	/* The inner packet is not associated to any local interface,
-	 * so we do not call netif_rx().
-	 *
-	 * If slwt->nh6 is set to ::, then lookup the nexthop for the
-	 * inner packet's DA. Otherwise, use the specified nexthop.
-	 */
-
-	if (!ipv6_addr_any(&slwt->nh6))
-		nhaddr = &slwt->nh6;
-
 	skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+	nf_reset_ct(skb);

-	seg6_lookup_nexthop(skb, nhaddr, 0);
-
-	return dst_input(skb);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, dev_net(skb->dev), NULL,
+		       skb, NULL, skb_dst(skb)->dev,
+		       input_action_end_dx6_finish);
 drop:
 	kfree_skb(skb);
 	return -EINVAL;
 }

-static int input_action_end_dx4(struct sk_buff *skb,
-				struct seg6_local_lwt *slwt)
+static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
+				       struct sk_buff *skb)
 {
+	struct dst_entry *orig_dst = skb_dst(skb);
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
 	struct iphdr *iph;
 	__be32 nhaddr;
 	int err;

+	iph = ip_hdr(skb);
+	nhaddr = slwt->nh4.s_addr ?: iph->daddr;
+
+	skb_dst_drop(skb);
+
+	err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+	if (err) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return dst_input(skb);
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+				struct seg6_local_lwt *slwt)
+{
 	if (!decap_and_validate(skb, IPPROTO_IPIP))
 		goto drop;

@@ -463,20 +493,12 @@ static int input_action_end_dx4(struct sk_buff *skb,
 		goto drop;

 	skb->protocol = htons(ETH_P_IP);
-
-	iph = ip_hdr(skb);
-
-	nhaddr = slwt->nh4.s_addr ?: iph->daddr;
-
-	skb_dst_drop(skb);
-
 	skb_set_transport_header(skb, sizeof(struct iphdr));
+	nf_reset_ct(skb);

-	err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
-	if (err)
-		goto drop;
-
-	return dst_input(skb);
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, dev_net(skb->dev), NULL,
+		       skb, NULL, skb_dst(skb)->dev,
+		       input_action_end_dx4_finish);

 drop:
 	kfree_skb(skb);
@@ -645,6 +667,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
 	skb_dst_drop(skb);

 	skb_set_transport_header(skb, hdrlen);
+	nf_reset_ct(skb);

 	return end_dt_vrf_rcv(skb, family, vrf);

@@ -1078,22 +1101,16 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
 	u64_stats_update_end(&pcounters->syncp);
 }

-static int seg6_local_input(struct sk_buff *skb)
+static int seg6_local_input_core(struct net *net, struct sock *sk,
+				 struct sk_buff *skb)
 {
 	struct dst_entry *orig_dst = skb_dst(skb);
+	struct seg6_local_lwt *slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
 	struct seg6_action_desc *desc;
-	struct seg6_local_lwt *slwt;
 	unsigned int len = skb->len;
 	int rc;

-	if (skb->protocol != htons(ETH_P_IPV6)) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
-	slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
 	desc = slwt->desc;
-
 	rc = desc->input(skb, slwt);

 	if (!seg6_lwtunnel_counters_enabled(slwt))
@@ -1104,6 +1121,17 @@ static int seg6_local_input(struct sk_buff *skb)
 	return rc;
 }

+static int seg6_local_input(struct sk_buff *skb)
+{
+	if (skb->protocol != htons(ETH_P_IPV6)) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, dev_net(skb->dev), NULL,
+		       skb, skb->dev, NULL, seg6_local_input_core);
+}
+
 static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
 	[SEG6_LOCAL_ACTION]	= { .type = NLA_U32 },
 	[SEG6_LOCAL_SRH]	= { .type = NLA_BINARY },
--
1.8.3.1


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-06  5:25 [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows Ryoga Saito
@ 2021-07-08  1:31 ` Andrea Mayer
  2021-07-08 13:38   ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: Andrea Mayer @ 2021-07-08  1:31 UTC (permalink / raw)
  To: Ryoga Saito
  Cc: davem, yoshfuji, dsahern, kuba, netfilter-devel,
	Stefano Salsano <stefano.salsano@uniroma2.it>,
	Paolo Lungaroni <paolo.lungaroni@uniroma2.it>,
	Andrea Mayer <andrea.mayer@uniroma2.it>

Dear Ryoga,
looking at your patch I noted several issues.
I start from the decap part but the same critical aspects are present also in
the encap one.

On Tue, 6 Jul 2021 14:25:48 +0900
Ryoga Saito <proelbtn@gmail.com> wrote:
> [...]
>
>
> +static int seg6_local_input(struct sk_buff *skb)
> +{
> +    if (skb->protocol != htons(ETH_P_IPV6)) {
> +        kfree_skb(skb);
> +        return -EINVAL;
> +    }
> +
> +    return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, dev_net(skb->dev), NULL,
> +               skb, skb->dev, NULL, seg6_local_input_core);
> +}
> +

The main problem here concerns the use that has been made of the netfilter
HOOKs combined with the SRv6 processing logic.

In seg6_local_input, it is not absolutely guaranteed that the packet is
intended to be processed and delivered locally. In fact, depending on the given
configuration and behavior, the packet can either be i) processed and delivered
locally or ii) processed and forwarded to another node.

In seg6_local_input, depending on the given configuration and behavior, the
packet can either be i) processed and delivered locally or ii) processed and
forwarded to another node. On the other hand, your code assumes that the packet
is intended to be processed and delivered locally.


Calling the nfhook NFPROTO_IPV6 with NF_INET_LOCAL_IN can have several side
effects. I'll show you one below with an example:

suppose you have a transit SRv6 node (which we call T) configured with an SRv6
Behavior End (in other words, node T receives SRv6 traffic to be processed by
SRv6 End and forwarded to another node). Such node T is configured with
firewall rules on the INPUT CHAIN that prevent it from receiving traffic that
was *NOT* generated by the node itself (speaking of conntrack...). This
configuration can be enforced either through an explicit rule (i.e. XXX -j
DROP) or by setting the default INPUT CHAIN policy to DROP (as it would be done
in a traditional firewall configuration).

In this patch, what happens is that when an SRv6 packet passes through the
node, the call to the nfhook with NF_INET_LOCAL_IN triggers the call to the
firewall and the DROP policy on INPUT kicks in. As a result, the packet is
discarded. What makes the situation even worse is that using the nfhook in this
way breaks the SRv6 Behavior counter system (making that totally unusable).


> +static int input_action_end_dx4(struct sk_buff *skb,
> +                struct seg6_local_lwt *slwt)
> +{
[...]

Similar problems with the inappropriate use of the hook also exist in
action_end_dx4.

> +static int seg6_input(struct sk_buff *skb)
> +{
> +    int proto;
> +
> +    if (skb->protocol == htons(ETH_P_IPV6))
> +        proto = NFPROTO_IPV6;
> +    else if (skb->protocol == htons(ETH_P_IP))
> +        proto = NFPROTO_IPV4;
> +    else
> +        return -EINVAL;
> +
> +    return NF_HOOK(proto, NF_INET_POST_ROUTING, dev_net(skb->dev), NULL,
> +               skb, NULL, skb_dst(skb)->dev, seg6_input_core);
> +}
> +
>


Another example where the normal processing flow is altered is in the
seg6_input() function (on the encap side). The seg6_input function should be
called in case of i) local processing and delivery or ii) local processing and
forwarding of the packet to another node. However, in this case a nfhook with
POST_ROUTING is called.

> +static int seg6_output_core(struct net *net, struct sock *sk,
> +                struct sk_buff *skb)
> {
>     struct dst_entry *orig_dst = skb_dst(skb);
>     struct dst_entry *dst = NULL;
> @@ -387,12 +411,28 @@ static int seg6_output(struct net *net, struct sock > > *sk, struct sk_buff *skb)
>     if (unlikely(err))
>         goto drop;
>
> -    return dst_output(net, sk, skb);
> +    return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL,
> +               skb_dst(skb)->dev, dst_output);
>

In turn, the seg6_input_core function calls the nfhook set with
NF_INET_LOCAL_OUT. Doing that side effects may be expected, because the natural
order of packet processing in netfilter, or more specifically in the SRv6
framework, has been changed.

There are also some minor issues, such as trying to follow the coding style of
the SRv6 Networking subsystem (this applies also to the Networking subsystem in
general). For example here:

+static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
+                       struct sk_buff *skb)
+{
+    struct dst_entry *orig_dst = skb_dst(skb);
+    struct seg6_local_lwt *slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+    struct in6_addr *nhaddr = NULL;
+    [...]

The code should respect the Reverse Christmas tree, i.e:

    struct dst_entry *orig_dst = skb_dst(skb);
    struct in6_addr *nhaddr = NULL;
    struct seg6_local_lwt *slwt;

    slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
    [...]


Ciao,
Andrea

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-08  1:31 ` Andrea Mayer
@ 2021-07-08 13:38   ` Pablo Neira Ayuso
  2021-07-08 16:32     ` Andrea Mayer
  0 siblings, 1 reply; 12+ messages in thread
From: Pablo Neira Ayuso @ 2021-07-08 13:38 UTC (permalink / raw)
  To: Andrea Mayer; +Cc: Ryoga Saito, davem, yoshfuji, dsahern, kuba, netfilter-devel

Dear Andrea,

On Thu, Jul 08, 2021 at 03:31:38AM +0200, Andrea Mayer wrote:
> Dear Ryoga,
> looking at your patch I noted several issues.
> I start from the decap part but the same critical aspects are present also in
> the encap one.
> 
> On Tue, 6 Jul 2021 14:25:48 +0900
> Ryoga Saito <proelbtn@gmail.com> wrote:
> > [...]
> >
> >
> > +static int seg6_local_input(struct sk_buff *skb)
> > +{
> > +    if (skb->protocol != htons(ETH_P_IPV6)) {
> > +        kfree_skb(skb);
> > +        return -EINVAL;
> > +    }
> > +
> > +    return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, dev_net(skb->dev), NULL,
> > +               skb, skb->dev, NULL, seg6_local_input_core);
> > +}
> > +
> 
> The main problem here concerns the use that has been made of the netfilter
> HOOKs combined with the SRv6 processing logic.
> 
> In seg6_local_input, it is not absolutely guaranteed that the packet is
> intended to be processed and delivered locally. In fact, depending on the given
> configuration and behavior, the packet can either be i) processed and delivered
> locally or ii) processed and forwarded to another node.

What SRv6 decides to do with the packet is irrelevant, see below.

> In seg6_local_input, depending on the given configuration and behavior, the
> packet can either be i) processed and delivered locally or ii) processed and
> forwarded to another node. On the other hand, your code assumes that the packet
> is intended to be processed and delivered locally.
>
> Calling the nfhook NFPROTO_IPV6 with NF_INET_LOCAL_IN can have several side
> effects.

This is how UDP tunnel encap_rcv infrastructure works: the packet
follows the INPUT path. The encap_rcv() might decide to reinject the
decapsulated packet to stack or forward it somewhere else.

> I'll show you one below with an example:
> 
> suppose you have a transit SRv6 node (which we call T) configured with an SRv6
> Behavior End (in other words, node T receives SRv6 traffic to be processed by
> SRv6 End and forwarded to another node). Such node T is configured with
> firewall rules on the INPUT CHAIN that prevent it from receiving traffic that
> was *NOT* generated by the node itself (speaking of conntrack...). This
> configuration can be enforced either through an explicit rule (i.e. XXX -j
> DROP) or by setting the default INPUT CHAIN policy to DROP (as it would be done
> in a traditional firewall configuration).
> 
> In this patch, what happens is that when an SRv6 packet passes through the
> node, the call to the nfhook with NF_INET_LOCAL_IN triggers the call to the
> firewall and the DROP policy on INPUT kicks in. As a result, the packet is
> discarded. What makes the situation even worse is that using the nfhook in this
> way breaks the SRv6 Behavior counter system (making that totally unusable).

By default there are no registered hooks, ie. no filtering policy in
place, the user needs to explicity specify a filtering policy, the
mechanism is not breaking anything, the user policy needs to be
consistent, that's all.

> > +static int input_action_end_dx4(struct sk_buff *skb,
> > +                struct seg6_local_lwt *slwt)
> > +{
> [...]
> 
> Similar problems with the inappropriate use of the hook also exist in
> action_end_dx4.
> 
> > +static int seg6_input(struct sk_buff *skb)
> > +{
> > +    int proto;
> > +
> > +    if (skb->protocol == htons(ETH_P_IPV6))
> > +        proto = NFPROTO_IPV6;
> > +    else if (skb->protocol == htons(ETH_P_IP))
> > +        proto = NFPROTO_IPV4;
> > +    else
> > +        return -EINVAL;
> > +
> > +    return NF_HOOK(proto, NF_INET_POST_ROUTING, dev_net(skb->dev), NULL,
> > +               skb, NULL, skb_dst(skb)->dev, seg6_input_core);
> > +}
> > +
> 
> Another example where the normal processing flow is altered is in the
> seg6_input() function (on the encap side). The seg6_input function should be
> called in case of i) local processing and delivery or ii) local processing and
> forwarding of the packet to another node. However, in this case a nfhook with
> POST_ROUTING is called.

By "local processing" here, you mean that the packet is going to enter
the tunnel. If this packet is locally generated, then calling
NF_INET_POST_ROUTING on the original packet (before being tunneled)
looks correct to me.

> > +static int seg6_output_core(struct net *net, struct sock *sk,
> > +                struct sk_buff *skb)
> > {
> >     struct dst_entry *orig_dst = skb_dst(skb);
> >     struct dst_entry *dst = NULL;
> > @@ -387,12 +411,28 @@ static int seg6_output(struct net *net, struct sock > > *sk, struct sk_buff *skb)
> >     if (unlikely(err))
> >         goto drop;
> >
> > -    return dst_output(net, sk, skb);
> > +    return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL,
> > +               skb_dst(skb)->dev, dst_output);
> >
> 
> In turn, the seg6_input_core function calls the nfhook set with
> NF_INET_LOCAL_OUT.

This looks consistent to me: the tunneled packet follows the output
path just like the UDP-based tunnel drivers, ie.
iudp_tunnel_xmit_skb() -> ip6tunnel_xmit() -> ip6_local_out().

> Doing that side effects may be expected, because the natural order
> of packet processing in netfilter, or more specifically in the SRv6
> framework, has been changed.
>
> There are also some minor issues, such as trying to follow the coding style of
> the SRv6 Networking subsystem (this applies also to the Networking subsystem in
> general). For example here:
> 
> +static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
> +                       struct sk_buff *skb)
> +{
> +    struct dst_entry *orig_dst = skb_dst(skb);
> +    struct seg6_local_lwt *slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
> +    struct in6_addr *nhaddr = NULL;
> +    [...]
> 
> The code should respect the Reverse Christmas tree, i.e:
> 
>     struct dst_entry *orig_dst = skb_dst(skb);
>     struct in6_addr *nhaddr = NULL;
>     struct seg6_local_lwt *slwt;
> 
>     slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
>     [...]

Indeed.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-08 13:38   ` Pablo Neira Ayuso
@ 2021-07-08 16:32     ` Andrea Mayer
       [not found]       ` <CALPAGbJt_rb_r3M2AEJ_6VRsG+zXrEOza0U-6SxFGsERGipT4w@mail.gmail.com>
  0 siblings, 1 reply; 12+ messages in thread
From: Andrea Mayer @ 2021-07-08 16:32 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Ryoga Saito, davem, yoshfuji, dsahern, kuba, netfilter-devel,
	Stefano Salsano, Paolo Lungaroni, Andrea Mayer

Dear Pablo,
thanks for your time.

On Thu, 8 Jul 2021 15:38:59 +0200
Pablo Neira Ayuso <pablo@netfilter.org> wrote:

> Dear Andrea,
> 
> On Thu, Jul 08, 2021 at 03:31:38AM +0200, Andrea Mayer wrote:
> > Dear Ryoga,
> > looking at your patch I noted several issues.
> > I start from the decap part but the same critical aspects are present also in
> > the encap one.
> > 
> > On Tue, 6 Jul 2021 14:25:48 +0900
> > Ryoga Saito <proelbtn@gmail.com> wrote:
> > > [...]
> > >
> > >
> > > +static int seg6_local_input(struct sk_buff *skb)
> > > +{
> > > +    if (skb->protocol != htons(ETH_P_IPV6)) {
> > > +        kfree_skb(skb);
> > > +        return -EINVAL;
> > > +    }
> > > +
> > > +    return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, dev_net(skb->dev), NULL,
> > > +               skb, skb->dev, NULL, seg6_local_input_core);
> > > +}
> > > +
> > 
> > The main problem here concerns the use that has been made of the netfilter
> > HOOKs combined with the SRv6 processing logic.
> > 
> > In seg6_local_input, it is not absolutely guaranteed that the packet is
> > intended to be processed and delivered locally. In fact, depending on the given
> > configuration and behavior, the packet can either be i) processed and delivered
> > locally or ii) processed and forwarded to another node.
> 
> What SRv6 decides to do with the packet is irrelevant, see below.
> 
> > In seg6_local_input, depending on the given configuration and behavior, the
> > packet can either be i) processed and delivered locally or ii) processed and
> > forwarded to another node. On the other hand, your code assumes that the packet
> > is intended to be processed and delivered locally.
> >
> > Calling the nfhook NFPROTO_IPV6 with NF_INET_LOCAL_IN can have several side
> > effects.
> 
> This is how UDP tunnel encap_rcv infrastructure works: the packet
> follows the INPUT path. The encap_rcv() might decide to reinject the
> decapsulated packet to stack or forward it somewhere else.
> 

the problem is that in SRv6 a packet to be processed by a node with an 
SRv6 End Behavior does not follow the INPUT path and its processing is 
different from the UDP tunnel example that you have in mind (more info 
below)

Note that I'm referring to the SRv6 Behaviors as implemented in seg6_local.c

> > I'll show you one below with an example:
> > 
> > suppose you have a transit SRv6 node (which we call T) configured with an SRv6
> > Behavior End (in other words, node T receives SRv6 traffic to be processed by
> > SRv6 End and forwarded to another node). Such node T is configured with
> > firewall rules on the INPUT CHAIN that prevent it from receiving traffic that
> > was *NOT* generated by the node itself (speaking of conntrack...). This
> > configuration can be enforced either through an explicit rule (i.e. XXX -j
> > DROP) or by setting the default INPUT CHAIN policy to DROP (as it would be done
> > in a traditional firewall configuration).
> > 
> > In this patch, what happens is that when an SRv6 packet passes through the
> > node, the call to the nfhook with NF_INET_LOCAL_IN triggers the call to the
> > firewall and the DROP policy on INPUT kicks in. As a result, the packet is
> > discarded. What makes the situation even worse is that using the nfhook in this
> > way breaks the SRv6 Behavior counter system (making that totally unusable).
> 
> By default there are no registered hooks, ie. no filtering policy in
> place, the user needs to explicity specify a filtering policy, the
> mechanism is not breaking anything, the user policy needs to be
> consistent, that's all.
> 

An example of consistent user policies in node T that can be installed
now and are broken by the patch is the following:
 
   ip6tables -A INPUT -i lo -j ACCEPT

   ip6tables -A INPUT \
          -p icmpv6 --icmpv6-type neighbor-solicitation -j ACCEPT                 
   ip6tables -A INPUT \
          -p icmpv6 --icmpv6-type neighbor-advertisement -j ACCEPT
                                                              
   ip6tables -A INPUT \
          -p icmpv6 --icmpv6-type router-solicitation -j ACCEPT
   ip6tables -A INPUT \
          -p icmpv6 --icmpv6-type router-advertisement -j ACCEPT

   ip6tables -A INPUT \
          -p icmpv6 --icmpv6-type echo-request -j ACCEPT
   ip6tables -A INPUT \
          -p icmpv6 --icmpv6-type echo-reply -j ACCEPT

   ip6tables -P INPUT DROP  

The IPv6 destination address fc01::2 (SRv6 SID) is configured as an SRv6 End
Behavior in node T. On node T we expect to receive an IPv6+SRH packet with
DA equals to fc01::2.
Please note that the fc01::2 is NOT a local address for the T node.

In node T only the following protocols are allowed in INPUT:

  icmpv6 RA, neigh adv/sol and echo request/reply

while all other protocols in INPUT are discarded.

with this consistent configuration, node T is able to correctly process and
forward packets with IPv6+SRH destination address fc01::2 because the INPUT
path is not taken when SRv6 implementation is processing an SRv6 End Behavior.

After the introduction of the patch, this correct behavior is broken.

If you want I can provide a diagram with the full description of this 
scenario and all the scripts to reproduce the issue.

Andrea

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
       [not found]       ` <CALPAGbJt_rb_r3M2AEJ_6VRsG+zXrEOza0U-6SxFGsERGipT4w@mail.gmail.com>
@ 2021-07-08 20:52         ` Ryoga Saito
  2021-07-09 18:48           ` Andrea Mayer
  0 siblings, 1 reply; 12+ messages in thread
From: Ryoga Saito @ 2021-07-08 20:52 UTC (permalink / raw)
  To: Andrea Mayer
  Cc: Pablo Neira Ayuso, davem, yoshfuji, dsahern, kuba,
	netfilter-devel, Stefano Salsano, Paolo Lungaroni

Dear Andrea
Thanks for your reviews and comments.


> On Jul 9, 2021, at 1:32, Andrea Mayer <andrea.mayer@uniroma2.it> wrote:
> 
> Dear Pablo,
> thanks for your time.
> 
> On Thu, 8 Jul 2021 15:38:59 +0200
> Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> 
>> Dear Andrea,
>> 
>> On Thu, Jul 08, 2021 at 03:31:38AM +0200, Andrea Mayer wrote:
>>> Dear Ryoga,
>>> looking at your patch I noted several issues.
>>> I start from the decap part but the same critical aspects are present also in
>>> the encap one.
>>> 
>>> On Tue, 6 Jul 2021 14:25:48 +0900
>>> Ryoga Saito <proelbtn@gmail.com> wrote:
>>>> [...]
>>>> 
>>>> 
>>>> +static int seg6_local_input(struct sk_buff *skb)
>>>> +{
>>>> +    if (skb->protocol != htons(ETH_P_IPV6)) {
>>>> +        kfree_skb(skb);
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>> +    return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, dev_net(skb->dev), NULL,
>>>> +               skb, skb->dev, NULL, seg6_local_input_core);
>>>> +}
>>>> +
>>> 
>>> The main problem here concerns the use that has been made of the netfilter
>>> HOOKs combined with the SRv6 processing logic.
>>> 
>>> In seg6_local_input, it is not absolutely guaranteed that the packet is
>>> intended to be processed and delivered locally. In fact, depending on the given
>>> configuration and behavior, the packet can either be i) processed and delivered
>>> locally or ii) processed and forwarded to another node.
>> 
>> What SRv6 decides to do with the packet is irrelevant, see below.
>> 
>>> In seg6_local_input, depending on the given configuration and behavior, the
>>> packet can either be i) processed and delivered locally or ii) processed and
>>> forwarded to another node. On the other hand, your code assumes that the packet
>>> is intended to be processed and delivered locally.
>>> 
>>> Calling the nfhook NFPROTO_IPV6 with NF_INET_LOCAL_IN can have several side
>>> effects.
>> 
>> This is how UDP tunnel encap_rcv infrastructure works: the packet
>> follows the INPUT path. The encap_rcv() might decide to reinject the
>> decapsulated packet to stack or forward it somewhere else.
>> 
> 
> the problem is that in SRv6 a packet to be processed by a node with an 
> SRv6 End Behavior does not follow the INPUT path and its processing is 
> different from the UDP tunnel example that you have in mind (more info 
> below)
> 
> Note that I'm referring to the SRv6 Behaviors as implemented in seg6_local.c

I thought SRv6 processing to be the same as encapsulation/decapsulation
processing and I implemented patch this way intentionally.

>>> I'll show you one below with an example:
>>> 
>>> suppose you have a transit SRv6 node (which we call T) configured with an SRv6
>>> Behavior End (in other words, node T receives SRv6 traffic to be processed by
>>> SRv6 End and forwarded to another node). Such node T is configured with
>>> firewall rules on the INPUT CHAIN that prevent it from receiving traffic that
>>> was *NOT* generated by the node itself (speaking of conntrack...). This
>>> configuration can be enforced either through an explicit rule (i.e. XXX -j
>>> DROP) or by setting the default INPUT CHAIN policy to DROP (as it would be done
>>> in a traditional firewall configuration).
>>> 
>>> In this patch, what happens is that when an SRv6 packet passes through the
>>> node, the call to the nfhook with NF_INET_LOCAL_IN triggers the call to the
>>> firewall and the DROP policy on INPUT kicks in. As a result, the packet is
>>> discarded. What makes the situation even worse is that using the nfhook in this
>>> way breaks the SRv6 Behavior counter system (making that totally unusable).
>> 
>> By default there are no registered hooks, ie. no filtering policy in
>> place, the user needs to explicity specify a filtering policy, the
>> mechanism is not breaking anything, the user policy needs to be
>> consistent, that's all.
>> 
> 
> An example of consistent user policies in node T that can be installed
> now and are broken by the patch is the following:
> 
>   ip6tables -A INPUT -i lo -j ACCEPT
> 
>   ip6tables -A INPUT \
>          -p icmpv6 --icmpv6-type neighbor-solicitation -j ACCEPT                 
>   ip6tables -A INPUT \
>          -p icmpv6 --icmpv6-type neighbor-advertisement -j ACCEPT
> 
>   ip6tables -A INPUT \
>          -p icmpv6 --icmpv6-type router-solicitation -j ACCEPT
>   ip6tables -A INPUT \
>          -p icmpv6 --icmpv6-type router-advertisement -j ACCEPT
> 
>   ip6tables -A INPUT \
>          -p icmpv6 --icmpv6-type echo-request -j ACCEPT
>   ip6tables -A INPUT \
>          -p icmpv6 --icmpv6-type echo-reply -j ACCEPT
> 
>   ip6tables -P INPUT DROP  
> 
> The IPv6 destination address fc01::2 (SRv6 SID) is configured as an SRv6 End
> Behavior in node T. On node T we expect to receive an IPv6+SRH packet with
> DA equals to fc01::2.
> Please note that the fc01::2 is NOT a local address for the T node.
> 
> In node T only the following protocols are allowed in INPUT:
> 
>  icmpv6 RA, neigh adv/sol and echo request/reply
> 
> while all other protocols in INPUT are discarded.
> 
> with this consistent configuration, node T is able to correctly process and
> forward packets with IPv6+SRH destination address fc01::2 because the INPUT
> path is not taken when SRv6 implementation is processing an SRv6 End Behavior.
> 
> After the introduction of the patch, this correct behavior is broken.

I considered srv6 Behaviors (e.g. T.Encaps) to be the same as the encapsulation
in other tunneling protocols, and srv6local Behaviors (e.g. End, End.DT4,
End.DT6, ...) to be the same as the decapsulation in other tunneling protocols
even if decapsulation isn't happened. I'm intended that SRv6 packets whose
destination address is SRv6 End Behavior go through INPUT path.

I think it works correctly on your situation with the following rule:

ip6tables -A INPUT --dst fc01::2 -j ACCEPT

To say more generally, SRv6 locator blocks should be allowed with ip6tables if
you want to change default policy of INPUT chain to DROP/REJECT.
 
Ryoga

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-08 20:52         ` Ryoga Saito
@ 2021-07-09 18:48           ` Andrea Mayer
  2021-07-11  7:12             ` Ryoga Saito
  0 siblings, 1 reply; 12+ messages in thread
From: Andrea Mayer @ 2021-07-09 18:48 UTC (permalink / raw)
  To: Ryoga Saito
  Cc: Pablo Neira Ayuso, davem, yoshfuji, dsahern, kuba,
	netfilter-devel, Stefano Salsano, Paolo Lungaroni, Andrea Mayer

Hi Ryoga,
please, see my answers below.

On Fri, 9 Jul 2021 05:52:45 +0900
Ryoga Saito <proelbtn@gmail.com> wrote:

> Dear Andrea
> Thanks for your reviews and comments.
>

You're welcome, always happy to help if I can.


> >
> > the problem is that in SRv6 a packet to be processed by a node with an
> > SRv6 End Behavior does not follow the INPUT path and its processing is
> > different from the UDP tunnel example that you have in mind (more info
> > below)
> >
> > Note that I'm referring to the SRv6 Behaviors as implemented in seg6_local.c
>
> I thought SRv6 processing to be the same as encapsulation/decapsulation
> processing and I implemented patch this way intentionally.
>

Segment routing is *not* just another protocol to carry encapsulated traffic
and, thus, to perform *only* encap/decap operations.

Directly quoting the abstract of the RFC 8986:
  ---
  The Segment Routing over IPv6 (SRv6) Network Programming framework
  enables a network operator or an application to specify a packet
  processing program by encoding a sequence of instructions in the IPv6
  packet header.

  Each instruction is implemented on one or several nodes in the
  network and identified by an SRv6 Segment Identifier in the packet.
  ---

In principle, each instruction can lead to a different processing than the
other.


>
> I considered srv6 Behaviors (e.g. T.Encaps) to be the same as the encapsulation
> in other tunneling protocols, and srv6local Behaviors (e.g. End, End.DT4,
> End.DT6, ...) to be the same as the decapsulation in other tunneling protocols
> even if decapsulation isn't happened. 

This is the point: SRv6 End, End.T, End.X with their flavors are *not* encap/
decap operations. As SRv6 is not a protocol meant only for encap/decap, we
cannot apply to this the same logic found in other protocols that perform
encap/decap operations.


> I'm intended that SRv6 packets whose
> destination address is SRv6 End Behavior go through INPUT path.
>

No, it does not work this way and this was an important design choice in the
SRv6 implementation in Linux. Basically, the SIDs corresponding to End
behaviors (srv6local) are *not* considered local addresses bound to a local
interface. This has important advantages in terms of the forwarding performance
of SRv6. We cannot change this design choice without causing a strong
performance impact on SRv6 forwarding.


> I think it works correctly on your situation with the following rule:
>
> ip6tables -A INPUT --dst fc01::2 -j ACCEPT
>
> To say more generally, SRv6 locator blocks should be allowed with ip6tables if
> you want to change default policy of INPUT chain to DROP/REJECT.
>  

No, this is a way to fix the problem that you introduce by changing the current
srv6local forwarding model. If you have 100 End SIDs should you add 100 accept
rules? The root problem is that SRv6 (srv6local) traffic should not go through
the INPUT path.

Have you set the traffic to flow through INPUT to confirm a connection (for
conntrack)? If this is the only reason, before changing the srv6local
processing model in such a disruptive way, you can investigate different ways
to do connection confirmation without going directly through nfhook with INPUT.
I can help with some hints if you are interested.

Andrea

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-09 18:48           ` Andrea Mayer
@ 2021-07-11  7:12             ` Ryoga Saito
  2021-07-12 23:31               ` Andrea Mayer
  0 siblings, 1 reply; 12+ messages in thread
From: Ryoga Saito @ 2021-07-11  7:12 UTC (permalink / raw)
  To: Andrea Mayer
  Cc: Pablo Neira Ayuso, davem, yoshfuji, dsahern, kuba,
	netfilter-devel, Stefano Salsano, Paolo Lungaroni

Thanks for your reply.

>> I considered srv6 Behaviors (e.g. T.Encaps) to be the same as the encapsulation
>> in other tunneling protocols, and srv6local Behaviors (e.g. End, End.DT4,
>> End.DT6, ...) to be the same as the decapsulation in other tunneling protocols
>> even if decapsulation isn't happened. 
> 
> This is the point: SRv6 End, End.T, End.X with their flavors are *not* encap/
> decap operations. As SRv6 is not a protocol meant only for encap/decap, we
> cannot apply to this the same logic found in other protocols that perform
> encap/decap operations.

Okay, I understood.

>> I think it works correctly on your situation with the following rule:
>> 
>> ip6tables -A INPUT --dst fc01::2 -j ACCEPT
>> 
>> To say more generally, SRv6 locator blocks should be allowed with ip6tables if
>> you want to change default policy of INPUT chain to DROP/REJECT.
>> 
> 
> No, this is a way to fix the problem that you introduce by changing the current
> srv6local forwarding model. If you have 100 End SIDs should you add 100 accept
> rules? The root problem is that SRv6 (srv6local) traffic should not go through
> the INPUT path.

No, you only need to add single rule for accepting these SIDs. Here is the
quote of RFC 8986.

---
This document defines an SRv6 SID as consisting of LOC:FUNCT:ARG, where
a locator (LOC) is encoded in the L most significant bits of the SID,
followed by F bits of function (FUNCT) and A bits of arguments (ARG). L,
the locator length, is flexible, and an operator is free to use the locator
length of their choice. F and A may be any value as long as L+F+A <= 128.
When L+F+A is less than 128, then the remaining bits of the SID MUST be
zero.

A locator may be represented as B:N where B is the SRv6 SID block (IPv6
prefix allocated for SRv6 SIDs by the operator) and N is the identifier of
the parent node instantiating the SID.

When the LOC part of the SRv6 SIDs is routable, it leads to the node, which
instantiates the SID.
---

If there are 100 SIDs, but these SIDs are for the same node, the locators
of these SIDs also should be same. so, you can allow SRv6 flows by adding
only single rule.

> Have you set the traffic to flow through INPUT to confirm a connection (for
> conntrack)? If this is the only reason, before changing the srv6local
> processing model in such a disruptive way, you can investigate different ways
> to do connection confirmation without going directly through nfhook with INPUT.
> I can help with some hints if you are interested.

You stated this patch isn't acceptable because NF_HOOK is called even when
End behavior is processing, aren't you? So, do you think it’s natural that
NF_HOOK is called *only* when SRv6 behavior is encap/decap operation. The
problem I stated first is that netfilter couldn't track inner flows of
SRv6-encapsulated packets regardless of the status of IPv6 conntrack. If
yes, I will fix and resubmit patch.

Ryoga

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-11  7:12             ` Ryoga Saito
@ 2021-07-12 23:31               ` Andrea Mayer
  2021-07-15 22:13                 ` Pablo Neira Ayuso
  0 siblings, 1 reply; 12+ messages in thread
From: Andrea Mayer @ 2021-07-12 23:31 UTC (permalink / raw)
  To: Ryoga Saito
  Cc: Pablo Neira Ayuso, davem, yoshfuji, dsahern, kuba,
	netfilter-devel, Stefano Salsano, Paolo Lungaroni, Andrea Mayer

Hi Ryoga,
Thanks for considering my points.

Please, see below for my answers.

> On Sun, 11 Jul 2021 16:12:48 +0900
> Ryoga Saito <proelbtn@gmail.com> wrote:

> >
> > No, this is a way to fix the problem that you introduce by changing the current
> > srv6local forwarding model. If you have 100 End SIDs should you add 100 accept
> > rules? The root problem is that SRv6 (srv6local) traffic should not go through
> > the INPUT path.
>
> No, you only need to add single rule for accepting these SIDs. Here is the
> quote of RFC 8986.
>
> ---
> This document defines an SRv6 SID as consisting of LOC:FUNCT:ARG, where
> a locator (LOC) is encoded in the L most significant bits of the SID,
> followed by F bits of function (FUNCT) and A bits of arguments (ARG). L,
> the locator length, is flexible, and an operator is free to use the locator
> length of their choice. F and A may be any value as long as L+F+A <= 128.
> When L+F+A is less than 128, then the remaining bits of the SID MUST be
> zero.
>
> A locator may be represented as B:N where B is the SRv6 SID block (IPv6
> prefix allocated for SRv6 SIDs by the operator) and N is the identifier of
> the parent node instantiating the SID.
>
> When the LOC part of the SRv6 SIDs is routable, it leads to the node, which
> instantiates the SID.
> ---
>
> If there are 100 SIDs, but these SIDs are for the same node, the locators
> of these SIDs also should be same. so, you can allow SRv6 flows by adding
> only single rule.

No, you cannot rely on this assumption.
Operators can choose to assign different locators to the same node.
The document you mention does not prescribe how the SIDs should be allocated on
the nodes, nor whether they should be part of one or more locators.
Consequently, no one imposes on us that those 100 SIDs must belong all to the
same locator.


> > Have you set the traffic to flow through INPUT to confirm a connection (for
> > conntrack)? If this is the only reason, before changing the srv6local
> > processing model in such a disruptive way, you can investigate different ways
> > to do connection confirmation without going directly through nfhook with INPUT.
> > I can help with some hints if you are interested.
>
> You stated this patch isn't acceptable because NF_HOOK is called even when
> End behavior is processing, aren't you? 

Yes, since the SRv6 processing (seg6_local) is applied to traffic with DAs not
necessarily associated with local addresses, it should not pass through INPUT.


> So, do you think it’s natural that
> NF_HOOK is called *only* when SRv6 behavior is encap/decap operation. The
> problem I stated first is that netfilter couldn't track inner flows of
> SRv6-encapsulated packets regardless of the status of IPv6 conntrack. If
> yes, I will fix and resubmit patch.
>

Let's consider encap/decap operation. The first important consideration is that
encap and decap are two different beasts.

Encap (T.Encap) is done in seg6_input (seg6_iptunnel) when a packet is
received on the IPv6 receive path and in seg6_output if the packet to be
encapsulated is locally generated.
Then you will have decap operations that are performed in seg6_local, according
to several different decap behaviors.

For the moment, let's consider the encap operation applied to a packet received
on the IPv6 receive path. If your plan is to call NF_HOOK set on OUTPUT, you
will have a similar problem to what I have already described for
seg6_local_input (seg6_local). However, this time the OUTPUT is involved rather
than the INPUT.

The SRv6 encap operation (seg6_input) for packets received on the IPv6 receive
path has been designed and implemented so that packets are not steered through
the OUTPUT. For this reason, if you change this design you will cause: 

 1) possible traffic loss due to some already existing policies in OUTPUT.
    In other words you will break existing working configuration;

 2) a performance drop in SRv6 encapsulation, which I have measured below.

---

I set up a testbed with the purpose of quickly and preliminarily testing the
performance (throughput) of a couple of patched processing functions you
proposed:

  i) SRv6 End (since the seg6_local_input function was patched);

 ii) SRv6 T.Encap (seg6_iptunnel).


The following scenarios were tested:

 1.a) vanilla kernel with a single SRv6 End Behavior and only 1 ip6tables
      (filter) rule to fill the INPUT (although not necessary, see below);
 
 1.b) vanilla kernel with a single SRv6 T.Encap and 0 ip6tables (filter)
      rules on OUTPUT;

 2.a) patched kernel with a single SRv6 End Behavior and only 1 ip6tables
      (filter) rule in INPUT to do accept (necessary to accept the SID);
 
 2.b) patched kernel with a single SRv6 T.Encap and 0 ip6tables (filter)
      rules on OUTPUT.

In 1.a and 2.a, I considered the *ideal* case where we have a single locator
and a single accept rule for the whole locator+SIDs. In the vanilla kernel this
is not necessary, but I wanted to simulate the presence of at least one INPUT
rule in both cases for being as fair as possible.
Indeed, in 2.a the accept rule is mandatory if the default policy in INPUT is
set to drop.

In 1.b and 2.b, I consider a use case where a SRv6 router can generate traffic
(of any kind) to communicate with another node in the network. Although this is
*not* often the case in reality, I considered the *ideal* situation where there
are no restrictions on OUTPUT at all.
However in the real world for case 2.b, we have to consider fixing OUTPUT
policies to handle the IPv6+SRH outer header in case there are some restrictions
on locally generated traffic by the router. Otherwise, this can break
configurations where OUTPUT policies are strict.


Results of tests run in the 4 scenarios:

  Achieved Throughput
  ===================
  1.a) avg. 939.34  kpps, std. dev. 1.09 kpps;
  1.b) avg. 1095.35 kpps, std. dev. 3.64 kpps;
  2.a) avg. 863.07  kpps, std. dev. 1.74 kpps;
  2.b) avg. 924.93  kpps, std. dev. 4.33 kpps.

The changes slowed down performance by about 8.1% in the case of SRv6 End
Behavior and 15.6% in the case of SRv6 T.Encap Behavior.

The performance drop is significant and is always experienced even when we are
not interested in tracking internal packets whatsoever.

I hope I have been of help.

Andrea

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-12 23:31               ` Andrea Mayer
@ 2021-07-15 22:13                 ` Pablo Neira Ayuso
  2021-07-19 10:12                   ` Ryoga Saito
  2021-07-19 11:55                   ` Andrea Mayer
  0 siblings, 2 replies; 12+ messages in thread
From: Pablo Neira Ayuso @ 2021-07-15 22:13 UTC (permalink / raw)
  To: Andrea Mayer
  Cc: Ryoga Saito, davem, yoshfuji, dsahern, kuba, netfilter-devel,
	Stefano Salsano, Paolo Lungaroni

Hi Andrea,

On Tue, Jul 13, 2021 at 01:31:16AM +0200, Andrea Mayer wrote:
[...]
> > On Sun, 11 Jul 2021 16:12:48 +0900
> > Ryoga Saito <proelbtn@gmail.com> wrote:
> 
> > >
> > > No, this is a way to fix the problem that you introduce by changing the current
> > > srv6local forwarding model. If you have 100 End SIDs should you add 100 accept
> > > rules? The root problem is that SRv6 (srv6local) traffic should not go through
> > > the INPUT path.
> >
> > No, you only need to add single rule for accepting these SIDs. Here is the
> > quote of RFC 8986.
> >
> > ---
> > This document defines an SRv6 SID as consisting of LOC:FUNCT:ARG, where
> > a locator (LOC) is encoded in the L most significant bits of the SID,
> > followed by F bits of function (FUNCT) and A bits of arguments (ARG). L,
> > the locator length, is flexible, and an operator is free to use the locator
> > length of their choice. F and A may be any value as long as L+F+A <= 128.
> > When L+F+A is less than 128, then the remaining bits of the SID MUST be
> > zero.
> >
> > A locator may be represented as B:N where B is the SRv6 SID block (IPv6
> > prefix allocated for SRv6 SIDs by the operator) and N is the identifier of
> > the parent node instantiating the SID.
> >
> > When the LOC part of the SRv6 SIDs is routable, it leads to the node, which
> > instantiates the SID.
> > ---
> >
> > If there are 100 SIDs, but these SIDs are for the same node, the locators
> > of these SIDs also should be same. so, you can allow SRv6 flows by adding
> > only single rule.
> 
> No, you cannot rely on this assumption.
> Operators can choose to assign different locators to the same node.
> The document you mention does not prescribe how the SIDs should be allocated on
> the nodes, nor whether they should be part of one or more locators.
> Consequently, no one imposes on us that those 100 SIDs must belong all to the
> same locator.

It is possible to filter 100 SIDs with one single rule and one set,
even if they are different SIDs.

> > > Have you set the traffic to flow through INPUT to confirm a connection (for
> > > conntrack)? If this is the only reason, before changing the srv6local
> > > processing model in such a disruptive way, you can investigate different ways
> > > to do connection confirmation without going directly through nfhook with INPUT.
> > > I can help with some hints if you are interested.
> >
> > You stated this patch isn't acceptable because NF_HOOK is called even when
> > End behavior is processing, aren't you? 
> 
> Yes, since the SRv6 processing (seg6_local) is applied to traffic with DAs not
> necessarily associated with local addresses, it should not pass through INPUT.

See below.

> > So, do you think it’s natural that
> > NF_HOOK is called *only* when SRv6 behavior is encap/decap operation. The
> > problem I stated first is that netfilter couldn't track inner flows of
> > SRv6-encapsulated packets regardless of the status of IPv6 conntrack. If
> > yes, I will fix and resubmit patch.
> >
> 
> Let's consider encap/decap operation. The first important consideration is that
> encap and decap are two different beasts.
> 
> Encap (T.Encap) is done in seg6_input (seg6_iptunnel) when a packet is
> received on the IPv6 receive path and in seg6_output if the packet to be
> encapsulated is locally generated.
> Then you will have decap operations that are performed in seg6_local, according
> to several different decap behaviors.
> 
> For the moment, let's consider the encap operation applied to a packet received
> on the IPv6 receive path. If your plan is to call NF_HOOK set on OUTPUT, you
> will have a similar problem to what I have already described for
> seg6_local_input (seg6_local). However, this time the OUTPUT is involved rather
> than the INPUT.

If this is a real concern, then it should be to possible to add new
hooks such as NF_INET_LWT_LOCAL_IN and NF_INET_LWT_LOCAL_OUT, and extend
conntrack to also register handlers for those new hooks.

> The SRv6 encap operation (seg6_input) for packets received on the IPv6 receive
> path has been designed and implemented so that packets are not steered through
> the OUTPUT. For this reason, if you change this design you will cause:
> 
>  1) possible traffic loss due to some already existing policies in OUTPUT.
>     In other words you will break existing working configuration;
>
>  2) a performance drop in SRv6 encapsulation, which I have measured below.
> 
> ---
> 
> I set up a testbed with the purpose of quickly and preliminarily testing the
> performance (throughput) of a couple of patched processing functions you
> proposed:
> 
>   i) SRv6 End (since the seg6_local_input function was patched);
> 
>  ii) SRv6 T.Encap (seg6_iptunnel).
> 
> 
> The following scenarios were tested:
> 
>  1.a) vanilla kernel with a single SRv6 End Behavior and only 1 ip6tables
>       (filter) rule to fill the INPUT (although not necessary, see below);
>  
>  1.b) vanilla kernel with a single SRv6 T.Encap and 0 ip6tables (filter)
>       rules on OUTPUT;
> 
>  2.a) patched kernel with a single SRv6 End Behavior and only 1 ip6tables
>       (filter) rule in INPUT to do accept (necessary to accept the SID);
>  
>  2.b) patched kernel with a single SRv6 T.Encap and 0 ip6tables (filter)
>       rules on OUTPUT.

This is not correct, you are evaluating here the cost of the
filtering, not the cost of the new hooks. If your concern that the new
hooks might slow down the IPv6 SRv6 datapath, then you should repeat
your experiment with and without the patch that adds the hooks.

And you should also provide more information on how you're collecting
any performance number to allow us to scrutinize that your performance
evaluation is correct.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-15 22:13                 ` Pablo Neira Ayuso
@ 2021-07-19 10:12                   ` Ryoga Saito
  2021-07-26 21:29                     ` Pablo Neira Ayuso
  2021-07-19 11:55                   ` Andrea Mayer
  1 sibling, 1 reply; 12+ messages in thread
From: Ryoga Saito @ 2021-07-19 10:12 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Andrea Mayer, davem, yoshfuji, dsahern, kuba, netfilter-devel,
	Stefano Salsano, Paolo Lungaroni

Hi Pablo

I would like your comments for it.

I have 2 implementation ideas about fixing this patch:

1.) fix only coding style pointed out in previous mail
2.) add sysctl parameter and change NF_HOOK to NF_HOOK_COND for user to
    select behavior of hook call

I believed SRv6 encaps/decaps operations should be tracked with conntrack
like any other virtual net-device based tunneling protocols (e.g. VXLAN,
IPIP), even if the forwarding performance slows down because occurred by
lack of considerations. and any other tunnels also have this overhead.

Therefore, I support 1st idea. However, 2nd idea is ok if the overhead
caused by adding new hook isn't acceptable.

Ryoga

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-15 22:13                 ` Pablo Neira Ayuso
  2021-07-19 10:12                   ` Ryoga Saito
@ 2021-07-19 11:55                   ` Andrea Mayer
  1 sibling, 0 replies; 12+ messages in thread
From: Andrea Mayer @ 2021-07-19 11:55 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: Ryoga Saito, davem, yoshfuji, dsahern, kuba, netfilter-devel,
	Stefano Salsano, Paolo Lungaroni, Ahmed Abdelsalam, Andrea Mayer

Hi Pablo,
Please see my answers below.

On Fri, 16 Jul 2021 00:13:42 +0200
Pablo Neira Ayuso <pablo@netfilter.org> wrote:

> Hi Andrea,
>
> On Tue, Jul 13, 2021 at 01:31:16AM +0200, Andrea Mayer wrote:
> [...]
> > > On Sun, 11 Jul 2021 16:12:48 +0900
> > > Ryoga Saito <proelbtn@gmail.com> wrote:
> >
> > > >
> > >
> > >
> > > If there are 100 SIDs, but these SIDs are for the same node, the locators
> > > of these SIDs also should be same. so, you can allow SRv6 flows by adding
> > > only single rule.
> >
> > No, you cannot rely on this assumption.
> > Operators can choose to assign different locators to the same node.
> > The document you mention does not prescribe how the SIDs should be allocated on
> > the nodes, nor whether they should be part of one or more locators.
> > Consequently, no one imposes on us that those 100 SIDs must belong all to the
> > same locator.
>
> It is possible to filter 100 SIDs with one single rule and one set,
> even if they are different SIDs.
>

Yes, with ipset you can avoid to increase the cost linearly with the number of
SIDs (at the prize of increased configuration complexity IMO, because the
network administrator should learn to use ipset).

Anyway, our concern is valid for the case in which you have only a single SID
and you are forced to add a single rule to explicitly allow that SID. In fact,
the measurement results we have discussed only consider one SID.


> > > > Have you set the traffic to flow through INPUT to confirm a connection (for
> > > > conntrack)? If this is the only reason, before changing the srv6local
> > > > processing model in such a disruptive way, you can investigate different ways
> > > > to do connection confirmation without going directly through nfhook with INPUT.
> > > > I can help with some hints if you are interested.
> > >
> > > You stated this patch isn't acceptable because NF_HOOK is called even when
> > > End behavior is processing, aren't you?
> >
> > Yes, since the SRv6 processing (seg6_local) is applied to traffic with DAs not
> > necessarily associated with local addresses, it should not pass through INPUT.
>
> See below.
>
> > > So, do you think it’s natural that
> > > NF_HOOK is called *only* when SRv6 behavior is encap/decap operation. The
> > > problem I stated first is that netfilter couldn't track inner flows of
> > > SRv6-encapsulated packets regardless of the status of IPv6 conntrack. If
> > > yes, I will fix and resubmit patch.
> > >
> >
> > Let's consider encap/decap operation. The first important consideration is that
> > encap and decap are two different beasts.
> >
> > Encap (T.Encap) is done in seg6_input (seg6_iptunnel) when a packet is
> > received on the IPv6 receive path and in seg6_output if the packet to be
> > encapsulated is locally generated.
> > Then you will have decap operations that are performed in seg6_local, according
> > to several different decap behaviors.
> >
> > For the moment, let's consider the encap operation applied to a packet received
> > on the IPv6 receive path. If your plan is to call NF_HOOK set on OUTPUT, you
> > will have a similar problem to what I have already described for
> > seg6_local_input (seg6_local). However, this time the OUTPUT is involved rather
> > than the INPUT.
>
> If this is a real concern, then it should be to possible to add new
> hooks such as NF_INET_LWT_LOCAL_IN and NF_INET_LWT_LOCAL_OUT, and extend
> conntrack to also register handlers for those new hooks.

That seems to be a promising approach to be explored; I'm looking forward to
receiving more details on how you will design the packet processing paths for
the various scenarios considering these new hooks.

> > The SRv6 encap operation (seg6_input) for packets received on the IPv6 receive
> > path has been designed and implemented so that packets are not steered through
> > the OUTPUT. For this reason, if you change this design you will cause:
> >
> >  1) possible traffic loss due to some already existing policies in OUTPUT.
> >     In other words you will break existing working configuration;
> >
> >  2) a performance drop in SRv6 encapsulation, which I have measured below.
> >
> > ---
> >
> > I set up a testbed with the purpose of quickly and preliminarily testing the
> > performance (throughput) of a couple of patched processing functions you
> > proposed:
> >
> >   i) SRv6 End (since the seg6_local_input function was patched);
> >
> >  ii) SRv6 T.Encap (seg6_iptunnel).
> >
> >
> > The following scenarios were tested:
> >
> >  1.a) vanilla kernel with a single SRv6 End Behavior and only 1 ip6tables
> >       (filter) rule to fill the INPUT (although not necessary, see below);
> >  
> >  1.b) vanilla kernel with a single SRv6 T.Encap and 0 ip6tables (filter)
> >       rules on OUTPUT;
> >
> >  2.a) patched kernel with a single SRv6 End Behavior and only 1 ip6tables
> >       (filter) rule in INPUT to do accept (necessary to accept the SID);
> >  
> >  2.b) patched kernel with a single SRv6 T.Encap and 0 ip6tables (filter)
> >       rules on OUTPUT.
>
> This is not correct, you are evaluating here the cost of the
> filtering, not the cost of the new hooks. If your concern that the new
> hooks might slow down the IPv6 SRv6 datapath, then you should repeat
> your experiment with and without the patch that adds the hooks.
>

The problem is that the patch forces us to add an explicit accept rule when the
default policy is set to DROP. So, we are measuring the performance penalty for
an existing and typical scenario when filtering is a requirement.

We could also measure as you suggest the performance penalty due to the simple
addition of the hooks. This is an interesting test to be performed that we can
definitely consider. However this gives us the penalty for the "best case" not
even for the "average" one.

> And you should also provide more information on how you're collecting
> any performance number to allow us to scrutinize that your performance
> evaluation is correct.

You are right.
Sorry for not having included details about the test earlier.
Please see below, thanks.

---

Details on Testbed and measurements
===================================

We set up a testbed using the SRPerf framework as discussed in [1]. 
Figure 1 depicts the testbed architecture that we used: it comprises two nodes
denoted as Traffic Generator and Receiver (TGR) and System Under Test (SUT)
respectively. 


+--------------+                           +--------------+
|              |(Sender port)     (IN port)|              |
|     enp6s0f0 +-------------------------->| enp6s0f0     |
|   12:1::1/64 |          10 Gbps          | 12:1::2/64   |
|              |                           |              |
|     TGR      |                           |     SUT      |
|              |          10 Gbps          |              |
|     enp6s0f1 |<--------------------------+ enp6s0f1     |
|   12:2::1/64 |(Receiver port)  (OUT port)| 12:2::2/64   |
|              |                           |              |
+--------------+                           +--------------+

Figure 1: Testbed architecture

The packets are generated by the TGR on the Sender port, enter the SUT from the
IN port, exit the SUT from the OUT port and then they are received back by the
TGR on the Receiver port.

SRPerf measures the maximum throughput, defined as the maximum packet rate in
Packet Per Seconds (pps) for which the packet drop ratio is smaller than or
equal to a given threshold (in our case we chose 0.5%). This is also referred
to as Partial Drop Rate (PDR) at a 0.5% drop ratio (in short PDR@0.5%). Further
details on PDR can be found in [1].

SRPerf uses the TRex [2] generator in order to evaluate the maximum throughput
that can be processed by the SUT Node. The source code of SRPerf is available
at [3].

The testbed is deployed on the CloudLab facilities [4], a flexible
infrastructure dedicated to scientific research on the future of Cloud
Computing. Both the TGR and the SUT are two identical bare metal servers whose
hardware characteristics are shown below:

-----------------------------------------------------------
 CPU:   2x Intel E5-2630 (8 Core 16 Thread) at 2.40 GHz
 RAM:   128 GB of ECC RAM
 Disks: 2x 1.2 TB HDD SAS 6Gbps 10K rpm
        1x 480 GB SSD SAS 6Gbps
 NICs:  Intel Corporation 82599ES 10-Gigabit SFI/SFP
        Intel I350 1Gb Dual Port
-----------------------------------------------------------

Each bare metal server uses the two Intel 82599ES 10-Gigabit network interface
cards to provide back-to-back connectivity between testbed nodes like described
in Figure 1.

On the SUT Node we deploy and execute experiments for studying the proposed
scenarios.
Here we run:
  i) a vanilla Linux kernel 5.13.0, for scenarios 1.a and 2.a;
 ii) the same kernel release of (i) with the patch under discussion
     applied to, for scenarios 1.b and 2.b.

In any case, the SUT Node is configured as an SRv6 Node in an SRv6 Network.


SUT Node and experiment parameters
==================================

On the SUT Node we compiled both the vanilla kernel 5.13.0 and the patched
kernel 5.13.0 using the same config file. In particular, we enabled the
SR-related options as well as the Netfilter support. 

We disabled GRO, GSO and the hardware transmitting and receiving offloading
capabilities on the 82599ES 10-Gigabit network interfaces that are used to
connect the two testbed servers. We changed the IRQ settings so that all the
queues of the 10-Gigabit NICs were served by a single CPU and Hyper-Threading
was disabled. 


Scenarios 1.a, 2.a (SRv6 End behavior)
-------------------------------------

Two experiments have been carried out respectively for scenario 1.a and 2.a.
Either way, they both make use of the same SUT configuration and process the
same kind of traffic generated by the TGR.

TGR generates IPv6+SRH traffic which is sent directly to the SUT IN port. Every
single packet sent by the TGR is has the same format:


+-----------+------------+-----+------------+-----+-----------+
|           |            |     |            |     |           |
| MAC Layer | IPv6 Outer | SRH | IPv6 Inner | UDP | Raw bytes |
|           |            |     |            |     |           |
+-----------+------------+-----+------------+-----+-----------+

/-------------------------- 162 bytes ------------------------/

Where:
 - IPv6 Outer SA: 1:2:1::1, DA: f1::
 - SRH SID List: f1::,f2::
 - IPv6 Inner SA: 1:2:1::1, IPv6 DA: b::2 
 - UDP sport: 50701, dport: 5001 | Raw bytes (16 Bytes)

In the SUT Node, a single SRv6 End Behavior is set for the SID f1::

On SUT Node, incoming traffic at enp6s0f0 (IN port) matches the IPv6 DA with
the SID f1:: and the SRv6 End Behavior is executed. After that, processed
packets are then sent back to the TGR through the enp6s0f1 (OUT port).

Please note that SIDs f1:: and f2:: are *not* assigned to any interface of the
SUT.

The SUT Node is also configured with some firewall policies. The default rule
for IPv6 INPUT is set as DROP and an explicit ACCEPT rule for SID f1:: is also
configured.


Scenarios 1.b, 2.b (SRv6 T.Encap Behavior)
------------------------------------------

Two experiments have been carried out respectively for scenario 1.b and 2.b.
Either way, they both make use of the same SUT configuration and process the
same kind of traffic generated by the TGR.

TGR generates plain IPv6 traffic which is sent directly to the SUT IN port.
Every single packet sent by the TGR follows the same format:

+-----------+------------+-----+-----------+
|           |            |     |           |
| MAC Layer |    IPv6    | UDP | Raw bytes |
|           |            |     |           |
+-----------+------------+-----+-----------+

/----------------- 82 bytes ---------------/

Where:
 - IPv6 SA: 1:2:1::1, IPv6 DA: b::2 
 - UDP sport: 39892, dport: 5001 | Raw bytes (16 Bytes)


In the SUT Node, a single SRv6 T.Encap Behavior is set for the IPv6 DA b::2.

On SUT Node, incoming traffic at enp6s0f0 (IN port) matches the IPv6 DA with
b::2 and the SRv6 T.Encap Behavior is executed. Then, the whole
IPv6+SRH packet (SID List [f1::]) is sent back to the TGR through the
enp6s0f1 (OUT Port).

Please note that b::2 and SID f1:: are *not* assigned to any interface of the
SUT.

The SUT Node does not restrict the locally generated traffic.
The default OUTPUT policy for IPv6 is set to ACCEPT.

---

[1] A. Abdelsalam et al. "SRPerf: a Performance Evaluation Framework for
    IPv6 Segment Routing", IEEE Transactions on Network and Service
    Management (Volume: 18, Issue: 2, June 2021). 
    Available: https://arxiv.org/pdf/2001.06182.pdf

[2] TRex realistic traffic generator. https://trex-tgn.cisco.com/

[3] SRPerf - Performance Evaluation Framework for Segment Routing.
    Available: https://github.com/SRouting/SRPerf

[4] CloudLab home page. Available: https://www.cloudlab.us/

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows
  2021-07-19 10:12                   ` Ryoga Saito
@ 2021-07-26 21:29                     ` Pablo Neira Ayuso
  0 siblings, 0 replies; 12+ messages in thread
From: Pablo Neira Ayuso @ 2021-07-26 21:29 UTC (permalink / raw)
  To: Ryoga Saito
  Cc: Andrea Mayer, davem, yoshfuji, dsahern, kuba, netfilter-devel,
	Stefano Salsano, Paolo Lungaroni

Hi Ryoga,

On Mon, Jul 19, 2021 at 07:12:46PM +0900, Ryoga Saito wrote:
> Hi Pablo
> 
> I would like your comments for it.
> 
> I have 2 implementation ideas about fixing this patch:
> 
> 1.) fix only coding style pointed out in previous mail
> 2.) add sysctl parameter and change NF_HOOK to NF_HOOK_COND for user to
>     select behavior of hook call
> 
> I believed SRv6 encaps/decaps operations should be tracked with conntrack
> like any other virtual net-device based tunneling protocols (e.g. VXLAN,
> IPIP)

Agreed, users will be expecting consistent behaviour with the existing
net-device based tunneling infrastructure. The hook order you are
proposing look correct to me.

> even if the forwarding performance slows down because occurred by
> lack of considerations. and any other tunnels also have this overhead.

If you go for option 2, you can add a new specific static_key for
the lightweight tunnel netfilter hooks, this static key could be
turned on via sysctl. But I think this sysctl toggle should be
one-time (once enabled, you cannot disable it).

I'll help you with the benchmarking.

> Therefore, I support 1st idea. However, 2nd idea is ok if the overhead
> caused by adding new hook isn't acceptable.

I'd prefer option 1 too, I tend to dislike new sysctl toggles, the
specific static_key should remove the concern on the performance
impact for people that do not want to use this new infrastructure.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-07-26 21:29 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-06  5:25 [PATCH] net: Add netfilter hooks to track SRv6-encapsulated flows Ryoga Saito
2021-07-08  1:31 ` Andrea Mayer
2021-07-08 13:38   ` Pablo Neira Ayuso
2021-07-08 16:32     ` Andrea Mayer
     [not found]       ` <CALPAGbJt_rb_r3M2AEJ_6VRsG+zXrEOza0U-6SxFGsERGipT4w@mail.gmail.com>
2021-07-08 20:52         ` Ryoga Saito
2021-07-09 18:48           ` Andrea Mayer
2021-07-11  7:12             ` Ryoga Saito
2021-07-12 23:31               ` Andrea Mayer
2021-07-15 22:13                 ` Pablo Neira Ayuso
2021-07-19 10:12                   ` Ryoga Saito
2021-07-26 21:29                     ` Pablo Neira Ayuso
2021-07-19 11:55                   ` Andrea Mayer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.