All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device
@ 2018-12-27  7:38 wenxu
  2018-12-28 14:42 ` David Ahern
  2019-01-02 21:50 ` David Ahern
  0 siblings, 2 replies; 6+ messages in thread
From: wenxu @ 2018-12-27  7:38 UTC (permalink / raw)
  To: netdev

From: wenxu <wenxu@ucloud.cn>

In the ip_rcv the skb go through the PREROUTING hook first,
Then jump in vrf device go through the same hook again.
When conntrack work with vrf, there will be some conflict for rules.
Because the package go through the hook twice with different nf status

ip link add user1 type vrf table 1
ip link add user2 type vrf table 2
ip l set dev tun1 master user1
ip l set dev tun2 master user2

nft add table firewall
nft add chain firewall zones { type filter hook prerouting  priority - 300 \; }
nft add rule firewall zones counter ct zone set iif map { "tun1" : 1, "tun2" : 2 }
nft add chain firewall rule-1000-ingress
nft add rule firewall rule-1000-ingress ct zone 1 tcp dport 22 ct state new counter accept
nft add rule firewall rule-1000-ingress counter drop
nft add chain firewall rule-1000-egress
nft add rule firewall rule-1000-egress tcp dport 22 ct state new counter drop
nft add rule firewall rule-1000-egress counter accept

nft add chain firewall rules-all { type filter hook prerouting priority - 150 \; }
nft add rule firewall rules-all ip daddr vmap { "2.2.2.11" : jump rule-1000-ingress }
nft add rule firewall rules-all ct zone vmap { 1 : jump rule-1000-egress }

nft add rule firewall dnat-all ct zone vmap { 1 : jump dnat-1000 }
nft add rule firewall dnat-1000 ip daddr 2.2.2.11 counter dnat to 10.0.0.7

For a package with ip daddr 2.2.2.11 and tcp dport 22, first time accept in the 
rule-1000-ingress and dnat to 10.0.0.7. Then second time the packet goto the wrong 
chain rule-1000-egress which leads the packet drop

So it proived a flag to control the vrf-device bypass go through hook for 
the second time.

Signed-off-by: wenxu <wenxu@ucloud.cn>
---
 drivers/net/vrf.c            | 20 ++++++++++++++++++--
 include/uapi/linux/if_link.h |  3 +++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 95909e2..f378fa19 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -52,6 +52,7 @@ struct net_vrf {
 	struct fib6_table	*fib6_table;
 #endif
 	u32                     tb_id;
+	u8                      flags;
 };
 
 struct pcpu_dstats {
@@ -898,6 +899,10 @@ static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
+	struct net_vrf *vrf = netdev_priv(dev);
+
+	if (vrf->flags & VRF_F_BYPASS_RCV_NF)
+		return skb;
 
 	if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1)
 		skb = NULL;    /* kfree_skb(skb) handled by nf code */
@@ -1323,6 +1328,9 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 		return -EINVAL;
 	}
 
+	if (data[IFLA_VRF_FLAGS])
+		vrf->flags = nla_get_u8(data[IFLA_VRF_FLAGS]);
+
 	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
 	err = register_netdevice(dev);
@@ -1346,7 +1354,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 
 static size_t vrf_nl_getsize(const struct net_device *dev)
 {
-	return nla_total_size(sizeof(u32));  /* IFLA_VRF_TABLE */
+	return nla_total_size(sizeof(u32)) +  /* IFLA_VRF_TABLE */
+		nla_total_size(sizeof(u8));   /* IFLA_VRF_FLAGS */
 }
 
 static int vrf_fillinfo(struct sk_buff *skb,
@@ -1354,7 +1363,13 @@ static int vrf_fillinfo(struct sk_buff *skb,
 {
 	struct net_vrf *vrf = netdev_priv(dev);
 
-	return nla_put_u32(skb, IFLA_VRF_TABLE, vrf->tb_id);
+	if (nla_put_u32(skb, IFLA_VRF_TABLE, vrf->tb_id))
+		return -EMSGSIZE;
+
+	if (nla_put_u8(skb, IFLA_VRF_FLAGS, vrf->flags))
+		return -EMSGSIZE;
+
+	return 0;
 }
 
 static size_t vrf_get_slave_size(const struct net_device *bond_dev,
@@ -1377,6 +1392,7 @@ static int vrf_fill_slave_info(struct sk_buff *skb,
 
 static const struct nla_policy vrf_nl_policy[IFLA_VRF_MAX + 1] = {
 	[IFLA_VRF_TABLE] = { .type = NLA_U32 },
+	[IFLA_VRF_FLAGS] = { .type = NLA_U8 },
 };
 
 static struct rtnl_link_ops vrf_link_ops __read_mostly = {
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d653382..23c489d 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -430,11 +430,14 @@ enum macvlan_macaddr_mode {
 enum {
 	IFLA_VRF_UNSPEC,
 	IFLA_VRF_TABLE,
+	IFLA_VRF_FLAGS,
 	__IFLA_VRF_MAX
 };
 
 #define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
 
+#define VRF_F_BYPASS_RCV_NF     0x01
+
 enum {
 	IFLA_VRF_PORT_UNSPEC,
 	IFLA_VRF_PORT_TABLE,
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device
  2018-12-27  7:38 [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device wenxu
@ 2018-12-28 14:42 ` David Ahern
  2019-01-10 15:21   ` wenxu
  2019-01-02 21:50 ` David Ahern
  1 sibling, 1 reply; 6+ messages in thread
From: David Ahern @ 2018-12-28 14:42 UTC (permalink / raw)
  To: wenxu, netdev, David Miller; +Cc: David Ahern

On 12/27/18 2:38 AM, wenxu@ucloud.cn wrote:
> From: wenxu <wenxu@ucloud.cn>
> 
> In the ip_rcv the skb go through the PREROUTING hook first,
> Then jump in vrf device go through the same hook again.
> When conntrack work with vrf, there will be some conflict for rules.
> Because the package go through the hook twice with different nf status
> 
> ip link add user1 type vrf table 1
> ip link add user2 type vrf table 2
> ip l set dev tun1 master user1
> ip l set dev tun2 master user2
> 
> nft add table firewall
> nft add chain firewall zones { type filter hook prerouting  priority - 300 \; }
> nft add rule firewall zones counter ct zone set iif map { "tun1" : 1, "tun2" : 2 }
> nft add chain firewall rule-1000-ingress
> nft add rule firewall rule-1000-ingress ct zone 1 tcp dport 22 ct state new counter accept
> nft add rule firewall rule-1000-ingress counter drop
> nft add chain firewall rule-1000-egress
> nft add rule firewall rule-1000-egress tcp dport 22 ct state new counter drop
> nft add rule firewall rule-1000-egress counter accept
> 
> nft add chain firewall rules-all { type filter hook prerouting priority - 150 \; }
> nft add rule firewall rules-all ip daddr vmap { "2.2.2.11" : jump rule-1000-ingress }
> nft add rule firewall rules-all ct zone vmap { 1 : jump rule-1000-egress }
> 
> nft add rule firewall dnat-all ct zone vmap { 1 : jump dnat-1000 }
> nft add rule firewall dnat-1000 ip daddr 2.2.2.11 counter dnat to 10.0.0.7
> 
> For a package with ip daddr 2.2.2.11 and tcp dport 22, first time accept in the 
> rule-1000-ingress and dnat to 10.0.0.7. Then second time the packet goto the wrong 
> chain rule-1000-egress which leads the packet drop
> 
> So it proived a flag to control the vrf-device bypass go through hook for 
> the second time.
> 
> Signed-off-by: wenxu <wenxu@ucloud.cn>
> ---
>  drivers/net/vrf.c            | 20 ++++++++++++++++++--
>  include/uapi/linux/if_link.h |  3 +++
>  2 files changed, 21 insertions(+), 2 deletions(-)
> 

Thanks for the report with commands to reproduce. I am out of the office
at the moment; I will take a look at this next week.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device
  2018-12-27  7:38 [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device wenxu
  2018-12-28 14:42 ` David Ahern
@ 2019-01-02 21:50 ` David Ahern
  2019-01-02 22:19   ` Florian Westphal
  1 sibling, 1 reply; 6+ messages in thread
From: David Ahern @ 2019-01-02 21:50 UTC (permalink / raw)
  To: wenxu, netdev, NetFilter

On 12/27/18 12:38 AM, wenxu@ucloud.cn wrote:
> nft add table firewall
> nft add chain firewall zones { type filter hook prerouting  priority - 300 \; }
> nft add rule firewall zones counter ct zone set iif map { "eth1" : 1, "eth2" : 2 }
> nft add chain firewall rule-1000-ingress
> nft add rule firewall rule-1000-ingress ct zone 1 tcp dport 22 ct state new counter accept
> nft add rule firewall rule-1000-ingress counter drop
> nft add chain firewall rule-1000-egress
> nft add rule firewall rule-1000-egress tcp dport 22 ct state new counter drop
> nft add rule firewall rule-1000-egress counter accept
> 
> nft add chain firewall rules-all { type filter hook prerouting priority - 150 \; }
> nft add rule firewall rules-all ip daddr vmap { "2.2.2.11" : jump rule-1000-ingress }
> nft add rule firewall rules-all ct zone vmap { 1 : jump rule-1000-egress }

Bug report to the netfilter crowd: After this set of commands, 'nft list
tables' goes into a loop over recvmsg. This is debian stretch with
nftables from backports - version 0.9.0-1~bpo9+1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device
  2019-01-02 21:50 ` David Ahern
@ 2019-01-02 22:19   ` Florian Westphal
  0 siblings, 0 replies; 6+ messages in thread
From: Florian Westphal @ 2019-01-02 22:19 UTC (permalink / raw)
  To: David Ahern; +Cc: wenxu, netdev, NetFilter

David Ahern <dsahern@gmail.com> wrote:
> On 12/27/18 12:38 AM, wenxu@ucloud.cn wrote:
> > nft add table firewall
> > nft add chain firewall zones { type filter hook prerouting  priority - 300 \; }
> > nft add rule firewall zones counter ct zone set iif map { "eth1" : 1, "eth2" : 2 }
> > nft add chain firewall rule-1000-ingress
> > nft add rule firewall rule-1000-ingress ct zone 1 tcp dport 22 ct state new counter accept
> > nft add rule firewall rule-1000-ingress counter drop
> > nft add chain firewall rule-1000-egress
> > nft add rule firewall rule-1000-egress tcp dport 22 ct state new counter drop
> > nft add rule firewall rule-1000-egress counter accept
> > 
> > nft add chain firewall rules-all { type filter hook prerouting priority - 150 \; }
> > nft add rule firewall rules-all ip daddr vmap { "2.2.2.11" : jump rule-1000-ingress }
> > nft add rule firewall rules-all ct zone vmap { 1 : jump rule-1000-egress }
> 
> Bug report to the netfilter crowd: After this set of commands, 'nft list
> tables' goes into a loop over recvmsg. This is debian stretch with
> nftables from backports - version 0.9.0-1~bpo9+1

Thanks for reporting, I will have a look.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device
  2018-12-28 14:42 ` David Ahern
@ 2019-01-10 15:21   ` wenxu
  2019-01-10 17:00     ` David Ahern
  0 siblings, 1 reply; 6+ messages in thread
From: wenxu @ 2019-01-10 15:21 UTC (permalink / raw)
  To: David Ahern, David Miller; +Cc: netdev

On 2018/12/28 下午10:42, David Ahern wrote:
> On 12/27/18 2:38 AM, wenxu@ucloud.cn wrote:
>> From: wenxu <wenxu@ucloud.cn>
>>
>> In the ip_rcv the skb go through the PREROUTING hook first,
>> Then jump in vrf device go through the same hook again.
>> When conntrack work with vrf, there will be some conflict for rules.
>> Because the package go through the hook twice with different nf status
>>
>> ip link add user1 type vrf table 1
>> ip link add user2 type vrf table 2
>> ip l set dev tun1 master user1
>> ip l set dev tun2 master user2
>>
>> nft add table firewall
>> nft add chain firewall zones { type filter hook prerouting  priority - 300 \; }
>> nft add rule firewall zones counter ct zone set iif map { "tun1" : 1, "tun2" : 2 }
>> nft add chain firewall rule-1000-ingress
>> nft add rule firewall rule-1000-ingress ct zone 1 tcp dport 22 ct state new counter accept
>> nft add rule firewall rule-1000-ingress counter drop
>> nft add chain firewall rule-1000-egress
>> nft add rule firewall rule-1000-egress tcp dport 22 ct state new counter drop
>> nft add rule firewall rule-1000-egress counter accept
>>
>> nft add chain firewall rules-all { type filter hook prerouting priority - 150 \; }
>> nft add rule firewall rules-all ip daddr vmap { "2.2.2.11" : jump rule-1000-ingress }
>> nft add rule firewall rules-all ct zone vmap { 1 : jump rule-1000-egress }
>>
>> nft add rule firewall dnat-all ct zone vmap { 1 : jump dnat-1000 }
>> nft add rule firewall dnat-1000 ip daddr 2.2.2.11 counter dnat to 10.0.0.7
>>
>> For a package with ip daddr 2.2.2.11 and tcp dport 22, first time accept in the 
>> rule-1000-ingress and dnat to 10.0.0.7. Then second time the packet goto the wrong 
>> chain rule-1000-egress which leads the packet drop
>>
>> So it proived a flag to control the vrf-device bypass go through hook for 
>> the second time.
>>
>> Signed-off-by: wenxu <wenxu@ucloud.cn>
>> ---
>>  drivers/net/vrf.c            | 20 ++++++++++++++++++--
>>  include/uapi/linux/if_link.h |  3 +++
>>  2 files changed, 21 insertions(+), 2 deletions(-)
>>
> Thanks for the report with commands to reproduce. I am out of the office
> at the moment; I will take a look at this next week.
>
Hi,

How about the status of this patch? Should I resubmit it?

wenxu

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device
  2019-01-10 15:21   ` wenxu
@ 2019-01-10 17:00     ` David Ahern
  0 siblings, 0 replies; 6+ messages in thread
From: David Ahern @ 2019-01-10 17:00 UTC (permalink / raw)
  To: wenxu, David Miller; +Cc: netdev

On 1/10/19 8:21 AM, wenxu wrote:
> 
> How about the status of this patch? Should I resubmit it?
> 

I do not like the need for a flag when the VRF is created. If something
changes with the firewall rules, it means a user has to delete and
re-create the VRF which is really expensive.

It would be better to detect this on the fly - similar to how it detects
the default qdisc and avoids the recirculation on Tx when the qdisc is
the default.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-01-10 17:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-27  7:38 [PATCH net-next] vrf: Add VRF_F_BYPASS_RCV_NF flag to vrf device wenxu
2018-12-28 14:42 ` David Ahern
2019-01-10 15:21   ` wenxu
2019-01-10 17:00     ` David Ahern
2019-01-02 21:50 ` David Ahern
2019-01-02 22:19   ` Florian Westphal

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.