* [PATCH net 01/11] selftests: netfilter: add a vrf+conntrack testcase
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-19 11:10 ` patchwork-bot+netdevbpf
2021-11-18 22:26 ` [PATCH net 02/11] selftests: netfilter: extend nfqueue tests to cover vrf device Pablo Neira Ayuso
` (9 subsequent siblings)
10 siblings, 1 reply; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Florian Westphal <fw@strlen.de>
Rework the reproducer for the vrf+conntrack regression reported
by Eugene into a selftest and also add a test for ip masquerading
that Lahav fixed recently.
With net or net-next tree, the first test fails and the latter
two pass.
With 09e856d54bda5f28 ("vrf: Reset skb conntrack connection on VRF rcv")
reverted first test passes but the last two fail.
A proper fix needs more work, for time being a revert seems to be
the best choice, snat/masquerade did not work before the fix.
Link: https://lore.kernel.org/netdev/378ca299-4474-7e9a-3d36-2350c8c98995@gmail.com/T/#m95358a31810df7392f541f99d187227bc75c9963
Reported-by: Eugene Crosser <crosser@average.org>
Cc: Lahav Schlesinger <lschlesinger@drivenets.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
tools/testing/selftests/netfilter/Makefile | 3 +-
.../selftests/netfilter/conntrack_vrf.sh | 219 ++++++++++++++++++
2 files changed, 221 insertions(+), 1 deletion(-)
create mode 100755 tools/testing/selftests/netfilter/conntrack_vrf.sh
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 8748199ac109..ffca314897c4 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
- ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
+ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
+ conntrack_vrf.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
new file mode 100755
index 000000000000..91f3ef0f1192
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
@@ -0,0 +1,219 @@
+#!/bin/sh
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device. In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+ksft_skip=4
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+
+cleanup()
+{
+ ip netns pids $ns0 | xargs kill 2>/dev/null
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+
+ ip netns del $ns0 $ns1
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns0"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns0"
+ exit $ksft_skip
+fi
+ip netns add "$ns1"
+
+trap cleanup EXIT
+
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not add veth device"
+ exit $ksft_skip
+fi
+
+ip -net $ns0 li add tvrf type vrf table 9876
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not add vrf device"
+ exit $ksft_skip
+fi
+
+ip -net $ns0 li set lo up
+
+ip -net $ns0 li set veth0 master tvrf
+ip -net $ns0 li set tvrf up
+ip -net $ns0 li set veth0 up
+ip -net $ns1 li set veth0 up
+
+ip -net $ns0 addr add $IP0/$PFXL dev veth0
+ip -net $ns1 addr add $IP1/$PFXL dev veth0
+
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not start iperf3"
+ exit $ksft_skip
+fi
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec $ns0 nft -f - <<EOF
+table testct {
+ chain rawpre {
+ type filter hook prerouting priority raw;
+
+ iif { veth0, tvrf } counter meta nftrace set 1
+ iif veth0 counter ct zone set 1 counter return
+ iif tvrf counter ct zone set 2 counter return
+ ip protocol icmp counter
+ notrack counter
+ }
+
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif veth0 counter ct zone set 1 counter return
+ oif tvrf counter ct zone set 2 counter return
+ notrack counter
+ }
+}
+EOF
+ ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+
+ # should be in zone 1, not zone 2
+ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+ if [ $count -eq 1 ]; then
+ echo "PASS: entry found in conntrack zone 1"
+ else
+ echo "FAIL: entry not found in conntrack zone 1"
+ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+ if [ $count -eq 1 ]; then
+ echo "FAIL: entry found in zone 2 instead"
+ else
+ echo "FAIL: entry not in zone 1 or 2, dumping table"
+ ip netns exec $ns0 conntrack -L
+ ip netns exec $ns0 nft list ruleset
+ fi
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ # NB: masquerade should always be combined with 'oif(name) bla',
+ # lack of this is intentional here, we want to exercise double-snat.
+ ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+ if [ $? -eq 0 ]; then
+ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
+ else
+ echo "FAIL: vrf masq rule has unexpected counter value"
+ ret=1
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+ if [ $? -eq 0 ]; then
+ echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+ else
+ echo "FAIL: vrf masq rule has unexpected counter value"
+ ret=1
+ fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf
+test_masquerade_veth
+
+exit $ret
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH net 01/11] selftests: netfilter: add a vrf+conntrack testcase
2021-11-18 22:26 ` [PATCH net 01/11] selftests: netfilter: add a vrf+conntrack testcase Pablo Neira Ayuso
@ 2021-11-19 11:10 ` patchwork-bot+netdevbpf
0 siblings, 0 replies; 14+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-11-19 11:10 UTC (permalink / raw)
To: Pablo Neira Ayuso; +Cc: netfilter-devel, davem, netdev, kuba
Hello:
This series was applied to netdev/net.git (master)
by Pablo Neira Ayuso <pablo@netfilter.org>:
On Thu, 18 Nov 2021 23:26:08 +0100 you wrote:
> From: Florian Westphal <fw@strlen.de>
>
> Rework the reproducer for the vrf+conntrack regression reported
> by Eugene into a selftest and also add a test for ip masquerading
> that Lahav fixed recently.
>
> With net or net-next tree, the first test fails and the latter
> two pass.
>
> [...]
Here is the summary with links:
- [net,01/11] selftests: netfilter: add a vrf+conntrack testcase
https://git.kernel.org/netdev/net/c/33b8aad21ac1
- [net,02/11] selftests: netfilter: extend nfqueue tests to cover vrf device
https://git.kernel.org/netdev/net/c/228c3fa054ad
- [net,03/11] netfilter: nft_payload: Remove duplicated include in nft_payload.c
https://git.kernel.org/netdev/net/c/00d8b83725e9
- [net,04/11] selftests: nft_nat: Improve port shadow test stability
https://git.kernel.org/netdev/net/c/e1f8bc06e497
- [net,05/11] selftests: nft_nat: Simplify port shadow notrack test
https://git.kernel.org/netdev/net/c/85c0c8b342e8
- [net,06/11] netfilter: ctnetlink: fix filtering with CTA_TUPLE_REPLY
https://git.kernel.org/netdev/net/c/ad81d4daf6a3
- [net,07/11] netfilter: ctnetlink: do not erase error code with EINVAL
https://git.kernel.org/netdev/net/c/77522ff02f33
- [net,08/11] netfilter: ipvs: Fix reuse connection if RS weight is 0
https://git.kernel.org/netdev/net/c/c95c07836fa4
- [net,09/11] netfilter: xt_IDLETIMER: replace snprintf in show functions with sysfs_emit
https://git.kernel.org/netdev/net/c/c08d3286caf1
- [net,10/11] netfilter: flowtable: fix IPv6 tunnel addr match
https://git.kernel.org/netdev/net/c/39f6eed4cb20
- [net,11/11] selftests: nft_nat: switch port shadow test cases to socat
https://git.kernel.org/netdev/net/c/a2acf0c0e2da
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH net 02/11] selftests: netfilter: extend nfqueue tests to cover vrf device
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 01/11] selftests: netfilter: add a vrf+conntrack testcase Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 03/11] netfilter: nft_payload: Remove duplicated include in nft_payload.c Pablo Neira Ayuso
` (8 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Florian Westphal <fw@strlen.de>
VRF device calls the output/postrouting hooks so packet should be seeon
with oifname tvrf and once with eth0.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
.../testing/selftests/netfilter/nft_queue.sh | 54 +++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 3d202b90b33d..7d27f1f3bc01 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -16,6 +16,10 @@ timeout=4
cleanup()
{
+ ip netns pids ${ns1} | xargs kill 2>/dev/null
+ ip netns pids ${ns2} | xargs kill 2>/dev/null
+ ip netns pids ${nsrouter} | xargs kill 2>/dev/null
+
ip netns del ${ns1}
ip netns del ${ns2}
ip netns del ${nsrouter}
@@ -332,6 +336,55 @@ EOF
echo "PASS: tcp via loopback and re-queueing"
}
+test_icmp_vrf() {
+ ip -net $ns1 link add tvrf type vrf table 9876
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not add vrf device"
+ return
+ fi
+
+ ip -net $ns1 li set eth0 master tvrf
+ ip -net $ns1 li set tvrf up
+
+ ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec ${ns1} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+}
+EOF
+ ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
+ local nfqpid=$!
+
+ sleep 1
+ ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+ for n in output post; do
+ for d in tvrf eth0; do
+ ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
+ if [ $? -ne 0 ] ; then
+ echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+ ip netns exec ${ns1} nft list ruleset
+ ret=1
+ return
+ fi
+ done
+ done
+
+ wait $nfqpid
+ [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
+ wait 2>/dev/null
+}
+
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -372,5 +425,6 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_requeue
+test_icmp_vrf
exit $ret
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 03/11] netfilter: nft_payload: Remove duplicated include in nft_payload.c
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 01/11] selftests: netfilter: add a vrf+conntrack testcase Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 02/11] selftests: netfilter: extend nfqueue tests to cover vrf device Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 04/11] selftests: nft_nat: Improve port shadow test stability Pablo Neira Ayuso
` (7 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Wan Jiabing <wanjiabing@vivo.com>
Fix following checkincludes.pl warning:
./net/netfilter/nft_payload.c: linux/ip.h is included more than once.
Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nft_payload.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index cbfe4e4a4ad7..bd689938a2e0 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -22,7 +22,6 @@
#include <linux/icmpv6.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/ip.h>
#include <net/sctp/checksum.h>
static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 04/11] selftests: nft_nat: Improve port shadow test stability
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
` (2 preceding siblings ...)
2021-11-18 22:26 ` [PATCH net 03/11] netfilter: nft_payload: Remove duplicated include in nft_payload.c Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 05/11] selftests: nft_nat: Simplify port shadow notrack test Pablo Neira Ayuso
` (6 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Phil Sutter <phil@nwl.cc>
Setup phase in test_port_shadow() relied upon a race-condition:
Listening nc on port 1405 was started in background before attempting to
create the fake conntrack entry using the same source port. If listening
nc won, fake conntrack entry could not be created causing wrong
behaviour. Reorder nc calls to fix this and introduce a short delay
before testing the setup to wait for listening nc process startup.
Fixes: 465f15a6d1a8f ("selftests: nft_nat: add udp hole punch test case")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
tools/testing/selftests/netfilter/nft_nat.sh | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index da1c1e4b6c86..905c033db74d 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -759,14 +759,16 @@ test_port_shadow()
local result=""
local logmsg=""
+ # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+ echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+
echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
nc_r=$!
echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
nc_c=$!
- # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
- echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+ sleep 0.3
# ns1 tries to connect to ns0:1405. With default settings this should connect
# to client, it matches the conntrack entry created above.
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 05/11] selftests: nft_nat: Simplify port shadow notrack test
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
` (3 preceding siblings ...)
2021-11-18 22:26 ` [PATCH net 04/11] selftests: nft_nat: Improve port shadow test stability Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 06/11] netfilter: ctnetlink: fix filtering with CTA_TUPLE_REPLY Pablo Neira Ayuso
` (5 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Phil Sutter <phil@nwl.cc>
The second rule in prerouting chain was probably a leftover: The router
listens on veth0, so not tracking connections via that interface is
sufficient. Likewise, the rule in output chain can be limited to that
interface as well.
Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
tools/testing/selftests/netfilter/nft_nat.sh | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 905c033db74d..c62e4e26252c 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -818,11 +818,10 @@ table $family raw {
chain prerouting {
type filter hook prerouting priority -300; policy accept;
meta iif veth0 udp dport 1405 notrack
- udp dport 1405 notrack
}
chain output {
type filter hook output priority -300; policy accept;
- udp sport 1405 notrack
+ meta oif veth0 udp sport 1405 notrack
}
}
EOF
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 06/11] netfilter: ctnetlink: fix filtering with CTA_TUPLE_REPLY
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
` (4 preceding siblings ...)
2021-11-18 22:26 ` [PATCH net 05/11] selftests: nft_nat: Simplify port shadow notrack test Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 07/11] netfilter: ctnetlink: do not erase error code with EINVAL Pablo Neira Ayuso
` (4 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Florent Fourcot <florent.fourcot@wifirst.fr>
filter->orig_flags was used for a reply context.
Fixes: cb8aa9a3affb ("netfilter: ctnetlink: add kernel side filtering for dump")
Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_netlink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f1e5443fe7c7..2663764d0b6e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1011,7 +1011,7 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
CTA_TUPLE_REPLY,
filter->family,
&filter->zone,
- filter->orig_flags);
+ filter->reply_flags);
if (err < 0) {
err = -EINVAL;
goto err_filter;
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 07/11] netfilter: ctnetlink: do not erase error code with EINVAL
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
` (5 preceding siblings ...)
2021-11-18 22:26 ` [PATCH net 06/11] netfilter: ctnetlink: fix filtering with CTA_TUPLE_REPLY Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 08/11] netfilter: ipvs: Fix reuse connection if RS weight is 0 Pablo Neira Ayuso
` (3 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Florent Fourcot <florent.fourcot@wifirst.fr>
And be consistent in error management for both orig/reply filtering
Fixes: cb8aa9a3affb ("netfilter: ctnetlink: add kernel side filtering for dump")
Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_conntrack_netlink.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2663764d0b6e..c7708bde057c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1012,10 +1012,8 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
filter->family,
&filter->zone,
filter->reply_flags);
- if (err < 0) {
- err = -EINVAL;
+ if (err < 0)
goto err_filter;
- }
}
return filter;
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 08/11] netfilter: ipvs: Fix reuse connection if RS weight is 0
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
` (6 preceding siblings ...)
2021-11-18 22:26 ` [PATCH net 07/11] netfilter: ctnetlink: do not erase error code with EINVAL Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 09/11] netfilter: xt_IDLETIMER: replace snprintf in show functions with sysfs_emit Pablo Neira Ayuso
` (2 subsequent siblings)
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: yangxingwu <xingwu.yang@gmail.com>
We are changing expire_nodest_conn to work even for reused connections when
conn_reuse_mode=0, just as what was done with commit dc7b3eb900aa ("ipvs:
Fix reuse connection if real server is dead").
For controlled and persistent connections, the new connection will get the
needed real server depending on the rules in ip_vs_check_template().
Fixes: d752c3645717 ("ipvs: allow rescheduling of new connections when port reuse is detected")
Co-developed-by: Chuanqi Liu <legend050709@qq.com>
Signed-off-by: Chuanqi Liu <legend050709@qq.com>
Signed-off-by: yangxingwu <xingwu.yang@gmail.com>
Acked-by: Simon Horman <horms@verge.net.au>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
Documentation/networking/ipvs-sysctl.rst | 3 +--
net/netfilter/ipvs/ip_vs_core.c | 8 ++++----
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst
index 95ef56d62077..387fda80f05f 100644
--- a/Documentation/networking/ipvs-sysctl.rst
+++ b/Documentation/networking/ipvs-sysctl.rst
@@ -37,8 +37,7 @@ conn_reuse_mode - INTEGER
0: disable any special handling on port reuse. The new
connection will be delivered to the same real server that was
- servicing the previous connection. This will effectively
- disable expire_nodest_conn.
+ servicing the previous connection.
bit 1: enable rescheduling of new connections when it is safe.
That is, whenever expire_nodest_conn and for TCP sockets, when
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e93c937a8bf0..51ad557a525b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1919,7 +1919,6 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, pkts;
- int conn_reuse_mode;
struct sock *sk;
int af = state->pf;
@@ -1997,15 +1996,16 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
ipvs, af, skb, &iph);
- conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
- if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
bool old_ct = false, resched = false;
if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
unlikely(!atomic_read(&cp->dest->weight))) {
resched = true;
old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
- } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+ } else if (conn_reuse_mode &&
+ is_new_conn_expected(cp, conn_reuse_mode)) {
old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
if (!atomic_read(&cp->n_control)) {
resched = true;
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 09/11] netfilter: xt_IDLETIMER: replace snprintf in show functions with sysfs_emit
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
` (7 preceding siblings ...)
2021-11-18 22:26 ` [PATCH net 08/11] netfilter: ipvs: Fix reuse connection if RS weight is 0 Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 10/11] netfilter: flowtable: fix IPv6 tunnel addr match Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 11/11] selftests: nft_nat: switch port shadow test cases to socat Pablo Neira Ayuso
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Jing Yao <yao.jing2@zte.com.cn>
coccicheck complains about the use of snprintf() in sysfs show
functions:
WARNING use scnprintf or sprintf
Use sysfs_emit instead of scnprintf, snprintf or sprintf makes more
sense.
Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Jing Yao <yao.jing2@zte.com.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/xt_IDLETIMER.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 2f7cf5ecebf4..0f8bb0bf558f 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -85,9 +85,9 @@ static ssize_t idletimer_tg_show(struct device *dev,
mutex_unlock(&list_mutex);
if (time_after(expires, jiffies) || ktimespec.tv_sec > 0)
- return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff);
+ return sysfs_emit(buf, "%ld\n", time_diff);
- return snprintf(buf, PAGE_SIZE, "0\n");
+ return sysfs_emit(buf, "0\n");
}
static void idletimer_tg_work(struct work_struct *work)
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 10/11] netfilter: flowtable: fix IPv6 tunnel addr match
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
` (8 preceding siblings ...)
2021-11-18 22:26 ` [PATCH net 09/11] netfilter: xt_IDLETIMER: replace snprintf in show functions with sysfs_emit Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
2021-11-18 22:26 ` [PATCH net 11/11] selftests: nft_nat: switch port shadow test cases to socat Pablo Neira Ayuso
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Will Mortensen <willmo@gmail.com>
Previously the IPv6 addresses in the key were clobbered and the mask was
left unset.
I haven't tested this; I noticed it while skimming the code to
understand an unrelated issue.
Fixes: cfab6dbd0ecf ("netfilter: flowtable: add tunnel match offload support")
Cc: wenxu <wenxu@ucloud.cn>
Signed-off-by: Will Mortensen <willmo@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
net/netfilter/nf_flow_table_offload.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index d6bf1b2cd541..b561e0a44a45 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -65,11 +65,11 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
sizeof(struct in6_addr));
if (memcmp(&key->enc_ipv6.src, &in6addr_any,
sizeof(struct in6_addr)))
- memset(&key->enc_ipv6.src, 0xff,
+ memset(&mask->enc_ipv6.src, 0xff,
sizeof(struct in6_addr));
if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
sizeof(struct in6_addr)))
- memset(&key->enc_ipv6.dst, 0xff,
+ memset(&mask->enc_ipv6.dst, 0xff,
sizeof(struct in6_addr));
enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net 11/11] selftests: nft_nat: switch port shadow test cases to socat
2021-11-18 22:26 [PATCH net 00/11] Netfilter fixes for net Pablo Neira Ayuso
` (9 preceding siblings ...)
2021-11-18 22:26 ` [PATCH net 10/11] netfilter: flowtable: fix IPv6 tunnel addr match Pablo Neira Ayuso
@ 2021-11-18 22:26 ` Pablo Neira Ayuso
10 siblings, 0 replies; 14+ messages in thread
From: Pablo Neira Ayuso @ 2021-11-18 22:26 UTC (permalink / raw)
To: netfilter-devel; +Cc: davem, netdev, kuba
From: Florian Westphal <fw@strlen.de>
There are now at least three distinct flavours of netcat/nc tool:
'original' version, one version ported from openbsd and nmap-ncat.
The script only works with original because it sets SOREUSEPORT option.
Other nc versions return 'port already in use' error and port shadow test fails:
PASS: inet IPv6 redirection for ns2-hMHcaRvx
nc: bind failed: Address already in use
ERROR: portshadow test default: got reply from "ROUTER", not CLIENT as intended
Switch to socat instead.
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
tools/testing/selftests/netfilter/nft_nat.sh | 26 ++++++++++++++------
1 file changed, 19 insertions(+), 7 deletions(-)
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index c62e4e26252c..d88867d2fed7 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -760,20 +760,20 @@ test_port_shadow()
local logmsg=""
# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
- echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+ echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
- echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
- nc_r=$!
+ echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
+ sc_r=$!
- echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
- nc_c=$!
+ echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
+ sc_c=$!
sleep 0.3
# ns1 tries to connect to ns0:1405. With default settings this should connect
# to client, it matches the conntrack entry created above.
- result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
+ result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
if [ "$result" = "$expect" ] ;then
echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
@@ -782,7 +782,7 @@ test_port_shadow()
ret=1
fi
- kill $nc_r $nc_c 2>/dev/null
+ kill $sc_r $sc_c 2>/dev/null
# flush udp entries for next test round, if any
ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
@@ -852,6 +852,18 @@ test_port_shadowing()
{
local family="ip"
+ conntrack -h >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+ return
+ fi
+
+ socat -h > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run nat port shadowing test without socat tool"
+ return
+ fi
+
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
--
2.30.2
^ permalink raw reply related [flat|nested] 14+ messages in thread