* [PATCH bpf 1/2] bpf: Set mac_len in bpf_skb_change_head
@ 2021-04-27 13:55 Jussi Maki
2021-04-27 13:55 ` [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head Jussi Maki
` (4 more replies)
0 siblings, 5 replies; 20+ messages in thread
From: Jussi Maki @ 2021-04-27 13:55 UTC (permalink / raw)
To: bpf; +Cc: daniel, Jussi Maki
The skb_change_head() helper did not set "skb->mac_len", which is problematic
when it's used in combination with skb_redirect_peer(). Without it, redirecting
a packet from a L3 device such as wireguard to the veth peer device will cause
skb->data to point to the middle of the IP header on entry to tcp_v4_rcv() since
the L2 header is not pulled correctly due to mac_len=0.
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
net/core/filter.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a670..65ab4e21c087 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
__skb_push(skb, head_room);
memset(skb->data, 0, head_room);
skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
}
return ret;
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head
2021-04-27 13:55 [PATCH bpf 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
@ 2021-04-27 13:55 ` Jussi Maki
2021-04-27 21:41 ` Andrii Nakryiko
2021-05-17 10:11 ` [PATCH bpf v2 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
` (3 subsequent siblings)
4 siblings, 1 reply; 20+ messages in thread
From: Jussi Maki @ 2021-04-27 13:55 UTC (permalink / raw)
To: bpf; +Cc: daniel, Jussi Maki
Adds test to check that bpf_skb_change_head can be used
in combination with bpf_redirect_peer to redirect a packet
from L3 device to veth.
Fixes: a426d97e970d ("bpf: Set mac_len in bpf_skb_change_head")
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
tools/testing/selftests/bpf/.gitignore | 1 +
tools/testing/selftests/bpf/Makefile | 2 +-
.../selftests/bpf/progs/test_tc_peer.c | 24 ++++
.../testing/selftests/bpf/test_tc_peer_user.c | 127 ++++++++++++++++++
.../testing/selftests/bpf/test_tc_redirect.sh | 95 ++++++++++---
5 files changed, 233 insertions(+), 16 deletions(-)
create mode 100644 tools/testing/selftests/bpf/test_tc_peer_user.c
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 4866f6a21901..49f3f050be4d 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -27,6 +27,7 @@ test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
test_sysctl
+test_tc_peer_user
xdping
test_cpp
*.skel.h
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 283e5ad8385e..0e05fefe2333 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -84,7 +84,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
- xdpxceiver
+ xdpxceiver test_tc_peer_user
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index fc84a7685aa2..49f0eefd58e7 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,8 +5,11 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
enum {
dev_src,
@@ -42,4 +45,25 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
}
+SEC("src_ingress_l3") int tc_src_l3(struct __sk_buff *skb)
+{
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+}
+
char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_peer_user.c b/tools/testing/selftests/bpf/test_tc_peer_user.c
new file mode 100644
index 000000000000..bdecf2bd1cc1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_peer_user.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Networking across two network namespaces based on TUN/TAP.
+ * Like veth, but slow and L3. Used for testing BPF redirect_peer
+ * from L3 to L2 veth device.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+int tun_alloc(char *name) {
+ struct ifreq ifr;
+ int fd, err;
+ char cmd[512];
+
+ if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ if ((err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0) {
+ close(fd);
+ return err;
+ }
+
+ snprintf(cmd, sizeof(cmd), "ip link set dev %s up", name);
+ system(cmd);
+
+ return fd;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+void setns_by_name(char *name) {
+ int nsfd;
+ char nspath[PATH_MAX];
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (nsfd < 0) {
+ fprintf(stderr, "failed to open net namespace %s: %s\n", name, strerror(errno));
+ exit(1);
+ }
+ setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+}
+
+int main(int argc, char **argv) {
+ char *src_ns, *src_tun, *target_ns, *target_tun;
+ int srcfd, targetfd;
+
+ if (argc != 5) {
+ fprintf(stderr, "usage: %s <source namespace> <source tun device name> <target namespace> <target tun device name>\n", argv[0]);
+ return 1;
+ }
+
+ src_ns = argv[1];
+ src_tun = argv[2];
+ target_ns = argv[3];
+ target_tun = argv[4];
+
+ setns_by_name(src_ns);
+ srcfd = tun_alloc(src_tun);
+ if (srcfd < 0) {
+ fprintf(stderr, "failed to allocate tun device\n");
+ return 1;
+ }
+
+ setns_by_name(target_ns);
+ targetfd = tun_alloc(target_tun);
+ if (srcfd < 0) {
+ fprintf(stderr, "failed to allocate tun device\n");
+ return 1;
+ }
+
+ fd_set rfds, wfds;
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+
+ for (;;) {
+ char buf[4096];
+ int direction, nread, nwrite;
+ FD_SET(srcfd, &rfds);
+ FD_SET(targetfd, &rfds);
+
+ if (select(1 + MAX(srcfd, targetfd), &rfds, NULL, NULL, NULL) < 0) {
+ fprintf(stderr, "select failed: %s\n", strerror(errno));
+ return 1;
+ }
+
+ direction = FD_ISSET(srcfd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? srcfd : targetfd, buf, sizeof(buf));
+ if (nread < 0) {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? targetfd : srcfd, buf, nread);
+ if (nwrite != nread) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ return 1;
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
index 8868aa1ca902..469e1dc13f03 100755
--- a/tools/testing/selftests/bpf/test_tc_redirect.sh
+++ b/tools/testing/selftests/bpf/test_tc_redirect.sh
@@ -50,15 +50,32 @@ readonly IP4_DST="172.16.2.100"
readonly IP6_SRC="::1:dead:beef:cafe"
readonly IP6_DST="::2:dead:beef:cafe"
+readonly IP4_TUN_SRC="172.17.1.100"
+readonly IP4_TUN_FWD="172.17.1.200"
+
+readonly IP6_TUN_SRC="1::dead:beef:0"
+readonly IP6_TUN_FWD="1::dead:beef:1"
+
readonly IP4_SLL="169.254.0.1"
readonly IP4_DLL="169.254.0.2"
readonly IP4_NET="169.254.0.0"
+readonly MAC_DST_FWD="00:11:22:33:44:55"
+readonly MAC_DST="00:22:33:44:55:66"
+
+TEST_TC_PEER_USER_PID=""
+NC4_PID=""
+NC6_PID=""
+
netns_cleanup()
{
ip netns del ${NS_SRC}
ip netns del ${NS_FWD}
ip netns del ${NS_DST}
+
+ [ -n "$TEST_TC_PEER_USER_PID" ] && kill ${TEST_TC_PEER_USER_PID} || true
+ [ -n "${NC4_PID}" ] && kill ${NC4_PID} || true
+ [ -n "${NC6_PID}" ] && kill ${NC6_PID} || true
}
netns_setup()
@@ -70,6 +87,9 @@ netns_setup()
ip link add veth_src type veth peer name veth_src_fwd
ip link add veth_dst type veth peer name veth_dst_fwd
+ ip link set veth_dst_fwd address ${MAC_DST_FWD}
+ ip link set veth_dst address ${MAC_DST}
+
ip link set veth_src netns ${NS_SRC}
ip link set veth_src_fwd netns ${NS_FWD}
@@ -117,14 +137,20 @@ netns_setup()
ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
+
+ ip -netns ${NS_DST} neigh add ${IP4_TUN_SRC} dev veth_dst lladdr $fmac_dst
+ ip -netns ${NS_DST} neigh add ${IP6_TUN_SRC} dev veth_dst lladdr $fmac_dst
+ ip -netns ${NS_DST} neigh add ${IP6_TUN_FWD} dev veth_dst lladdr $fmac_dst
}
netns_test_connectivity()
{
set +e
- ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
- ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
+ ip netns exec ${NS_DST} nc -4 -l -p 9004 &
+ NC4_PID=$!
+ ip netns exec ${NS_DST} nc -6 -l -p 9006 &
+ NC6_PID=$!
TEST="TCPv4 connectivity test"
ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
@@ -170,14 +196,52 @@ netns_setup_bpf()
{
local obj=$1
local use_forwarding=${2:-0}
+ local use_tuntap=${3:-0}
+
+ if [ "$use_tuntap" -eq "1" ]; then
+ # Set up tuntap based tunnel between src and fwd namespaces.
+ ./test_tc_peer_user ${NS_SRC} tun_src ${NS_FWD} tun_fwd &
+ TEST_TC_PEER_USER_PID=$!
+
+ while ! ip -netns ${NS_SRC} link show tun_src; do echo "Waiting for tun_src to appear..."; sleep 0.5; done
+ while ! ip -netns ${NS_FWD} link show tun_fwd; do echo "Waiting for tun_fwd to appoar..."; sleep 0.5; done
+
+ ip -netns ${NS_SRC} addr add dev tun_src ${IP4_TUN_SRC}/24
+ ip -netns ${NS_FWD} addr add dev tun_fwd ${IP4_TUN_FWD}/24
- ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
- ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
- ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
+ ip -netns ${NS_SRC} addr add dev tun_src ${IP6_TUN_SRC}/64 nodad
+ ip -netns ${NS_FWD} addr add dev tun_fwd ${IP6_TUN_FWD}/64 nodad
- ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
- ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
- ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
+ ip -netns ${NS_SRC} route del ${IP4_DST}/32 dev veth_src scope global
+ ip -netns ${NS_SRC} route add ${IP4_DST}/32 via ${IP4_TUN_FWD} dev tun_src scope global
+ ip -netns ${NS_DST} route add ${IP4_TUN_SRC}/32 dev veth_dst scope global
+
+ ip -netns ${NS_SRC} route del ${IP6_DST}/128 dev veth_src scope global
+ ip -netns ${NS_SRC} route add ${IP6_DST}/128 via ${IP6_TUN_FWD} dev tun_src scope global
+ ip -netns ${NS_DST} route add ${IP6_TUN_SRC}/128 dev veth_dst scope global
+ ip -netns ${NS_DST} route add ${IP6_TUN_FWD}/128 dev veth_dst scope global
+
+ ip netns exec ${NS_FWD} tc qdisc add dev tun_fwd clsact
+ ip netns exec ${NS_FWD} tc filter add dev tun_fwd ingress bpf da obj $obj sec src_ingress_l3
+
+ # Enable forwarding back towards src, but not for packets coming from the tunnel.
+ ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
+ ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.all.forwarding=1
+ ip netns exec ${NS_FWD} sysctl -w net.ipv4.conf.veth_dst_fwd.forwarding=1
+ ip netns exec ${NS_FWD} sysctl -w net.ipv4.conf.veth_src_fwd.forwarding=0
+ ip netns exec ${NS_FWD} sysctl -w net.ipv4.conf.tun_fwd.forwarding=0
+ ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=0
+ ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
+ ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.tun_fwd.forwarding=0
+ else
+ ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
+ ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
+ ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress
+
+ ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
+ ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
+ ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress
+ fi
if [ "$use_forwarding" -eq "1" ]; then
# bpf_fib_lookup() checks if forwarding is enabled
@@ -190,13 +254,10 @@ netns_setup_bpf()
veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
- progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
- for prog in $progs; do
- map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
- if [ ! -z "$map" ]; then
- bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
- bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
- fi
+ maps=$(bpftool map list --json | jq -r '.[] | select(.name == "ifindex_map") | .id')
+ for map in $maps; do
+ bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
+ bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
done
}
@@ -214,3 +275,7 @@ netns_cleanup
netns_setup
netns_setup_bpf test_tc_peer.o
netns_test_connectivity
+netns_cleanup
+netns_setup
+netns_setup_bpf test_tc_peer.o 0 1
+netns_test_connectivity
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head
2021-04-27 13:55 ` [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head Jussi Maki
@ 2021-04-27 21:41 ` Andrii Nakryiko
2021-04-28 10:39 ` Jussi Maki
0 siblings, 1 reply; 20+ messages in thread
From: Andrii Nakryiko @ 2021-04-27 21:41 UTC (permalink / raw)
To: Jussi Maki; +Cc: bpf, Daniel Borkmann
On Tue, Apr 27, 2021 at 6:56 AM Jussi Maki <joamaki@gmail.com> wrote:
>
> Adds test to check that bpf_skb_change_head can be used
> in combination with bpf_redirect_peer to redirect a packet
> from L3 device to veth.
>
> Fixes: a426d97e970d ("bpf: Set mac_len in bpf_skb_change_head")
> Signed-off-by: Jussi Maki <joamaki@gmail.com>
> ---
> tools/testing/selftests/bpf/.gitignore | 1 +
> tools/testing/selftests/bpf/Makefile | 2 +-
> .../selftests/bpf/progs/test_tc_peer.c | 24 ++++
> .../testing/selftests/bpf/test_tc_peer_user.c | 127 ++++++++++++++++++
> .../testing/selftests/bpf/test_tc_redirect.sh | 95 ++++++++++---
> 5 files changed, 233 insertions(+), 16 deletions(-)
> create mode 100644 tools/testing/selftests/bpf/test_tc_peer_user.c
>
> diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
> index 4866f6a21901..49f3f050be4d 100644
> --- a/tools/testing/selftests/bpf/.gitignore
> +++ b/tools/testing/selftests/bpf/.gitignore
> @@ -27,6 +27,7 @@ test_tcpnotify_user
> test_libbpf
> test_tcp_check_syncookie_user
> test_sysctl
> +test_tc_peer_user
can we make it into a reasonable test inside test_progs? that way it
will be executed regularly
> xdping
> test_cpp
> *.skel.h
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index 283e5ad8385e..0e05fefe2333 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -84,7 +84,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
> TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
> flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
> test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
> - xdpxceiver
> + xdpxceiver test_tc_peer_user
>
> TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
>
> diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
> index fc84a7685aa2..49f0eefd58e7 100644
> --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
> +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
> @@ -5,8 +5,11 @@
> #include <linux/bpf.h>
> #include <linux/stddef.h>
> #include <linux/pkt_cls.h>
> +#include <linux/if_ether.h>
> +#include <linux/ip.h>
>
> #include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
>
> enum {
> dev_src,
> @@ -42,4 +45,25 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
> return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
> }
>
> +SEC("src_ingress_l3") int tc_src_l3(struct __sk_buff *skb)
please keep SEC() on separate line
also, is this a BPF_PROG_TYPE_SCHED_CLS program? Can you usee libbpf's
"classifier/src_ingress_l3" naming convention?
> +{
> + __u16 proto = skb->protocol;
> +
> + if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
> + return TC_ACT_SHOT;
> +
> + __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
we try to keep BPF code compliant with C89, so please move all the
variable declaration into a single block at the top of a function
> + if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
> + return TC_ACT_SHOT;
> +
> + __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
> + if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
> + return TC_ACT_SHOT;
> +
> + if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
> + return TC_ACT_SHOT;
> +
> + return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
> +}
> +
[...]
> +#define MAX(a, b) ((a) > (b) ? (a) : (b))
> +
> +enum {
> + SRC_TO_TARGET = 0,
> + TARGET_TO_SRC = 1,
> +};
> +
> +void setns_by_name(char *name) {
{ should be on a new line
please run scripts/checkpatch.pl on these files, it will point out
trivial issues like this
> + int nsfd;
> + char nspath[PATH_MAX];
> +
> + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
> + nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
> + if (nsfd < 0) {
> + fprintf(stderr, "failed to open net namespace %s: %s\n", name, strerror(errno));
> + exit(1);
> + }
here seems to be a mix of tabs and spaces
> + setns(nsfd, CLONE_NEWNET);
> + close(nsfd);
> +}
> +
[...]
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head
2021-04-27 21:41 ` Andrii Nakryiko
@ 2021-04-28 10:39 ` Jussi Maki
2021-04-28 10:49 ` Daniel Borkmann
0 siblings, 1 reply; 20+ messages in thread
From: Jussi Maki @ 2021-04-28 10:39 UTC (permalink / raw)
To: Andrii Nakryiko; +Cc: bpf, Daniel Borkmann
On Tue, Apr 27, 2021 at 11:41 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
> > ...
> > +test_tc_peer_user
>
> can we make it into a reasonable test inside test_progs? that way it
> will be executed regularly
There doesn't seem to be any tests yet for redirect_peer in test_progs and I'd
like this test to live next to them. Would it make sense to rework
test_tc_redirect.sh
into the test_progs framework?
What's your thoughts on this Daniel?
> >
> > +SEC("src_ingress_l3") int tc_src_l3(struct __sk_buff *skb)
>
> please keep SEC() on separate line
>
> also, is this a BPF_PROG_TYPE_SCHED_CLS program? Can you usee libbpf's
> "classifier/src_ingress_l3" naming convention?
I'm following the conventions in that file. Should they all be
changed? Happy to do that.
> > +{
> > + __u16 proto = skb->protocol;
> > +
> > + if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
> > + return TC_ACT_SHOT;
> > +
> > + __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
>
> we try to keep BPF code compliant with C89, so please move all the
> variable declaration into a single block at the top of a function
Will fix, thanks.
> > +void setns_by_name(char *name) {
>
>
> { should be on a new line
>
> please run scripts/checkpatch.pl on these files, it will point out
> trivial issues like this
Sorry about that. Will make sure I'll run it on all future submissions.
> > + int nsfd;
> > + char nspath[PATH_MAX];
> > +
> > + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
> > + nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
> > + if (nsfd < 0) {
> > + fprintf(stderr, "failed to open net namespace %s: %s\n", name, strerror(errno));
> > + exit(1);
> > + }
>
> here seems to be a mix of tabs and spaces
Thanks, will fix it and my editor's settings ;-).
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head
2021-04-28 10:39 ` Jussi Maki
@ 2021-04-28 10:49 ` Daniel Borkmann
2021-04-28 13:39 ` Jussi Maki
0 siblings, 1 reply; 20+ messages in thread
From: Daniel Borkmann @ 2021-04-28 10:49 UTC (permalink / raw)
To: Jussi Maki, Andrii Nakryiko; +Cc: bpf
On 4/28/21 12:39 PM, Jussi Maki wrote:
> On Tue, Apr 27, 2021 at 11:41 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
>>> ...
>>> +test_tc_peer_user
>>
>> can we make it into a reasonable test inside test_progs? that way it
>> will be executed regularly
>
> There doesn't seem to be any tests yet for redirect_peer in test_progs and I'd
> like this test to live next to them. Would it make sense to rework
> test_tc_redirect.sh
> into the test_progs framework?
>
> What's your thoughts on this Daniel?
Yes, please, that would be awesome to have test_tc_redirect.sh reworked to
run as part of test_progs! :) Then it will be covered by CI [0].
Thanks,
Daniel
[0] https://travis-ci.com/github/kernel-patches/bpf/pull_requests
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head
2021-04-28 10:49 ` Daniel Borkmann
@ 2021-04-28 13:39 ` Jussi Maki
0 siblings, 0 replies; 20+ messages in thread
From: Jussi Maki @ 2021-04-28 13:39 UTC (permalink / raw)
To: Daniel Borkmann; +Cc: Andrii Nakryiko, bpf
On Wed, Apr 28, 2021 at 12:49 PM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> Yes, please, that would be awesome to have test_tc_redirect.sh reworked to
> run as part of test_progs! :) Then it will be covered by CI [0].
Sure thing. I'll rework it and will follow up with a separate patch
for the rework and then v2
of this change.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf v2 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head
2021-04-27 13:55 [PATCH bpf 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-04-27 13:55 ` [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head Jussi Maki
@ 2021-05-17 10:11 ` Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 1/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 2/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
` (2 subsequent siblings)
4 siblings, 2 replies; 20+ messages in thread
From: Jussi Maki @ 2021-05-17 10:11 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko
This fixes an issue with using bpf_skb_change_head to redirect
packet from l3 to l2 device. See commit messages for details.
v1->v2:
- Port the test case to the newly refactored prog_tests/tc_redirect.c.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf v2 1/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer
2021-05-17 10:11 ` [PATCH bpf v2 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
@ 2021-05-17 10:11 ` Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 2/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
1 sibling, 0 replies; 20+ messages in thread
From: Jussi Maki @ 2021-05-17 10:11 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko, Jussi Maki
Add a failing test to try and use bpf_skb_change_head in combination
with bpf_redirect_peer to redirect a packet from a L3 device to veth.
The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
causing tcp_v4_rcv to point the TCP header into middle of the IP header.
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
.../selftests/bpf/prog_tests/tc_redirect.c | 238 +++++++++++++++++-
.../selftests/bpf/progs/test_tc_peer.c | 26 ++
2 files changed, 251 insertions(+), 13 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8..aa844f282e8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,16 +11,18 @@
*/
#define _GNU_SOURCE
-#include <fcntl.h>
+
+#include "test_progs.h"
+
#include <linux/limits.h>
#include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/stat.h>
-#include <sys/types.h>
-#include "test_progs.h"
#include "network_helpers.h"
#include "test_tc_neigh_fib.skel.h"
#include "test_tc_neigh.skel.h"
@@ -32,16 +34,23 @@
#define IP4_SRC "172.16.1.100"
#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
#define IP4_PORT 9004
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
#define IP6_PORT 9006
#define IP4_SLL "169.254.0.1"
#define IP4_DLL "169.254.0.2"
#define IP4_NET "169.254.0.0"
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
#define IFADDR_STR_LEN 18
#define PING_ARGS "-c 3 -w 10 -q"
@@ -92,7 +101,8 @@ static int modify_proc(const char *path, const char *newval)
strncpy(mod->path, path, PATH_MAX);
- if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
+ mod->oldlen = fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f);
+ if (mod->oldlen < 0) {
log_err("reading from %s failed", path);
goto fail;
}
@@ -238,14 +248,15 @@ static int get_ifindex(const char *name)
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
- char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
SYS("ip link add veth_src type veth peer name veth_src_fwd");
SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+ SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS("ip link set veth_dst address " MAC_DST);
+
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
- if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
- goto fail;
result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
if (result->ifindex_veth_src_fwd < 0)
@@ -306,10 +317,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
- SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
- SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
+ SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
setns_root();
return 0;
@@ -560,6 +569,206 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
setns_root();
}
+
+static int tun_open(char *name)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0)
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS("ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
+{
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ fprintf(stderr, "select failed: %s\n", strerror(errno));
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ struct test_tc_peer *skel;
+ int err, tunnel_pid = -1;
+ int src_fd, target_fd;
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ return;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load")) {
+ test_tc_peer__destroy(skel);
+ return;
+ }
+
+ err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) {
+ test_tc_peer__destroy(skel);
+ return;
+ }
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ if (!ASSERT_OK(setns_by_name(NS_SRC), "setns " NS_SRC))
+ goto fail;
+
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "alloc tun_src"))
+ goto fail;
+
+ if (!ASSERT_OK(setns_by_name(NS_FWD), "setns " NS_FWD))
+ goto fail;
+
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "alloc tun_fwd"))
+ goto fail;
+
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
+
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
+
+ setns_root();
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets */
+ if (!ASSERT_OK(setns_by_name(NS_FWD), "setns " NS_FWD))
+ goto fail;
+
+ SYS("tc qdisc add dev tun_fwd clsact");
+ SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+
+ /* Setup route and neigh tables */
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+ SYS("ip -netns " NS_DST " route add " IP6_TUN_FWD "/128 dev veth_dst scope global");
+
+ SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip -netns " NS_DST " neigh add " IP6_TUN_FWD " dev veth_dst lladdr " MAC_DST_FWD);
+
+ /* Enable forwarding back to wards src, but not the other way in order to require the
+ * BPF redirection.
+ */
+ err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward"))
+ goto fail;
+
+ err = modify_proc("/proc/sys/net/ipv4/conf/veth_src_fwd/forwarding", "0");
+ if (!ASSERT_OK(err, "set veth_src_fwd.forwarding"))
+ goto fail;
+
+ err = modify_proc("/proc/sys/net/ipv4/conf/tun_fwd/forwarding", "0");
+ if (!ASSERT_OK(err, "set veth_src_fwd.forwarding"))
+ goto fail;
+
+ err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
+ if (!ASSERT_OK(err, "set ipv6.forwarding"))
+ goto fail;
+
+ err = modify_proc("/proc/sys/net/ipv6/conf/veth_src_fwd/forwarding", "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding"))
+ goto fail;
+
+ err = modify_proc("/proc/sys/net/ipv6/conf/tun_fwd/forwarding", "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding"))
+ goto fail;
+
+ setns_root();
+
+ test_connectivity();
+
+fail:
+ setns_by_name(NS_FWD);
+ restore_proc();
+ setns_root();
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ bpf_program__unpin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+ test_tc_peer__destroy(skel);
+}
+
void test_tc_redirect(void)
{
struct netns_setup_result setup_result;
@@ -577,6 +786,9 @@ void test_tc_redirect(void)
if (test__start_subtest("tc_redirect_peer"))
test_tc_redirect_peer(&setup_result);
+ if (test__start_subtest("tc_redirect_peer_l3"))
+ test_tc_redirect_peer_l3(&setup_result);
+
if (test__start_subtest("tc_redirect_neigh"))
test_tc_redirect_neigh(&setup_result);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 72c72950c3bb..aea7bec5a1ab 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,18 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
static volatile const __u32 IFINDEX_SRC;
static volatile const __u32 IFINDEX_DST;
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
SEC("classifier/chk_egress")
int tc_chk(struct __sk_buff *skb)
{
@@ -29,4 +35,24 @@ int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(IFINDEX_DST, 0);
}
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
char __license[] SEC("license") = "GPL";
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH bpf v2 2/2] bpf: Set mac_len in bpf_skb_change_head
2021-05-17 10:11 ` [PATCH bpf v2 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 1/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
@ 2021-05-17 10:11 ` Jussi Maki
1 sibling, 0 replies; 20+ messages in thread
From: Jussi Maki @ 2021-05-17 10:11 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko, Jussi Maki
The skb_change_head() helper did not set "skb->mac_len", which is problematic
when it's used in combination with skb_redirect_peer(). Without it, redirecting
a packet from a L3 device such as wireguard to the veth peer device will cause
skb->data to point to the middle of the IP header on entry to tcp_v4_rcv() since
the L2 header is not pulled correctly due to mac_len=0.
Fixes: d98138927f3e ("selftests/bpf: Add test for l3 use of bpf_redirect_peer")
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
net/core/filter.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a670..65ab4e21c087 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
__skb_push(skb, head_room);
memset(skb->data, 0, head_room);
skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
}
return ret;
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH bpf v3 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head
2021-04-27 13:55 [PATCH bpf 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-04-27 13:55 ` [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
@ 2021-05-18 14:23 ` Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 2/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
2021-05-19 15:47 ` [PATCH bpf v4 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-25 10:29 ` [PATCH bpf v5] " Jussi Maki
4 siblings, 2 replies; 20+ messages in thread
From: Jussi Maki @ 2021-05-18 14:23 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko
This fixes an issue with using bpf_skb_change_head to redirect
packet from l3 to l2 device. See commit messages for details.
v2->v3:
- Refactor test_tc_redirect_peer_l3 to use BPF for passing packets
to both directions instead of relying on forwarding on the way back.
- Clean up of tc_redirect test. Setup and tear down namespaces for each
test case and avoid a more complex cleanup when tearing down the
namespace is enough.
v1->v2:
- Port the test case to the newly refactored prog_tests/tc_redirect.c.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf v3 1/2] bpf: Set mac_len in bpf_skb_change_head
2021-05-18 14:23 ` [PATCH bpf v3 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
@ 2021-05-18 14:23 ` Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 2/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
1 sibling, 0 replies; 20+ messages in thread
From: Jussi Maki @ 2021-05-18 14:23 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko, Jussi Maki
The skb_change_head() helper did not set "skb->mac_len", which is
problematic when it's used in combination with skb_redirect_peer().
Without it, redirecting a packet from a L3 device such as wireguard to
the veth peer device will cause skb->data to point to the middle of the
IP header on entry to tcp_v4_rcv() since the L2 header is not pulled
correctly due to mac_len=0.
Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure")
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
net/core/filter.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a670..65ab4e21c087 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
__skb_push(skb, head_room);
memset(skb->data, 0, head_room);
skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
}
return ret;
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH bpf v3 2/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer
2021-05-18 14:23 ` [PATCH bpf v3 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
@ 2021-05-18 14:23 ` Jussi Maki
2021-05-19 15:33 ` Daniel Borkmann
1 sibling, 1 reply; 20+ messages in thread
From: Jussi Maki @ 2021-05-18 14:23 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko, Jussi Maki
Add a test case for using bpf_skb_change_head in combination with
bpf_redirect_peer to redirect a packet from a L3 device to veth and back.
The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
causing tcp_v4_rcv to point the TCP header into middle of the IP header.
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
.../selftests/bpf/prog_tests/tc_redirect.c | 538 ++++++++++++------
.../selftests/bpf/progs/test_tc_peer.c | 31 +
2 files changed, 391 insertions(+), 178 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8..c5e84b7a9c06 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,14 +11,17 @@
*/
#define _GNU_SOURCE
-#include <fcntl.h>
+
+#include <arpa/inet.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/stat.h>
-#include <sys/types.h>
+#include <sys/mount.h>
#include "test_progs.h"
#include "network_helpers.h"
@@ -32,18 +35,25 @@
#define IP4_SRC "172.16.1.100"
#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
#define IP4_PORT 9004
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
#define IP6_PORT 9006
#define IP4_SLL "169.254.0.1"
#define IP4_DLL "169.254.0.2"
#define IP4_NET "169.254.0.0"
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
#define IFADDR_STR_LEN 18
-#define PING_ARGS "-c 3 -w 10 -q"
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
@@ -51,120 +61,104 @@
#define TIMEOUT_MILLIS 10000
-#define MAX_PROC_MODS 128
-#define MAX_PROC_VALUE_LEN 16
-
#define log_err(MSG, ...) \
fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
-struct proc_mod {
- char path[PATH_MAX];
- char oldval[MAX_PROC_VALUE_LEN];
- int oldlen;
-};
-
static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
-static int root_netns_fd = -1;
-static int num_proc_mods;
-static struct proc_mod proc_mods[MAX_PROC_MODS];
-/**
- * modify_proc() - Modify entry in /proc
- *
- * Modifies an entry in /proc and saves the original value for later
- * restoration with restore_proc().
- */
-static int modify_proc(const char *path, const char *newval)
+static int write_file(const char *path, const char *newval)
{
- struct proc_mod *mod;
FILE *f;
- if (num_proc_mods + 1 > MAX_PROC_MODS)
- return -1;
-
f = fopen(path, "r+");
if (!f)
return -1;
-
- mod = &proc_mods[num_proc_mods];
- num_proc_mods++;
-
- strncpy(mod->path, path, PATH_MAX);
-
- if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
- log_err("reading from %s failed", path);
- goto fail;
- }
- rewind(f);
if (fwrite(newval, strlen(newval), 1, f) != 1) {
log_err("writing to %s failed", path);
- goto fail;
+ fclose(f);
+ return -1;
}
-
fclose(f);
return 0;
-
-fail:
- fclose(f);
- num_proc_mods--;
- return -1;
}
-/**
- * restore_proc() - Restore all /proc modifications
- */
-static void restore_proc(void)
+struct nstoken {
+ int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
{
- int i;
+ int err;
- for (i = 0; i < num_proc_mods; i++) {
- struct proc_mod *mod = &proc_mods[i];
- FILE *f;
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
- f = fopen(mod->path, "w");
- if (!f) {
- log_err("fopen of %s failed", mod->path);
- continue;
- }
+ if (!ASSERT_OK(err, "setns"))
+ return err;
- if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
- log_err("fwrite to %s failed", mod->path);
+ /* Switch /sys to the new namespace so that e.g. /sys/class/net
+ * reflects the devices in the new namespace.
+ */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "unshare"))
+ return err;
- fclose(f);
- }
- num_proc_mods = 0;
+ err = umount2("/sys", MNT_DETACH);
+ if (!ASSERT_OK(err, "umount2 /sys"))
+ return err;
+
+ err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys"))
+ return err;
+
+ err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+ return err;
+
+ return 0;
}
/**
- * setns_by_name() - Set networks namespace by name
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
*/
-static int setns_by_name(const char *name)
+static struct nstoken *open_netns(const char *name)
{
int nsfd;
char nspath[PATH_MAX];
int err;
+ struct nstoken *token;
+
+ token = malloc(sizeof(struct nstoken));
+ if (!ASSERT_OK_PTR(token, "malloc token"))
+ return NULL;
+
+ token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ goto fail;
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
- if (nsfd < 0)
- return nsfd;
+ if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ goto fail;
- err = setns(nsfd, CLONE_NEWNET);
- close(nsfd);
+ err = setns_by_fd(nsfd);
+ if (!ASSERT_OK(err, "setns_by_fd"))
+ goto fail;
- return err;
+ return token;
+fail:
+ free(token);
+ return NULL;
}
-/**
- * setns_root() - Set network namespace to original (root) namespace
- *
- * Not expected to ever fail, so error not returned, but failure logged
- * and test marked as failed.
- */
-static void setns_root(void)
+static void close_netns(struct nstoken *token)
{
- ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
+ ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+ free(token);
}
static int netns_setup_namespaces(const char *verb)
@@ -237,15 +231,17 @@ static int get_ifindex(const char *name)
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
+ struct nstoken *nstoken = NULL;
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
- char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
SYS("ip link add veth_src type veth peer name veth_src_fwd");
SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+ SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS("ip link set veth_dst address " MAC_DST);
+
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
- if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
- goto fail;
result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
if (result->ifindex_veth_src_fwd < 0)
@@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip link set veth_dst netns " NS_DST);
/** setup in 'src' namespace */
- if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
SYS("ip addr add " IP4_SRC "/32 dev veth_src");
@@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
+ close_netns(nstoken);
+
/** setup in 'fwd' namespace */
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
goto fail;
/* The fwd netns automatically gets a v6 LL address / routes, but also
@@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+ close_netns(nstoken);
+
/** setup in 'dst' namespace */
- if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
SYS("ip addr add " IP4_DST "/32 dev veth_dst");
@@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
- SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
- SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
+ SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ close_netns(nstoken);
- setns_root();
return 0;
fail:
- setns_root();
+ if (nstoken)
+ close_netns(nstoken);
return -1;
}
static int netns_load_bpf(void)
{
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- return -1;
-
SYS("tc qdisc add dev veth_src_fwd clsact");
SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
SRC_PROG_PIN_FILE);
@@ -335,42 +335,29 @@ static int netns_load_bpf(void)
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
CHK_PROG_PIN_FILE);
- setns_root();
- return -1;
-fail:
- setns_root();
- return -1;
-}
-
-static int netns_unload_bpf(void)
-{
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- goto fail;
- SYS("tc qdisc delete dev veth_src_fwd clsact");
- SYS("tc qdisc delete dev veth_dst_fwd clsact");
-
- setns_root();
return 0;
fail:
- setns_root();
return -1;
}
-
static void test_tcp(int family, const char *addr, __u16 port)
{
int listen_fd = -1, accept_fd = -1, client_fd = -1;
char buf[] = "testing testing";
int n;
+ struct nstoken *nstoken;
- if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
return;
listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
if (!ASSERT_GE(listen_fd, 0, "listen"))
goto done;
- if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+ close_netns(nstoken);
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
@@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port)
ASSERT_EQ(n, sizeof(buf), "recv from server");
done:
- setns_root();
+ if (nstoken)
+ close_netns(nstoken);
if (listen_fd >= 0)
close(listen_fd);
if (accept_fd >= 0)
@@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr)
{
const char *ping = family == AF_INET6 ? "ping6" : "ping";
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
return 0;
fail:
return -1;
@@ -419,19 +407,37 @@ static void test_connectivity(void)
test_ping(AF_INET6, IP6_DST);
}
+static int set_forwarding(bool enable)
+{
+ int err;
+
+ err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+ return err;
+
+ err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+ return err;
+
+ return 0;
+}
+
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
{
- struct test_tc_neigh_fib *skel;
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh_fib *skel = NULL;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_neigh_fib__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
- return;
+ goto done;
- if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
- test_tc_neigh_fib__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
goto done;
/* bpf_fib_lookup() checks if forwarding is enabled */
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- goto done;
-
- err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
- if (!ASSERT_OK(err, "set ipv4.ip_forward"))
+ if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
goto done;
- err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
- if (!ASSERT_OK(err, "set ipv6.forwarding"))
- goto done;
- setns_root();
-
test_connectivity();
+
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_neigh_fib__destroy(skel);
- netns_unload_bpf();
- setns_root();
- restore_proc();
+ if (skel)
+ test_tc_neigh_fib__destroy(skel);
+ close_netns(nstoken);
}
static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
{
- struct test_tc_neigh *skel;
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh *skel = NULL;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_neigh__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_neigh__load(skel);
- if (!ASSERT_OK(err, "test_tc_neigh__load")) {
- test_tc_neigh__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(err, "test_tc_neigh__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
if (netns_load_bpf())
goto done;
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
test_connectivity();
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_neigh__destroy(skel);
- netns_unload_bpf();
- setns_root();
+ if (skel)
+ test_tc_neigh__destroy(skel);
+ close_netns(nstoken);
}
static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
{
+ struct nstoken *nstoken;
struct test_tc_peer *skel;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_peer__open();
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_peer__load(skel);
- if (!ASSERT_OK(err, "test_tc_peer__load")) {
- test_tc_peer__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -549,41 +549,223 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
if (netns_load_bpf())
goto done;
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
test_connectivity();
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_peer__destroy(skel);
- netns_unload_bpf();
- setns_root();
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
}
-void test_tc_redirect(void)
+static int tun_open(char *name)
{
- struct netns_setup_result setup_result;
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS("ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
+{
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
- root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
- if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ log_err("select failed");
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ log_err("read failed");
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ log_err("write failed");
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ struct test_tc_peer *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int err;
+ int tunnel_pid = -1;
+ int src_fd, target_fd;
+ int ifindex;
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return;
- if (netns_setup_namespaces("add"))
- goto done;
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+ goto fail;
- if (netns_setup_links_and_routes(&setup_result))
- goto done;
+ close_netns(nstoken);
- if (test__start_subtest("tc_redirect_peer"))
- test_tc_redirect_peer(&setup_result);
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ goto fail;
- if (test__start_subtest("tc_redirect_neigh"))
- test_tc_redirect_neigh(&setup_result);
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+ goto fail;
- if (test__start_subtest("tc_redirect_neigh_fib"))
- test_tc_redirect_neigh_fib(&setup_result);
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
-done:
- close(root_netns_fd);
- netns_setup_namespaces("delete");
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto fail;
+
+ ifindex = get_ifindex("tun_fwd");
+ if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+ goto fail;
+
+ skel->rodata->IFINDEX_SRC = ifindex;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto fail;
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+ * towards dst, and "tc_dst" to redirect packets
+ * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+ */
+ SYS("tc qdisc add dev tun_fwd clsact");
+ SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+
+ SYS("tc qdisc add dev veth_dst_fwd clsact");
+ SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+ DST_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ /* Setup route and neigh tables */
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+
+ SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto fail;
+
+ test_connectivity();
+
+fail:
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ if (skel)
+ test_tc_peer__destroy(skel);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+#define RUN_TEST(name) \
+ ({ \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
+ "setup links and routes")) \
+ test_ ## name(&setup_result); \
+ netns_setup_namespaces("delete"); \
+ } \
+ })
+
+void test_tc_redirect(void)
+{
+ struct netns_setup_result setup_result;
+
+ RUN_TEST(tc_redirect_peer);
+ RUN_TEST(tc_redirect_peer_l3);
+ RUN_TEST(tc_redirect_neigh);
+ RUN_TEST(tc_redirect_neigh_fib);
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 72c72950c3bb..ad70f4ce9254 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,17 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
static volatile const __u32 IFINDEX_SRC;
static volatile const __u32 IFINDEX_DST;
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
SEC("classifier/chk_egress")
int tc_chk(struct __sk_buff *skb)
{
@@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(IFINDEX_DST, 0);
}
+SEC("classifier/dst_ingress_l3")
+int tc_dst_l3(struct __sk_buff *skb)
+{
+ return bpf_redirect(IFINDEX_SRC, 0);
+}
+
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
char __license[] SEC("license") = "GPL";
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH bpf v3 2/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer
2021-05-18 14:23 ` [PATCH bpf v3 2/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
@ 2021-05-19 15:33 ` Daniel Borkmann
0 siblings, 0 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-05-19 15:33 UTC (permalink / raw)
To: Jussi Maki, bpf; +Cc: andrii.nakryiko
On 5/18/21 4:23 PM, Jussi Maki wrote:
> Add a test case for using bpf_skb_change_head in combination with
> bpf_redirect_peer to redirect a packet from a L3 device to veth and back.
>
> The test uses a BPF program that adds L2 headers to the packet coming
> from a L3 device and then calls bpf_redirect_peer to redirect the packet
> to a veth device. The test fails as skb->mac_len is not set properly and
> thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
> causing tcp_v4_rcv to point the TCP header into middle of the IP header.
>
> Signed-off-by: Jussi Maki <joamaki@gmail.com>
[...]
>
> /**
> - * setns_by_name() - Set networks namespace by name
> + * open_netns() - Switch to specified network namespace by name.
> + *
> + * Returns token with which to restore the original namespace
> + * using close_netns().
> */
> -static int setns_by_name(const char *name)
> +static struct nstoken *open_netns(const char *name)
> {
> int nsfd;
> char nspath[PATH_MAX];
> int err;
> + struct nstoken *token;
> +
> + token = malloc(sizeof(struct nstoken));
> + if (!ASSERT_OK_PTR(token, "malloc token"))
> + return NULL;
> +
> + token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
> + if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
> + goto fail;
>
> snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
> nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
> - if (nsfd < 0)
> - return nsfd;
> + if (!ASSERT_GE(nsfd, 0, "open netns fd"))
> + goto fail;
>
> - err = setns(nsfd, CLONE_NEWNET);
> - close(nsfd);
> + err = setns_by_fd(nsfd);
> + if (!ASSERT_OK(err, "setns_by_fd"))
> + goto fail;
>
> - return err;
> + return token;
> +fail:
> + free(token);
> + return NULL;
> }
As discussed earlier, the selftest seems to be causing issues in the bpf CI [0] likely
due to the setns() interaction/cleanup. Pls investigate and resubmit once fixed. Thanks
a lot, Jussi!
Cheers,
Daniel
[0] https://travis-ci.com/github/kernel-patches/bpf/builds/226213040
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf v4 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head
2021-04-27 13:55 [PATCH bpf 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
` (2 preceding siblings ...)
2021-05-18 14:23 ` [PATCH bpf v3 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
@ 2021-05-19 15:47 ` Jussi Maki
2021-05-19 15:47 ` [PATCH bpf v4 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-19 15:47 ` [PATCH bpf v4 2/2] " Jussi Maki
2021-05-25 10:29 ` [PATCH bpf v5] " Jussi Maki
4 siblings, 2 replies; 20+ messages in thread
From: Jussi Maki @ 2021-05-19 15:47 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko
This fixes an issue with using bpf_skb_change_head to redirect
packet from l3 to l2 device. See commit messages for details.
v3->v4:
- Run the tc_redirect test in its own thread to isolate the network and
mount namespace modifications to the test thread. Fixes the failing CI
check in v3.
v2->v3:
- Refactor test_tc_redirect_peer_l3 to use BPF for passing packets
to both directions instead of relying on forwarding on the way back.
- Clean up of tc_redirect test. Setup and tear down namespaces for each
test case and avoid a more complex cleanup when tearing down the
namespace is enough.
v1->v2:
- Port the test case to the newly refactored prog_tests/tc_redirect.c.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf v4 1/2] bpf: Set mac_len in bpf_skb_change_head
2021-05-19 15:47 ` [PATCH bpf v4 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
@ 2021-05-19 15:47 ` Jussi Maki
2021-05-20 22:07 ` Daniel Borkmann
2021-05-19 15:47 ` [PATCH bpf v4 2/2] " Jussi Maki
1 sibling, 1 reply; 20+ messages in thread
From: Jussi Maki @ 2021-05-19 15:47 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko, Jussi Maki
The skb_change_head() helper did not set "skb->mac_len", which is
problematic when it's used in combination with skb_redirect_peer().
Without it, redirecting a packet from a L3 device such as wireguard to
the veth peer device will cause skb->data to point to the middle of the
IP header on entry to tcp_v4_rcv() since the L2 header is not pulled
correctly due to mac_len=0.
Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure")
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
net/core/filter.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a670..65ab4e21c087 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
__skb_push(skb, head_room);
memset(skb->data, 0, head_room);
skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
}
return ret;
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH bpf v4 2/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer
2021-05-19 15:47 ` [PATCH bpf v4 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-19 15:47 ` [PATCH bpf v4 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
@ 2021-05-19 15:47 ` Jussi Maki
1 sibling, 0 replies; 20+ messages in thread
From: Jussi Maki @ 2021-05-19 15:47 UTC (permalink / raw)
To: bpf; +Cc: daniel, andrii.nakryiko, Jussi Maki
Add a test case for using bpf_skb_change_head in combination with
bpf_redirect_peer to redirect a packet from a L3 device to veth and back.
The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
causing tcp_v4_rcv to point the TCP header into middle of the IP header.
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
.../selftests/bpf/prog_tests/tc_redirect.c | 553 ++++++++++++------
.../selftests/bpf/progs/test_tc_peer.c | 31 +
2 files changed, 406 insertions(+), 178 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8..1b42b15c390e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,14 +11,17 @@
*/
#define _GNU_SOURCE
-#include <fcntl.h>
+
+#include <arpa/inet.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/stat.h>
-#include <sys/types.h>
+#include <sys/mount.h>
#include "test_progs.h"
#include "network_helpers.h"
@@ -32,18 +35,25 @@
#define IP4_SRC "172.16.1.100"
#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
#define IP4_PORT 9004
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
#define IP6_PORT 9006
#define IP4_SLL "169.254.0.1"
#define IP4_DLL "169.254.0.2"
#define IP4_NET "169.254.0.0"
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
#define IFADDR_STR_LEN 18
-#define PING_ARGS "-c 3 -w 10 -q"
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
@@ -51,120 +61,104 @@
#define TIMEOUT_MILLIS 10000
-#define MAX_PROC_MODS 128
-#define MAX_PROC_VALUE_LEN 16
-
#define log_err(MSG, ...) \
fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
-struct proc_mod {
- char path[PATH_MAX];
- char oldval[MAX_PROC_VALUE_LEN];
- int oldlen;
-};
-
static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
-static int root_netns_fd = -1;
-static int num_proc_mods;
-static struct proc_mod proc_mods[MAX_PROC_MODS];
-/**
- * modify_proc() - Modify entry in /proc
- *
- * Modifies an entry in /proc and saves the original value for later
- * restoration with restore_proc().
- */
-static int modify_proc(const char *path, const char *newval)
+static int write_file(const char *path, const char *newval)
{
- struct proc_mod *mod;
FILE *f;
- if (num_proc_mods + 1 > MAX_PROC_MODS)
- return -1;
-
f = fopen(path, "r+");
if (!f)
return -1;
-
- mod = &proc_mods[num_proc_mods];
- num_proc_mods++;
-
- strncpy(mod->path, path, PATH_MAX);
-
- if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
- log_err("reading from %s failed", path);
- goto fail;
- }
- rewind(f);
if (fwrite(newval, strlen(newval), 1, f) != 1) {
log_err("writing to %s failed", path);
- goto fail;
+ fclose(f);
+ return -1;
}
-
fclose(f);
return 0;
-
-fail:
- fclose(f);
- num_proc_mods--;
- return -1;
}
-/**
- * restore_proc() - Restore all /proc modifications
- */
-static void restore_proc(void)
+struct nstoken {
+ int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
{
- int i;
+ int err;
- for (i = 0; i < num_proc_mods; i++) {
- struct proc_mod *mod = &proc_mods[i];
- FILE *f;
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
- f = fopen(mod->path, "w");
- if (!f) {
- log_err("fopen of %s failed", mod->path);
- continue;
- }
+ if (!ASSERT_OK(err, "setns"))
+ return err;
- if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
- log_err("fwrite to %s failed", mod->path);
+ /* Switch /sys to the new namespace so that e.g. /sys/class/net
+ * reflects the devices in the new namespace.
+ */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "unshare"))
+ return err;
- fclose(f);
- }
- num_proc_mods = 0;
+ err = umount2("/sys", MNT_DETACH);
+ if (!ASSERT_OK(err, "umount2 /sys"))
+ return err;
+
+ err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys"))
+ return err;
+
+ err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+ return err;
+
+ return 0;
}
/**
- * setns_by_name() - Set networks namespace by name
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
*/
-static int setns_by_name(const char *name)
+static struct nstoken *open_netns(const char *name)
{
int nsfd;
char nspath[PATH_MAX];
int err;
+ struct nstoken *token;
+
+ token = malloc(sizeof(struct nstoken));
+ if (!ASSERT_OK_PTR(token, "malloc token"))
+ return NULL;
+
+ token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ goto fail;
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
- if (nsfd < 0)
- return nsfd;
+ if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ goto fail;
- err = setns(nsfd, CLONE_NEWNET);
- close(nsfd);
+ err = setns_by_fd(nsfd);
+ if (!ASSERT_OK(err, "setns_by_fd"))
+ goto fail;
- return err;
+ return token;
+fail:
+ free(token);
+ return NULL;
}
-/**
- * setns_root() - Set network namespace to original (root) namespace
- *
- * Not expected to ever fail, so error not returned, but failure logged
- * and test marked as failed.
- */
-static void setns_root(void)
+static void close_netns(struct nstoken *token)
{
- ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
+ ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+ free(token);
}
static int netns_setup_namespaces(const char *verb)
@@ -237,15 +231,17 @@ static int get_ifindex(const char *name)
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
+ struct nstoken *nstoken = NULL;
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
- char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
SYS("ip link add veth_src type veth peer name veth_src_fwd");
SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+ SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS("ip link set veth_dst address " MAC_DST);
+
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
- if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
- goto fail;
result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
if (result->ifindex_veth_src_fwd < 0)
@@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip link set veth_dst netns " NS_DST);
/** setup in 'src' namespace */
- if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
SYS("ip addr add " IP4_SRC "/32 dev veth_src");
@@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
+ close_netns(nstoken);
+
/** setup in 'fwd' namespace */
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
goto fail;
/* The fwd netns automatically gets a v6 LL address / routes, but also
@@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+ close_netns(nstoken);
+
/** setup in 'dst' namespace */
- if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
SYS("ip addr add " IP4_DST "/32 dev veth_dst");
@@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
- SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
- SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
+ SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ close_netns(nstoken);
- setns_root();
return 0;
fail:
- setns_root();
+ if (nstoken)
+ close_netns(nstoken);
return -1;
}
static int netns_load_bpf(void)
{
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- return -1;
-
SYS("tc qdisc add dev veth_src_fwd clsact");
SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
SRC_PROG_PIN_FILE);
@@ -335,42 +335,29 @@ static int netns_load_bpf(void)
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
CHK_PROG_PIN_FILE);
- setns_root();
- return -1;
-fail:
- setns_root();
- return -1;
-}
-
-static int netns_unload_bpf(void)
-{
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- goto fail;
- SYS("tc qdisc delete dev veth_src_fwd clsact");
- SYS("tc qdisc delete dev veth_dst_fwd clsact");
-
- setns_root();
return 0;
fail:
- setns_root();
return -1;
}
-
static void test_tcp(int family, const char *addr, __u16 port)
{
int listen_fd = -1, accept_fd = -1, client_fd = -1;
char buf[] = "testing testing";
int n;
+ struct nstoken *nstoken;
- if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
return;
listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
if (!ASSERT_GE(listen_fd, 0, "listen"))
goto done;
- if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+ close_netns(nstoken);
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
@@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port)
ASSERT_EQ(n, sizeof(buf), "recv from server");
done:
- setns_root();
+ if (nstoken)
+ close_netns(nstoken);
if (listen_fd >= 0)
close(listen_fd);
if (accept_fd >= 0)
@@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr)
{
const char *ping = family == AF_INET6 ? "ping6" : "ping";
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
return 0;
fail:
return -1;
@@ -419,19 +407,37 @@ static void test_connectivity(void)
test_ping(AF_INET6, IP6_DST);
}
+static int set_forwarding(bool enable)
+{
+ int err;
+
+ err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+ return err;
+
+ err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+ return err;
+
+ return 0;
+}
+
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
{
- struct test_tc_neigh_fib *skel;
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh_fib *skel = NULL;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_neigh_fib__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
- return;
+ goto done;
- if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
- test_tc_neigh_fib__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
goto done;
/* bpf_fib_lookup() checks if forwarding is enabled */
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+ if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
goto done;
- err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
- if (!ASSERT_OK(err, "set ipv4.ip_forward"))
- goto done;
-
- err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
- if (!ASSERT_OK(err, "set ipv6.forwarding"))
- goto done;
- setns_root();
-
test_connectivity();
+
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_neigh_fib__destroy(skel);
- netns_unload_bpf();
- setns_root();
- restore_proc();
+ if (skel)
+ test_tc_neigh_fib__destroy(skel);
+ close_netns(nstoken);
}
static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
{
- struct test_tc_neigh *skel;
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh *skel = NULL;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_neigh__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_neigh__load(skel);
- if (!ASSERT_OK(err, "test_tc_neigh__load")) {
- test_tc_neigh__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(err, "test_tc_neigh__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
if (netns_load_bpf())
goto done;
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
test_connectivity();
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_neigh__destroy(skel);
- netns_unload_bpf();
- setns_root();
+ if (skel)
+ test_tc_neigh__destroy(skel);
+ close_netns(nstoken);
}
static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
{
+ struct nstoken *nstoken;
struct test_tc_peer *skel;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_peer__open();
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_peer__load(skel);
- if (!ASSERT_OK(err, "test_tc_peer__load")) {
- test_tc_peer__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -549,41 +549,238 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
if (netns_load_bpf())
goto done;
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
test_connectivity();
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_peer__destroy(skel);
- netns_unload_bpf();
- setns_root();
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
}
-void test_tc_redirect(void)
+static int tun_open(char *name)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS("ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
{
- struct netns_setup_result setup_result;
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
- root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
- if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ log_err("select failed");
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ log_err("read failed");
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ log_err("write failed");
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ struct test_tc_peer *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int err;
+ int tunnel_pid = -1;
+ int src_fd, target_fd;
+ int ifindex;
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return;
- if (netns_setup_namespaces("add"))
- goto done;
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+ goto fail;
- if (netns_setup_links_and_routes(&setup_result))
- goto done;
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ goto fail;
- if (test__start_subtest("tc_redirect_peer"))
- test_tc_redirect_peer(&setup_result);
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+ goto fail;
- if (test__start_subtest("tc_redirect_neigh"))
- test_tc_redirect_neigh(&setup_result);
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
- if (test__start_subtest("tc_redirect_neigh_fib"))
- test_tc_redirect_neigh_fib(&setup_result);
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
-done:
- close(root_netns_fd);
- netns_setup_namespaces("delete");
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto fail;
+
+ ifindex = get_ifindex("tun_fwd");
+ if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+ goto fail;
+
+ skel->rodata->IFINDEX_SRC = ifindex;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto fail;
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+ * towards dst, and "tc_dst" to redirect packets
+ * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+ */
+ SYS("tc qdisc add dev tun_fwd clsact");
+ SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+
+ SYS("tc qdisc add dev veth_dst_fwd clsact");
+ SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+ DST_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ /* Setup route and neigh tables */
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+
+ SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto fail;
+
+ test_connectivity();
+
+fail:
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ if (skel)
+ test_tc_peer__destroy(skel);
+ if (nstoken)
+ close_netns(nstoken);
}
+
+#define RUN_TEST(name) \
+ ({ \
+ struct netns_setup_result setup_result; \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
+ "setup links and routes")) \
+ test_ ## name(&setup_result); \
+ netns_setup_namespaces("delete"); \
+ } \
+ })
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+ RUN_TEST(tc_redirect_peer);
+ RUN_TEST(tc_redirect_peer_l3);
+ RUN_TEST(tc_redirect_neigh);
+ RUN_TEST(tc_redirect_neigh_fib);
+ return NULL;
+}
+
+void test_tc_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 72c72950c3bb..ad70f4ce9254 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,17 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
static volatile const __u32 IFINDEX_SRC;
static volatile const __u32 IFINDEX_DST;
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
SEC("classifier/chk_egress")
int tc_chk(struct __sk_buff *skb)
{
@@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(IFINDEX_DST, 0);
}
+SEC("classifier/dst_ingress_l3")
+int tc_dst_l3(struct __sk_buff *skb)
+{
+ return bpf_redirect(IFINDEX_SRC, 0);
+}
+
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
char __license[] SEC("license") = "GPL";
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH bpf v4 1/2] bpf: Set mac_len in bpf_skb_change_head
2021-05-19 15:47 ` [PATCH bpf v4 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
@ 2021-05-20 22:07 ` Daniel Borkmann
2021-05-25 10:22 ` [PATCH bpf v5] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
0 siblings, 1 reply; 20+ messages in thread
From: Daniel Borkmann @ 2021-05-20 22:07 UTC (permalink / raw)
To: Jussi Maki, bpf; +Cc: andrii.nakryiko
On 5/19/21 5:47 PM, Jussi Maki wrote:
> The skb_change_head() helper did not set "skb->mac_len", which is
> problematic when it's used in combination with skb_redirect_peer().
> Without it, redirecting a packet from a L3 device such as wireguard to
> the veth peer device will cause skb->data to point to the middle of the
> IP header on entry to tcp_v4_rcv() since the L2 header is not pulled
> correctly due to mac_len=0.
>
> Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure")
> Signed-off-by: Jussi Maki <joamaki@gmail.com>
Took this one in, the selftest needs a rebase since it doesn't apply to
bpf. Pls fix and resubmit 2/2, thanks.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH bpf v5] selftests/bpf: Add test for l3 use of bpf_redirect_peer
2021-05-20 22:07 ` Daniel Borkmann
@ 2021-05-25 10:22 ` Jussi Maki
0 siblings, 0 replies; 20+ messages in thread
From: Jussi Maki @ 2021-05-25 10:22 UTC (permalink / raw)
To: bpf; +Cc: daniel, Jussi Maki
Add a test case for using bpf_skb_change_head in combination with
bpf_redirect_peer to redirect a packet from a L3 device to veth and back.
The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
causing tcp_v4_rcv to point the TCP header into middle of the IP header.
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
.../selftests/bpf/prog_tests/tc_redirect.c | 553 ++++++++++++------
.../selftests/bpf/progs/test_tc_peer.c | 31 +
2 files changed, 406 insertions(+), 178 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8..1b42b15c390e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,14 +11,17 @@
*/
#define _GNU_SOURCE
-#include <fcntl.h>
+
+#include <arpa/inet.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/stat.h>
-#include <sys/types.h>
+#include <sys/mount.h>
#include "test_progs.h"
#include "network_helpers.h"
@@ -32,18 +35,25 @@
#define IP4_SRC "172.16.1.100"
#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
#define IP4_PORT 9004
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
#define IP6_PORT 9006
#define IP4_SLL "169.254.0.1"
#define IP4_DLL "169.254.0.2"
#define IP4_NET "169.254.0.0"
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
#define IFADDR_STR_LEN 18
-#define PING_ARGS "-c 3 -w 10 -q"
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
@@ -51,120 +61,104 @@
#define TIMEOUT_MILLIS 10000
-#define MAX_PROC_MODS 128
-#define MAX_PROC_VALUE_LEN 16
-
#define log_err(MSG, ...) \
fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
-struct proc_mod {
- char path[PATH_MAX];
- char oldval[MAX_PROC_VALUE_LEN];
- int oldlen;
-};
-
static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
-static int root_netns_fd = -1;
-static int num_proc_mods;
-static struct proc_mod proc_mods[MAX_PROC_MODS];
-/**
- * modify_proc() - Modify entry in /proc
- *
- * Modifies an entry in /proc and saves the original value for later
- * restoration with restore_proc().
- */
-static int modify_proc(const char *path, const char *newval)
+static int write_file(const char *path, const char *newval)
{
- struct proc_mod *mod;
FILE *f;
- if (num_proc_mods + 1 > MAX_PROC_MODS)
- return -1;
-
f = fopen(path, "r+");
if (!f)
return -1;
-
- mod = &proc_mods[num_proc_mods];
- num_proc_mods++;
-
- strncpy(mod->path, path, PATH_MAX);
-
- if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
- log_err("reading from %s failed", path);
- goto fail;
- }
- rewind(f);
if (fwrite(newval, strlen(newval), 1, f) != 1) {
log_err("writing to %s failed", path);
- goto fail;
+ fclose(f);
+ return -1;
}
-
fclose(f);
return 0;
-
-fail:
- fclose(f);
- num_proc_mods--;
- return -1;
}
-/**
- * restore_proc() - Restore all /proc modifications
- */
-static void restore_proc(void)
+struct nstoken {
+ int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
{
- int i;
+ int err;
- for (i = 0; i < num_proc_mods; i++) {
- struct proc_mod *mod = &proc_mods[i];
- FILE *f;
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
- f = fopen(mod->path, "w");
- if (!f) {
- log_err("fopen of %s failed", mod->path);
- continue;
- }
+ if (!ASSERT_OK(err, "setns"))
+ return err;
- if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
- log_err("fwrite to %s failed", mod->path);
+ /* Switch /sys to the new namespace so that e.g. /sys/class/net
+ * reflects the devices in the new namespace.
+ */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "unshare"))
+ return err;
- fclose(f);
- }
- num_proc_mods = 0;
+ err = umount2("/sys", MNT_DETACH);
+ if (!ASSERT_OK(err, "umount2 /sys"))
+ return err;
+
+ err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys"))
+ return err;
+
+ err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+ return err;
+
+ return 0;
}
/**
- * setns_by_name() - Set networks namespace by name
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
*/
-static int setns_by_name(const char *name)
+static struct nstoken *open_netns(const char *name)
{
int nsfd;
char nspath[PATH_MAX];
int err;
+ struct nstoken *token;
+
+ token = malloc(sizeof(struct nstoken));
+ if (!ASSERT_OK_PTR(token, "malloc token"))
+ return NULL;
+
+ token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ goto fail;
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
- if (nsfd < 0)
- return nsfd;
+ if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ goto fail;
- err = setns(nsfd, CLONE_NEWNET);
- close(nsfd);
+ err = setns_by_fd(nsfd);
+ if (!ASSERT_OK(err, "setns_by_fd"))
+ goto fail;
- return err;
+ return token;
+fail:
+ free(token);
+ return NULL;
}
-/**
- * setns_root() - Set network namespace to original (root) namespace
- *
- * Not expected to ever fail, so error not returned, but failure logged
- * and test marked as failed.
- */
-static void setns_root(void)
+static void close_netns(struct nstoken *token)
{
- ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
+ ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+ free(token);
}
static int netns_setup_namespaces(const char *verb)
@@ -237,15 +231,17 @@ static int get_ifindex(const char *name)
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
+ struct nstoken *nstoken = NULL;
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
- char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
SYS("ip link add veth_src type veth peer name veth_src_fwd");
SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+ SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS("ip link set veth_dst address " MAC_DST);
+
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
- if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
- goto fail;
result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
if (result->ifindex_veth_src_fwd < 0)
@@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip link set veth_dst netns " NS_DST);
/** setup in 'src' namespace */
- if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
SYS("ip addr add " IP4_SRC "/32 dev veth_src");
@@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
+ close_netns(nstoken);
+
/** setup in 'fwd' namespace */
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
goto fail;
/* The fwd netns automatically gets a v6 LL address / routes, but also
@@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+ close_netns(nstoken);
+
/** setup in 'dst' namespace */
- if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
SYS("ip addr add " IP4_DST "/32 dev veth_dst");
@@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
- SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
- SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
+ SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ close_netns(nstoken);
- setns_root();
return 0;
fail:
- setns_root();
+ if (nstoken)
+ close_netns(nstoken);
return -1;
}
static int netns_load_bpf(void)
{
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- return -1;
-
SYS("tc qdisc add dev veth_src_fwd clsact");
SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
SRC_PROG_PIN_FILE);
@@ -335,42 +335,29 @@ static int netns_load_bpf(void)
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
CHK_PROG_PIN_FILE);
- setns_root();
- return -1;
-fail:
- setns_root();
- return -1;
-}
-
-static int netns_unload_bpf(void)
-{
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- goto fail;
- SYS("tc qdisc delete dev veth_src_fwd clsact");
- SYS("tc qdisc delete dev veth_dst_fwd clsact");
-
- setns_root();
return 0;
fail:
- setns_root();
return -1;
}
-
static void test_tcp(int family, const char *addr, __u16 port)
{
int listen_fd = -1, accept_fd = -1, client_fd = -1;
char buf[] = "testing testing";
int n;
+ struct nstoken *nstoken;
- if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
return;
listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
if (!ASSERT_GE(listen_fd, 0, "listen"))
goto done;
- if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+ close_netns(nstoken);
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
@@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port)
ASSERT_EQ(n, sizeof(buf), "recv from server");
done:
- setns_root();
+ if (nstoken)
+ close_netns(nstoken);
if (listen_fd >= 0)
close(listen_fd);
if (accept_fd >= 0)
@@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr)
{
const char *ping = family == AF_INET6 ? "ping6" : "ping";
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
return 0;
fail:
return -1;
@@ -419,19 +407,37 @@ static void test_connectivity(void)
test_ping(AF_INET6, IP6_DST);
}
+static int set_forwarding(bool enable)
+{
+ int err;
+
+ err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+ return err;
+
+ err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+ return err;
+
+ return 0;
+}
+
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
{
- struct test_tc_neigh_fib *skel;
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh_fib *skel = NULL;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_neigh_fib__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
- return;
+ goto done;
- if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
- test_tc_neigh_fib__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
goto done;
/* bpf_fib_lookup() checks if forwarding is enabled */
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+ if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
goto done;
- err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
- if (!ASSERT_OK(err, "set ipv4.ip_forward"))
- goto done;
-
- err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
- if (!ASSERT_OK(err, "set ipv6.forwarding"))
- goto done;
- setns_root();
-
test_connectivity();
+
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_neigh_fib__destroy(skel);
- netns_unload_bpf();
- setns_root();
- restore_proc();
+ if (skel)
+ test_tc_neigh_fib__destroy(skel);
+ close_netns(nstoken);
}
static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
{
- struct test_tc_neigh *skel;
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh *skel = NULL;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_neigh__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_neigh__load(skel);
- if (!ASSERT_OK(err, "test_tc_neigh__load")) {
- test_tc_neigh__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(err, "test_tc_neigh__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
if (netns_load_bpf())
goto done;
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
test_connectivity();
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_neigh__destroy(skel);
- netns_unload_bpf();
- setns_root();
+ if (skel)
+ test_tc_neigh__destroy(skel);
+ close_netns(nstoken);
}
static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
{
+ struct nstoken *nstoken;
struct test_tc_peer *skel;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_peer__open();
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_peer__load(skel);
- if (!ASSERT_OK(err, "test_tc_peer__load")) {
- test_tc_peer__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -549,41 +549,238 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
if (netns_load_bpf())
goto done;
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
test_connectivity();
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_peer__destroy(skel);
- netns_unload_bpf();
- setns_root();
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
}
-void test_tc_redirect(void)
+static int tun_open(char *name)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS("ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
{
- struct netns_setup_result setup_result;
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
- root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
- if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ log_err("select failed");
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ log_err("read failed");
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ log_err("write failed");
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ struct test_tc_peer *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int err;
+ int tunnel_pid = -1;
+ int src_fd, target_fd;
+ int ifindex;
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return;
- if (netns_setup_namespaces("add"))
- goto done;
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+ goto fail;
- if (netns_setup_links_and_routes(&setup_result))
- goto done;
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ goto fail;
- if (test__start_subtest("tc_redirect_peer"))
- test_tc_redirect_peer(&setup_result);
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+ goto fail;
- if (test__start_subtest("tc_redirect_neigh"))
- test_tc_redirect_neigh(&setup_result);
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
- if (test__start_subtest("tc_redirect_neigh_fib"))
- test_tc_redirect_neigh_fib(&setup_result);
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
-done:
- close(root_netns_fd);
- netns_setup_namespaces("delete");
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto fail;
+
+ ifindex = get_ifindex("tun_fwd");
+ if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+ goto fail;
+
+ skel->rodata->IFINDEX_SRC = ifindex;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto fail;
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+ * towards dst, and "tc_dst" to redirect packets
+ * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+ */
+ SYS("tc qdisc add dev tun_fwd clsact");
+ SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+
+ SYS("tc qdisc add dev veth_dst_fwd clsact");
+ SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+ DST_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ /* Setup route and neigh tables */
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+
+ SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto fail;
+
+ test_connectivity();
+
+fail:
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ if (skel)
+ test_tc_peer__destroy(skel);
+ if (nstoken)
+ close_netns(nstoken);
}
+
+#define RUN_TEST(name) \
+ ({ \
+ struct netns_setup_result setup_result; \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
+ "setup links and routes")) \
+ test_ ## name(&setup_result); \
+ netns_setup_namespaces("delete"); \
+ } \
+ })
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+ RUN_TEST(tc_redirect_peer);
+ RUN_TEST(tc_redirect_peer_l3);
+ RUN_TEST(tc_redirect_neigh);
+ RUN_TEST(tc_redirect_neigh_fib);
+ return NULL;
+}
+
+void test_tc_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index ef264bced0e6..fe818cd5f010 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,17 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
volatile const __u32 IFINDEX_SRC;
volatile const __u32 IFINDEX_DST;
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
SEC("classifier/chk_egress")
int tc_chk(struct __sk_buff *skb)
{
@@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(IFINDEX_DST, 0);
}
+SEC("classifier/dst_ingress_l3")
+int tc_dst_l3(struct __sk_buff *skb)
+{
+ return bpf_redirect(IFINDEX_SRC, 0);
+}
+
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
char __license[] SEC("license") = "GPL";
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH bpf v5] selftests/bpf: Add test for l3 use of bpf_redirect_peer
2021-04-27 13:55 [PATCH bpf 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
` (3 preceding siblings ...)
2021-05-19 15:47 ` [PATCH bpf v4 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
@ 2021-05-25 10:29 ` Jussi Maki
2021-05-25 16:03 ` Daniel Borkmann
4 siblings, 1 reply; 20+ messages in thread
From: Jussi Maki @ 2021-05-25 10:29 UTC (permalink / raw)
To: bpf; +Cc: daniel, Jussi Maki
Add a test case for using bpf_skb_change_head in combination with
bpf_redirect_peer to redirect a packet from a L3 device to veth and back.
The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
causing tcp_v4_rcv to point the TCP header into middle of the IP header.
Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
.../selftests/bpf/prog_tests/tc_redirect.c | 553 ++++++++++++------
.../selftests/bpf/progs/test_tc_peer.c | 31 +
2 files changed, 406 insertions(+), 178 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8..1b42b15c390e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,14 +11,17 @@
*/
#define _GNU_SOURCE
-#include <fcntl.h>
+
+#include <arpa/inet.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/stat.h>
-#include <sys/types.h>
+#include <sys/mount.h>
#include "test_progs.h"
#include "network_helpers.h"
@@ -32,18 +35,25 @@
#define IP4_SRC "172.16.1.100"
#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
#define IP4_PORT 9004
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
#define IP6_PORT 9006
#define IP4_SLL "169.254.0.1"
#define IP4_DLL "169.254.0.2"
#define IP4_NET "169.254.0.0"
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
#define IFADDR_STR_LEN 18
-#define PING_ARGS "-c 3 -w 10 -q"
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
@@ -51,120 +61,104 @@
#define TIMEOUT_MILLIS 10000
-#define MAX_PROC_MODS 128
-#define MAX_PROC_VALUE_LEN 16
-
#define log_err(MSG, ...) \
fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
-struct proc_mod {
- char path[PATH_MAX];
- char oldval[MAX_PROC_VALUE_LEN];
- int oldlen;
-};
-
static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
-static int root_netns_fd = -1;
-static int num_proc_mods;
-static struct proc_mod proc_mods[MAX_PROC_MODS];
-/**
- * modify_proc() - Modify entry in /proc
- *
- * Modifies an entry in /proc and saves the original value for later
- * restoration with restore_proc().
- */
-static int modify_proc(const char *path, const char *newval)
+static int write_file(const char *path, const char *newval)
{
- struct proc_mod *mod;
FILE *f;
- if (num_proc_mods + 1 > MAX_PROC_MODS)
- return -1;
-
f = fopen(path, "r+");
if (!f)
return -1;
-
- mod = &proc_mods[num_proc_mods];
- num_proc_mods++;
-
- strncpy(mod->path, path, PATH_MAX);
-
- if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
- log_err("reading from %s failed", path);
- goto fail;
- }
- rewind(f);
if (fwrite(newval, strlen(newval), 1, f) != 1) {
log_err("writing to %s failed", path);
- goto fail;
+ fclose(f);
+ return -1;
}
-
fclose(f);
return 0;
-
-fail:
- fclose(f);
- num_proc_mods--;
- return -1;
}
-/**
- * restore_proc() - Restore all /proc modifications
- */
-static void restore_proc(void)
+struct nstoken {
+ int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
{
- int i;
+ int err;
- for (i = 0; i < num_proc_mods; i++) {
- struct proc_mod *mod = &proc_mods[i];
- FILE *f;
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
- f = fopen(mod->path, "w");
- if (!f) {
- log_err("fopen of %s failed", mod->path);
- continue;
- }
+ if (!ASSERT_OK(err, "setns"))
+ return err;
- if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
- log_err("fwrite to %s failed", mod->path);
+ /* Switch /sys to the new namespace so that e.g. /sys/class/net
+ * reflects the devices in the new namespace.
+ */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "unshare"))
+ return err;
- fclose(f);
- }
- num_proc_mods = 0;
+ err = umount2("/sys", MNT_DETACH);
+ if (!ASSERT_OK(err, "umount2 /sys"))
+ return err;
+
+ err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys"))
+ return err;
+
+ err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+ if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+ return err;
+
+ return 0;
}
/**
- * setns_by_name() - Set networks namespace by name
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
*/
-static int setns_by_name(const char *name)
+static struct nstoken *open_netns(const char *name)
{
int nsfd;
char nspath[PATH_MAX];
int err;
+ struct nstoken *token;
+
+ token = malloc(sizeof(struct nstoken));
+ if (!ASSERT_OK_PTR(token, "malloc token"))
+ return NULL;
+
+ token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ goto fail;
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
- if (nsfd < 0)
- return nsfd;
+ if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ goto fail;
- err = setns(nsfd, CLONE_NEWNET);
- close(nsfd);
+ err = setns_by_fd(nsfd);
+ if (!ASSERT_OK(err, "setns_by_fd"))
+ goto fail;
- return err;
+ return token;
+fail:
+ free(token);
+ return NULL;
}
-/**
- * setns_root() - Set network namespace to original (root) namespace
- *
- * Not expected to ever fail, so error not returned, but failure logged
- * and test marked as failed.
- */
-static void setns_root(void)
+static void close_netns(struct nstoken *token)
{
- ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
+ ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+ free(token);
}
static int netns_setup_namespaces(const char *verb)
@@ -237,15 +231,17 @@ static int get_ifindex(const char *name)
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
+ struct nstoken *nstoken = NULL;
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
- char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
SYS("ip link add veth_src type veth peer name veth_src_fwd");
SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+ SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS("ip link set veth_dst address " MAC_DST);
+
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
- if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
- goto fail;
result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
if (result->ifindex_veth_src_fwd < 0)
@@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip link set veth_dst netns " NS_DST);
/** setup in 'src' namespace */
- if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
SYS("ip addr add " IP4_SRC "/32 dev veth_src");
@@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
+ close_netns(nstoken);
+
/** setup in 'fwd' namespace */
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
goto fail;
/* The fwd netns automatically gets a v6 LL address / routes, but also
@@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+ close_netns(nstoken);
+
/** setup in 'dst' namespace */
- if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
SYS("ip addr add " IP4_DST "/32 dev veth_dst");
@@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
- SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
- SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
- veth_dst_fwd_addr);
+ SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ close_netns(nstoken);
- setns_root();
return 0;
fail:
- setns_root();
+ if (nstoken)
+ close_netns(nstoken);
return -1;
}
static int netns_load_bpf(void)
{
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- return -1;
-
SYS("tc qdisc add dev veth_src_fwd clsact");
SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
SRC_PROG_PIN_FILE);
@@ -335,42 +335,29 @@ static int netns_load_bpf(void)
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
CHK_PROG_PIN_FILE);
- setns_root();
- return -1;
-fail:
- setns_root();
- return -1;
-}
-
-static int netns_unload_bpf(void)
-{
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
- goto fail;
- SYS("tc qdisc delete dev veth_src_fwd clsact");
- SYS("tc qdisc delete dev veth_dst_fwd clsact");
-
- setns_root();
return 0;
fail:
- setns_root();
return -1;
}
-
static void test_tcp(int family, const char *addr, __u16 port)
{
int listen_fd = -1, accept_fd = -1, client_fd = -1;
char buf[] = "testing testing";
int n;
+ struct nstoken *nstoken;
- if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
return;
listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
if (!ASSERT_GE(listen_fd, 0, "listen"))
goto done;
- if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+ close_netns(nstoken);
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
@@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port)
ASSERT_EQ(n, sizeof(buf), "recv from server");
done:
- setns_root();
+ if (nstoken)
+ close_netns(nstoken);
if (listen_fd >= 0)
close(listen_fd);
if (accept_fd >= 0)
@@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr)
{
const char *ping = family == AF_INET6 ? "ping6" : "ping";
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
return 0;
fail:
return -1;
@@ -419,19 +407,37 @@ static void test_connectivity(void)
test_ping(AF_INET6, IP6_DST);
}
+static int set_forwarding(bool enable)
+{
+ int err;
+
+ err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+ return err;
+
+ err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+ return err;
+
+ return 0;
+}
+
static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
{
- struct test_tc_neigh_fib *skel;
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh_fib *skel = NULL;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_neigh_fib__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
- return;
+ goto done;
- if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
- test_tc_neigh_fib__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
goto done;
/* bpf_fib_lookup() checks if forwarding is enabled */
- if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+ if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
goto done;
- err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
- if (!ASSERT_OK(err, "set ipv4.ip_forward"))
- goto done;
-
- err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
- if (!ASSERT_OK(err, "set ipv6.forwarding"))
- goto done;
- setns_root();
-
test_connectivity();
+
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_neigh_fib__destroy(skel);
- netns_unload_bpf();
- setns_root();
- restore_proc();
+ if (skel)
+ test_tc_neigh_fib__destroy(skel);
+ close_netns(nstoken);
}
static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
{
- struct test_tc_neigh *skel;
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh *skel = NULL;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_neigh__open();
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_neigh__load(skel);
- if (!ASSERT_OK(err, "test_tc_neigh__load")) {
- test_tc_neigh__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(err, "test_tc_neigh__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
if (netns_load_bpf())
goto done;
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
test_connectivity();
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_neigh__destroy(skel);
- netns_unload_bpf();
- setns_root();
+ if (skel)
+ test_tc_neigh__destroy(skel);
+ close_netns(nstoken);
}
static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
{
+ struct nstoken *nstoken;
struct test_tc_peer *skel;
int err;
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
skel = test_tc_peer__open();
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
- return;
+ goto done;
skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
err = test_tc_peer__load(skel);
- if (!ASSERT_OK(err, "test_tc_peer__load")) {
- test_tc_peer__destroy(skel);
- return;
- }
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -549,41 +549,238 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
if (netns_load_bpf())
goto done;
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
test_connectivity();
done:
- bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
- bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
- test_tc_peer__destroy(skel);
- netns_unload_bpf();
- setns_root();
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
}
-void test_tc_redirect(void)
+static int tun_open(char *name)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS("ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
{
- struct netns_setup_result setup_result;
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
- root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
- if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ log_err("select failed");
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ log_err("read failed");
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ log_err("write failed");
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ struct test_tc_peer *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int err;
+ int tunnel_pid = -1;
+ int src_fd, target_fd;
+ int ifindex;
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return;
- if (netns_setup_namespaces("add"))
- goto done;
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+ goto fail;
- if (netns_setup_links_and_routes(&setup_result))
- goto done;
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ goto fail;
- if (test__start_subtest("tc_redirect_peer"))
- test_tc_redirect_peer(&setup_result);
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+ goto fail;
- if (test__start_subtest("tc_redirect_neigh"))
- test_tc_redirect_neigh(&setup_result);
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
- if (test__start_subtest("tc_redirect_neigh_fib"))
- test_tc_redirect_neigh_fib(&setup_result);
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
-done:
- close(root_netns_fd);
- netns_setup_namespaces("delete");
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto fail;
+
+ ifindex = get_ifindex("tun_fwd");
+ if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+ goto fail;
+
+ skel->rodata->IFINDEX_SRC = ifindex;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+ goto fail;
+
+ err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+ if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+ goto fail;
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+ * towards dst, and "tc_dst" to redirect packets
+ * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+ */
+ SYS("tc qdisc add dev tun_fwd clsact");
+ SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+ SRC_PROG_PIN_FILE);
+
+ SYS("tc qdisc add dev veth_dst_fwd clsact");
+ SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+ DST_PROG_PIN_FILE);
+ SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+ CHK_PROG_PIN_FILE);
+
+ /* Setup route and neigh tables */
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+
+ SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto fail;
+
+ test_connectivity();
+
+fail:
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ if (skel)
+ test_tc_peer__destroy(skel);
+ if (nstoken)
+ close_netns(nstoken);
}
+
+#define RUN_TEST(name) \
+ ({ \
+ struct netns_setup_result setup_result; \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
+ "setup links and routes")) \
+ test_ ## name(&setup_result); \
+ netns_setup_namespaces("delete"); \
+ } \
+ })
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+ RUN_TEST(tc_redirect_peer);
+ RUN_TEST(tc_redirect_peer_l3);
+ RUN_TEST(tc_redirect_neigh);
+ RUN_TEST(tc_redirect_neigh_fib);
+ return NULL;
+}
+
+void test_tc_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index ef264bced0e6..fe818cd5f010 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,17 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
volatile const __u32 IFINDEX_SRC;
volatile const __u32 IFINDEX_DST;
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
SEC("classifier/chk_egress")
int tc_chk(struct __sk_buff *skb)
{
@@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(IFINDEX_DST, 0);
}
+SEC("classifier/dst_ingress_l3")
+int tc_dst_l3(struct __sk_buff *skb)
+{
+ return bpf_redirect(IFINDEX_SRC, 0);
+}
+
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
char __license[] SEC("license") = "GPL";
--
2.30.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH bpf v5] selftests/bpf: Add test for l3 use of bpf_redirect_peer
2021-05-25 10:29 ` [PATCH bpf v5] " Jussi Maki
@ 2021-05-25 16:03 ` Daniel Borkmann
0 siblings, 0 replies; 20+ messages in thread
From: Daniel Borkmann @ 2021-05-25 16:03 UTC (permalink / raw)
To: Jussi Maki, bpf
On 5/25/21 12:29 PM, Jussi Maki wrote:
> Add a test case for using bpf_skb_change_head in combination with
> bpf_redirect_peer to redirect a packet from a L3 device to veth and back.
>
> The test uses a BPF program that adds L2 headers to the packet coming
> from a L3 device and then calls bpf_redirect_peer to redirect the packet
> to a veth device. The test fails as skb->mac_len is not set properly and
> thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify,
> causing tcp_v4_rcv to point the TCP header into middle of the IP header.
>
> Signed-off-by: Jussi Maki <joamaki@gmail.com>
Applied, thanks!
[...]
> +void test_tc_redirect(void)
> +{
> + pthread_t test_thread;
> + int err;
> +
> + /* Run the tests in their own thread to isolate the namespace changes
> + * so they do not affect the environment of other tests.
> + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
> + */
> + err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
> + if (ASSERT_OK(err, "pthread_create"))
> + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
> +}
> +
Also fixed up the extra newline while applying.
> diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
^ permalink raw reply [flat|nested] 20+ messages in thread
end of thread, other threads:[~2021-05-25 16:03 UTC | newest]
Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-27 13:55 [PATCH bpf 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-04-27 13:55 ` [PATCH bpf 2/2] selftests/bpf: Add test for bpf_skb_change_head Jussi Maki
2021-04-27 21:41 ` Andrii Nakryiko
2021-04-28 10:39 ` Jussi Maki
2021-04-28 10:49 ` Daniel Borkmann
2021-04-28 13:39 ` Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 1/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
2021-05-17 10:11 ` [PATCH bpf v2 2/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-18 14:23 ` [PATCH bpf v3 2/2] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
2021-05-19 15:33 ` Daniel Borkmann
2021-05-19 15:47 ` [PATCH bpf v4 0/2] bpf: Fix l3 to l2 use of bpf_skb_change_head Jussi Maki
2021-05-19 15:47 ` [PATCH bpf v4 1/2] bpf: Set mac_len in bpf_skb_change_head Jussi Maki
2021-05-20 22:07 ` Daniel Borkmann
2021-05-25 10:22 ` [PATCH bpf v5] selftests/bpf: Add test for l3 use of bpf_redirect_peer Jussi Maki
2021-05-19 15:47 ` [PATCH bpf v4 2/2] " Jussi Maki
2021-05-25 10:29 ` [PATCH bpf v5] " Jussi Maki
2021-05-25 16:03 ` Daniel Borkmann
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.