[PATCH bpf-next 0/3] bpf: expose bpf_{s,g}etsockopt helpers to bind{4,6} hooks

bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH bpf-next 0/3] bpf: expose bpf_{s,g}etsockopt helpers to bind{4,6} hooks
@ 2020-11-18  0:17 Stanislav Fomichev
  2020-11-18  0:17 ` [PATCH bpf-next 1/3] selftests/bpf: rewrite test_sock_addr bind bpf into C Stanislav Fomichev
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Stanislav Fomichev @ 2020-11-18  0:17 UTC (permalink / raw)
  To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev

This might be useful for the listener sockets to pre-populate
some options. Since those helpers require locked sockets,
I'm changing bind hooks to lock/unlock the sockets. This
should not cause any performance overhead because at this
point there shouldn't be any socket lock contention and the
locking/unlocking should be cheap.

Also, as part of the series, I convert test_sock_addr bpf
assembly into C (and preserve the narrow load tests) to
make it easier to extend with th bpf_setsockopt later on.

Stanislav Fomichev (3):
  selftests/bpf: rewrite test_sock_addr bind bpf into C
  bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  selftests/bpf: extend bind{4,6} programs with a call to bpf_setsockopt

 include/linux/bpf-cgroup.h                    |  12 +-
 net/core/filter.c                             |   4 +
 net/ipv4/af_inet.c                            |   2 +-
 net/ipv6/af_inet6.c                           |   2 +-
 .../testing/selftests/bpf/progs/bind4_prog.c  | 104 ++++++++++
 .../testing/selftests/bpf/progs/bind6_prog.c  | 121 +++++++++++
 tools/testing/selftests/bpf/test_sock_addr.c  | 196 ++----------------
 7 files changed, 249 insertions(+), 192 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bind4_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/bind6_prog.c

-- 
2.29.2.299.gdc1121823c-goog


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH bpf-next 1/3] selftests/bpf: rewrite test_sock_addr bind bpf into C
  2020-11-18  0:17 [PATCH bpf-next 0/3] bpf: expose bpf_{s,g}etsockopt helpers to bind{4,6} hooks Stanislav Fomichev
@ 2020-11-18  0:17 ` Stanislav Fomichev
  2020-12-02  0:26   ` Andrii Nakryiko
  2020-11-18  0:17 ` [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks Stanislav Fomichev
  2020-11-18  0:17 ` [PATCH bpf-next 3/3] selftests/bpf: extend bind{4,6} programs with a call to bpf_setsockopt Stanislav Fomichev
  2 siblings, 1 reply; 15+ messages in thread
From: Stanislav Fomichev @ 2020-11-18  0:17 UTC (permalink / raw)
  To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev

I'm planning to extend it in the next patches. It's much easier to
work with C than BPF assembly.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 .../testing/selftests/bpf/progs/bind4_prog.c  |  73 +++++++
 .../testing/selftests/bpf/progs/bind6_prog.c  |  90 ++++++++
 tools/testing/selftests/bpf/test_sock_addr.c  | 196 ++----------------
 3 files changed, 175 insertions(+), 184 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bind4_prog.c
 create mode 100644 tools/testing/selftests/bpf/progs/bind6_prog.c

diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
new file mode 100644
index 000000000000..ff3def2ee6f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define SERV4_IP		0xc0a801feU /* 192.168.1.254 */
+#define SERV4_PORT		4040
+#define SERV4_REWRITE_IP	0x7f000001U /* 127.0.0.1 */
+#define SERV4_REWRITE_PORT	4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/bind4")
+int bind_v4_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock *sk;
+	__u32 user_ip4;
+	__u16 user_port;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 0;
+
+	if (sk->family != AF_INET)
+		return 0;
+
+	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+		return 0;
+
+	if (ctx->user_ip4 != bpf_htonl(SERV4_IP) ||
+	    ctx->user_port != bpf_htons(SERV4_PORT))
+		return 0;
+
+	// u8 narrow loads:
+	user_ip4 = 0;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16;
+	user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24;
+	if (ctx->user_ip4 != user_ip4)
+		return 0;
+
+	user_port = 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+	if (ctx->user_port != user_port)
+		return 0;
+
+	// u16 narrow loads:
+	user_ip4 = 0;
+	user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0;
+	user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16;
+	if (ctx->user_ip4 != user_ip4)
+		return 0;
+
+	ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
+	ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
new file mode 100644
index 000000000000..97686baaae65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define SERV6_IP_0		0xfaceb00c /* face:b00c:1234:5678::abcd */
+#define SERV6_IP_1		0x12345678
+#define SERV6_IP_2		0x00000000
+#define SERV6_IP_3		0x0000abcd
+#define SERV6_PORT		6060
+#define SERV6_REWRITE_IP_0	0x00000000
+#define SERV6_REWRITE_IP_1	0x00000000
+#define SERV6_REWRITE_IP_2	0x00000000
+#define SERV6_REWRITE_IP_3	0x00000001
+#define SERV6_REWRITE_PORT	6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/bind6")
+int bind_v6_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock *sk;
+	__u32 user_ip6;
+	__u16 user_port;
+	int i;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 0;
+
+	if (sk->family != AF_INET6)
+		return 0;
+
+	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+		return 0;
+
+	if (ctx->user_ip6[0] != bpf_htonl(SERV6_IP_0) ||
+	    ctx->user_ip6[1] != bpf_htonl(SERV6_IP_1) ||
+	    ctx->user_ip6[2] != bpf_htonl(SERV6_IP_2) ||
+	    ctx->user_ip6[3] != bpf_htonl(SERV6_IP_3) ||
+	    ctx->user_port != bpf_htons(SERV6_PORT))
+		return 0;
+
+	// u8 narrow loads:
+	for (i = 0; i < 4; i++) {
+		user_ip6 = 0;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16;
+		user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24;
+		if (ctx->user_ip6[i] != user_ip6)
+			return 0;
+	}
+
+	user_port = 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
+	user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+	if (ctx->user_port != user_port)
+		return 0;
+
+	// u16 narrow loads:
+	for (i = 0; i < 4; i++) {
+		user_ip6 = 0;
+		user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0;
+		user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16;
+		if (ctx->user_ip6[i] != user_ip6)
+			return 0;
+	}
+
+	ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
+	ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
+	ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
+	ctx->user_ip6[3] = bpf_htonl(SERV6_REWRITE_IP_3);
+	ctx->user_port = bpf_htons(SERV6_REWRITE_PORT);
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index b8c72c1d9cf7..dcb83ab02919 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -31,6 +31,8 @@
 #define CONNECT6_PROG_PATH	"./connect6_prog.o"
 #define SENDMSG4_PROG_PATH	"./sendmsg4_prog.o"
 #define SENDMSG6_PROG_PATH	"./sendmsg6_prog.o"
+#define BIND4_PROG_PATH		"./bind4_prog.o"
+#define BIND6_PROG_PATH		"./bind6_prog.o"
 
 #define SERV4_IP		"192.168.1.254"
 #define SERV4_REWRITE_IP	"127.0.0.1"
@@ -660,190 +662,6 @@ static int load_insns(const struct sock_addr_test *test,
 	return ret;
 }
 
-/* [1] These testing programs try to read different context fields, including
- * narrow loads of different sizes from user_ip4 and user_ip6, and write to
- * those allowed to be overridden.
- *
- * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
- * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
- * in such cases since it accepts only _signed_ 32bit integer as IMM
- * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
- * to count jumps properly.
- */
-
-static int bind4_prog_load(const struct sock_addr_test *test)
-{
-	union {
-		uint8_t u4_addr8[4];
-		uint16_t u4_addr16[2];
-		uint32_t u4_addr32;
-	} ip4, port;
-	struct sockaddr_in addr4_rw;
-
-	if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
-		log_err("Invalid IPv4: %s", SERV4_IP);
-		return -1;
-	}
-
-	port.u4_addr32 = htons(SERV4_PORT);
-
-	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
-			(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
-		return -1;
-
-	/* See [1]. */
-	struct bpf_insn insns[] = {
-		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
-		/* if (sk.family == AF_INET && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32),
-
-		/*     (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, type)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
-		BPF_JMP_A(1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28),
-
-		/*     1st_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26),
-
-		/*     2nd_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24),
-
-		/*     3rd_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22),
-
-		/*     4th_byte_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 3),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20),
-
-		/*     1st_half_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18),
-
-		/*     2nd_half_of_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16),
-
-		/*     whole_user_ip4 == expected && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12),
-
-		/*     1st_byte_of_user_port == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10),
-
-		/*     1st_half_of_user_port == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8),
-
-		/*     user_port == expected) { */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
-
-		/*      user_ip4 = addr4_rw.sin_addr */
-		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_ip4)),
-
-		/*      user_port = addr4_rw.sin_port */
-		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_port)),
-		/* } */
-
-		/* return 1 */
-		BPF_MOV64_IMM(BPF_REG_0, 1),
-		BPF_EXIT_INSN(),
-	};
-
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
-static int bind6_prog_load(const struct sock_addr_test *test)
-{
-	struct sockaddr_in6 addr6_rw;
-	struct in6_addr ip6;
-
-	if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
-		log_err("Invalid IPv6: %s", SERV6_IP);
-		return -1;
-	}
-
-	if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
-			(struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
-		return -1;
-
-	/* See [1]. */
-	struct bpf_insn insns[] = {
-		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
-		/* if (sk.family == AF_INET6 && */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
-
-		/*            5th_byte_of_user_ip6 == expected && */
-		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[1])),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
-
-		/*            3rd_half_of_user_ip6 == expected && */
-		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[1])),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
-
-		/*            last_word_of_user_ip6 == expected) { */
-		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
-			    offsetof(struct bpf_sock_addr, user_ip6[3])),
-		BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]),  /* See [2]. */
-		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
-
-
-#define STORE_IPV6_WORD(N)						       \
-		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]),     \
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,		       \
-			    offsetof(struct bpf_sock_addr, user_ip6[N]))
-
-		/*      user_ip6 = addr6_rw.sin6_addr */
-		STORE_IPV6_WORD(0),
-		STORE_IPV6_WORD(1),
-		STORE_IPV6_WORD(2),
-		STORE_IPV6_WORD(3),
-
-		/*      user_port = addr6_rw.sin6_port */
-		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
-		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
-			    offsetof(struct bpf_sock_addr, user_port)),
-
-		/* } */
-
-		/* return 1 */
-		BPF_MOV64_IMM(BPF_REG_0, 1),
-		BPF_EXIT_INSN(),
-	};
-
-	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
-}
-
 static int load_path(const struct sock_addr_test *test, const char *path)
 {
 	struct bpf_prog_load_attr attr;
@@ -865,6 +683,16 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 	return prog_fd;
 }
 
+static int bind4_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, BIND4_PROG_PATH);
+}
+
+static int bind6_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, BIND6_PROG_PATH);
+}
+
 static int connect4_prog_load(const struct sock_addr_test *test)
 {
 	return load_path(test, CONNECT4_PROG_PATH);
-- 
2.29.2.299.gdc1121823c-goog


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-11-18  0:17 [PATCH bpf-next 0/3] bpf: expose bpf_{s,g}etsockopt helpers to bind{4,6} hooks Stanislav Fomichev
  2020-11-18  0:17 ` [PATCH bpf-next 1/3] selftests/bpf: rewrite test_sock_addr bind bpf into C Stanislav Fomichev
@ 2020-11-18  0:17 ` Stanislav Fomichev
  2020-11-18  4:05   ` Alexei Starovoitov
  2020-12-01 19:21   ` Andrey Ignatov
  2020-11-18  0:17 ` [PATCH bpf-next 3/3] selftests/bpf: extend bind{4,6} programs with a call to bpf_setsockopt Stanislav Fomichev
  2 siblings, 2 replies; 15+ messages in thread
From: Stanislav Fomichev @ 2020-11-18  0:17 UTC (permalink / raw)
  To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev

I have to now lock/unlock socket for the bind hook execution.
That shouldn't cause any overhead because the socket is unbound
and shouldn't receive any traffic.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup.h | 12 ++++++------
 net/core/filter.c          |  4 ++++
 net/ipv4/af_inet.c         |  2 +-
 net/ipv6/af_inet6.c        |  2 +-
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ed71bd1a0825..72e69a0e1e8c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -246,11 +246,11 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 	__ret;								       \
 })
 
-#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
+#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)			       \
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
 
-#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
+#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)			       \
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
 
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
 					    sk->sk_prot->pre_connect)
@@ -434,8 +434,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
diff --git a/net/core/filter.c b/net/core/filter.c
index 2ca5eecebacf..21d91dcf0260 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6995,6 +6995,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_storage_delete_proto;
 	case BPF_FUNC_setsockopt:
 		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET4_BIND:
+		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
 			return &bpf_sock_addr_setsockopt_proto;
@@ -7003,6 +7005,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		}
 	case BPF_FUNC_getsockopt:
 		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET4_BIND:
+		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
 			return &bpf_sock_addr_getsockopt_proto;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b7260c8cef2e..b94fa8eb831b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -450,7 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
+	err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
 	if (err)
 		return err;
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e648fbebb167..a7e3d170af51 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+	err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
 	if (err)
 		return err;
 
-- 
2.29.2.299.gdc1121823c-goog


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH bpf-next 3/3] selftests/bpf: extend bind{4,6} programs with a call to bpf_setsockopt
  2020-11-18  0:17 [PATCH bpf-next 0/3] bpf: expose bpf_{s,g}etsockopt helpers to bind{4,6} hooks Stanislav Fomichev
  2020-11-18  0:17 ` [PATCH bpf-next 1/3] selftests/bpf: rewrite test_sock_addr bind bpf into C Stanislav Fomichev
  2020-11-18  0:17 ` [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks Stanislav Fomichev
@ 2020-11-18  0:17 ` Stanislav Fomichev
  2020-12-02  0:22   ` Andrii Nakryiko
  2 siblings, 1 reply; 15+ messages in thread
From: Stanislav Fomichev @ 2020-11-18  0:17 UTC (permalink / raw)
  To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev

To make sure it doesn't trigger sock_owned_by_me splat.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 .../testing/selftests/bpf/progs/bind4_prog.c  | 31 +++++++++++++++++++
 .../testing/selftests/bpf/progs/bind6_prog.c  | 31 +++++++++++++++++++
 2 files changed, 62 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index ff3def2ee6f9..9d1d8d642edc 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -19,8 +19,35 @@
 #define SERV4_REWRITE_IP	0x7f000001U /* 127.0.0.1 */
 #define SERV4_REWRITE_PORT	4444
 
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
 int _version SEC("version") = 1;
 
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+	char veth1[IFNAMSIZ] = "test_sock_addr1";
+	char veth2[IFNAMSIZ] = "test_sock_addr2";
+	char missing[IFNAMSIZ] = "nonexistent_dev";
+	char del_bind[IFNAMSIZ] = "";
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth1, sizeof(veth1)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth2, sizeof(veth2)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&missing, sizeof(missing)) != -ENODEV)
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&del_bind, sizeof(del_bind)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/bind4")
 int bind_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -64,6 +91,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
 	if (ctx->user_ip4 != user_ip4)
 		return 0;
 
+	/* Bind to device and unbind it. */
+	if (bind_to_device(ctx))
+		return 0;
+
 	ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
 	ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
 
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index 97686baaae65..a443927dae53 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -25,8 +25,35 @@
 #define SERV6_REWRITE_IP_3	0x00000001
 #define SERV6_REWRITE_PORT	6666
 
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
 int _version SEC("version") = 1;
 
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+	char veth1[IFNAMSIZ] = "test_sock_addr1";
+	char veth2[IFNAMSIZ] = "test_sock_addr2";
+	char missing[IFNAMSIZ] = "nonexistent_dev";
+	char del_bind[IFNAMSIZ] = "";
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth1, sizeof(veth1)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth2, sizeof(veth2)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&missing, sizeof(missing)) != -ENODEV)
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&del_bind, sizeof(del_bind)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/bind6")
 int bind_v6_prog(struct bpf_sock_addr *ctx)
 {
@@ -78,6 +105,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
 			return 0;
 	}
 
+	/* Bind to device and unbind it. */
+	if (bind_to_device(ctx))
+		return 0;
+
 	ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
 	ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
 	ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
-- 
2.29.2.299.gdc1121823c-goog


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-11-18  0:17 ` [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks Stanislav Fomichev
@ 2020-11-18  4:05   ` Alexei Starovoitov
  2020-11-30  1:05     ` Andrey Ignatov
  2020-12-01 19:21   ` Andrey Ignatov
  1 sibling, 1 reply; 15+ messages in thread
From: Alexei Starovoitov @ 2020-11-18  4:05 UTC (permalink / raw)
  To: Stanislav Fomichev, Andrey Ignatov
  Cc: Network Development, bpf, David S. Miller, Alexei Starovoitov,
	Daniel Borkmann

On Tue, Nov 17, 2020 at 4:17 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> I have to now lock/unlock socket for the bind hook execution.
> That shouldn't cause any overhead because the socket is unbound
> and shouldn't receive any traffic.
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  include/linux/bpf-cgroup.h | 12 ++++++------
>  net/core/filter.c          |  4 ++++
>  net/ipv4/af_inet.c         |  2 +-
>  net/ipv6/af_inet6.c        |  2 +-
>  4 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index ed71bd1a0825..72e69a0e1e8c 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -246,11 +246,11 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>         __ret;                                                                 \
>  })
>
> -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr)                             \
> -       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
> +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)                        \
> +       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
>
> -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr)                             \
> -       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
> +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)                        \
> +       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
>
>  #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
>                                             sk->sk_prot->pre_connect)
> @@ -434,8 +434,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
>  #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
> -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
> -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2ca5eecebacf..21d91dcf0260 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6995,6 +6995,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_sk_storage_delete_proto;
>         case BPF_FUNC_setsockopt:
>                 switch (prog->expected_attach_type) {
> +               case BPF_CGROUP_INET4_BIND:
> +               case BPF_CGROUP_INET6_BIND:
>                 case BPF_CGROUP_INET4_CONNECT:
>                 case BPF_CGROUP_INET6_CONNECT:
>                         return &bpf_sock_addr_setsockopt_proto;
> @@ -7003,6 +7005,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 }
>         case BPF_FUNC_getsockopt:
>                 switch (prog->expected_attach_type) {
> +               case BPF_CGROUP_INET4_BIND:
> +               case BPF_CGROUP_INET6_BIND:
>                 case BPF_CGROUP_INET4_CONNECT:
>                 case BPF_CGROUP_INET6_CONNECT:
>                         return &bpf_sock_addr_getsockopt_proto;
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index b7260c8cef2e..b94fa8eb831b 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -450,7 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
>         /* BPF prog is run before any checks are done so that if the prog
>          * changes context in a wrong way it will be caught.
>          */
> -       err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
> +       err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);

I think it is ok, but I need to go through the locking paths more.
Andrey,
please take a look as well.

>         if (err)
>                 return err;
>
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index e648fbebb167..a7e3d170af51 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
>         /* BPF prog is run before any checks are done so that if the prog
>          * changes context in a wrong way it will be caught.
>          */
> -       err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
> +       err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
>         if (err)
>                 return err;
>
> --
> 2.29.2.299.gdc1121823c-goog
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-11-18  4:05   ` Alexei Starovoitov
@ 2020-11-30  1:05     ` Andrey Ignatov
  2020-11-30 16:38       ` sdf
  0 siblings, 1 reply; 15+ messages in thread
From: Andrey Ignatov @ 2020-11-30  1:05 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Stanislav Fomichev, Network Development, bpf, David S. Miller,
	Alexei Starovoitov, Daniel Borkmann

Alexei Starovoitov <alexei.starovoitov@gmail.com> [Tue, 2020-11-17 20:05 -0800]:
> On Tue, Nov 17, 2020 at 4:17 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > I have to now lock/unlock socket for the bind hook execution.
> > That shouldn't cause any overhead because the socket is unbound
> > and shouldn't receive any traffic.
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  include/linux/bpf-cgroup.h | 12 ++++++------
> >  net/core/filter.c          |  4 ++++
> >  net/ipv4/af_inet.c         |  2 +-
> >  net/ipv6/af_inet6.c        |  2 +-
> >  4 files changed, 12 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> > index ed71bd1a0825..72e69a0e1e8c 100644
> > --- a/include/linux/bpf-cgroup.h
> > +++ b/include/linux/bpf-cgroup.h
> > @@ -246,11 +246,11 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
> >         __ret;                                                                 \
> >  })
> >
> > -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr)                             \
> > -       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
> > +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)                        \
> > +       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
> >
> > -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr)                             \
> > -       BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
> > +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)                        \
> > +       BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
> >
> >  #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
> >                                             sk->sk_prot->pre_connect)
> > @@ -434,8 +434,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
> >  #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
> >  #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
> >  #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
> > -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
> > -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
> > +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
> > +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
> >  #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
> >  #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
> >  #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 2ca5eecebacf..21d91dcf0260 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -6995,6 +6995,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >                 return &bpf_sk_storage_delete_proto;
> >         case BPF_FUNC_setsockopt:
> >                 switch (prog->expected_attach_type) {
> > +               case BPF_CGROUP_INET4_BIND:
> > +               case BPF_CGROUP_INET6_BIND:
> >                 case BPF_CGROUP_INET4_CONNECT:
> >                 case BPF_CGROUP_INET6_CONNECT:
> >                         return &bpf_sock_addr_setsockopt_proto;
> > @@ -7003,6 +7005,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >                 }
> >         case BPF_FUNC_getsockopt:
> >                 switch (prog->expected_attach_type) {
> > +               case BPF_CGROUP_INET4_BIND:
> > +               case BPF_CGROUP_INET6_BIND:
> >                 case BPF_CGROUP_INET4_CONNECT:
> >                 case BPF_CGROUP_INET6_CONNECT:
> >                         return &bpf_sock_addr_getsockopt_proto;
> > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> > index b7260c8cef2e..b94fa8eb831b 100644
> > --- a/net/ipv4/af_inet.c
> > +++ b/net/ipv4/af_inet.c
> > @@ -450,7 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> >         /* BPF prog is run before any checks are done so that if the prog
> >          * changes context in a wrong way it will be caught.
> >          */
> > -       err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
> > +       err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
> 
> I think it is ok, but I need to go through the locking paths more.
> Andrey,
> please take a look as well.

Sorry for delay, I was offline for the last two weeks.

From the correctness perspective it looks fine to me.

From the performance perspective I can think of one relevant scenario.
Quite common use-case in applications is to use bind(2) not before
listen(2) but before connect(2) for client sockets so that connection
can be set up from specific source IP and, optionally, port.

Binding to both IP and port case is not interesting since it's already
slow due to get_port().

But some applications do care about connection setup performance and at
the same time need to set source IP only (no port). In this case they
use IP_BIND_ADDRESS_NO_PORT socket option, what makes bind(2) fast
(we've discussed it with Stanislav earlier in [0]).

I can imagine some pathological case when an application sets up tons of
connections with bind(2) before connect(2) for sockets with
IP_BIND_ADDRESS_NO_PORT enabled (that by itself requires setsockopt(2)
though, i.e. socket lock/unlock) and that another lock/unlock to run
bind hook may add some overhead. Though I do not know how critical that
overhead may be and whether it's worth to benchmark or not (maybe too
much paranoia).

[0] https://lore.kernel.org/bpf/20200505182010.GB55644@rdna-mbp/

> >         if (err)
> >                 return err;
> >
> > diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> > index e648fbebb167..a7e3d170af51 100644
> > --- a/net/ipv6/af_inet6.c
> > +++ b/net/ipv6/af_inet6.c
> > @@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> >         /* BPF prog is run before any checks are done so that if the prog
> >          * changes context in a wrong way it will be caught.
> >          */
> > -       err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
> > +       err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
> >         if (err)
> >                 return err;
> >
> > --
> > 2.29.2.299.gdc1121823c-goog
> >

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-11-30  1:05     ` Andrey Ignatov
@ 2020-11-30 16:38       ` sdf
  2020-11-30 23:02         ` Andrey Ignatov
  0 siblings, 1 reply; 15+ messages in thread
From: sdf @ 2020-11-30 16:38 UTC (permalink / raw)
  To: Andrey Ignatov
  Cc: Alexei Starovoitov, Network Development, bpf, David S. Miller,
	Alexei Starovoitov, Daniel Borkmann

On 11/29, Andrey Ignatov wrote:
> Alexei Starovoitov <alexei.starovoitov@gmail.com> [Tue, 2020-11-17 20:05  
> -0800]:
> > On Tue, Nov 17, 2020 at 4:17 PM Stanislav Fomichev <sdf@google.com>  
> wrote:
[..]
> >
> > I think it is ok, but I need to go through the locking paths more.
> > Andrey,
> > please take a look as well.

> Sorry for delay, I was offline for the last two weeks.
No worries, I was OOO myself last week, thanks for the feedback!

>  From the correctness perspective it looks fine to me.

>  From the performance perspective I can think of one relevant scenario.
> Quite common use-case in applications is to use bind(2) not before
> listen(2) but before connect(2) for client sockets so that connection
> can be set up from specific source IP and, optionally, port.

> Binding to both IP and port case is not interesting since it's already
> slow due to get_port().

> But some applications do care about connection setup performance and at
> the same time need to set source IP only (no port). In this case they
> use IP_BIND_ADDRESS_NO_PORT socket option, what makes bind(2) fast
> (we've discussed it with Stanislav earlier in [0]).

> I can imagine some pathological case when an application sets up tons of
> connections with bind(2) before connect(2) for sockets with
> IP_BIND_ADDRESS_NO_PORT enabled (that by itself requires setsockopt(2)
> though, i.e. socket lock/unlock) and that another lock/unlock to run
> bind hook may add some overhead. Though I do not know how critical that
> overhead may be and whether it's worth to benchmark or not (maybe too
> much paranoia).

> [0] https://lore.kernel.org/bpf/20200505182010.GB55644@rdna-mbp/
Even in case of IP_BIND_ADDRESS_NO_PORT, inet[6]_bind() does
lock_sock down the line, so it's not like we are switching
a lockless path to the one with the lock, right?

And in this case, similar to listen, the socket is still uncontended and
owned by the userspace. So that extra lock/unlock should be cheap
enough to be ignored (spin_lock_bh on the warm cache line).

Am I missing something?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-11-30 16:38       ` sdf
@ 2020-11-30 23:02         ` Andrey Ignatov
  2020-12-01 18:43           ` sdf
  0 siblings, 1 reply; 15+ messages in thread
From: Andrey Ignatov @ 2020-11-30 23:02 UTC (permalink / raw)
  To: sdf
  Cc: Alexei Starovoitov, Network Development, bpf, David S. Miller,
	Alexei Starovoitov, Daniel Borkmann

sdf@google.com <sdf@google.com> [Mon, 2020-11-30 08:38 -0800]:
> On 11/29, Andrey Ignatov wrote:
> > Alexei Starovoitov <alexei.starovoitov@gmail.com> [Tue, 2020-11-17 20:05
> > -0800]:
> > > On Tue, Nov 17, 2020 at 4:17 PM Stanislav Fomichev <sdf@google.com>
> > wrote:
> [..]
> > >
> > > I think it is ok, but I need to go through the locking paths more.
> > > Andrey,
> > > please take a look as well.
> 
> > Sorry for delay, I was offline for the last two weeks.
> No worries, I was OOO myself last week, thanks for the feedback!
> 
> >  From the correctness perspective it looks fine to me.
> 
> >  From the performance perspective I can think of one relevant scenario.
> > Quite common use-case in applications is to use bind(2) not before
> > listen(2) but before connect(2) for client sockets so that connection
> > can be set up from specific source IP and, optionally, port.
> 
> > Binding to both IP and port case is not interesting since it's already
> > slow due to get_port().
> 
> > But some applications do care about connection setup performance and at
> > the same time need to set source IP only (no port). In this case they
> > use IP_BIND_ADDRESS_NO_PORT socket option, what makes bind(2) fast
> > (we've discussed it with Stanislav earlier in [0]).
> 
> > I can imagine some pathological case when an application sets up tons of
> > connections with bind(2) before connect(2) for sockets with
> > IP_BIND_ADDRESS_NO_PORT enabled (that by itself requires setsockopt(2)
> > though, i.e. socket lock/unlock) and that another lock/unlock to run
> > bind hook may add some overhead. Though I do not know how critical that
> > overhead may be and whether it's worth to benchmark or not (maybe too
> > much paranoia).
> 
> > [0] https://lore.kernel.org/bpf/20200505182010.GB55644@rdna-mbp/
> Even in case of IP_BIND_ADDRESS_NO_PORT, inet[6]_bind() does
> lock_sock down the line, so it's not like we are switching
> a lockless path to the one with the lock, right?

Right, I understand that it's going from one lock/unlock to two (not
from zero to one), that's what I meant by "another". My point was about
this one more lock.

> And in this case, similar to listen, the socket is still uncontended and
> owned by the userspace. So that extra lock/unlock should be cheap
> enough to be ignored (spin_lock_bh on the warm cache line).
> 
> Am I missing something?

As I mentioned it may come up only in "pathological case" what is
probably fine to ignore, i.e. I'd rather agree with "cheap enough to be
ignored" and benchmark would likely confirm it, I just couldn't say that
for sure w/o numbers so brought this point.

Given that we both agree that it should be fine to ignore this +1 lock,
IMO it should be good to go unless someone else has objections.

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-11-30 23:02         ` Andrey Ignatov
@ 2020-12-01 18:43           ` sdf
  2020-12-01 19:22             ` Andrey Ignatov
  0 siblings, 1 reply; 15+ messages in thread
From: sdf @ 2020-12-01 18:43 UTC (permalink / raw)
  To: Andrey Ignatov
  Cc: Alexei Starovoitov, Network Development, bpf, David S. Miller,
	Alexei Starovoitov, Daniel Borkmann

On 11/30, Andrey Ignatov wrote:
> sdf@google.com <sdf@google.com> [Mon, 2020-11-30 08:38 -0800]:
> > On 11/29, Andrey Ignatov wrote:
> > > Alexei Starovoitov <alexei.starovoitov@gmail.com> [Tue, 2020-11-17  
> 20:05
> > > -0800]:
> > > > On Tue, Nov 17, 2020 at 4:17 PM Stanislav Fomichev <sdf@google.com>
> > > wrote:
> > [..]
> > > >
> > > > I think it is ok, but I need to go through the locking paths more.
> > > > Andrey,
> > > > please take a look as well.
> >
> > > Sorry for delay, I was offline for the last two weeks.
> > No worries, I was OOO myself last week, thanks for the feedback!
> >
> > >  From the correctness perspective it looks fine to me.
> >
> > >  From the performance perspective I can think of one relevant  
> scenario.
> > > Quite common use-case in applications is to use bind(2) not before
> > > listen(2) but before connect(2) for client sockets so that connection
> > > can be set up from specific source IP and, optionally, port.
> >
> > > Binding to both IP and port case is not interesting since it's already
> > > slow due to get_port().
> >
> > > But some applications do care about connection setup performance and  
> at
> > > the same time need to set source IP only (no port). In this case they
> > > use IP_BIND_ADDRESS_NO_PORT socket option, what makes bind(2) fast
> > > (we've discussed it with Stanislav earlier in [0]).
> >
> > > I can imagine some pathological case when an application sets up tons  
> of
> > > connections with bind(2) before connect(2) for sockets with
> > > IP_BIND_ADDRESS_NO_PORT enabled (that by itself requires setsockopt(2)
> > > though, i.e. socket lock/unlock) and that another lock/unlock to run
> > > bind hook may add some overhead. Though I do not know how critical  
> that
> > > overhead may be and whether it's worth to benchmark or not (maybe too
> > > much paranoia).
> >
> > > [0] https://lore.kernel.org/bpf/20200505182010.GB55644@rdna-mbp/
> > Even in case of IP_BIND_ADDRESS_NO_PORT, inet[6]_bind() does
> > lock_sock down the line, so it's not like we are switching
> > a lockless path to the one with the lock, right?

> Right, I understand that it's going from one lock/unlock to two (not
> from zero to one), that's what I meant by "another". My point was about
> this one more lock.

> > And in this case, similar to listen, the socket is still uncontended and
> > owned by the userspace. So that extra lock/unlock should be cheap
> > enough to be ignored (spin_lock_bh on the warm cache line).
> >
> > Am I missing something?

> As I mentioned it may come up only in "pathological case" what is
> probably fine to ignore, i.e. I'd rather agree with "cheap enough to be
> ignored" and benchmark would likely confirm it, I just couldn't say that
> for sure w/o numbers so brought this point.

> Given that we both agree that it should be fine to ignore this +1 lock,
> IMO it should be good to go unless someone else has objections.
Thanks, agreed. Do you mind giving it an acked-by so it gets some
attention in the patchwork? ;-)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-11-18  0:17 ` [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks Stanislav Fomichev
  2020-11-18  4:05   ` Alexei Starovoitov
@ 2020-12-01 19:21   ` Andrey Ignatov
  1 sibling, 0 replies; 15+ messages in thread
From: Andrey Ignatov @ 2020-12-01 19:21 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, bpf, davem, ast, daniel

Stanislav Fomichev <sdf@google.com> [Tue, 2020-11-17 16:18 -0800]:
> I have to now lock/unlock socket for the bind hook execution.
> That shouldn't cause any overhead because the socket is unbound
> and shouldn't receive any traffic.
> 
> Signed-off-by: Stanislav Fomichev <sdf@google.com>

Acked-by: Andrey Ignatov <rdna@fb.com>

> ---
>  include/linux/bpf-cgroup.h | 12 ++++++------
>  net/core/filter.c          |  4 ++++
>  net/ipv4/af_inet.c         |  2 +-
>  net/ipv6/af_inet6.c        |  2 +-
>  4 files changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index ed71bd1a0825..72e69a0e1e8c 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -246,11 +246,11 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>  	__ret;								       \
>  })
>  
> -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr)			       \
> -	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
> +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)			       \
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
>  
> -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr)			       \
> -	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
> +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)			       \
> +	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
>  
>  #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
>  					    sk->sk_prot->pre_connect)
> @@ -434,8 +434,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
>  #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
> -#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
> -#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2ca5eecebacf..21d91dcf0260 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6995,6 +6995,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  		return &bpf_sk_storage_delete_proto;
>  	case BPF_FUNC_setsockopt:
>  		switch (prog->expected_attach_type) {
> +		case BPF_CGROUP_INET4_BIND:
> +		case BPF_CGROUP_INET6_BIND:
>  		case BPF_CGROUP_INET4_CONNECT:
>  		case BPF_CGROUP_INET6_CONNECT:
>  			return &bpf_sock_addr_setsockopt_proto;
> @@ -7003,6 +7005,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  		}
>  	case BPF_FUNC_getsockopt:
>  		switch (prog->expected_attach_type) {
> +		case BPF_CGROUP_INET4_BIND:
> +		case BPF_CGROUP_INET6_BIND:
>  		case BPF_CGROUP_INET4_CONNECT:
>  		case BPF_CGROUP_INET6_CONNECT:
>  			return &bpf_sock_addr_getsockopt_proto;
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index b7260c8cef2e..b94fa8eb831b 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -450,7 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
>  	/* BPF prog is run before any checks are done so that if the prog
>  	 * changes context in a wrong way it will be caught.
>  	 */
> -	err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
> +	err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
>  	if (err)
>  		return err;
>  
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index e648fbebb167..a7e3d170af51 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
>  	/* BPF prog is run before any checks are done so that if the prog
>  	 * changes context in a wrong way it will be caught.
>  	 */
> -	err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
> +	err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
>  	if (err)
>  		return err;
>  
> -- 
> 2.29.2.299.gdc1121823c-goog
> 

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-12-01 18:43           ` sdf
@ 2020-12-01 19:22             ` Andrey Ignatov
  0 siblings, 0 replies; 15+ messages in thread
From: Andrey Ignatov @ 2020-12-01 19:22 UTC (permalink / raw)
  To: sdf
  Cc: Alexei Starovoitov, Network Development, bpf, David S. Miller,
	Alexei Starovoitov, Daniel Borkmann

sdf@google.com <sdf@google.com> [Tue, 2020-12-01 10:43 -0800]:
> On 11/30, Andrey Ignatov wrote:
> > sdf@google.com <sdf@google.com> [Mon, 2020-11-30 08:38 -0800]:
> > > On 11/29, Andrey Ignatov wrote:
> > > > Alexei Starovoitov <alexei.starovoitov@gmail.com> [Tue, 2020-11-17
> > 20:05
> > > > -0800]:
> > > > > On Tue, Nov 17, 2020 at 4:17 PM Stanislav Fomichev <sdf@google.com>
> > > > wrote:
> > > [..]
> > > > >
> > > > > I think it is ok, but I need to go through the locking paths more.
> > > > > Andrey,
> > > > > please take a look as well.
> > >
> > > > Sorry for delay, I was offline for the last two weeks.
> > > No worries, I was OOO myself last week, thanks for the feedback!
> > >
> > > >  From the correctness perspective it looks fine to me.
> > >
> > > >  From the performance perspective I can think of one relevant
> > scenario.
> > > > Quite common use-case in applications is to use bind(2) not before
> > > > listen(2) but before connect(2) for client sockets so that connection
> > > > can be set up from specific source IP and, optionally, port.
> > >
> > > > Binding to both IP and port case is not interesting since it's already
> > > > slow due to get_port().
> > >
> > > > But some applications do care about connection setup performance and
> > at
> > > > the same time need to set source IP only (no port). In this case they
> > > > use IP_BIND_ADDRESS_NO_PORT socket option, what makes bind(2) fast
> > > > (we've discussed it with Stanislav earlier in [0]).
> > >
> > > > I can imagine some pathological case when an application sets up
> > tons of
> > > > connections with bind(2) before connect(2) for sockets with
> > > > IP_BIND_ADDRESS_NO_PORT enabled (that by itself requires setsockopt(2)
> > > > though, i.e. socket lock/unlock) and that another lock/unlock to run
> > > > bind hook may add some overhead. Though I do not know how critical
> > that
> > > > overhead may be and whether it's worth to benchmark or not (maybe too
> > > > much paranoia).
> > >
> > > > [0] https://lore.kernel.org/bpf/20200505182010.GB55644@rdna-mbp/
> > > Even in case of IP_BIND_ADDRESS_NO_PORT, inet[6]_bind() does
> > > lock_sock down the line, so it's not like we are switching
> > > a lockless path to the one with the lock, right?
> 
> > Right, I understand that it's going from one lock/unlock to two (not
> > from zero to one), that's what I meant by "another". My point was about
> > this one more lock.
> 
> > > And in this case, similar to listen, the socket is still uncontended and
> > > owned by the userspace. So that extra lock/unlock should be cheap
> > > enough to be ignored (spin_lock_bh on the warm cache line).
> > >
> > > Am I missing something?
> 
> > As I mentioned it may come up only in "pathological case" what is
> > probably fine to ignore, i.e. I'd rather agree with "cheap enough to be
> > ignored" and benchmark would likely confirm it, I just couldn't say that
> > for sure w/o numbers so brought this point.
> 
> > Given that we both agree that it should be fine to ignore this +1 lock,
> > IMO it should be good to go unless someone else has objections.
> Thanks, agreed. Do you mind giving it an acked-by so it gets some
> attention in the patchwork? ;-)

Sure. Acked this one.

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 3/3] selftests/bpf: extend bind{4,6} programs with a call to bpf_setsockopt
  2020-11-18  0:17 ` [PATCH bpf-next 3/3] selftests/bpf: extend bind{4,6} programs with a call to bpf_setsockopt Stanislav Fomichev
@ 2020-12-02  0:22   ` Andrii Nakryiko
  0 siblings, 0 replies; 15+ messages in thread
From: Andrii Nakryiko @ 2020-12-02  0:22 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Networking, bpf, David S. Miller, Alexei Starovoitov, Daniel Borkmann

On Tue, Nov 17, 2020 at 4:20 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> To make sure it doesn't trigger sock_owned_by_me splat.
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  .../testing/selftests/bpf/progs/bind4_prog.c  | 31 +++++++++++++++++++
>  .../testing/selftests/bpf/progs/bind6_prog.c  | 31 +++++++++++++++++++
>  2 files changed, 62 insertions(+)
>
> diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
> index ff3def2ee6f9..9d1d8d642edc 100644
> --- a/tools/testing/selftests/bpf/progs/bind4_prog.c
> +++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
> @@ -19,8 +19,35 @@
>  #define SERV4_REWRITE_IP       0x7f000001U /* 127.0.0.1 */
>  #define SERV4_REWRITE_PORT     4444
>
> +#ifndef IFNAMSIZ
> +#define IFNAMSIZ 16
> +#endif
> +
>  int _version SEC("version") = 1;

nit: would be nice to drop this anachronism

>

[...]

> diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
> index 97686baaae65..a443927dae53 100644
> --- a/tools/testing/selftests/bpf/progs/bind6_prog.c
> +++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
> @@ -25,8 +25,35 @@
>  #define SERV6_REWRITE_IP_3     0x00000001
>  #define SERV6_REWRITE_PORT     6666
>
> +#ifndef IFNAMSIZ
> +#define IFNAMSIZ 16
> +#endif
> +
>  int _version SEC("version") = 1;

nit: same

>

[...]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 1/3] selftests/bpf: rewrite test_sock_addr bind bpf into C
  2020-11-18  0:17 ` [PATCH bpf-next 1/3] selftests/bpf: rewrite test_sock_addr bind bpf into C Stanislav Fomichev
@ 2020-12-02  0:26   ` Andrii Nakryiko
  2020-12-02 17:04     ` sdf
  0 siblings, 1 reply; 15+ messages in thread
From: Andrii Nakryiko @ 2020-12-02  0:26 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Networking, bpf, David S. Miller, Alexei Starovoitov, Daniel Borkmann

On Tue, Nov 17, 2020 at 4:20 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> I'm planning to extend it in the next patches. It's much easier to
> work with C than BPF assembly.
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---

With nits below:

Acked-by: Andrii Nakryiko <andrii@kernel.org>


>  .../testing/selftests/bpf/progs/bind4_prog.c  |  73 +++++++
>  .../testing/selftests/bpf/progs/bind6_prog.c  |  90 ++++++++
>  tools/testing/selftests/bpf/test_sock_addr.c  | 196 ++----------------
>  3 files changed, 175 insertions(+), 184 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/progs/bind4_prog.c
>  create mode 100644 tools/testing/selftests/bpf/progs/bind6_prog.c
>
> diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
> new file mode 100644
> index 000000000000..ff3def2ee6f9
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
> @@ -0,0 +1,73 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <string.h>
> +
> +#include <linux/stddef.h>
> +#include <linux/bpf.h>
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <sys/socket.h>
> +#include <netinet/tcp.h>
> +#include <linux/if.h>
> +#include <errno.h>
> +
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +
> +#define SERV4_IP               0xc0a801feU /* 192.168.1.254 */
> +#define SERV4_PORT             4040
> +#define SERV4_REWRITE_IP       0x7f000001U /* 127.0.0.1 */
> +#define SERV4_REWRITE_PORT     4444
> +
> +int _version SEC("version") = 1;

not needed, let's not add it to a new test prog

> +

[...]

> diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
> new file mode 100644
> index 000000000000..97686baaae65
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
> @@ -0,0 +1,90 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <string.h>
> +
> +#include <linux/stddef.h>
> +#include <linux/bpf.h>
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <sys/socket.h>
> +#include <netinet/tcp.h>
> +#include <linux/if.h>
> +#include <errno.h>
> +
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +
> +#define SERV6_IP_0             0xfaceb00c /* face:b00c:1234:5678::abcd */
> +#define SERV6_IP_1             0x12345678
> +#define SERV6_IP_2             0x00000000
> +#define SERV6_IP_3             0x0000abcd
> +#define SERV6_PORT             6060
> +#define SERV6_REWRITE_IP_0     0x00000000
> +#define SERV6_REWRITE_IP_1     0x00000000
> +#define SERV6_REWRITE_IP_2     0x00000000
> +#define SERV6_REWRITE_IP_3     0x00000001
> +#define SERV6_REWRITE_PORT     6666
> +
> +int _version SEC("version") = 1;

same

> +
> +SEC("cgroup/bind6")
> +int bind_v6_prog(struct bpf_sock_addr *ctx)
> +{

[...]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH bpf-next 1/3] selftests/bpf: rewrite test_sock_addr bind bpf into C
  2020-12-02  0:26   ` Andrii Nakryiko
@ 2020-12-02 17:04     ` sdf
  0 siblings, 0 replies; 15+ messages in thread
From: sdf @ 2020-12-02 17:04 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Networking, bpf, David S. Miller, Alexei Starovoitov, Daniel Borkmann

On 12/01, Andrii Nakryiko wrote:
> On Tue, Nov 17, 2020 at 4:20 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > I'm planning to extend it in the next patches. It's much easier to
> > work with C than BPF assembly.
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---

> With nits below:

> Acked-by: Andrii Nakryiko <andrii@kernel.org>
Thank you for the review! Will respin shortly with the nits addressed.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks
  2020-12-02 17:25 [PATCH bpf-next 0/3] bpf: expose bpf_{s,g}etsockopt helpers to bind{4,6} hooks Stanislav Fomichev
@ 2020-12-02 17:25 ` Stanislav Fomichev
  0 siblings, 0 replies; 15+ messages in thread
From: Stanislav Fomichev @ 2020-12-02 17:25 UTC (permalink / raw)
  To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev, Andrey Ignatov

I have to now lock/unlock socket for the bind hook execution.
That shouldn't cause any overhead because the socket is unbound
and shouldn't receive any traffic.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Andrey Ignatov <rdna@fb.com>
---
 include/linux/bpf-cgroup.h | 12 ++++++------
 net/core/filter.c          |  4 ++++
 net/ipv4/af_inet.c         |  2 +-
 net/ipv6/af_inet6.c        |  2 +-
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ed71bd1a0825..72e69a0e1e8c 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -246,11 +246,11 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 	__ret;								       \
 })
 
-#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_BIND)
+#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)			       \
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
 
-#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_BIND)
+#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)			       \
+	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
 
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
 					    sk->sk_prot->pre_connect)
@@ -434,8 +434,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
diff --git a/net/core/filter.c b/net/core/filter.c
index 2ca5eecebacf..21d91dcf0260 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6995,6 +6995,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_storage_delete_proto;
 	case BPF_FUNC_setsockopt:
 		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET4_BIND:
+		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
 			return &bpf_sock_addr_setsockopt_proto;
@@ -7003,6 +7005,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		}
 	case BPF_FUNC_getsockopt:
 		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET4_BIND:
+		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
 			return &bpf_sock_addr_getsockopt_proto;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b7260c8cef2e..b94fa8eb831b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -450,7 +450,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr);
+	err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
 	if (err)
 		return err;
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e648fbebb167..a7e3d170af51 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+	err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
 	if (err)
 		return err;
 
-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-12-02 17:26 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-18  0:17 [PATCH bpf-next 0/3] bpf: expose bpf_{s,g}etsockopt helpers to bind{4,6} hooks Stanislav Fomichev
2020-11-18  0:17 ` [PATCH bpf-next 1/3] selftests/bpf: rewrite test_sock_addr bind bpf into C Stanislav Fomichev
2020-12-02  0:26   ` Andrii Nakryiko
2020-12-02 17:04     ` sdf
2020-11-18  0:17 ` [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks Stanislav Fomichev
2020-11-18  4:05   ` Alexei Starovoitov
2020-11-30  1:05     ` Andrey Ignatov
2020-11-30 16:38       ` sdf
2020-11-30 23:02         ` Andrey Ignatov
2020-12-01 18:43           ` sdf
2020-12-01 19:22             ` Andrey Ignatov
2020-12-01 19:21   ` Andrey Ignatov
2020-11-18  0:17 ` [PATCH bpf-next 3/3] selftests/bpf: extend bind{4,6} programs with a call to bpf_setsockopt Stanislav Fomichev
2020-12-02  0:22   ` Andrii Nakryiko
2020-12-02 17:25 [PATCH bpf-next 0/3] bpf: expose bpf_{s,g}etsockopt helpers to bind{4,6} hooks Stanislav Fomichev
2020-12-02 17:25 ` [PATCH bpf-next 2/3] bpf: allow bpf_{s,g}etsockopt from cgroup " Stanislav Fomichev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).