All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start
@ 2021-01-21  1:22 Stanislav Fomichev
  2021-01-21  1:22 ` [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
  2021-01-22 19:37 ` [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Andrey Ignatov
  0 siblings, 2 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2021-01-21  1:22 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, Stanislav Fomichev

At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
to the privileged ones (< ip_unprivileged_port_start), but it will
be rejected later on in the __inet_bind or __inet6_bind.

Let's export 'port_changed' event from the BPF program and bypass
ip_unprivileged_port_start range check when we've seen that
the program explicitly overrode the port. This is accomplished
by generating instructions to set ctx->port_changed along with
updating ctx->user_port.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup.h | 30 ++++++++++++++++++++----------
 include/linux/filter.h     |  1 +
 include/net/inet_common.h  |  3 +++
 kernel/bpf/cgroup.c        |  8 +++++++-
 net/core/filter.c          | 13 +++++++++++++
 net/ipv4/af_inet.c         |  9 ++++++---
 net/ipv6/af_inet6.c        |  6 ++++--
 7 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 0748fd87969e..874ed865bea1 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 				      struct sockaddr *uaddr,
 				      enum bpf_attach_type type,
-				      void *t_ctx);
+				      void *t_ctx,
+				      bool *port_changed);
 
 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 				     struct bpf_sock_ops_kern *sock_ops,
@@ -234,7 +235,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(type))					       \
 		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
-							  NULL);	       \
+							  NULL, NULL);	       \
 	__ret;								       \
 })
 
@@ -244,17 +245,27 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 	if (cgroup_bpf_enabled(type))	{				       \
 		lock_sock(sk);						       \
 		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
-							  t_ctx);	       \
+							  t_ctx, NULL);	       \
 		release_sock(sk);					       \
 	}								       \
 	__ret;								       \
 })
 
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
-
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags)	       \
+({									       \
+	bool port_changed = false;					       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled(type))	{				       \
+		lock_sock(sk);						       \
+		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+							  NULL,		       \
+							  &port_changed);      \
+		release_sock(sk);					       \
+		if (port_changed)					       \
+			*flags |= BIND_NO_CAP_NET_BIND_SERVICE;		       \
+	}								       \
+	__ret;								       \
+})
 
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)				       \
 	((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) ||		       \
@@ -453,8 +464,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5b3137d7b690..9bee8c057dd2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1258,6 +1258,7 @@ struct bpf_sock_addr_kern {
 	 */
 	u64 tmp_reg;
 	void *t_ctx;	/* Attach type specific context. */
+	u32 port_changed;
 };
 
 struct bpf_sock_ops_kern {
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index cb2818862919..9ba935c15869 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -41,6 +41,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 #define BIND_WITH_LOCK			(1 << 1)
 /* Called from BPF program. */
 #define BIND_FROM_BPF			(1 << 2)
+/* Skip CAP_NET_BIND_SERVICE check. */
+#define BIND_NO_CAP_NET_BIND_SERVICE	(1 << 3)
+
 int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 		u32 flags);
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index da649f20d6b2..f5d6205f1717 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1055,6 +1055,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  * @uaddr: sockaddr struct provided by user
  * @type: The type of program to be exectuted
  * @t_ctx: Pointer to attach type specific context
+ * @port_changed: Pointer to bool which will be set to 'true' when BPF
+ *                program updates user_port
  *
  * socket is expected to be of type INET or INET6.
  *
@@ -1064,7 +1066,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 				      struct sockaddr *uaddr,
 				      enum bpf_attach_type type,
-				      void *t_ctx)
+				      void *t_ctx,
+				      bool *port_changed)
 {
 	struct bpf_sock_addr_kern ctx = {
 		.sk = sk,
@@ -1089,6 +1092,9 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
 
+	if (port_changed)
+		*port_changed = ctx.port_changed;
+
 	return ret == 1 ? 0 : -EPERM;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
diff --git a/net/core/filter.c b/net/core/filter.c
index 9ab94e90d660..b3dd02eb9551 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9028,6 +9028,19 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
 			     offsetof(struct sockaddr_in6, sin6_port));
 		BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
 			     sizeof_field(struct sockaddr_in6, sin6_port));
+
+		/* Set bpf_sock_addr_kern->port_changed=1 whenever
+		 * the port is updated from the BPF program.
+		 */
+		if (type == BPF_WRITE) {
+			*insn++ = BPF_ST_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern,
+							      port_changed),
+					     si->dst_reg,
+					     offsetof(struct bpf_sock_addr_kern,
+						      port_changed),
+					     1);
+		}
+
 		/* Account for sin6_port being smaller than user_port. */
 		port_size = min(port_size, BPF_LDST_BYTES(si));
 		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6ba2930ff49b..aaa94bea19c3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -438,6 +438,7 @@ EXPORT_SYMBOL(inet_release);
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
+	u32 flags = BIND_WITH_LOCK;
 	int err;
 
 	/* If the socket has its own bind function then use it. (RAW) */
@@ -450,11 +451,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
+	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+						 BPF_CGROUP_INET4_BIND, &flags);
 	if (err)
 		return err;
 
-	return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+	return __inet_bind(sk, uaddr, addr_len, flags);
 }
 EXPORT_SYMBOL(inet_bind);
 
@@ -499,7 +501,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 
 	snum = ntohs(addr->sin_port);
 	err = -EACCES;
-	if (snum && inet_port_requires_bind_service(net, snum) &&
+	if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+	    snum && inet_port_requires_bind_service(net, snum) &&
 	    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
 		goto out;
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b9c654836b72..3e523c4f5226 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -439,6 +439,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
+	u32 flags = BIND_WITH_LOCK;
 	int err = 0;
 
 	/* If the socket has its own bind function then use it. */
@@ -451,11 +452,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
+	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+						 BPF_CGROUP_INET6_BIND, &flags);
 	if (err)
 		return err;
 
-	return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+	return __inet6_bind(sk, uaddr, addr_len, flags);
 }
 EXPORT_SYMBOL(inet6_bind);
 
-- 
2.30.0.284.gd98b1dd5eaa7-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-21  1:22 [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Stanislav Fomichev
@ 2021-01-21  1:22 ` Stanislav Fomichev
  2021-01-21 22:33   ` Martin KaFai Lau
  2021-01-21 23:53   ` Andrii Nakryiko
  2021-01-22 19:37 ` [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Andrey Ignatov
  1 sibling, 2 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2021-01-21  1:22 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, Stanislav Fomichev

BPF rewrites from 111 to 111, but it still should mark the port as
"changed".
We also verify that if port isn't touched by BPF, it's still prohibited.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 .../selftests/bpf/prog_tests/bind_perm.c      | 88 +++++++++++++++++++
 tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
 2 files changed, 124 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c
 create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
new file mode 100644
index 000000000000..840a04ac9042
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "bind_perm.skel.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/capability.h>
+
+static int duration;
+
+void try_bind(int port, int expected_errno)
+{
+	struct sockaddr_in sin = {};
+	int fd = -1;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (CHECK(fd < 0, "fd", "errno %d", errno))
+		goto close_socket;
+
+	sin.sin_family = AF_INET;
+	sin.sin_port = htons(port);
+
+	errno = 0;
+	bind(fd, (struct sockaddr *)&sin, sizeof(sin));
+	CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
+	      errno, expected_errno);
+
+close_socket:
+	if (fd >= 0)
+		close(fd);
+}
+
+void cap_net_bind_service(cap_flag_value_t flag)
+{
+	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
+	cap_t caps;
+
+	caps = cap_get_proc();
+	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
+		goto free_caps;
+
+	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
+			       CAP_CLEAR),
+		  "cap_set_flag", "errno %d", errno))
+		goto free_caps;
+
+	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
+			       CAP_CLEAR),
+		  "cap_set_flag", "errno %d", errno))
+		goto free_caps;
+
+	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
+		goto free_caps;
+
+free_caps:
+	if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
+		goto free_caps;
+}
+
+void test_bind_perm(void)
+{
+	struct bind_perm *skel;
+	int cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/bind_perm");
+	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+		return;
+
+	skel = bind_perm__open_and_load();
+	if (CHECK(!skel, "skel-load", "errno %d", errno))
+		goto close_cgroup_fd;
+
+	skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
+	if (CHECK(IS_ERR(skel->links.bind_v4_prog),
+		  "cg-attach", "bind4 %ld",
+		  PTR_ERR(skel->links.bind_v4_prog)))
+		goto close_skeleton;
+
+	cap_net_bind_service(CAP_CLEAR);
+	try_bind(110, EACCES);
+	try_bind(111, 0);
+	cap_net_bind_service(CAP_SET);
+
+close_skeleton:
+	bind_perm__destroy(skel);
+close_cgroup_fd:
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c b/tools/testing/selftests/bpf/progs/bind_perm.c
new file mode 100644
index 000000000000..2194587ec806
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind_perm.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+SEC("cgroup/bind4")
+int bind_v4_prog(struct bpf_sock_addr *ctx)
+{
+	struct bpf_sock *sk;
+	__u32 user_ip4;
+	__u16 user_port;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 0;
+
+	if (sk->family != AF_INET)
+		return 0;
+
+	if (ctx->type != SOCK_STREAM)
+		return 0;
+
+	/* Rewriting to the same value should still cause
+	 * permission check to be bypassed.
+	 */
+	if (ctx->user_port == bpf_htons(111))
+		ctx->user_port = bpf_htons(111);
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.30.0.284.gd98b1dd5eaa7-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-21  1:22 ` [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
@ 2021-01-21 22:33   ` Martin KaFai Lau
  2021-01-21 22:57     ` sdf
  2021-01-21 23:53   ` Andrii Nakryiko
  1 sibling, 1 reply; 16+ messages in thread
From: Martin KaFai Lau @ 2021-01-21 22:33 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, bpf, ast, daniel

On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev wrote:
> BPF rewrites from 111 to 111, but it still should mark the port as
> "changed".
> We also verify that if port isn't touched by BPF, it's still prohibited.
> 
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  .../selftests/bpf/prog_tests/bind_perm.c      | 88 +++++++++++++++++++
>  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
>  2 files changed, 124 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c
>  create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> new file mode 100644
> index 000000000000..840a04ac9042
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <test_progs.h>
> +#include "bind_perm.skel.h"
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/capability.h>
> +
> +static int duration;
> +
> +void try_bind(int port, int expected_errno)
> +{
> +	struct sockaddr_in sin = {};
> +	int fd = -1;
> +
> +	fd = socket(AF_INET, SOCK_STREAM, 0);
> +	if (CHECK(fd < 0, "fd", "errno %d", errno))
> +		goto close_socket;
> +
> +	sin.sin_family = AF_INET;
> +	sin.sin_port = htons(port);
> +
> +	errno = 0;
> +	bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> +	CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
> +	      errno, expected_errno);
> +
> +close_socket:
> +	if (fd >= 0)
> +		close(fd);
> +}
> +
> +void cap_net_bind_service(cap_flag_value_t flag)
> +{
> +	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> +	cap_t caps;
> +
> +	caps = cap_get_proc();
> +	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> +		goto free_caps;
> +
> +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
> +			       CAP_CLEAR),
> +		  "cap_set_flag", "errno %d", errno))
> +		goto free_caps;
> +
> +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
> +			       CAP_CLEAR),
> +		  "cap_set_flag", "errno %d", errno))
> +		goto free_caps;
> +
> +	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
> +		goto free_caps;
> +
> +free_caps:
> +	if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> +		goto free_caps;
> +}
> +
> +void test_bind_perm(void)
> +{
> +	struct bind_perm *skel;
> +	int cgroup_fd;
> +
> +	cgroup_fd = test__join_cgroup("/bind_perm");
> +	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> +		return;
> +
> +	skel = bind_perm__open_and_load();
> +	if (CHECK(!skel, "skel-load", "errno %d", errno))
> +		goto close_cgroup_fd;
> +
> +	skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> +	if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> +		  "cg-attach", "bind4 %ld",
> +		  PTR_ERR(skel->links.bind_v4_prog)))
> +		goto close_skeleton;
> +
> +	cap_net_bind_service(CAP_CLEAR);
> +	try_bind(110, EACCES);
> +	try_bind(111, 0);
> +	cap_net_bind_service(CAP_SET);
> +
> +close_skeleton:
> +	bind_perm__destroy(skel);
> +close_cgroup_fd:
> +	close(cgroup_fd);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c b/tools/testing/selftests/bpf/progs/bind_perm.c
> new file mode 100644
> index 000000000000..2194587ec806
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bind_perm.c
> @@ -0,0 +1,36 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/stddef.h>
> +#include <linux/bpf.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +
> +SEC("cgroup/bind4")
> +int bind_v4_prog(struct bpf_sock_addr *ctx)
> +{
> +	struct bpf_sock *sk;
> +	__u32 user_ip4;
> +	__u16 user_port;
> +
> +	sk = ctx->sk;
> +	if (!sk)
> +		return 0;
> +
> +	if (sk->family != AF_INET)
> +		return 0;
> +
> +	if (ctx->type != SOCK_STREAM)
> +		return 0;
> +
> +	/* Rewriting to the same value should still cause
> +	 * permission check to be bypassed.
> +	 */
> +	if (ctx->user_port == bpf_htons(111))
> +		ctx->user_port = bpf_htons(111);
iiuc, this overwrite is essentially the way to ensure the bind
will succeed (override CAP_NET_BIND_SERVICE in this particular case?).

It seems to be okay if we consider most of the use cases is rewriting
to a different port.

However, it is quite un-intuitive to the bpf prog to overwrite with
the same user_port just to ensure this port can be binded successfully
later.

Is user_port the only case? How about other fields in bpf_sock_addr?

> +
> +	return 1;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> -- 
> 2.30.0.284.gd98b1dd5eaa7-goog
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-21 22:33   ` Martin KaFai Lau
@ 2021-01-21 22:57     ` sdf
  2021-01-21 23:50       ` Martin KaFai Lau
  0 siblings, 1 reply; 16+ messages in thread
From: sdf @ 2021-01-21 22:57 UTC (permalink / raw)
  To: Martin KaFai Lau; +Cc: netdev, bpf, ast, daniel

On 01/21, Martin KaFai Lau wrote:
> On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev wrote:
> > BPF rewrites from 111 to 111, but it still should mark the port as
> > "changed".
> > We also verify that if port isn't touched by BPF, it's still prohibited.
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  .../selftests/bpf/prog_tests/bind_perm.c      | 88 +++++++++++++++++++
> >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> >  2 files changed, 124 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c  
> b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > new file mode 100644
> > index 000000000000..840a04ac9042
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > @@ -0,0 +1,88 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <test_progs.h>
> > +#include "bind_perm.skel.h"
> > +
> > +#include <sys/types.h>
> > +#include <sys/socket.h>
> > +#include <sys/capability.h>
> > +
> > +static int duration;
> > +
> > +void try_bind(int port, int expected_errno)
> > +{
> > +	struct sockaddr_in sin = {};
> > +	int fd = -1;
> > +
> > +	fd = socket(AF_INET, SOCK_STREAM, 0);
> > +	if (CHECK(fd < 0, "fd", "errno %d", errno))
> > +		goto close_socket;
> > +
> > +	sin.sin_family = AF_INET;
> > +	sin.sin_port = htons(port);
> > +
> > +	errno = 0;
> > +	bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > +	CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
> > +	      errno, expected_errno);
> > +
> > +close_socket:
> > +	if (fd >= 0)
> > +		close(fd);
> > +}
> > +
> > +void cap_net_bind_service(cap_flag_value_t flag)
> > +{
> > +	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > +	cap_t caps;
> > +
> > +	caps = cap_get_proc();
> > +	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > +		goto free_caps;
> > +
> > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
> > +			       CAP_CLEAR),
> > +		  "cap_set_flag", "errno %d", errno))
> > +		goto free_caps;
> > +
> > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
> > +			       CAP_CLEAR),
> > +		  "cap_set_flag", "errno %d", errno))
> > +		goto free_caps;
> > +
> > +	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
> > +		goto free_caps;
> > +
> > +free_caps:
> > +	if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > +		goto free_caps;
> > +}
> > +
> > +void test_bind_perm(void)
> > +{
> > +	struct bind_perm *skel;
> > +	int cgroup_fd;
> > +
> > +	cgroup_fd = test__join_cgroup("/bind_perm");
> > +	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > +		return;
> > +
> > +	skel = bind_perm__open_and_load();
> > +	if (CHECK(!skel, "skel-load", "errno %d", errno))
> > +		goto close_cgroup_fd;
> > +
> > +	skel->links.bind_v4_prog =  
> bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > +	if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > +		  "cg-attach", "bind4 %ld",
> > +		  PTR_ERR(skel->links.bind_v4_prog)))
> > +		goto close_skeleton;
> > +
> > +	cap_net_bind_service(CAP_CLEAR);
> > +	try_bind(110, EACCES);
> > +	try_bind(111, 0);
> > +	cap_net_bind_service(CAP_SET);
> > +
> > +close_skeleton:
> > +	bind_perm__destroy(skel);
> > +close_cgroup_fd:
> > +	close(cgroup_fd);
> > +}
> > diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c  
> b/tools/testing/selftests/bpf/progs/bind_perm.c
> > new file mode 100644
> > index 000000000000..2194587ec806
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/bind_perm.c
> > @@ -0,0 +1,36 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#include <linux/stddef.h>
> > +#include <linux/bpf.h>
> > +#include <sys/types.h>
> > +#include <sys/socket.h>
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_endian.h>
> > +
> > +SEC("cgroup/bind4")
> > +int bind_v4_prog(struct bpf_sock_addr *ctx)
> > +{
> > +	struct bpf_sock *sk;
> > +	__u32 user_ip4;
> > +	__u16 user_port;
> > +
> > +	sk = ctx->sk;
> > +	if (!sk)
> > +		return 0;
> > +
> > +	if (sk->family != AF_INET)
> > +		return 0;
> > +
> > +	if (ctx->type != SOCK_STREAM)
> > +		return 0;
> > +
> > +	/* Rewriting to the same value should still cause
> > +	 * permission check to be bypassed.
> > +	 */
> > +	if (ctx->user_port == bpf_htons(111))
> > +		ctx->user_port = bpf_htons(111);
> iiuc, this overwrite is essentially the way to ensure the bind
> will succeed (override CAP_NET_BIND_SERVICE in this particular case?).
Correct. The alternative might be to export ignore_perm_check
via bpf_sock_addr and make it explicit.

> It seems to be okay if we consider most of the use cases is rewriting
> to a different port.

> However, it is quite un-intuitive to the bpf prog to overwrite with
> the same user_port just to ensure this port can be binded successfully
> later.
I'm testing a corner case here when the address is rewritten to the same
value, but the intention is to rewrite X to Y < 1024.

> Is user_port the only case? How about other fields in bpf_sock_addr?
Good question. For our use case only the port matters because
we rewrite both port and address (and never only address).

It does feel like it should also work when BPF rewrites address only
(and port happens to be in the privileged range). I guess I can
apply the same logic to the user_ip4 and user_ip6?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-21 22:57     ` sdf
@ 2021-01-21 23:50       ` Martin KaFai Lau
  2021-01-22  0:30         ` sdf
  0 siblings, 1 reply; 16+ messages in thread
From: Martin KaFai Lau @ 2021-01-21 23:50 UTC (permalink / raw)
  To: sdf; +Cc: netdev, bpf, ast, daniel

On Thu, Jan 21, 2021 at 02:57:44PM -0800, sdf@google.com wrote:
> On 01/21, Martin KaFai Lau wrote:
> > On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev wrote:
> > > BPF rewrites from 111 to 111, but it still should mark the port as
> > > "changed".
> > > We also verify that if port isn't touched by BPF, it's still prohibited.
> > >
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > >  .../selftests/bpf/prog_tests/bind_perm.c      | 88 +++++++++++++++++++
> > >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> > >  2 files changed, 124 insertions(+)
> > >  create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > >  create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c
> > >
> > > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > new file mode 100644
> > > index 000000000000..840a04ac9042
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > @@ -0,0 +1,88 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +#include <test_progs.h>
> > > +#include "bind_perm.skel.h"
> > > +
> > > +#include <sys/types.h>
> > > +#include <sys/socket.h>
> > > +#include <sys/capability.h>
> > > +
> > > +static int duration;
> > > +
> > > +void try_bind(int port, int expected_errno)
> > > +{
> > > +	struct sockaddr_in sin = {};
> > > +	int fd = -1;
> > > +
> > > +	fd = socket(AF_INET, SOCK_STREAM, 0);
> > > +	if (CHECK(fd < 0, "fd", "errno %d", errno))
> > > +		goto close_socket;
> > > +
> > > +	sin.sin_family = AF_INET;
> > > +	sin.sin_port = htons(port);
> > > +
> > > +	errno = 0;
> > > +	bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > > +	CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
> > > +	      errno, expected_errno);
> > > +
> > > +close_socket:
> > > +	if (fd >= 0)
> > > +		close(fd);
> > > +}
> > > +
> > > +void cap_net_bind_service(cap_flag_value_t flag)
> > > +{
> > > +	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > > +	cap_t caps;
> > > +
> > > +	caps = cap_get_proc();
> > > +	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > > +		goto free_caps;
> > > +
> > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
> > > +			       CAP_CLEAR),
> > > +		  "cap_set_flag", "errno %d", errno))
> > > +		goto free_caps;
> > > +
> > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
> > > +			       CAP_CLEAR),
> > > +		  "cap_set_flag", "errno %d", errno))
> > > +		goto free_caps;
> > > +
> > > +	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
> > > +		goto free_caps;
> > > +
> > > +free_caps:
> > > +	if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > > +		goto free_caps;
> > > +}
> > > +
> > > +void test_bind_perm(void)
> > > +{
> > > +	struct bind_perm *skel;
> > > +	int cgroup_fd;
> > > +
> > > +	cgroup_fd = test__join_cgroup("/bind_perm");
> > > +	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > > +		return;
> > > +
> > > +	skel = bind_perm__open_and_load();
> > > +	if (CHECK(!skel, "skel-load", "errno %d", errno))
> > > +		goto close_cgroup_fd;
> > > +
> > > +	skel->links.bind_v4_prog =
> > bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > > +	if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > > +		  "cg-attach", "bind4 %ld",
> > > +		  PTR_ERR(skel->links.bind_v4_prog)))
> > > +		goto close_skeleton;
> > > +
> > > +	cap_net_bind_service(CAP_CLEAR);
> > > +	try_bind(110, EACCES);
> > > +	try_bind(111, 0);
> > > +	cap_net_bind_service(CAP_SET);
> > > +
> > > +close_skeleton:
> > > +	bind_perm__destroy(skel);
> > > +close_cgroup_fd:
> > > +	close(cgroup_fd);
> > > +}
> > > diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c
> > b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > new file mode 100644
> > > index 000000000000..2194587ec806
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > @@ -0,0 +1,36 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +
> > > +#include <linux/stddef.h>
> > > +#include <linux/bpf.h>
> > > +#include <sys/types.h>
> > > +#include <sys/socket.h>
> > > +#include <bpf/bpf_helpers.h>
> > > +#include <bpf/bpf_endian.h>
> > > +
> > > +SEC("cgroup/bind4")
> > > +int bind_v4_prog(struct bpf_sock_addr *ctx)
> > > +{
> > > +	struct bpf_sock *sk;
> > > +	__u32 user_ip4;
> > > +	__u16 user_port;
> > > +
> > > +	sk = ctx->sk;
> > > +	if (!sk)
> > > +		return 0;
> > > +
> > > +	if (sk->family != AF_INET)
> > > +		return 0;
> > > +
> > > +	if (ctx->type != SOCK_STREAM)
> > > +		return 0;
> > > +
> > > +	/* Rewriting to the same value should still cause
> > > +	 * permission check to be bypassed.
> > > +	 */
> > > +	if (ctx->user_port == bpf_htons(111))
> > > +		ctx->user_port = bpf_htons(111);
> > iiuc, this overwrite is essentially the way to ensure the bind
> > will succeed (override CAP_NET_BIND_SERVICE in this particular case?).
> Correct. The alternative might be to export ignore_perm_check
> via bpf_sock_addr and make it explicit.
An explicit field is one option.

or a different return value (e.g. BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY).

Not sure which one (including the one in the current patch) is better
at this point.

Also, from patch 1, if one cgrp bpf prog says no-perm-check,
it does not matter what the latter cgrp bpf progs have to say?

> 
> > It seems to be okay if we consider most of the use cases is rewriting
> > to a different port.
> 
> > However, it is quite un-intuitive to the bpf prog to overwrite with
> > the same user_port just to ensure this port can be binded successfully
> > later.
> I'm testing a corner case here when the address is rewritten to the same
> value, but the intention is to rewrite X to Y < 1024.
It is a legit corner case though.

Also, is it possible that the compiler may optimize this
same-value-assignment out?

> 
> > Is user_port the only case? How about other fields in bpf_sock_addr?
> Good question. For our use case only the port matters because
> we rewrite both port and address (and never only address).
> 
> It does feel like it should also work when BPF rewrites address only
> (and port happens to be in the privileged range). I guess I can
> apply the same logic to the user_ip4 and user_ip6?
My concern is having more cases that need to overwrite with the same value.
Then it may make a stronger case to use return value or an explicit field.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-21  1:22 ` [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
  2021-01-21 22:33   ` Martin KaFai Lau
@ 2021-01-21 23:53   ` Andrii Nakryiko
  2021-01-22  0:09     ` sdf
  1 sibling, 1 reply; 16+ messages in thread
From: Andrii Nakryiko @ 2021-01-21 23:53 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann

On Wed, Jan 20, 2021 at 7:16 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> BPF rewrites from 111 to 111, but it still should mark the port as
> "changed".
> We also verify that if port isn't touched by BPF, it's still prohibited.
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  .../selftests/bpf/prog_tests/bind_perm.c      | 88 +++++++++++++++++++
>  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
>  2 files changed, 124 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c
>  create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> new file mode 100644
> index 000000000000..840a04ac9042
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> @@ -0,0 +1,88 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <test_progs.h>
> +#include "bind_perm.skel.h"
> +
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/capability.h>
> +
> +static int duration;
> +
> +void try_bind(int port, int expected_errno)
> +{
> +       struct sockaddr_in sin = {};
> +       int fd = -1;
> +
> +       fd = socket(AF_INET, SOCK_STREAM, 0);
> +       if (CHECK(fd < 0, "fd", "errno %d", errno))
> +               goto close_socket;
> +
> +       sin.sin_family = AF_INET;
> +       sin.sin_port = htons(port);
> +
> +       errno = 0;
> +       bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> +       CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
> +             errno, expected_errno);

ASSERT_NEQ() is nicer

> +
> +close_socket:
> +       if (fd >= 0)
> +               close(fd);
> +}
> +
> +void cap_net_bind_service(cap_flag_value_t flag)
> +{
> +       const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> +       cap_t caps;
> +
> +       caps = cap_get_proc();
> +       if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> +               goto free_caps;
> +
> +       if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
> +                              CAP_CLEAR),
> +                 "cap_set_flag", "errno %d", errno))
> +               goto free_caps;
> +
> +       if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
> +                              CAP_CLEAR),
> +                 "cap_set_flag", "errno %d", errno))
> +               goto free_caps;
> +
> +       if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
> +               goto free_caps;
> +
> +free_caps:
> +       if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> +               goto free_caps;
> +}
> +
> +void test_bind_perm(void)
> +{
> +       struct bind_perm *skel;
> +       int cgroup_fd;
> +
> +       cgroup_fd = test__join_cgroup("/bind_perm");
> +       if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> +               return;
> +
> +       skel = bind_perm__open_and_load();
> +       if (CHECK(!skel, "skel-load", "errno %d", errno))
> +               goto close_cgroup_fd;

errno is irrelevant; also use ASSERT_PTR_OK() instead

> +
> +       skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> +       if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> +                 "cg-attach", "bind4 %ld",
> +                 PTR_ERR(skel->links.bind_v4_prog)))

try using ASSERT_PTR_OK instead

> +               goto close_skeleton;
> +
> +       cap_net_bind_service(CAP_CLEAR);
> +       try_bind(110, EACCES);
> +       try_bind(111, 0);
> +       cap_net_bind_service(CAP_SET);
> +
> +close_skeleton:
> +       bind_perm__destroy(skel);
> +close_cgroup_fd:
> +       close(cgroup_fd);
> +}

[...]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-21 23:53   ` Andrii Nakryiko
@ 2021-01-22  0:09     ` sdf
  2021-01-22  0:24       ` Andrii Nakryiko
  0 siblings, 1 reply; 16+ messages in thread
From: sdf @ 2021-01-22  0:09 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann

On 01/21, Andrii Nakryiko wrote:
> On Wed, Jan 20, 2021 at 7:16 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > BPF rewrites from 111 to 111, but it still should mark the port as
> > "changed".
> > We also verify that if port isn't touched by BPF, it's still prohibited.
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >  .../selftests/bpf/prog_tests/bind_perm.c      | 88 +++++++++++++++++++
> >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> >  2 files changed, 124 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c
> >  create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c  
> b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > new file mode 100644
> > index 000000000000..840a04ac9042
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > @@ -0,0 +1,88 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <test_progs.h>
> > +#include "bind_perm.skel.h"
> > +
> > +#include <sys/types.h>
> > +#include <sys/socket.h>
> > +#include <sys/capability.h>
> > +
> > +static int duration;
> > +
> > +void try_bind(int port, int expected_errno)
> > +{
> > +       struct sockaddr_in sin = {};
> > +       int fd = -1;
> > +
> > +       fd = socket(AF_INET, SOCK_STREAM, 0);
> > +       if (CHECK(fd < 0, "fd", "errno %d", errno))
> > +               goto close_socket;
> > +
> > +       sin.sin_family = AF_INET;
> > +       sin.sin_port = htons(port);
> > +
> > +       errno = 0;
> > +       bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > +       CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
> > +             errno, expected_errno);

> ASSERT_NEQ() is nicer
Nice, didn't know these existed. Now we need ASSERT_GT/LE/GE/LE to also
get rid of those other CHECKs :-)

> > +
> > +close_socket:
> > +       if (fd >= 0)
> > +               close(fd);
> > +}
> > +
> > +void cap_net_bind_service(cap_flag_value_t flag)
> > +{
> > +       const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > +       cap_t caps;
> > +
> > +       caps = cap_get_proc();
> > +       if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > +               goto free_caps;
> > +
> > +       if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,  
> &cap_net_bind_service,
> > +                              CAP_CLEAR),
> > +                 "cap_set_flag", "errno %d", errno))
> > +               goto free_caps;
> > +
> > +       if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,  
> &cap_net_bind_service,
> > +                              CAP_CLEAR),
> > +                 "cap_set_flag", "errno %d", errno))
> > +               goto free_caps;
> > +
> > +       if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d",  
> errno))
> > +               goto free_caps;
> > +
> > +free_caps:
> > +       if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > +               goto free_caps;
> > +}
> > +
> > +void test_bind_perm(void)
> > +{
> > +       struct bind_perm *skel;
> > +       int cgroup_fd;
> > +
> > +       cgroup_fd = test__join_cgroup("/bind_perm");
> > +       if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > +               return;
> > +
> > +       skel = bind_perm__open_and_load();
> > +       if (CHECK(!skel, "skel-load", "errno %d", errno))
> > +               goto close_cgroup_fd;

> errno is irrelevant; also use ASSERT_PTR_OK() instead
Ack, it might be worth unconditionally printing it in your ASSERT_XXX
macros. Worst case - it's not used, but in general case avoids
all this "errno %d" boilerplate.

> > +
> > +       skel->links.bind_v4_prog =  
> bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > +       if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > +                 "cg-attach", "bind4 %ld",
> > +                 PTR_ERR(skel->links.bind_v4_prog)))

> try using ASSERT_PTR_OK instead
Sure, thanks!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-22  0:09     ` sdf
@ 2021-01-22  0:24       ` Andrii Nakryiko
  0 siblings, 0 replies; 16+ messages in thread
From: Andrii Nakryiko @ 2021-01-22  0:24 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann

On Thu, Jan 21, 2021 at 4:09 PM <sdf@google.com> wrote:
>
> On 01/21, Andrii Nakryiko wrote:
> > On Wed, Jan 20, 2021 at 7:16 PM Stanislav Fomichev <sdf@google.com> wrote:
> > >
> > > BPF rewrites from 111 to 111, but it still should mark the port as
> > > "changed".
> > > We also verify that if port isn't touched by BPF, it's still prohibited.
> > >
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > >  .../selftests/bpf/prog_tests/bind_perm.c      | 88 +++++++++++++++++++
> > >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> > >  2 files changed, 124 insertions(+)
> > >  create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > >  create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c
> > >
> > > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > new file mode 100644
> > > index 000000000000..840a04ac9042
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > @@ -0,0 +1,88 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +#include <test_progs.h>
> > > +#include "bind_perm.skel.h"
> > > +
> > > +#include <sys/types.h>
> > > +#include <sys/socket.h>
> > > +#include <sys/capability.h>
> > > +
> > > +static int duration;
> > > +
> > > +void try_bind(int port, int expected_errno)
> > > +{
> > > +       struct sockaddr_in sin = {};
> > > +       int fd = -1;
> > > +
> > > +       fd = socket(AF_INET, SOCK_STREAM, 0);
> > > +       if (CHECK(fd < 0, "fd", "errno %d", errno))
> > > +               goto close_socket;
> > > +
> > > +       sin.sin_family = AF_INET;
> > > +       sin.sin_port = htons(port);
> > > +
> > > +       errno = 0;
> > > +       bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > > +       CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
> > > +             errno, expected_errno);
>
> > ASSERT_NEQ() is nicer
> Nice, didn't know these existed. Now we need ASSERT_GT/LE/GE/LE to also
> get rid of those other CHECKs :-)

When I was adding the initial set of ASSERT_XXX() I didn't think we'll
need all those variants, but it turns out they come up pretty
frequently. So while you might be joking, I think it's a good idea to
add them and start using them consistently.

>
> > > +
> > > +close_socket:
> > > +       if (fd >= 0)
> > > +               close(fd);
> > > +}
> > > +
> > > +void cap_net_bind_service(cap_flag_value_t flag)
> > > +{
> > > +       const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > > +       cap_t caps;
> > > +
> > > +       caps = cap_get_proc();
> > > +       if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > > +               goto free_caps;
> > > +
> > > +       if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > &cap_net_bind_service,
> > > +                              CAP_CLEAR),
> > > +                 "cap_set_flag", "errno %d", errno))
> > > +               goto free_caps;
> > > +
> > > +       if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > &cap_net_bind_service,
> > > +                              CAP_CLEAR),
> > > +                 "cap_set_flag", "errno %d", errno))
> > > +               goto free_caps;
> > > +
> > > +       if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d",
> > errno))
> > > +               goto free_caps;
> > > +
> > > +free_caps:
> > > +       if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > > +               goto free_caps;
> > > +}
> > > +
> > > +void test_bind_perm(void)
> > > +{
> > > +       struct bind_perm *skel;
> > > +       int cgroup_fd;
> > > +
> > > +       cgroup_fd = test__join_cgroup("/bind_perm");
> > > +       if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > > +               return;
> > > +
> > > +       skel = bind_perm__open_and_load();
> > > +       if (CHECK(!skel, "skel-load", "errno %d", errno))
> > > +               goto close_cgroup_fd;
>
> > errno is irrelevant; also use ASSERT_PTR_OK() instead
> Ack, it might be worth unconditionally printing it in your ASSERT_XXX
> macros. Worst case - it's not used, but in general case avoids
> all this "errno %d" boilerplate.

Don't know about that, having unrelated errno everywhere is annoying
and misleading. I'd rather move away from relying on errno so much :)

>
> > > +
> > > +       skel->links.bind_v4_prog =
> > bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > > +       if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > > +                 "cg-attach", "bind4 %ld",
> > > +                 PTR_ERR(skel->links.bind_v4_prog)))
>
> > try using ASSERT_PTR_OK instead
> Sure, thanks!

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-21 23:50       ` Martin KaFai Lau
@ 2021-01-22  0:30         ` sdf
  2021-01-22  1:27           ` Martin KaFai Lau
  0 siblings, 1 reply; 16+ messages in thread
From: sdf @ 2021-01-22  0:30 UTC (permalink / raw)
  To: Martin KaFai Lau; +Cc: netdev, bpf, ast, daniel

On 01/21, Martin KaFai Lau wrote:
> On Thu, Jan 21, 2021 at 02:57:44PM -0800, sdf@google.com wrote:
> > On 01/21, Martin KaFai Lau wrote:
> > > On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev wrote:
> > > > BPF rewrites from 111 to 111, but it still should mark the port as
> > > > "changed".
> > > > We also verify that if port isn't touched by BPF, it's still  
> prohibited.
> > > >
> > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > ---
> > > >  .../selftests/bpf/prog_tests/bind_perm.c      | 88  
> +++++++++++++++++++
> > > >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> > > >  2 files changed, 124 insertions(+)
> > > >  create mode 100644  
> tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > >  create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c
> > > >
> > > > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > new file mode 100644
> > > > index 000000000000..840a04ac9042
> > > > --- /dev/null
> > > > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > @@ -0,0 +1,88 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +#include <test_progs.h>
> > > > +#include "bind_perm.skel.h"
> > > > +
> > > > +#include <sys/types.h>
> > > > +#include <sys/socket.h>
> > > > +#include <sys/capability.h>
> > > > +
> > > > +static int duration;
> > > > +
> > > > +void try_bind(int port, int expected_errno)
> > > > +{
> > > > +	struct sockaddr_in sin = {};
> > > > +	int fd = -1;
> > > > +
> > > > +	fd = socket(AF_INET, SOCK_STREAM, 0);
> > > > +	if (CHECK(fd < 0, "fd", "errno %d", errno))
> > > > +		goto close_socket;
> > > > +
> > > > +	sin.sin_family = AF_INET;
> > > > +	sin.sin_port = htons(port);
> > > > +
> > > > +	errno = 0;
> > > > +	bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > > > +	CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
> > > > +	      errno, expected_errno);
> > > > +
> > > > +close_socket:
> > > > +	if (fd >= 0)
> > > > +		close(fd);
> > > > +}
> > > > +
> > > > +void cap_net_bind_service(cap_flag_value_t flag)
> > > > +{
> > > > +	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > > > +	cap_t caps;
> > > > +
> > > > +	caps = cap_get_proc();
> > > > +	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > > > +		goto free_caps;
> > > > +
> > > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,  
> &cap_net_bind_service,
> > > > +			       CAP_CLEAR),
> > > > +		  "cap_set_flag", "errno %d", errno))
> > > > +		goto free_caps;
> > > > +
> > > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,  
> &cap_net_bind_service,
> > > > +			       CAP_CLEAR),
> > > > +		  "cap_set_flag", "errno %d", errno))
> > > > +		goto free_caps;
> > > > +
> > > > +	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
> > > > +		goto free_caps;
> > > > +
> > > > +free_caps:
> > > > +	if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > > > +		goto free_caps;
> > > > +}
> > > > +
> > > > +void test_bind_perm(void)
> > > > +{
> > > > +	struct bind_perm *skel;
> > > > +	int cgroup_fd;
> > > > +
> > > > +	cgroup_fd = test__join_cgroup("/bind_perm");
> > > > +	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > > > +		return;
> > > > +
> > > > +	skel = bind_perm__open_and_load();
> > > > +	if (CHECK(!skel, "skel-load", "errno %d", errno))
> > > > +		goto close_cgroup_fd;
> > > > +
> > > > +	skel->links.bind_v4_prog =
> > > bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > > > +	if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > > > +		  "cg-attach", "bind4 %ld",
> > > > +		  PTR_ERR(skel->links.bind_v4_prog)))
> > > > +		goto close_skeleton;
> > > > +
> > > > +	cap_net_bind_service(CAP_CLEAR);
> > > > +	try_bind(110, EACCES);
> > > > +	try_bind(111, 0);
> > > > +	cap_net_bind_service(CAP_SET);
> > > > +
> > > > +close_skeleton:
> > > > +	bind_perm__destroy(skel);
> > > > +close_cgroup_fd:
> > > > +	close(cgroup_fd);
> > > > +}
> > > > diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c
> > > b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > new file mode 100644
> > > > index 000000000000..2194587ec806
> > > > --- /dev/null
> > > > +++ b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > @@ -0,0 +1,36 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +
> > > > +#include <linux/stddef.h>
> > > > +#include <linux/bpf.h>
> > > > +#include <sys/types.h>
> > > > +#include <sys/socket.h>
> > > > +#include <bpf/bpf_helpers.h>
> > > > +#include <bpf/bpf_endian.h>
> > > > +
> > > > +SEC("cgroup/bind4")
> > > > +int bind_v4_prog(struct bpf_sock_addr *ctx)
> > > > +{
> > > > +	struct bpf_sock *sk;
> > > > +	__u32 user_ip4;
> > > > +	__u16 user_port;
> > > > +
> > > > +	sk = ctx->sk;
> > > > +	if (!sk)
> > > > +		return 0;
> > > > +
> > > > +	if (sk->family != AF_INET)
> > > > +		return 0;
> > > > +
> > > > +	if (ctx->type != SOCK_STREAM)
> > > > +		return 0;
> > > > +
> > > > +	/* Rewriting to the same value should still cause
> > > > +	 * permission check to be bypassed.
> > > > +	 */
> > > > +	if (ctx->user_port == bpf_htons(111))
> > > > +		ctx->user_port = bpf_htons(111);
> > > iiuc, this overwrite is essentially the way to ensure the bind
> > > will succeed (override CAP_NET_BIND_SERVICE in this particular case?).
> > Correct. The alternative might be to export ignore_perm_check
> > via bpf_sock_addr and make it explicit.
> An explicit field is one option.

> or a different return value (e.g. BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY).

> Not sure which one (including the one in the current patch) is better
> at this point.
Same. My reasoning was: if the BPF program rewrites the port, it knows
what it's doing, so it doesn't seem like adding another explicit
signal makes sense. So I decided to go without external api change.

> Also, from patch 1, if one cgrp bpf prog says no-perm-check,
> it does not matter what the latter cgrp bpf progs have to say?
Right, it doesn't matter. But I think it's fine: if the latter
one rewrites the (previously rewritten) address to something
new, it still wants that address to be bound to, right?

If some program returns EPERM, it also doesn't matter.

> > > It seems to be okay if we consider most of the use cases is rewriting
> > > to a different port.
> >
> > > However, it is quite un-intuitive to the bpf prog to overwrite with
> > > the same user_port just to ensure this port can be binded successfully
> > > later.
> > I'm testing a corner case here when the address is rewritten to the same
> > value, but the intention is to rewrite X to Y < 1024.
> It is a legit corner case though.

> Also, is it possible that the compiler may optimize this
> same-value-assignment out?
Yeah, it's a legit case, that's why I tested it. Good point on
optimizing (can be "healed" with volatile?), but it should only matter if
the program is installed to bypass the permission checks for some ports
(as it does in this selftest). As you mention below, it's not clear what's
the 'default' use-case is. Is it rewriting to a different port or just
bypassing the cap_net_bind_service for some ports? Feels like rewriting
to a different address/port was the reason the hooks were added,
so I was targeting this one.

> > > Is user_port the only case? How about other fields in bpf_sock_addr?
> > Good question. For our use case only the port matters because
> > we rewrite both port and address (and never only address).
> >
> > It does feel like it should also work when BPF rewrites address only
> > (and port happens to be in the privileged range). I guess I can
> > apply the same logic to the user_ip4 and user_ip6?
> My concern is having more cases that need to overwrite with the same  
> value.
> Then it may make a stronger case to use return value or an explicit field.
Tried to add some reasoning in the comment above. Let me know what's
your preference is.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-22  0:30         ` sdf
@ 2021-01-22  1:27           ` Martin KaFai Lau
  2021-01-22 16:16             ` sdf
  0 siblings, 1 reply; 16+ messages in thread
From: Martin KaFai Lau @ 2021-01-22  1:27 UTC (permalink / raw)
  To: sdf; +Cc: netdev, bpf, ast, daniel

On Thu, Jan 21, 2021 at 04:30:08PM -0800, sdf@google.com wrote:
> On 01/21, Martin KaFai Lau wrote:
> > On Thu, Jan 21, 2021 at 02:57:44PM -0800, sdf@google.com wrote:
> > > On 01/21, Martin KaFai Lau wrote:
> > > > On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev wrote:
> > > > > BPF rewrites from 111 to 111, but it still should mark the port as
> > > > > "changed".
> > > > > We also verify that if port isn't touched by BPF, it's still
> > prohibited.
> > > > >
> > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > ---
> > > > >  .../selftests/bpf/prog_tests/bind_perm.c      | 88
> > +++++++++++++++++++
> > > > >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> > > > >  2 files changed, 124 insertions(+)
> > > > >  create mode 100644
> > tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > >  create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c
> > > > >
> > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > new file mode 100644
> > > > > index 000000000000..840a04ac9042
> > > > > --- /dev/null
> > > > > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > @@ -0,0 +1,88 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > +#include <test_progs.h>
> > > > > +#include "bind_perm.skel.h"
> > > > > +
> > > > > +#include <sys/types.h>
> > > > > +#include <sys/socket.h>
> > > > > +#include <sys/capability.h>
> > > > > +
> > > > > +static int duration;
> > > > > +
> > > > > +void try_bind(int port, int expected_errno)
> > > > > +{
> > > > > +	struct sockaddr_in sin = {};
> > > > > +	int fd = -1;
> > > > > +
> > > > > +	fd = socket(AF_INET, SOCK_STREAM, 0);
> > > > > +	if (CHECK(fd < 0, "fd", "errno %d", errno))
> > > > > +		goto close_socket;
> > > > > +
> > > > > +	sin.sin_family = AF_INET;
> > > > > +	sin.sin_port = htons(port);
> > > > > +
> > > > > +	errno = 0;
> > > > > +	bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > > > > +	CHECK(errno != expected_errno, "bind", "errno %d, expected %d",
> > > > > +	      errno, expected_errno);
> > > > > +
> > > > > +close_socket:
> > > > > +	if (fd >= 0)
> > > > > +		close(fd);
> > > > > +}
> > > > > +
> > > > > +void cap_net_bind_service(cap_flag_value_t flag)
> > > > > +{
> > > > > +	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > > > > +	cap_t caps;
> > > > > +
> > > > > +	caps = cap_get_proc();
> > > > > +	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > > > > +		goto free_caps;
> > > > > +
> > > > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > &cap_net_bind_service,
> > > > > +			       CAP_CLEAR),
> > > > > +		  "cap_set_flag", "errno %d", errno))
> > > > > +		goto free_caps;
> > > > > +
> > > > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > &cap_net_bind_service,
> > > > > +			       CAP_CLEAR),
> > > > > +		  "cap_set_flag", "errno %d", errno))
> > > > > +		goto free_caps;
> > > > > +
> > > > > +	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
> > > > > +		goto free_caps;
> > > > > +
> > > > > +free_caps:
> > > > > +	if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > > > > +		goto free_caps;
> > > > > +}
> > > > > +
> > > > > +void test_bind_perm(void)
> > > > > +{
> > > > > +	struct bind_perm *skel;
> > > > > +	int cgroup_fd;
> > > > > +
> > > > > +	cgroup_fd = test__join_cgroup("/bind_perm");
> > > > > +	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > > > > +		return;
> > > > > +
> > > > > +	skel = bind_perm__open_and_load();
> > > > > +	if (CHECK(!skel, "skel-load", "errno %d", errno))
> > > > > +		goto close_cgroup_fd;
> > > > > +
> > > > > +	skel->links.bind_v4_prog =
> > > > bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > > > > +	if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > > > > +		  "cg-attach", "bind4 %ld",
> > > > > +		  PTR_ERR(skel->links.bind_v4_prog)))
> > > > > +		goto close_skeleton;
> > > > > +
> > > > > +	cap_net_bind_service(CAP_CLEAR);
> > > > > +	try_bind(110, EACCES);
> > > > > +	try_bind(111, 0);
> > > > > +	cap_net_bind_service(CAP_SET);
> > > > > +
> > > > > +close_skeleton:
> > > > > +	bind_perm__destroy(skel);
> > > > > +close_cgroup_fd:
> > > > > +	close(cgroup_fd);
> > > > > +}
> > > > > diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > new file mode 100644
> > > > > index 000000000000..2194587ec806
> > > > > --- /dev/null
> > > > > +++ b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > @@ -0,0 +1,36 @@
> > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > +
> > > > > +#include <linux/stddef.h>
> > > > > +#include <linux/bpf.h>
> > > > > +#include <sys/types.h>
> > > > > +#include <sys/socket.h>
> > > > > +#include <bpf/bpf_helpers.h>
> > > > > +#include <bpf/bpf_endian.h>
> > > > > +
> > > > > +SEC("cgroup/bind4")
> > > > > +int bind_v4_prog(struct bpf_sock_addr *ctx)
> > > > > +{
> > > > > +	struct bpf_sock *sk;
> > > > > +	__u32 user_ip4;
> > > > > +	__u16 user_port;
> > > > > +
> > > > > +	sk = ctx->sk;
> > > > > +	if (!sk)
> > > > > +		return 0;
> > > > > +
> > > > > +	if (sk->family != AF_INET)
> > > > > +		return 0;
> > > > > +
> > > > > +	if (ctx->type != SOCK_STREAM)
> > > > > +		return 0;
> > > > > +
> > > > > +	/* Rewriting to the same value should still cause
> > > > > +	 * permission check to be bypassed.
> > > > > +	 */
> > > > > +	if (ctx->user_port == bpf_htons(111))
> > > > > +		ctx->user_port = bpf_htons(111);
> > > > iiuc, this overwrite is essentially the way to ensure the bind
> > > > will succeed (override CAP_NET_BIND_SERVICE in this particular case?).
> > > Correct. The alternative might be to export ignore_perm_check
> > > via bpf_sock_addr and make it explicit.
> > An explicit field is one option.
> 
> > or a different return value (e.g. BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY).
> 
> > Not sure which one (including the one in the current patch) is better
> > at this point.
> Same. My reasoning was: if the BPF program rewrites the port, it knows
> what it's doing, so it doesn't seem like adding another explicit
> signal makes sense. So I decided to go without external api change.
> 
> > Also, from patch 1, if one cgrp bpf prog says no-perm-check,
> > it does not matter what the latter cgrp bpf progs have to say?
> Right, it doesn't matter. But I think it's fine: if the latter
> one rewrites the (previously rewritten) address to something
> new, it still wants that address to be bound to, right?
> 
> If some program returns EPERM, it also doesn't matter.
> 
> > > > It seems to be okay if we consider most of the use cases is rewriting
> > > > to a different port.
> > >
> > > > However, it is quite un-intuitive to the bpf prog to overwrite with
> > > > the same user_port just to ensure this port can be binded successfully
> > > > later.
> > > I'm testing a corner case here when the address is rewritten to the same
> > > value, but the intention is to rewrite X to Y < 1024.
> > It is a legit corner case though.
> 
> > Also, is it possible that the compiler may optimize this
> > same-value-assignment out?
> Yeah, it's a legit case, that's why I tested it. Good point on
> optimizing (can be "healed" with volatile?),
hmm... It is too fragile.

> but it should only matter if
> the program is installed to bypass the permission checks for some ports
> (as it does in this selftest). As you mention below, it's not clear what's
> the 'default' use-case is. Is it rewriting to a different port or just
> bypassing the cap_net_bind_service for some ports? Feels like rewriting
> to a different address/port was the reason the hooks were added,
> so I was targeting this one.
It sounds like having a bpf to bypass permission only without changing
the port is not the target but more like a by-product of this change.

How about only bypass cap_net_bind_service when bpf did change the
address/port.  Will it become too slow for bind?

> 
> > > > Is user_port the only case? How about other fields in bpf_sock_addr?
> > > Good question. For our use case only the port matters because
> > > we rewrite both port and address (and never only address).
> > >
> > > It does feel like it should also work when BPF rewrites address only
> > > (and port happens to be in the privileged range). I guess I can
> > > apply the same logic to the user_ip4 and user_ip6?
> > My concern is having more cases that need to overwrite with the same
> > value.
> > Then it may make a stronger case to use return value or an explicit field.
> Tried to add some reasoning in the comment above. Let me know what's
> your preference is.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-22  1:27           ` Martin KaFai Lau
@ 2021-01-22 16:16             ` sdf
  2021-01-22 19:38               ` Martin KaFai Lau
  0 siblings, 1 reply; 16+ messages in thread
From: sdf @ 2021-01-22 16:16 UTC (permalink / raw)
  To: Martin KaFai Lau; +Cc: netdev, bpf, ast, daniel

On 01/21, Martin KaFai Lau wrote:
> On Thu, Jan 21, 2021 at 04:30:08PM -0800, sdf@google.com wrote:
> > On 01/21, Martin KaFai Lau wrote:
> > > On Thu, Jan 21, 2021 at 02:57:44PM -0800, sdf@google.com wrote:
> > > > On 01/21, Martin KaFai Lau wrote:
> > > > > On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev  
> wrote:
> > > > > > BPF rewrites from 111 to 111, but it still should mark the port  
> as
> > > > > > "changed".
> > > > > > We also verify that if port isn't touched by BPF, it's still
> > > prohibited.
> > > > > >
> > > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > > ---
> > > > > >  .../selftests/bpf/prog_tests/bind_perm.c      | 88
> > > +++++++++++++++++++
> > > > > >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> > > > > >  2 files changed, 124 insertions(+)
> > > > > >  create mode 100644
> > > tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > >  create mode 100644  
> tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > >
> > > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > new file mode 100644
> > > > > > index 000000000000..840a04ac9042
> > > > > > --- /dev/null
> > > > > > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > @@ -0,0 +1,88 @@
> > > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > > +#include <test_progs.h>
> > > > > > +#include "bind_perm.skel.h"
> > > > > > +
> > > > > > +#include <sys/types.h>
> > > > > > +#include <sys/socket.h>
> > > > > > +#include <sys/capability.h>
> > > > > > +
> > > > > > +static int duration;
> > > > > > +
> > > > > > +void try_bind(int port, int expected_errno)
> > > > > > +{
> > > > > > +	struct sockaddr_in sin = {};
> > > > > > +	int fd = -1;
> > > > > > +
> > > > > > +	fd = socket(AF_INET, SOCK_STREAM, 0);
> > > > > > +	if (CHECK(fd < 0, "fd", "errno %d", errno))
> > > > > > +		goto close_socket;
> > > > > > +
> > > > > > +	sin.sin_family = AF_INET;
> > > > > > +	sin.sin_port = htons(port);
> > > > > > +
> > > > > > +	errno = 0;
> > > > > > +	bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > > > > > +	CHECK(errno != expected_errno, "bind", "errno %d,  
> expected %d",
> > > > > > +	      errno, expected_errno);
> > > > > > +
> > > > > > +close_socket:
> > > > > > +	if (fd >= 0)
> > > > > > +		close(fd);
> > > > > > +}
> > > > > > +
> > > > > > +void cap_net_bind_service(cap_flag_value_t flag)
> > > > > > +{
> > > > > > +	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > > > > > +	cap_t caps;
> > > > > > +
> > > > > > +	caps = cap_get_proc();
> > > > > > +	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > > > > > +		goto free_caps;
> > > > > > +
> > > > > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > > &cap_net_bind_service,
> > > > > > +			       CAP_CLEAR),
> > > > > > +		  "cap_set_flag", "errno %d", errno))
> > > > > > +		goto free_caps;
> > > > > > +
> > > > > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > > &cap_net_bind_service,
> > > > > > +			       CAP_CLEAR),
> > > > > > +		  "cap_set_flag", "errno %d", errno))
> > > > > > +		goto free_caps;
> > > > > > +
> > > > > > +	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d",  
> errno))
> > > > > > +		goto free_caps;
> > > > > > +
> > > > > > +free_caps:
> > > > > > +	if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > > > > > +		goto free_caps;
> > > > > > +}
> > > > > > +
> > > > > > +void test_bind_perm(void)
> > > > > > +{
> > > > > > +	struct bind_perm *skel;
> > > > > > +	int cgroup_fd;
> > > > > > +
> > > > > > +	cgroup_fd = test__join_cgroup("/bind_perm");
> > > > > > +	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > > > > > +		return;
> > > > > > +
> > > > > > +	skel = bind_perm__open_and_load();
> > > > > > +	if (CHECK(!skel, "skel-load", "errno %d", errno))
> > > > > > +		goto close_cgroup_fd;
> > > > > > +
> > > > > > +	skel->links.bind_v4_prog =
> > > > > bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > > > > > +	if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > > > > > +		  "cg-attach", "bind4 %ld",
> > > > > > +		  PTR_ERR(skel->links.bind_v4_prog)))
> > > > > > +		goto close_skeleton;
> > > > > > +
> > > > > > +	cap_net_bind_service(CAP_CLEAR);
> > > > > > +	try_bind(110, EACCES);
> > > > > > +	try_bind(111, 0);
> > > > > > +	cap_net_bind_service(CAP_SET);
> > > > > > +
> > > > > > +close_skeleton:
> > > > > > +	bind_perm__destroy(skel);
> > > > > > +close_cgroup_fd:
> > > > > > +	close(cgroup_fd);
> > > > > > +}
> > > > > > diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > new file mode 100644
> > > > > > index 000000000000..2194587ec806
> > > > > > --- /dev/null
> > > > > > +++ b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > @@ -0,0 +1,36 @@
> > > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > > +
> > > > > > +#include <linux/stddef.h>
> > > > > > +#include <linux/bpf.h>
> > > > > > +#include <sys/types.h>
> > > > > > +#include <sys/socket.h>
> > > > > > +#include <bpf/bpf_helpers.h>
> > > > > > +#include <bpf/bpf_endian.h>
> > > > > > +
> > > > > > +SEC("cgroup/bind4")
> > > > > > +int bind_v4_prog(struct bpf_sock_addr *ctx)
> > > > > > +{
> > > > > > +	struct bpf_sock *sk;
> > > > > > +	__u32 user_ip4;
> > > > > > +	__u16 user_port;
> > > > > > +
> > > > > > +	sk = ctx->sk;
> > > > > > +	if (!sk)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	if (sk->family != AF_INET)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	if (ctx->type != SOCK_STREAM)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	/* Rewriting to the same value should still cause
> > > > > > +	 * permission check to be bypassed.
> > > > > > +	 */
> > > > > > +	if (ctx->user_port == bpf_htons(111))
> > > > > > +		ctx->user_port = bpf_htons(111);
> > > > > iiuc, this overwrite is essentially the way to ensure the bind
> > > > > will succeed (override CAP_NET_BIND_SERVICE in this particular  
> case?).
> > > > Correct. The alternative might be to export ignore_perm_check
> > > > via bpf_sock_addr and make it explicit.
> > > An explicit field is one option.
> >
> > > or a different return value (e.g.  
> BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY).
> >
> > > Not sure which one (including the one in the current patch) is better
> > > at this point.
> > Same. My reasoning was: if the BPF program rewrites the port, it knows
> > what it's doing, so it doesn't seem like adding another explicit
> > signal makes sense. So I decided to go without external api change.
> >
> > > Also, from patch 1, if one cgrp bpf prog says no-perm-check,
> > > it does not matter what the latter cgrp bpf progs have to say?
> > Right, it doesn't matter. But I think it's fine: if the latter
> > one rewrites the (previously rewritten) address to something
> > new, it still wants that address to be bound to, right?
> >
> > If some program returns EPERM, it also doesn't matter.
> >
> > > > > It seems to be okay if we consider most of the use cases is  
> rewriting
> > > > > to a different port.
> > > >
> > > > > However, it is quite un-intuitive to the bpf prog to overwrite  
> with
> > > > > the same user_port just to ensure this port can be binded  
> successfully
> > > > > later.
> > > > I'm testing a corner case here when the address is rewritten to the  
> same
> > > > value, but the intention is to rewrite X to Y < 1024.
> > > It is a legit corner case though.
> >
> > > Also, is it possible that the compiler may optimize this
> > > same-value-assignment out?
> > Yeah, it's a legit case, that's why I tested it. Good point on
> > optimizing (can be "healed" with volatile?),
> hmm... It is too fragile.

> > but it should only matter if
> > the program is installed to bypass the permission checks for some ports
> > (as it does in this selftest). As you mention below, it's not clear  
> what's
> > the 'default' use-case is. Is it rewriting to a different port or just
> > bypassing the cap_net_bind_service for some ports? Feels like rewriting
> > to a different address/port was the reason the hooks were added,
> > so I was targeting this one.
> It sounds like having a bpf to bypass permission only without changing
> the port is not the target but more like a by-product of this change.
Right, we might have a use-case for that as well, but it's not
strictly required. We can convert it to be something like
'rewrite this magic addr+port to this real addr+port'.

> How about only bypass cap_net_bind_service when bpf did change the
> address/port.  Will it become too slow for bind?
But this is what I'm doing already, isn't it? There is just a by-product
of triggering it for the same port = port address. Tracking
the real change will require extra space to keep the original
address and then memcmp to figure out if the change was made.
Assuming the majority of rewrites don't happen for <1024 ports
this seems like a bunch of wasted work (vs setting that ctx->port_changed).

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start
  2021-01-21  1:22 [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Stanislav Fomichev
  2021-01-21  1:22 ` [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
@ 2021-01-22 19:37 ` Andrey Ignatov
  2021-01-22 19:53   ` Stanislav Fomichev
  1 sibling, 1 reply; 16+ messages in thread
From: Andrey Ignatov @ 2021-01-22 19:37 UTC (permalink / raw)
  To: Stanislav Fomichev, Martin KaFai Lau; +Cc: netdev, bpf, ast, daniel

Stanislav Fomichev <sdf@google.com> [Wed, 2021-01-20 18:09 -0800]:
> At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
> to the privileged ones (< ip_unprivileged_port_start), but it will
> be rejected later on in the __inet_bind or __inet6_bind.
>
> Let's export 'port_changed' event from the BPF program and bypass
> ip_unprivileged_port_start range check when we've seen that
> the program explicitly overrode the port. This is accomplished
> by generating instructions to set ctx->port_changed along with
> updating ctx->user_port.
> 
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
...
> @@ -244,17 +245,27 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>  	if (cgroup_bpf_enabled(type))	{				       \
>  		lock_sock(sk);						       \
>  		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> -							  t_ctx);	       \
> +							  t_ctx, NULL);	       \
>  		release_sock(sk);					       \
>  	}								       \
>  	__ret;								       \
>  })
>  
> -#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)			       \
> -	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
> -
> -#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)			       \
> -	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
> +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags)	       \
> +({									       \
> +	bool port_changed = false;					       \

I see the discussion with Martin in [0] on the program overriding the
port but setting exactly same value as it already contains. Commenting
on this patch since the code is here.

From what I understand there is no use-case to support overriding the
port w/o changing the value to just bypass the capability. In this case
the code can be simplified.

Here instead of introducing port_changed you can just remember the
original ((struct sockaddr_in *)uaddr)->sin_port or
((struct sockaddr_in6 *)uaddr)->sin6_port (they have same offset/size so
it can be simplified same way as in sock_addr_convert_ctx_access() for
user_port) ...

> +	int __ret = 0;							       \
> +	if (cgroup_bpf_enabled(type))	{				       \
> +		lock_sock(sk);						       \
> +		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> +							  NULL,		       \
> +							  &port_changed);      \
> +		release_sock(sk);					       \
> +		if (port_changed)					       \

... and then just compare the original and the new ports here.

The benefits will be:
* no need to introduce port_changed field in struct bpf_sock_addr_kern;
* no need to do change program instructions;
* no need to think about compiler optimizing out those instructions;
* no need to think about multiple programs coordination, the flag will
  be set only if port has actually changed what is easy to reason about
  from user perspective.

wdyt?

> +			*flags |= BIND_NO_CAP_NET_BIND_SERVICE;		       \
> +	}								       \
> +	__ret;								       \
> +})
>  
>  #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)				       \
>  	((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) ||		       \
> @@ -453,8 +464,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
>  #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
> -#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
> -#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
> +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
>  #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
...

[0] https://lore.kernel.org/bpf/20210121223330.pyk4ljtjirm2zlay@kafai-mbp/

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-22 16:16             ` sdf
@ 2021-01-22 19:38               ` Martin KaFai Lau
  2021-01-22 19:56                 ` Stanislav Fomichev
  0 siblings, 1 reply; 16+ messages in thread
From: Martin KaFai Lau @ 2021-01-22 19:38 UTC (permalink / raw)
  To: sdf; +Cc: netdev, bpf, ast, daniel

On Fri, Jan 22, 2021 at 08:16:40AM -0800, sdf@google.com wrote:
> On 01/21, Martin KaFai Lau wrote:
> > On Thu, Jan 21, 2021 at 04:30:08PM -0800, sdf@google.com wrote:
> > > On 01/21, Martin KaFai Lau wrote:
> > > > On Thu, Jan 21, 2021 at 02:57:44PM -0800, sdf@google.com wrote:
> > > > > On 01/21, Martin KaFai Lau wrote:
> > > > > > On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev
> > wrote:
> > > > > > > BPF rewrites from 111 to 111, but it still should mark the
> > port as
> > > > > > > "changed".
> > > > > > > We also verify that if port isn't touched by BPF, it's still
> > > > prohibited.
> > > > > > >
> > > > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > > > ---
> > > > > > >  .../selftests/bpf/prog_tests/bind_perm.c      | 88
> > > > +++++++++++++++++++
> > > > > > >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> > > > > > >  2 files changed, 124 insertions(+)
> > > > > > >  create mode 100644
> > > > tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > >  create mode 100644
> > tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > >
> > > > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > > new file mode 100644
> > > > > > > index 000000000000..840a04ac9042
> > > > > > > --- /dev/null
> > > > > > > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > > @@ -0,0 +1,88 @@
> > > > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > > > +#include <test_progs.h>
> > > > > > > +#include "bind_perm.skel.h"
> > > > > > > +
> > > > > > > +#include <sys/types.h>
> > > > > > > +#include <sys/socket.h>
> > > > > > > +#include <sys/capability.h>
> > > > > > > +
> > > > > > > +static int duration;
> > > > > > > +
> > > > > > > +void try_bind(int port, int expected_errno)
> > > > > > > +{
> > > > > > > +	struct sockaddr_in sin = {};
> > > > > > > +	int fd = -1;
> > > > > > > +
> > > > > > > +	fd = socket(AF_INET, SOCK_STREAM, 0);
> > > > > > > +	if (CHECK(fd < 0, "fd", "errno %d", errno))
> > > > > > > +		goto close_socket;
> > > > > > > +
> > > > > > > +	sin.sin_family = AF_INET;
> > > > > > > +	sin.sin_port = htons(port);
> > > > > > > +
> > > > > > > +	errno = 0;
> > > > > > > +	bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > > > > > > +	CHECK(errno != expected_errno, "bind", "errno %d, expected
> > %d",
> > > > > > > +	      errno, expected_errno);
> > > > > > > +
> > > > > > > +close_socket:
> > > > > > > +	if (fd >= 0)
> > > > > > > +		close(fd);
> > > > > > > +}
> > > > > > > +
> > > > > > > +void cap_net_bind_service(cap_flag_value_t flag)
> > > > > > > +{
> > > > > > > +	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > > > > > > +	cap_t caps;
> > > > > > > +
> > > > > > > +	caps = cap_get_proc();
> > > > > > > +	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > > > > > > +		goto free_caps;
> > > > > > > +
> > > > > > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > > > &cap_net_bind_service,
> > > > > > > +			       CAP_CLEAR),
> > > > > > > +		  "cap_set_flag", "errno %d", errno))
> > > > > > > +		goto free_caps;
> > > > > > > +
> > > > > > > +	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > > > &cap_net_bind_service,
> > > > > > > +			       CAP_CLEAR),
> > > > > > > +		  "cap_set_flag", "errno %d", errno))
> > > > > > > +		goto free_caps;
> > > > > > > +
> > > > > > > +	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d",
> > errno))
> > > > > > > +		goto free_caps;
> > > > > > > +
> > > > > > > +free_caps:
> > > > > > > +	if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > > > > > > +		goto free_caps;
> > > > > > > +}
> > > > > > > +
> > > > > > > +void test_bind_perm(void)
> > > > > > > +{
> > > > > > > +	struct bind_perm *skel;
> > > > > > > +	int cgroup_fd;
> > > > > > > +
> > > > > > > +	cgroup_fd = test__join_cgroup("/bind_perm");
> > > > > > > +	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > > > > > > +		return;
> > > > > > > +
> > > > > > > +	skel = bind_perm__open_and_load();
> > > > > > > +	if (CHECK(!skel, "skel-load", "errno %d", errno))
> > > > > > > +		goto close_cgroup_fd;
> > > > > > > +
> > > > > > > +	skel->links.bind_v4_prog =
> > > > > > bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > > > > > > +	if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > > > > > > +		  "cg-attach", "bind4 %ld",
> > > > > > > +		  PTR_ERR(skel->links.bind_v4_prog)))
> > > > > > > +		goto close_skeleton;
> > > > > > > +
> > > > > > > +	cap_net_bind_service(CAP_CLEAR);
> > > > > > > +	try_bind(110, EACCES);
> > > > > > > +	try_bind(111, 0);
> > > > > > > +	cap_net_bind_service(CAP_SET);
> > > > > > > +
> > > > > > > +close_skeleton:
> > > > > > > +	bind_perm__destroy(skel);
> > > > > > > +close_cgroup_fd:
> > > > > > > +	close(cgroup_fd);
> > > > > > > +}
> > > > > > > diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > > new file mode 100644
> > > > > > > index 000000000000..2194587ec806
> > > > > > > --- /dev/null
> > > > > > > +++ b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > > @@ -0,0 +1,36 @@
> > > > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > > > +
> > > > > > > +#include <linux/stddef.h>
> > > > > > > +#include <linux/bpf.h>
> > > > > > > +#include <sys/types.h>
> > > > > > > +#include <sys/socket.h>
> > > > > > > +#include <bpf/bpf_helpers.h>
> > > > > > > +#include <bpf/bpf_endian.h>
> > > > > > > +
> > > > > > > +SEC("cgroup/bind4")
> > > > > > > +int bind_v4_prog(struct bpf_sock_addr *ctx)
> > > > > > > +{
> > > > > > > +	struct bpf_sock *sk;
> > > > > > > +	__u32 user_ip4;
> > > > > > > +	__u16 user_port;
> > > > > > > +
> > > > > > > +	sk = ctx->sk;
> > > > > > > +	if (!sk)
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	if (sk->family != AF_INET)
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	if (ctx->type != SOCK_STREAM)
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	/* Rewriting to the same value should still cause
> > > > > > > +	 * permission check to be bypassed.
> > > > > > > +	 */
> > > > > > > +	if (ctx->user_port == bpf_htons(111))
> > > > > > > +		ctx->user_port = bpf_htons(111);
> > > > > > iiuc, this overwrite is essentially the way to ensure the bind
> > > > > > will succeed (override CAP_NET_BIND_SERVICE in this particular
> > case?).
> > > > > Correct. The alternative might be to export ignore_perm_check
> > > > > via bpf_sock_addr and make it explicit.
> > > > An explicit field is one option.
> > >
> > > > or a different return value (e.g.
> > BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY).
> > >
> > > > Not sure which one (including the one in the current patch) is better
> > > > at this point.
> > > Same. My reasoning was: if the BPF program rewrites the port, it knows
> > > what it's doing, so it doesn't seem like adding another explicit
> > > signal makes sense. So I decided to go without external api change.
> > >
> > > > Also, from patch 1, if one cgrp bpf prog says no-perm-check,
> > > > it does not matter what the latter cgrp bpf progs have to say?
> > > Right, it doesn't matter. But I think it's fine: if the latter
> > > one rewrites the (previously rewritten) address to something
> > > new, it still wants that address to be bound to, right?
> > >
> > > If some program returns EPERM, it also doesn't matter.
> > >
> > > > > > It seems to be okay if we consider most of the use cases is
> > rewriting
> > > > > > to a different port.
> > > > >
> > > > > > However, it is quite un-intuitive to the bpf prog to overwrite
> > with
> > > > > > the same user_port just to ensure this port can be binded
> > successfully
> > > > > > later.
> > > > > I'm testing a corner case here when the address is rewritten to
> > the same
> > > > > value, but the intention is to rewrite X to Y < 1024.
> > > > It is a legit corner case though.
> > >
> > > > Also, is it possible that the compiler may optimize this
> > > > same-value-assignment out?
> > > Yeah, it's a legit case, that's why I tested it. Good point on
> > > optimizing (can be "healed" with volatile?),
> > hmm... It is too fragile.
> 
> > > but it should only matter if
> > > the program is installed to bypass the permission checks for some ports
> > > (as it does in this selftest). As you mention below, it's not clear
> > what's
> > > the 'default' use-case is. Is it rewriting to a different port or just
> > > bypassing the cap_net_bind_service for some ports? Feels like rewriting
> > > to a different address/port was the reason the hooks were added,
> > > so I was targeting this one.
> > It sounds like having a bpf to bypass permission only without changing
> > the port is not the target but more like a by-product of this change.
> Right, we might have a use-case for that as well, but it's not
> strictly required. We can convert it to be something like
> 'rewrite this magic addr+port to this real addr+port'.
> 
> > How about only bypass cap_net_bind_service when bpf did change the
> > address/port.  Will it become too slow for bind?
> But this is what I'm doing already, isn't it? There is just a by-product
> of triggering it for the same port = port address.
My concern is the way to trigger this legit by-product is too fragile (and
unintuitive) to be usable.  Either avoid this by-product completely or
have a better way to specify the need of bypass.

Lets say we do the latter.  After more thoughts, I think doing it in the
return value is more natural since it is already saying the port/addr
should be EPERM or not.  It makes sense to add BYPASS or not to the
return value.  When one bpf prog says bypass, then it will bypass.
The second bit of the return value can be used to do that.
Thoughts?

> Tracking the real change will require extra space to keep the original
> address and then memcmp to figure out if the change was made.
> Assuming the majority of rewrites don't happen for <1024 ports
> this seems like a bunch of wasted work (vs setting that ctx->port_changed).
Right, so the earlier question about if other fields will need
similar bypass.  If it is only port, it is pretty cheap to do.
However, it seems other fields will eventually need this in the
future if not now.

The check "if (old != new)" itself may be doable within
convert_ctx_access() itself which at least helping on the space side.
However, I think the return value is an easier and cleaner way.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start
  2021-01-22 19:37 ` [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Andrey Ignatov
@ 2021-01-22 19:53   ` Stanislav Fomichev
  2021-01-22 20:08     ` Andrey Ignatov
  0 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2021-01-22 19:53 UTC (permalink / raw)
  To: Andrey Ignatov
  Cc: Martin KaFai Lau, Netdev, bpf, Alexei Starovoitov, Daniel Borkmann

On Fri, Jan 22, 2021 at 11:37 AM Andrey Ignatov <rdna@fb.com> wrote:
>
> Stanislav Fomichev <sdf@google.com> [Wed, 2021-01-20 18:09 -0800]:
> > At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
> > to the privileged ones (< ip_unprivileged_port_start), but it will
> > be rejected later on in the __inet_bind or __inet6_bind.
> >
> > Let's export 'port_changed' event from the BPF program and bypass
> > ip_unprivileged_port_start range check when we've seen that
> > the program explicitly overrode the port. This is accomplished
> > by generating instructions to set ctx->port_changed along with
> > updating ctx->user_port.
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> ...
> > @@ -244,17 +245,27 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
> >       if (cgroup_bpf_enabled(type))   {                                      \
> >               lock_sock(sk);                                                 \
> >               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> > -                                                       t_ctx);              \
> > +                                                       t_ctx, NULL);        \
> >               release_sock(sk);                                              \
> >       }                                                                      \
> >       __ret;                                                                 \
> >  })
> >
> > -#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)                              \
> > -     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
> > -
> > -#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)                              \
> > -     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
> > +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags)          \
> > +({                                                                          \
> > +     bool port_changed = false;                                             \
>
> I see the discussion with Martin in [0] on the program overriding the
> port but setting exactly same value as it already contains. Commenting
> on this patch since the code is here.
>
> From what I understand there is no use-case to support overriding the
> port w/o changing the value to just bypass the capability. In this case
> the code can be simplified.
>
> Here instead of introducing port_changed you can just remember the
> original ((struct sockaddr_in *)uaddr)->sin_port or
> ((struct sockaddr_in6 *)uaddr)->sin6_port (they have same offset/size so
> it can be simplified same way as in sock_addr_convert_ctx_access() for
> user_port) ...
>
> > +     int __ret = 0;                                                         \
> > +     if (cgroup_bpf_enabled(type))   {                                      \
> > +             lock_sock(sk);                                                 \
> > +             __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> > +                                                       NULL,                \
> > +                                                       &port_changed);      \
> > +             release_sock(sk);                                              \
> > +             if (port_changed)                                              \
>
> ... and then just compare the original and the new ports here.
>
> The benefits will be:
> * no need to introduce port_changed field in struct bpf_sock_addr_kern;
> * no need to do change program instructions;
> * no need to think about compiler optimizing out those instructions;
> * no need to think about multiple programs coordination, the flag will
>   be set only if port has actually changed what is easy to reason about
>   from user perspective.
>
> wdyt?
Martin mentioned in another email that we might want to do that when
we rewrite only the address portion of it.
I think it makes sense. Imagine doing 1.1.1.1:50 -> 2.2.2.2:50 it
seems like it should also work, right?
And in this case, we need to store and compare addresses as well and
it becomes messy :-/
It also seems like it would be nice to have this 'bypass
cap_net_bind_service" without changing the address while we are at it.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-22 19:38               ` Martin KaFai Lau
@ 2021-01-22 19:56                 ` Stanislav Fomichev
  0 siblings, 0 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2021-01-22 19:56 UTC (permalink / raw)
  To: Martin KaFai Lau; +Cc: Netdev, bpf, Alexei Starovoitov, Daniel Borkmann

On Fri, Jan 22, 2021 at 11:38 AM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Fri, Jan 22, 2021 at 08:16:40AM -0800, sdf@google.com wrote:
> > On 01/21, Martin KaFai Lau wrote:
> > > On Thu, Jan 21, 2021 at 04:30:08PM -0800, sdf@google.com wrote:
> > > > On 01/21, Martin KaFai Lau wrote:
> > > > > On Thu, Jan 21, 2021 at 02:57:44PM -0800, sdf@google.com wrote:
> > > > > > On 01/21, Martin KaFai Lau wrote:
> > > > > > > On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev
> > > wrote:
> > > > > > > > BPF rewrites from 111 to 111, but it still should mark the
> > > port as
> > > > > > > > "changed".
> > > > > > > > We also verify that if port isn't touched by BPF, it's still
> > > > > prohibited.
> > > > > > > >
> > > > > > > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > > > > > > ---
> > > > > > > >  .../selftests/bpf/prog_tests/bind_perm.c      | 88
> > > > > +++++++++++++++++++
> > > > > > > >  tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++
> > > > > > > >  2 files changed, 124 insertions(+)
> > > > > > > >  create mode 100644
> > > > > tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > > >  create mode 100644
> > > tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > > >
> > > > > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > > b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > > > new file mode 100644
> > > > > > > > index 000000000000..840a04ac9042
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
> > > > > > > > @@ -0,0 +1,88 @@
> > > > > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > > > > +#include <test_progs.h>
> > > > > > > > +#include "bind_perm.skel.h"
> > > > > > > > +
> > > > > > > > +#include <sys/types.h>
> > > > > > > > +#include <sys/socket.h>
> > > > > > > > +#include <sys/capability.h>
> > > > > > > > +
> > > > > > > > +static int duration;
> > > > > > > > +
> > > > > > > > +void try_bind(int port, int expected_errno)
> > > > > > > > +{
> > > > > > > > + struct sockaddr_in sin = {};
> > > > > > > > + int fd = -1;
> > > > > > > > +
> > > > > > > > + fd = socket(AF_INET, SOCK_STREAM, 0);
> > > > > > > > + if (CHECK(fd < 0, "fd", "errno %d", errno))
> > > > > > > > +         goto close_socket;
> > > > > > > > +
> > > > > > > > + sin.sin_family = AF_INET;
> > > > > > > > + sin.sin_port = htons(port);
> > > > > > > > +
> > > > > > > > + errno = 0;
> > > > > > > > + bind(fd, (struct sockaddr *)&sin, sizeof(sin));
> > > > > > > > + CHECK(errno != expected_errno, "bind", "errno %d, expected
> > > %d",
> > > > > > > > +       errno, expected_errno);
> > > > > > > > +
> > > > > > > > +close_socket:
> > > > > > > > + if (fd >= 0)
> > > > > > > > +         close(fd);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +void cap_net_bind_service(cap_flag_value_t flag)
> > > > > > > > +{
> > > > > > > > + const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
> > > > > > > > + cap_t caps;
> > > > > > > > +
> > > > > > > > + caps = cap_get_proc();
> > > > > > > > + if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
> > > > > > > > +         goto free_caps;
> > > > > > > > +
> > > > > > > > + if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > > > > &cap_net_bind_service,
> > > > > > > > +                        CAP_CLEAR),
> > > > > > > > +           "cap_set_flag", "errno %d", errno))
> > > > > > > > +         goto free_caps;
> > > > > > > > +
> > > > > > > > + if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1,
> > > > > &cap_net_bind_service,
> > > > > > > > +                        CAP_CLEAR),
> > > > > > > > +           "cap_set_flag", "errno %d", errno))
> > > > > > > > +         goto free_caps;
> > > > > > > > +
> > > > > > > > + if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d",
> > > errno))
> > > > > > > > +         goto free_caps;
> > > > > > > > +
> > > > > > > > +free_caps:
> > > > > > > > + if (CHECK(cap_free(caps), "cap_free", "errno %d", errno))
> > > > > > > > +         goto free_caps;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +void test_bind_perm(void)
> > > > > > > > +{
> > > > > > > > + struct bind_perm *skel;
> > > > > > > > + int cgroup_fd;
> > > > > > > > +
> > > > > > > > + cgroup_fd = test__join_cgroup("/bind_perm");
> > > > > > > > + if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
> > > > > > > > +         return;
> > > > > > > > +
> > > > > > > > + skel = bind_perm__open_and_load();
> > > > > > > > + if (CHECK(!skel, "skel-load", "errno %d", errno))
> > > > > > > > +         goto close_cgroup_fd;
> > > > > > > > +
> > > > > > > > + skel->links.bind_v4_prog =
> > > > > > > bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
> > > > > > > > + if (CHECK(IS_ERR(skel->links.bind_v4_prog),
> > > > > > > > +           "cg-attach", "bind4 %ld",
> > > > > > > > +           PTR_ERR(skel->links.bind_v4_prog)))
> > > > > > > > +         goto close_skeleton;
> > > > > > > > +
> > > > > > > > + cap_net_bind_service(CAP_CLEAR);
> > > > > > > > + try_bind(110, EACCES);
> > > > > > > > + try_bind(111, 0);
> > > > > > > > + cap_net_bind_service(CAP_SET);
> > > > > > > > +
> > > > > > > > +close_skeleton:
> > > > > > > > + bind_perm__destroy(skel);
> > > > > > > > +close_cgroup_fd:
> > > > > > > > + close(cgroup_fd);
> > > > > > > > +}
> > > > > > > > diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > > b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > > > new file mode 100644
> > > > > > > > index 000000000000..2194587ec806
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/tools/testing/selftests/bpf/progs/bind_perm.c
> > > > > > > > @@ -0,0 +1,36 @@
> > > > > > > > +// SPDX-License-Identifier: GPL-2.0
> > > > > > > > +
> > > > > > > > +#include <linux/stddef.h>
> > > > > > > > +#include <linux/bpf.h>
> > > > > > > > +#include <sys/types.h>
> > > > > > > > +#include <sys/socket.h>
> > > > > > > > +#include <bpf/bpf_helpers.h>
> > > > > > > > +#include <bpf/bpf_endian.h>
> > > > > > > > +
> > > > > > > > +SEC("cgroup/bind4")
> > > > > > > > +int bind_v4_prog(struct bpf_sock_addr *ctx)
> > > > > > > > +{
> > > > > > > > + struct bpf_sock *sk;
> > > > > > > > + __u32 user_ip4;
> > > > > > > > + __u16 user_port;
> > > > > > > > +
> > > > > > > > + sk = ctx->sk;
> > > > > > > > + if (!sk)
> > > > > > > > +         return 0;
> > > > > > > > +
> > > > > > > > + if (sk->family != AF_INET)
> > > > > > > > +         return 0;
> > > > > > > > +
> > > > > > > > + if (ctx->type != SOCK_STREAM)
> > > > > > > > +         return 0;
> > > > > > > > +
> > > > > > > > + /* Rewriting to the same value should still cause
> > > > > > > > +  * permission check to be bypassed.
> > > > > > > > +  */
> > > > > > > > + if (ctx->user_port == bpf_htons(111))
> > > > > > > > +         ctx->user_port = bpf_htons(111);
> > > > > > > iiuc, this overwrite is essentially the way to ensure the bind
> > > > > > > will succeed (override CAP_NET_BIND_SERVICE in this particular
> > > case?).
> > > > > > Correct. The alternative might be to export ignore_perm_check
> > > > > > via bpf_sock_addr and make it explicit.
> > > > > An explicit field is one option.
> > > >
> > > > > or a different return value (e.g.
> > > BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY).
> > > >
> > > > > Not sure which one (including the one in the current patch) is better
> > > > > at this point.
> > > > Same. My reasoning was: if the BPF program rewrites the port, it knows
> > > > what it's doing, so it doesn't seem like adding another explicit
> > > > signal makes sense. So I decided to go without external api change.
> > > >
> > > > > Also, from patch 1, if one cgrp bpf prog says no-perm-check,
> > > > > it does not matter what the latter cgrp bpf progs have to say?
> > > > Right, it doesn't matter. But I think it's fine: if the latter
> > > > one rewrites the (previously rewritten) address to something
> > > > new, it still wants that address to be bound to, right?
> > > >
> > > > If some program returns EPERM, it also doesn't matter.
> > > >
> > > > > > > It seems to be okay if we consider most of the use cases is
> > > rewriting
> > > > > > > to a different port.
> > > > > >
> > > > > > > However, it is quite un-intuitive to the bpf prog to overwrite
> > > with
> > > > > > > the same user_port just to ensure this port can be binded
> > > successfully
> > > > > > > later.
> > > > > > I'm testing a corner case here when the address is rewritten to
> > > the same
> > > > > > value, but the intention is to rewrite X to Y < 1024.
> > > > > It is a legit corner case though.
> > > >
> > > > > Also, is it possible that the compiler may optimize this
> > > > > same-value-assignment out?
> > > > Yeah, it's a legit case, that's why I tested it. Good point on
> > > > optimizing (can be "healed" with volatile?),
> > > hmm... It is too fragile.
> >
> > > > but it should only matter if
> > > > the program is installed to bypass the permission checks for some ports
> > > > (as it does in this selftest). As you mention below, it's not clear
> > > what's
> > > > the 'default' use-case is. Is it rewriting to a different port or just
> > > > bypassing the cap_net_bind_service for some ports? Feels like rewriting
> > > > to a different address/port was the reason the hooks were added,
> > > > so I was targeting this one.
> > > It sounds like having a bpf to bypass permission only without changing
> > > the port is not the target but more like a by-product of this change.
> > Right, we might have a use-case for that as well, but it's not
> > strictly required. We can convert it to be something like
> > 'rewrite this magic addr+port to this real addr+port'.
> >
> > > How about only bypass cap_net_bind_service when bpf did change the
> > > address/port.  Will it become too slow for bind?
> > But this is what I'm doing already, isn't it? There is just a by-product
> > of triggering it for the same port = port address.
> My concern is the way to trigger this legit by-product is too fragile (and
> unintuitive) to be usable.  Either avoid this by-product completely or
> have a better way to specify the need of bypass.
>
> Lets say we do the latter.  After more thoughts, I think doing it in the
> return value is more natural since it is already saying the port/addr
> should be EPERM or not.  It makes sense to add BYPASS or not to the
> return value.  When one bpf prog says bypass, then it will bypass.
> The second bit of the return value can be used to do that.
> Thoughts?
This sounds like a workable solution as well. It's more explicit, but
it's more clear for the 'bypass' case, I agree.
Let me try to implement it and see whether I hit some problem.

> > Tracking the real change will require extra space to keep the original
> > address and then memcmp to figure out if the change was made.
> > Assuming the majority of rewrites don't happen for <1024 ports
> > this seems like a bunch of wasted work (vs setting that ctx->port_changed).
> Right, so the earlier question about if other fields will need
> similar bypass.  If it is only port, it is pretty cheap to do.
> However, it seems other fields will eventually need this in the
> future if not now.
At least user_ip4/user_ip6 should trigger that as well, I agree.

> The check "if (old != new)" itself may be doable within
> convert_ctx_access() itself which at least helping on the space side.
> However, I think the return value is an easier and cleaner way.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start
  2021-01-22 19:53   ` Stanislav Fomichev
@ 2021-01-22 20:08     ` Andrey Ignatov
  0 siblings, 0 replies; 16+ messages in thread
From: Andrey Ignatov @ 2021-01-22 20:08 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Martin KaFai Lau, Netdev, bpf, Alexei Starovoitov, Daniel Borkmann

Stanislav Fomichev <sdf@google.com> [Fri, 2021-01-22 11:54 -0800]:
> On Fri, Jan 22, 2021 at 11:37 AM Andrey Ignatov <rdna@fb.com> wrote:
> >
> > Stanislav Fomichev <sdf@google.com> [Wed, 2021-01-20 18:09 -0800]:
> > > At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
> > > to the privileged ones (< ip_unprivileged_port_start), but it will
> > > be rejected later on in the __inet_bind or __inet6_bind.
> > >
> > > Let's export 'port_changed' event from the BPF program and bypass
> > > ip_unprivileged_port_start range check when we've seen that
> > > the program explicitly overrode the port. This is accomplished
> > > by generating instructions to set ctx->port_changed along with
> > > updating ctx->user_port.
> > >
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > ...
> > > @@ -244,17 +245,27 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
> > >       if (cgroup_bpf_enabled(type))   {                                      \
> > >               lock_sock(sk);                                                 \
> > >               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> > > -                                                       t_ctx);              \
> > > +                                                       t_ctx, NULL);        \
> > >               release_sock(sk);                                              \
> > >       }                                                                      \
> > >       __ret;                                                                 \
> > >  })
> > >
> > > -#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)                              \
> > > -     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
> > > -
> > > -#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)                              \
> > > -     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
> > > +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags)          \
> > > +({                                                                          \
> > > +     bool port_changed = false;                                             \
> >
> > I see the discussion with Martin in [0] on the program overriding the
> > port but setting exactly same value as it already contains. Commenting
> > on this patch since the code is here.
> >
> > From what I understand there is no use-case to support overriding the
> > port w/o changing the value to just bypass the capability. In this case
> > the code can be simplified.
> >
> > Here instead of introducing port_changed you can just remember the
> > original ((struct sockaddr_in *)uaddr)->sin_port or
> > ((struct sockaddr_in6 *)uaddr)->sin6_port (they have same offset/size so
> > it can be simplified same way as in sock_addr_convert_ctx_access() for
> > user_port) ...
> >
> > > +     int __ret = 0;                                                         \
> > > +     if (cgroup_bpf_enabled(type))   {                                      \
> > > +             lock_sock(sk);                                                 \
> > > +             __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> > > +                                                       NULL,                \
> > > +                                                       &port_changed);      \
> > > +             release_sock(sk);                                              \
> > > +             if (port_changed)                                              \
> >
> > ... and then just compare the original and the new ports here.
> >
> > The benefits will be:
> > * no need to introduce port_changed field in struct bpf_sock_addr_kern;
> > * no need to do change program instructions;
> > * no need to think about compiler optimizing out those instructions;
> > * no need to think about multiple programs coordination, the flag will
> >   be set only if port has actually changed what is easy to reason about
> >   from user perspective.
> >
> > wdyt?
> Martin mentioned in another email that we might want to do that when
> we rewrite only the address portion of it.
> I think it makes sense. Imagine doing 1.1.1.1:50 -> 2.2.2.2:50 it
> seems like it should also work, right?
> And in this case, we need to store and compare addresses as well and
> it becomes messy :-/

Why does address matter? CAP_NET_BIND_SERVICE is only about ports, not
addresses.

IMO address change should not matter to bypass CAP_NET_BIND_SERVICE in
this case and correspondingly there should not be a need to compare
addresses, only port should be enough.

> It also seems like it would be nice to have this 'bypass
> cap_net_bind_service" without changing the address while we are at it.

Yeah, this part determines the behaviour. I guess it should be use-case
driven. So far it seems to be more like "nice to have" rather than a
real-use case exists, but I could miss it, please correct me if it's the
case.

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-01-22 23:10 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-21  1:22 [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Stanislav Fomichev
2021-01-21  1:22 ` [PATCH bpf-next 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
2021-01-21 22:33   ` Martin KaFai Lau
2021-01-21 22:57     ` sdf
2021-01-21 23:50       ` Martin KaFai Lau
2021-01-22  0:30         ` sdf
2021-01-22  1:27           ` Martin KaFai Lau
2021-01-22 16:16             ` sdf
2021-01-22 19:38               ` Martin KaFai Lau
2021-01-22 19:56                 ` Stanislav Fomichev
2021-01-21 23:53   ` Andrii Nakryiko
2021-01-22  0:09     ` sdf
2021-01-22  0:24       ` Andrii Nakryiko
2021-01-22 19:37 ` [PATCH bpf-next 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Andrey Ignatov
2021-01-22 19:53   ` Stanislav Fomichev
2021-01-22 20:08     ` Andrey Ignatov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.