All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start
@ 2021-01-26 19:35 Stanislav Fomichev
  2021-01-26 19:35 ` [PATCH bpf-next v4 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Stanislav Fomichev @ 2021-01-26 19:35 UTC (permalink / raw)
  To: netdev, bpf
  Cc: ast, daniel, Stanislav Fomichev, Andrey Ignatov, Martin KaFai Lau

At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
to the privileged ones (< ip_unprivileged_port_start), but it will
be rejected later on in the __inet_bind or __inet6_bind.

Let's add another return value to indicate that CAP_NET_BIND_SERVICE
check should be ignored. Use the same idea as we currently use
in cgroup/egress where bit #1 indicates CN. Instead, for
cgroup/bind{4,6}, bit #1 indicates that CAP_NET_BIND_SERVICE should
be bypassed.

v4:
- Add missing IPv6 support (Martin KaFai Lau)

v3:
- Update description (Martin KaFai Lau)
- Fix capability restore in selftest (Martin KaFai Lau)

v2:
- Switch to explicit return code (Martin KaFai Lau)

Cc: Andrey Ignatov <rdna@fb.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup.h | 38 ++++++++++++++++++++++++---------
 include/linux/bpf.h        | 43 ++++++++++++++++++++++----------------
 include/net/inet_common.h  |  3 +++
 kernel/bpf/cgroup.c        |  8 +++++--
 kernel/bpf/verifier.c      |  3 +++
 net/ipv4/af_inet.c         |  9 +++++---
 net/ipv6/af_inet6.c        |  9 +++++---
 7 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 0748fd87969e..6232745bae9b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 				      struct sockaddr *uaddr,
 				      enum bpf_attach_type type,
-				      void *t_ctx);
+				      void *t_ctx,
+				      u32 *flags);
 
 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 				     struct bpf_sock_ops_kern *sock_ops,
@@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 
 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)				       \
 ({									       \
+	u32 __unused_flags;						       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(type))					       \
 		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
-							  NULL);	       \
+							  NULL,		       \
+							  &__unused_flags);    \
 	__ret;								       \
 })
 
 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx)		       \
 ({									       \
+	u32 __unused_flags;						       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(type))	{				       \
 		lock_sock(sk);						       \
 		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
-							  t_ctx);	       \
+							  t_ctx,	       \
+							  &__unused_flags);    \
 		release_sock(sk);					       \
 	}								       \
 	__ret;								       \
 })
 
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
-
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)			       \
-	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
+/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
+ * via upper bits of return code. The only flag that is supported
+ * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
+ * should be bypassed.
+ */
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags)	       \
+({									       \
+	u32 __flags = 0;						       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled(type))	{				       \
+		lock_sock(sk);						       \
+		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
+							  NULL, &__flags);     \
+		release_sock(sk);					       \
+		if (__flags & 1)					       \
+			*flags |= BIND_NO_CAP_NET_BIND_SERVICE;		       \
+	}								       \
+	__ret;								       \
+})
 
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)				       \
 	((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) ||		       \
@@ -453,8 +472,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1aac2af12fed..08eee284d251 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1073,6 +1073,29 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
 			struct bpf_prog *include_prog,
 			struct bpf_prog_array **new_array);
 
+#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, flags)		\
+	({								\
+		struct bpf_prog_array_item *_item;			\
+		struct bpf_prog *_prog;					\
+		struct bpf_prog_array *_array;				\
+		u32 _ret = 1;						\
+		u32 ret;						\
+		migrate_disable();					\
+		rcu_read_lock();					\
+		_array = rcu_dereference(array);			\
+		_item = &_array->items[0];				\
+		while ((_prog = READ_ONCE(_item->prog))) {		\
+			bpf_cgroup_storage_set(_item->cgroup_storage);	\
+			ret = func(_prog, ctx);				\
+			_ret &= (ret & 1);				\
+			*(flags) |= (ret >> 1);				\
+			_item++;					\
+		}							\
+		rcu_read_unlock();					\
+		migrate_enable();					\
+		_ret;							\
+	 })
+
 #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null)	\
 	({						\
 		struct bpf_prog_array_item *_item;	\
@@ -1120,25 +1143,9 @@ _out:							\
  */
 #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)		\
 	({						\
-		struct bpf_prog_array_item *_item;	\
-		struct bpf_prog *_prog;			\
-		struct bpf_prog_array *_array;		\
-		u32 ret;				\
-		u32 _ret = 1;				\
 		u32 _cn = 0;				\
-		migrate_disable();			\
-		rcu_read_lock();			\
-		_array = rcu_dereference(array);	\
-		_item = &_array->items[0];		\
-		while ((_prog = READ_ONCE(_item->prog))) {		\
-			bpf_cgroup_storage_set(_item->cgroup_storage);	\
-			ret = func(_prog, ctx);		\
-			_ret &= (ret & 1);		\
-			_cn |= (ret & 2);		\
-			_item++;			\
-		}					\
-		rcu_read_unlock();			\
-		migrate_enable();			\
+		u32 _ret;				\
+		_ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_cn); \
 		if (_ret)				\
 			_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);	\
 		else					\
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index cb2818862919..9ba935c15869 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -41,6 +41,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 #define BIND_WITH_LOCK			(1 << 1)
 /* Called from BPF program. */
 #define BIND_FROM_BPF			(1 << 2)
+/* Skip CAP_NET_BIND_SERVICE check. */
+#define BIND_NO_CAP_NET_BIND_SERVICE	(1 << 3)
+
 int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 		u32 flags);
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index da649f20d6b2..cdf3c7e611d9 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1055,6 +1055,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  * @uaddr: sockaddr struct provided by user
  * @type: The type of program to be exectuted
  * @t_ctx: Pointer to attach type specific context
+ * @flags: Pointer to u32 which contains higher bits of BPF program
+ *         return value (OR'ed together).
  *
  * socket is expected to be of type INET or INET6.
  *
@@ -1064,7 +1066,8 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 				      struct sockaddr *uaddr,
 				      enum bpf_attach_type type,
-				      void *t_ctx)
+				      void *t_ctx,
+				      u32 *flags)
 {
 	struct bpf_sock_addr_kern ctx = {
 		.sk = sk,
@@ -1087,7 +1090,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 	}
 
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+	ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
+				       BPF_PROG_RUN, flags);
 
 	return ret == 1 ? 0 : -EPERM;
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d0eae51b31e4..972fc38eb62d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7986,6 +7986,9 @@ static int check_return_code(struct bpf_verifier_env *env)
 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
 			range = tnum_range(1, 1);
+		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
+			range = tnum_range(0, 3);
 		break;
 	case BPF_PROG_TYPE_CGROUP_SKB:
 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6ba2930ff49b..aaa94bea19c3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -438,6 +438,7 @@ EXPORT_SYMBOL(inet_release);
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
+	u32 flags = BIND_WITH_LOCK;
 	int err;
 
 	/* If the socket has its own bind function then use it. (RAW) */
@@ -450,11 +451,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr);
+	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+						 BPF_CGROUP_INET4_BIND, &flags);
 	if (err)
 		return err;
 
-	return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+	return __inet_bind(sk, uaddr, addr_len, flags);
 }
 EXPORT_SYMBOL(inet_bind);
 
@@ -499,7 +501,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 
 	snum = ntohs(addr->sin_port);
 	err = -EACCES;
-	if (snum && inet_port_requires_bind_service(net, snum) &&
+	if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+	    snum && inet_port_requires_bind_service(net, snum) &&
 	    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
 		goto out;
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b9c654836b72..f091fe9b4da5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -295,7 +295,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 		return -EINVAL;
 
 	snum = ntohs(addr->sin6_port);
-	if (snum && inet_port_requires_bind_service(net, snum) &&
+	if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) &&
+	    snum && inet_port_requires_bind_service(net, snum) &&
 	    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
@@ -439,6 +440,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sock *sk = sock->sk;
+	u32 flags = BIND_WITH_LOCK;
 	int err = 0;
 
 	/* If the socket has its own bind function then use it. */
@@ -451,11 +453,12 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	/* BPF prog is run before any checks are done so that if the prog
 	 * changes context in a wrong way it will be caught.
 	 */
-	err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
+	err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+						 BPF_CGROUP_INET6_BIND, &flags);
 	if (err)
 		return err;
 
-	return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
+	return __inet6_bind(sk, uaddr, addr_len, flags);
 }
 EXPORT_SYMBOL(inet6_bind);
 
-- 
2.30.0.280.ga3ce27912f-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH bpf-next v4 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-26 19:35 [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Stanislav Fomichev
@ 2021-01-26 19:35 ` Stanislav Fomichev
  2021-01-26 20:47   ` Martin KaFai Lau
  2021-01-26 20:44 ` [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Martin KaFai Lau
  2021-01-27 18:24 ` Andrey Ignatov
  2 siblings, 1 reply; 6+ messages in thread
From: Stanislav Fomichev @ 2021-01-26 19:35 UTC (permalink / raw)
  To: netdev, bpf
  Cc: ast, daniel, Stanislav Fomichev, Andrey Ignatov, Martin KaFai Lau

Return 3 to indicate that permission check for port 111
should be skipped.

Cc: Andrey Ignatov <rdna@fb.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 .../selftests/bpf/prog_tests/bind_perm.c      | 109 ++++++++++++++++++
 tools/testing/selftests/bpf/progs/bind_perm.c |  45 ++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c
 create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
new file mode 100644
index 000000000000..d0f06e40c16d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "bind_perm.skel.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/capability.h>
+
+static int duration;
+
+void try_bind(int family, int port, int expected_errno)
+{
+	struct sockaddr_storage addr = {};
+	struct sockaddr_in6 *sin6;
+	struct sockaddr_in *sin;
+	int fd = -1;
+
+	fd = socket(family, SOCK_STREAM, 0);
+	if (CHECK(fd < 0, "fd", "errno %d", errno))
+		goto close_socket;
+
+	if (family == AF_INET) {
+		sin = (struct sockaddr_in *)&addr;
+		sin->sin_family = family;
+		sin->sin_port = htons(port);
+	} else {
+		sin6 = (struct sockaddr_in6 *)&addr;
+		sin6->sin6_family = family;
+		sin6->sin6_port = htons(port);
+	}
+
+	errno = 0;
+	bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+	ASSERT_EQ(errno, expected_errno, "bind");
+
+close_socket:
+	if (fd >= 0)
+		close(fd);
+}
+
+bool cap_net_bind_service(cap_flag_value_t flag)
+{
+	const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE;
+	cap_flag_value_t original_value;
+	bool was_effective = false;
+	cap_t caps;
+
+	caps = cap_get_proc();
+	if (CHECK(!caps, "cap_get_proc", "errno %d", errno))
+		goto free_caps;
+
+	if (CHECK(cap_get_flag(caps, CAP_NET_BIND_SERVICE, CAP_EFFECTIVE,
+			       &original_value),
+		  "cap_get_flag", "errno %d", errno))
+		goto free_caps;
+
+	was_effective = (original_value == CAP_SET);
+
+	if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service,
+			       flag),
+		  "cap_set_flag", "errno %d", errno))
+		goto free_caps;
+
+	if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno))
+		goto free_caps;
+
+free_caps:
+	CHECK(cap_free(caps), "cap_free", "errno %d", errno);
+	return was_effective;
+}
+
+void test_bind_perm(void)
+{
+	bool cap_was_effective;
+	struct bind_perm *skel;
+	int cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/bind_perm");
+	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+		return;
+
+	skel = bind_perm__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel"))
+		goto close_cgroup_fd;
+
+	skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel, "bind_v4_prog"))
+		goto close_skeleton;
+
+	skel->links.bind_v6_prog = bpf_program__attach_cgroup(skel->progs.bind_v6_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel, "bind_v6_prog"))
+		goto close_skeleton;
+
+	cap_was_effective = cap_net_bind_service(CAP_CLEAR);
+
+	try_bind(AF_INET, 110, EACCES);
+	try_bind(AF_INET6, 110, EACCES);
+
+	try_bind(AF_INET, 111, 0);
+	try_bind(AF_INET6, 111, 0);
+
+	if (cap_was_effective)
+		cap_net_bind_service(CAP_SET);
+
+close_skeleton:
+	bind_perm__destroy(skel);
+close_cgroup_fd:
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c b/tools/testing/selftests/bpf/progs/bind_perm.c
new file mode 100644
index 000000000000..7bd2a027025d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind_perm.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+static __always_inline int bind_prog(struct bpf_sock_addr *ctx, int family)
+{
+	struct bpf_sock *sk;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 0;
+
+	if (sk->family != family)
+		return 0;
+
+	if (ctx->type != SOCK_STREAM)
+		return 0;
+
+	/* Return 1 OR'ed with the first bit set to indicate
+	 * that CAP_NET_BIND_SERVICE should be bypassed.
+	 */
+	if (ctx->user_port == bpf_htons(111))
+		return (1 | 2);
+
+	return 1;
+}
+
+SEC("cgroup/bind4")
+int bind_v4_prog(struct bpf_sock_addr *ctx)
+{
+	return bind_prog(ctx, AF_INET);
+}
+
+SEC("cgroup/bind6")
+int bind_v6_prog(struct bpf_sock_addr *ctx)
+{
+	return bind_prog(ctx, AF_INET6);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.30.0.280.ga3ce27912f-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start
  2021-01-26 19:35 [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Stanislav Fomichev
  2021-01-26 19:35 ` [PATCH bpf-next v4 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
@ 2021-01-26 20:44 ` Martin KaFai Lau
  2021-01-27 18:24 ` Andrey Ignatov
  2 siblings, 0 replies; 6+ messages in thread
From: Martin KaFai Lau @ 2021-01-26 20:44 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, bpf, ast, daniel, Andrey Ignatov

On Tue, Jan 26, 2021 at 11:35:43AM -0800, Stanislav Fomichev wrote:
> At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
> to the privileged ones (< ip_unprivileged_port_start), but it will
> be rejected later on in the __inet_bind or __inet6_bind.
> 
> Let's add another return value to indicate that CAP_NET_BIND_SERVICE
> check should be ignored. Use the same idea as we currently use
> in cgroup/egress where bit #1 indicates CN. Instead, for
> cgroup/bind{4,6}, bit #1 indicates that CAP_NET_BIND_SERVICE should
> be bypassed.
> 
> v4:
> - Add missing IPv6 support (Martin KaFai Lau)
> 
> v3:
> - Update description (Martin KaFai Lau)
> - Fix capability restore in selftest (Martin KaFai Lau)
> 
> v2:
> - Switch to explicit return code (Martin KaFai Lau)
Reviewed-by: Martin KaFai Lau <kafai@fb.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v4 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works
  2021-01-26 19:35 ` [PATCH bpf-next v4 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
@ 2021-01-26 20:47   ` Martin KaFai Lau
  0 siblings, 0 replies; 6+ messages in thread
From: Martin KaFai Lau @ 2021-01-26 20:47 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, bpf, ast, daniel, Andrey Ignatov

On Tue, Jan 26, 2021 at 11:35:44AM -0800, Stanislav Fomichev wrote:
> Return 3 to indicate that permission check for port 111
> should be skipped.
Acked-by: Martin KaFai Lau <kafai@fb.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start
  2021-01-26 19:35 [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Stanislav Fomichev
  2021-01-26 19:35 ` [PATCH bpf-next v4 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
  2021-01-26 20:44 ` [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Martin KaFai Lau
@ 2021-01-27 18:24 ` Andrey Ignatov
  2021-01-27 18:28   ` Stanislav Fomichev
  2 siblings, 1 reply; 6+ messages in thread
From: Andrey Ignatov @ 2021-01-27 18:24 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, bpf, ast, daniel, Martin KaFai Lau

Stanislav Fomichev <sdf@google.com> [Tue, 2021-01-26 11:36 -0800]:
> At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
> to the privileged ones (< ip_unprivileged_port_start), but it will
> be rejected later on in the __inet_bind or __inet6_bind.
> 
> Let's add another return value to indicate that CAP_NET_BIND_SERVICE
> check should be ignored. Use the same idea as we currently use
> in cgroup/egress where bit #1 indicates CN. Instead, for
> cgroup/bind{4,6}, bit #1 indicates that CAP_NET_BIND_SERVICE should
> be bypassed.
> 
> v4:
> - Add missing IPv6 support (Martin KaFai Lau)
> 
> v3:
> - Update description (Martin KaFai Lau)
> - Fix capability restore in selftest (Martin KaFai Lau)
> 
> v2:
> - Switch to explicit return code (Martin KaFai Lau)
> 
> Cc: Andrey Ignatov <rdna@fb.com>
> Cc: Martin KaFai Lau <kafai@fb.com>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>

Explicit return code looks much cleaner than both what v1 did and what I
proposed earlier (compare port before/after).

Just one nit from me but otherwide looks good.

Acked-by: Andrey Ignatov <rdna@fb.com>

...
> @@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>  
>  #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)				       \
>  ({									       \
> +	u32 __unused_flags;						       \
>  	int __ret = 0;							       \
>  	if (cgroup_bpf_enabled(type))					       \
>  		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> -							  NULL);	       \
> +							  NULL,		       \
> +							  &__unused_flags);    \
>  	__ret;								       \
>  })
>  
>  #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx)		       \
>  ({									       \
> +	u32 __unused_flags;						       \
>  	int __ret = 0;							       \
>  	if (cgroup_bpf_enabled(type))	{				       \
>  		lock_sock(sk);						       \
>  		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> -							  t_ctx);	       \
> +							  t_ctx,	       \
> +							  &__unused_flags);    \
>  		release_sock(sk);					       \
>  	}								       \
>  	__ret;								       \
>  })
>  
> -#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)			       \
> -	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
> -
> -#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)			       \
> -	BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
> +/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
> + * via upper bits of return code. The only flag that is supported
> + * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
> + * should be bypassed.
> + */
> +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags)	       \
> +({									       \
> +	u32 __flags = 0;						       \
> +	int __ret = 0;							       \
> +	if (cgroup_bpf_enabled(type))	{				       \
> +		lock_sock(sk);						       \
> +		__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> +							  NULL, &__flags);     \
> +		release_sock(sk);					       \
> +		if (__flags & 1)					       \
> +			*flags |= BIND_NO_CAP_NET_BIND_SERVICE;		       \

Nit: It took me some time to realize that there are two different
"flags": one to pass to __cgroup_bpf_run_filter_sock_addr() and another
to pass to __inet{,6}_bind/BPF_CGROUP_RUN_PROG_INET_BIND_LOCK that both carry
"BIND_NO_CAP_NET_BIND_SERVICE" flag but do it differently:
* hard-coded 0x1 in the former case;
* and BIND_NO_CAP_NET_BIND_SERVICE == (1 << 3) in the latter.

I'm not sure how to make it more readable: maybe name `flags` and
`__flags` differently to highlight the difference (`bind_flags` and
`__flags`?) and add a #define for the "1" here?

In anycase IMO it's not worth a respin and can be addressed by a
follow-up if you agree.

> +	}								       \
> +	__ret;								       \
> +})
>  
>  #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk)				       \
>  	((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) ||		       \

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start
  2021-01-27 18:24 ` Andrey Ignatov
@ 2021-01-27 18:28   ` Stanislav Fomichev
  0 siblings, 0 replies; 6+ messages in thread
From: Stanislav Fomichev @ 2021-01-27 18:28 UTC (permalink / raw)
  To: Andrey Ignatov
  Cc: Netdev, bpf, Alexei Starovoitov, Daniel Borkmann, Martin KaFai Lau

On Wed, Jan 27, 2021 at 10:24 AM Andrey Ignatov <rdna@fb.com> wrote:
>
> Stanislav Fomichev <sdf@google.com> [Tue, 2021-01-26 11:36 -0800]:
> > At the moment, BPF_CGROUP_INET{4,6}_BIND hooks can rewrite user_port
> > to the privileged ones (< ip_unprivileged_port_start), but it will
> > be rejected later on in the __inet_bind or __inet6_bind.
> >
> > Let's add another return value to indicate that CAP_NET_BIND_SERVICE
> > check should be ignored. Use the same idea as we currently use
> > in cgroup/egress where bit #1 indicates CN. Instead, for
> > cgroup/bind{4,6}, bit #1 indicates that CAP_NET_BIND_SERVICE should
> > be bypassed.
> >
> > v4:
> > - Add missing IPv6 support (Martin KaFai Lau)
> >
> > v3:
> > - Update description (Martin KaFai Lau)
> > - Fix capability restore in selftest (Martin KaFai Lau)
> >
> > v2:
> > - Switch to explicit return code (Martin KaFai Lau)
> >
> > Cc: Andrey Ignatov <rdna@fb.com>
> > Cc: Martin KaFai Lau <kafai@fb.com>
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
>
> Explicit return code looks much cleaner than both what v1 did and what I
> proposed earlier (compare port before/after).
>
> Just one nit from me but otherwide looks good.
>
> Acked-by: Andrey Ignatov <rdna@fb.com>
>
> ...
> > @@ -231,30 +232,48 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
> >
> >  #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type)                                     \
> >  ({                                                                          \
> > +     u32 __unused_flags;                                                    \
> >       int __ret = 0;                                                         \
> >       if (cgroup_bpf_enabled(type))                                          \
> >               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> > -                                                       NULL);               \
> > +                                                       NULL,                \
> > +                                                       &__unused_flags);    \
> >       __ret;                                                                 \
> >  })
> >
> >  #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx)                 \
> >  ({                                                                          \
> > +     u32 __unused_flags;                                                    \
> >       int __ret = 0;                                                         \
> >       if (cgroup_bpf_enabled(type))   {                                      \
> >               lock_sock(sk);                                                 \
> >               __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> > -                                                       t_ctx);              \
> > +                                                       t_ctx,               \
> > +                                                       &__unused_flags);    \
> >               release_sock(sk);                                              \
> >       }                                                                      \
> >       __ret;                                                                 \
> >  })
> >
> > -#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr)                              \
> > -     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
> > -
> > -#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr)                              \
> > -     BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
> > +/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
> > + * via upper bits of return code. The only flag that is supported
> > + * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
> > + * should be bypassed.
> > + */
> > +#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags)          \
> > +({                                                                          \
> > +     u32 __flags = 0;                                                       \
> > +     int __ret = 0;                                                         \
> > +     if (cgroup_bpf_enabled(type))   {                                      \
> > +             lock_sock(sk);                                                 \
> > +             __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type,     \
> > +                                                       NULL, &__flags);     \
> > +             release_sock(sk);                                              \
> > +             if (__flags & 1)                                               \
> > +                     *flags |= BIND_NO_CAP_NET_BIND_SERVICE;                \
>
> Nit: It took me some time to realize that there are two different
> "flags": one to pass to __cgroup_bpf_run_filter_sock_addr() and another
> to pass to __inet{,6}_bind/BPF_CGROUP_RUN_PROG_INET_BIND_LOCK that both carry
> "BIND_NO_CAP_NET_BIND_SERVICE" flag but do it differently:
> * hard-coded 0x1 in the former case;
> * and BIND_NO_CAP_NET_BIND_SERVICE == (1 << 3) in the latter.
>
> I'm not sure how to make it more readable: maybe name `flags` and
> `__flags` differently to highlight the difference (`bind_flags` and
> `__flags`?) and add a #define for the "1" here?
>
> In anycase IMO it's not worth a respin and can be addressed by a
> follow-up if you agree.
Yeah, I agree, I didn't stress too much about it because we also
have ret and _ret in BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY
(and now BPF_PROG_RUN_ARRAY_FLAGS), but it looks confusing.
Let me respin with bind_flags, shouldn't be too much work and
can help with the readability in the future. Thanks for the review!

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-01-27 18:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-26 19:35 [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Stanislav Fomichev
2021-01-26 19:35 ` [PATCH bpf-next v4 2/2] selftests/bpf: verify that rebinding to port < 1024 from BPF works Stanislav Fomichev
2021-01-26 20:47   ` Martin KaFai Lau
2021-01-26 20:44 ` [PATCH bpf-next v4 1/2] bpf: allow rewriting to ports under ip_unprivileged_port_start Martin KaFai Lau
2021-01-27 18:24 ` Andrey Ignatov
2021-01-27 18:28   ` Stanislav Fomichev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.