[PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close
@ 2019-01-18  0:41 Stanislav Fomichev
  2019-01-18  0:41 ` [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks Stanislav Fomichev
                   ` (6 more replies)
  0 siblings, 7 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-18  0:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev

Currently, we have BPF_CGROUP_INET_SOCK_CREATE hook that triggers on
socket creation and there is no way to know when the socket is being
closed. Add new set of hooks BPF_CGROUP_INET{4,6}_SOCK_RELEASE
that trigger when the socket is closed.

Initial intended usecase is to cleanup statistics after POST{4,6}_BIND.
Hooks have read-only access to all fields of struct bpf_sock.

First patch adds hooks, the rest of the patches add uapi and tests to make
sure these hooks work.

Stanislav Fomichev (5):
  bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
  tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in
    libbpf/bpftool
  selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
    test_section_names.c
  selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c
  selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
    test_sock_addr.c

 include/linux/bpf-cgroup.h                    |   6 +
 include/net/inet_common.h                     |   1 +
 include/uapi/linux/bpf.h                      |   2 +
 kernel/bpf/syscall.c                          |   8 ++
 net/core/filter.c                             |   7 +
 net/ipv4/af_inet.c                            |  13 +-
 net/ipv6/af_inet6.c                           |   5 +-
 tools/bpf/bpftool/cgroup.c                    |   2 +
 tools/include/uapi/linux/bpf.h                |   2 +
 tools/lib/bpf/libbpf.c                        |   4 +
 .../selftests/bpf/test_section_names.c        |  10 ++
 tools/testing/selftests/bpf/test_sock.c       | 119 ++++++++++++++++
 tools/testing/selftests/bpf/test_sock_addr.c  | 131 +++++++++++++++++-
 13 files changed, 307 insertions(+), 3 deletions(-)

-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
  2019-01-18  0:41 [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Stanislav Fomichev
@ 2019-01-18  0:41 ` Stanislav Fomichev
  2019-01-18  2:22   ` Andrey Ignatov
                     ` (2 more replies)
  2019-01-18  0:41 ` [PATCH bpf-next 2/5] tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in libbpf/bpftool Stanislav Fomichev
                   ` (5 subsequent siblings)
  6 siblings, 3 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-18  0:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev

These hooks mirror existing BPF_CGROUP_INET{4,6}_SOCK hooks (which
trigger upon socket creation), but trigger when the socket is closed.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpf-cgroup.h |  6 ++++++
 include/net/inet_common.h  |  1 +
 include/uapi/linux/bpf.h   |  2 ++
 kernel/bpf/syscall.c       |  8 ++++++++
 net/core/filter.c          |  7 +++++++
 net/ipv4/af_inet.c         | 13 ++++++++++++-
 net/ipv6/af_inet6.c        |  5 ++++-
 7 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 588dd5f0bd85..31fcfe215d80 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -176,6 +176,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
 	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
 
+#define BPF_CGROUP_RUN_PROG_INET4_SOCK_RELEASE(sk)			       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_SOCK_RELEASE)
+
+#define BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE(sk)			       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_SOCK_RELEASE)
+
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk)				       \
 	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
 
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 975901a95c0f..0e64046afe30 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -17,6 +17,7 @@ struct sockaddr;
 struct socket;
 
 int inet_release(struct socket *sock);
+int __inet_release(struct socket *sock);
 int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 			int addr_len, int flags);
 int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 91c43884f295..8e78aa28a42e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -186,6 +186,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP6_SENDMSG,
 	BPF_LIRC_MODE2,
 	BPF_FLOW_DISSECTOR,
+	BPF_CGROUP_INET4_SOCK_RELEASE,
+	BPF_CGROUP_INET6_SOCK_RELEASE,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..6fa113448580 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1417,6 +1417,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 	case BPF_PROG_TYPE_CGROUP_SOCK:
 		switch (expected_attach_type) {
 		case BPF_CGROUP_INET_SOCK_CREATE:
+		case BPF_CGROUP_INET4_SOCK_RELEASE:
+		case BPF_CGROUP_INET6_SOCK_RELEASE:
 		case BPF_CGROUP_INET4_POST_BIND:
 		case BPF_CGROUP_INET6_POST_BIND:
 			return 0;
@@ -1709,6 +1711,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_SKB;
 		break;
 	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_INET4_SOCK_RELEASE:
+	case BPF_CGROUP_INET6_SOCK_RELEASE:
 	case BPF_CGROUP_INET4_POST_BIND:
 	case BPF_CGROUP_INET6_POST_BIND:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
@@ -1791,6 +1795,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_SKB;
 		break;
 	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_INET4_SOCK_RELEASE:
+	case BPF_CGROUP_INET6_SOCK_RELEASE:
 	case BPF_CGROUP_INET4_POST_BIND:
 	case BPF_CGROUP_INET6_POST_BIND:
 		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
@@ -1841,6 +1847,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET_INGRESS:
 	case BPF_CGROUP_INET_EGRESS:
 	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_INET4_SOCK_RELEASE:
+	case BPF_CGROUP_INET6_SOCK_RELEASE:
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
 	case BPF_CGROUP_INET4_POST_BIND:
diff --git a/net/core/filter.c b/net/core/filter.c
index 2b3b436ef545..b4da6793fdbc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5885,12 +5885,16 @@ static bool __sock_filter_check_attach_type(int off,
 		switch (attach_type) {
 		case BPF_CGROUP_INET_SOCK_CREATE:
 			goto full_access;
+		case BPF_CGROUP_INET4_SOCK_RELEASE:
+		case BPF_CGROUP_INET6_SOCK_RELEASE:
+			goto read_only;
 		default:
 			return false;
 		}
 	case bpf_ctx_range(struct bpf_sock, src_ip4):
 		switch (attach_type) {
 		case BPF_CGROUP_INET4_POST_BIND:
+		case BPF_CGROUP_INET4_SOCK_RELEASE:
 			goto read_only;
 		default:
 			return false;
@@ -5898,6 +5902,7 @@ static bool __sock_filter_check_attach_type(int off,
 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
 		switch (attach_type) {
 		case BPF_CGROUP_INET6_POST_BIND:
+		case BPF_CGROUP_INET6_SOCK_RELEASE:
 			goto read_only;
 		default:
 			return false;
@@ -5906,6 +5911,8 @@ static bool __sock_filter_check_attach_type(int off,
 		switch (attach_type) {
 		case BPF_CGROUP_INET4_POST_BIND:
 		case BPF_CGROUP_INET6_POST_BIND:
+		case BPF_CGROUP_INET4_SOCK_RELEASE:
+		case BPF_CGROUP_INET6_SOCK_RELEASE:
 			goto read_only;
 		default:
 			return false;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0dfb72c46671..b703ad242365 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -403,7 +403,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
  *	function we are destroying the object and from then on nobody
  *	should refer to it.
  */
-int inet_release(struct socket *sock)
+int __inet_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 
@@ -429,6 +429,17 @@ int inet_release(struct socket *sock)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(__inet_release);
+
+int inet_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	if (sk && !sk->sk_kern_sock)
+		BPF_CGROUP_RUN_PROG_INET4_SOCK_RELEASE(sk);
+
+	return __inet_release(sock);
+}
 EXPORT_SYMBOL(inet_release);
 
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d99753b5e39b..44c86595eba8 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -464,13 +464,16 @@ int inet6_release(struct socket *sock)
 	if (!sk)
 		return -EINVAL;
 
+	if (!sk->sk_kern_sock)
+		BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE(sock->sk);
+
 	/* Free mc lists */
 	ipv6_sock_mc_close(sk);
 
 	/* Free ac lists */
 	ipv6_sock_ac_close(sk);
 
-	return inet_release(sock);
+	return __inet_release(sock);
 }
 EXPORT_SYMBOL(inet6_release);
 
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 2/5] tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in libbpf/bpftool
  2019-01-18  0:41 [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Stanislav Fomichev
  2019-01-18  0:41 ` [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks Stanislav Fomichev
@ 2019-01-18  0:41 ` Stanislav Fomichev
  2019-01-18  0:41 ` [PATCH bpf-next 3/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_section_names.c Stanislav Fomichev
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-18  0:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev

Sync uapi and make sure libbpf and bpftools are aware of the hooks.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/bpf/bpftool/cgroup.c     | 2 ++
 tools/include/uapi/linux/bpf.h | 2 ++
 tools/lib/bpf/libbpf.c         | 4 ++++
 3 files changed, 8 insertions(+)

diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 4b5c8da2a7c0..3474f810ba5a 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -31,6 +31,8 @@ static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
 	[BPF_CGROUP_INET_EGRESS] = "egress",
 	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+	[BPF_CGROUP_INET4_SOCK_RELEASE] = "sock_release4",
+	[BPF_CGROUP_INET6_SOCK_RELEASE] = "sock_release6",
 	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
 	[BPF_CGROUP_DEVICE] = "device",
 	[BPF_CGROUP_INET4_BIND] = "bind4",
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 91c43884f295..8e78aa28a42e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -186,6 +186,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP6_SENDMSG,
 	BPF_LIRC_MODE2,
 	BPF_FLOW_DISSECTOR,
+	BPF_CGROUP_INET4_SOCK_RELEASE,
+	BPF_CGROUP_INET6_SOCK_RELEASE,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 169e347c76f6..fdadd1ac22ad 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2626,6 +2626,10 @@ static const struct {
 	BPF_APROG_SEC("cgroup_skb/egress",	BPF_PROG_TYPE_CGROUP_SKB,
 						BPF_CGROUP_INET_EGRESS),
 	BPF_APROG_COMPAT("cgroup/skb",		BPF_PROG_TYPE_CGROUP_SKB),
+	BPF_EAPROG_SEC("cgroup/sock_release4",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET4_SOCK_RELEASE),
+	BPF_EAPROG_SEC("cgroup/sock_release6",	BPF_PROG_TYPE_CGROUP_SOCK,
+						BPF_CGROUP_INET6_SOCK_RELEASE),
 	BPF_APROG_SEC("cgroup/sock",		BPF_PROG_TYPE_CGROUP_SOCK,
 						BPF_CGROUP_INET_SOCK_CREATE),
 	BPF_EAPROG_SEC("cgroup/post_bind4",	BPF_PROG_TYPE_CGROUP_SOCK,
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 3/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_section_names.c
  2019-01-18  0:41 [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Stanislav Fomichev
  2019-01-18  0:41 ` [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks Stanislav Fomichev
  2019-01-18  0:41 ` [PATCH bpf-next 2/5] tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in libbpf/bpftool Stanislav Fomichev
@ 2019-01-18  0:41 ` Stanislav Fomichev
  2019-01-18  0:41 ` [PATCH bpf-next 4/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c Stanislav Fomichev
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-18  0:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev

Make sure section name detection works for new hooks.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/testing/selftests/bpf/test_section_names.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
index 7c4f41572b1c..c794005c3093 100644
--- a/tools/testing/selftests/bpf/test_section_names.c
+++ b/tools/testing/selftests/bpf/test_section_names.c
@@ -55,6 +55,16 @@ static struct sec_name_test tests[] = {
 		{0, BPF_PROG_TYPE_CGROUP_SOCK, 0},
 		{0, BPF_CGROUP_INET_SOCK_CREATE},
 	},
+	{
+		"cgroup/sock_release4",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_SOCK_RELEASE},
+		{0, BPF_CGROUP_INET4_SOCK_RELEASE},
+	},
+	{
+		"cgroup/sock_release6",
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_SOCK_RELEASE},
+		{0, BPF_CGROUP_INET6_SOCK_RELEASE},
+	},
 	{
 		"cgroup/post_bind4",
 		{0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND},
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 4/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c
  2019-01-18  0:41 [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Stanislav Fomichev
                   ` (2 preceding siblings ...)
  2019-01-18  0:41 ` [PATCH bpf-next 3/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_section_names.c Stanislav Fomichev
@ 2019-01-18  0:41 ` Stanislav Fomichev
  2019-01-18  0:41 ` [PATCH bpf-next 5/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock_addr.c Stanislav Fomichev
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-18  0:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev

Add context access tests for BPF_CGROUP_INET{4,6}_SOCK_RELEASE.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/testing/selftests/bpf/test_sock.c | 119 ++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 561ffb6d6433..07ef99b52640 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -309,6 +309,125 @@ static struct sock_test tests[] = {
 		0,
 		SUCCESS,
 	},
+	{
+		"sock_release4 load with valid access: src_ip4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip4)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		0,
+		SUCCESS,
+	},
+	{
+		"sock_release4 load with valid access: src_port",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_port)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		0,
+		SUCCESS,
+	},
+	{
+		"sock_release4 load with invalid access: src_ip6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip6[0])),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"sock_release4 load with valid access: mark",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, mark)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		0,
+		SUCCESS,
+	},
+	{
+		"sock_release6 load with valid access: src_ip6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip6[0])),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_SOCK_RELEASE,
+		BPF_CGROUP_INET6_SOCK_RELEASE,
+		AF_INET6,
+		SOCK_STREAM,
+		"::",
+		0,
+		SUCCESS,
+	},
+	{
+		"sock_release6 load with valid access: src_port",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_port)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_SOCK_RELEASE,
+		BPF_CGROUP_INET6_SOCK_RELEASE,
+		AF_INET6,
+		SOCK_STREAM,
+		"::",
+		0,
+		SUCCESS,
+	},
+	{
+		"sock_release6 load with invalid access: src_ip4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip4)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_SOCK_RELEASE,
+		BPF_CGROUP_INET6_SOCK_RELEASE,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
 };
 
 static size_t probe_prog_length(const struct bpf_insn *fp)
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH bpf-next 5/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock_addr.c
  2019-01-18  0:41 [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Stanislav Fomichev
                   ` (3 preceding siblings ...)
  2019-01-18  0:41 ` [PATCH bpf-next 4/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c Stanislav Fomichev
@ 2019-01-18  0:41 ` Stanislav Fomichev
  2019-01-18  2:45   ` Andrey Ignatov
  2019-01-18  1:02 ` [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Eric Dumazet
  2019-01-18  2:36 ` Andrey Ignatov
  6 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-18  0:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, ast, daniel, Stanislav Fomichev

Count the number of times hooks were executed in the cgroup
local storage to make sure they trigger.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 tools/testing/selftests/bpf/test_sock_addr.c | 131 ++++++++++++++++++-
 1 file changed, 130 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 3f110eaaf29c..92845bf6fac9 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -82,6 +82,9 @@ struct sock_addr_test {
 	} expected_result;
 };
 
+/* Per-test storage for additional cgroup storage map */
+static int cg_storage_fd;
+
 static int bind4_prog_load(const struct sock_addr_test *test);
 static int bind6_prog_load(const struct sock_addr_test *test);
 static int connect4_prog_load(const struct sock_addr_test *test);
@@ -94,6 +97,7 @@ static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
 static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
 static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
 static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
+static int release46_prog_load(const struct sock_addr_test *test);
 
 static struct sock_addr_test tests[] = {
 	/* bind */
@@ -507,6 +511,35 @@ static struct sock_addr_test tests[] = {
 		SRC6_REWRITE_IP,
 		SYSCALL_EPERM,
 	},
+	/* release */
+	{
+		"release4: make sure the hook triggers",
+		release46_prog_load,
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		BPF_CGROUP_INET4_SOCK_RELEASE,
+		AF_INET,
+		SOCK_STREAM,
+		SERV4_REWRITE_IP,
+		0,
+		SERV4_REWRITE_IP,
+		0,
+		NULL,
+		SUCCESS,
+	},
+	{
+		"release6: make sure the hook triggers",
+		release46_prog_load,
+		BPF_CGROUP_INET6_SOCK_RELEASE,
+		BPF_CGROUP_INET6_SOCK_RELEASE,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_REWRITE_IP,
+		0,
+		SERV6_REWRITE_IP,
+		0,
+		NULL,
+		SUCCESS,
+	},
 };
 
 static int mk_sockaddr(int domain, const char *ip, unsigned short port,
@@ -554,7 +587,15 @@ static int load_insns(const struct sock_addr_test *test,
 	int ret;
 
 	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
-	load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+	switch (test->expected_attach_type) {
+	case BPF_CGROUP_INET4_SOCK_RELEASE:
+	case BPF_CGROUP_INET6_SOCK_RELEASE:
+		load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+		break;
+	default:
+		load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+		break;
+	}
 	load_attr.expected_attach_type = test->expected_attach_type;
 	load_attr.insns = insns;
 	load_attr.insns_cnt = insns_cnt;
@@ -916,6 +957,35 @@ static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
 	return load_path(test, SENDMSG6_PROG_PATH);
 }
 
+static int release46_prog_load(const struct sock_addr_test *test)
+{
+	struct bpf_cgroup_storage_key key;
+	unsigned long long value;
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
+		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_local_storage),
+		BPF_MOV64_IMM(BPF_REG_1, 1),
+		BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
+		BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+		BPF_EXIT_INSN(),
+	};
+
+	cg_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
+				sizeof(value), 0, 0);
+	if (cg_storage_fd < 0) {
+		printf("Failed to create map: %s\n", strerror(errno));
+		return -1;
+	}
+
+	insns[0].imm = cg_storage_fd;
+
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
 static int cmp_addr(const struct sockaddr_storage *addr1,
 		    const struct sockaddr_storage *addr2, int cmp_port)
 {
@@ -1346,6 +1416,57 @@ static int run_sendmsg_test_case(const struct sock_addr_test *test)
 	return err;
 }
 
+static int run_release_test_case(const struct sock_addr_test *test)
+{
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+	struct sockaddr_storage requested_addr;
+	struct sockaddr_storage expected_addr;
+	struct bpf_cgroup_storage_key key;
+	unsigned long long value;
+	int servfd = -1;
+
+	if (bpf_map_get_next_key(cg_storage_fd, NULL, &key)) {
+		printf("Failed to get the first key in cgroup storage\n");
+		return -1;
+	}
+
+	if (bpf_map_lookup_elem(cg_storage_fd, &key, &value)) {
+		printf("Failed to lookup cgroup storage 0\n");
+		return -1;
+	}
+
+	if (value != 0) {
+		printf("Unexpected data in the cgroup storage: %llu\n", value);
+		return -1;
+	}
+
+	if (init_addrs(test, &requested_addr, &expected_addr, NULL))
+		return -1;
+
+	servfd = start_server(test->type, &requested_addr, addr_len);
+	if (servfd < 0)
+		return -1;
+
+	close(servfd);
+
+	if (bpf_map_get_next_key(cg_storage_fd, NULL, &key)) {
+		printf("Failed to get the first key in cgroup storage\n");
+		return -1;
+	}
+
+	if (bpf_map_lookup_elem(cg_storage_fd, &key, &value)) {
+		printf("Failed to lookup cgroup storage 0\n");
+		return -1;
+	}
+
+	if (value != 1) {
+		printf("Unexpected data in the cgroup storage: %llu\n", value);
+		return -1;
+	}
+
+	return 0;
+}
+
 static int run_test_case(int cgfd, const struct sock_addr_test *test)
 {
 	int progfd = -1;
@@ -1381,6 +1502,10 @@ static int run_test_case(int cgfd, const struct sock_addr_test *test)
 	case BPF_CGROUP_UDP6_SENDMSG:
 		err = run_sendmsg_test_case(test);
 		break;
+	case BPF_CGROUP_INET4_SOCK_RELEASE:
+	case BPF_CGROUP_INET6_SOCK_RELEASE:
+		err = run_release_test_case(test);
+		break;
 	default:
 		goto err;
 	}
@@ -1405,6 +1530,10 @@ static int run_test_case(int cgfd, const struct sock_addr_test *test)
 	/* Detaching w/o checking return code: best effort attempt. */
 	if (progfd != -1)
 		bpf_prog_detach(cgfd, test->attach_type);
+	if (cg_storage_fd > 0) {
+		close(cg_storage_fd);
+		cg_storage_fd = 0;
+	}
 	close(progfd);
 	printf("[%s]\n", err ? "FAIL" : "PASS");
 	return err;
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close
  2019-01-18  0:41 [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Stanislav Fomichev
                   ` (4 preceding siblings ...)
  2019-01-18  0:41 ` [PATCH bpf-next 5/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock_addr.c Stanislav Fomichev
@ 2019-01-18  1:02 ` Eric Dumazet
  2019-01-18  1:58   ` Stanislav Fomichev
  2019-01-18  2:36 ` Andrey Ignatov
  6 siblings, 1 reply; 16+ messages in thread
From: Eric Dumazet @ 2019-01-18  1:02 UTC (permalink / raw)
  To: Stanislav Fomichev, netdev; +Cc: davem, ast, daniel



On 01/17/2019 04:41 PM, Stanislav Fomichev wrote:
> Currently, we have BPF_CGROUP_INET_SOCK_CREATE hook that triggers on
> socket creation and there is no way to know when the socket is being
> closed. Add new set of hooks BPF_CGROUP_INET{4,6}_SOCK_RELEASE
> that trigger when the socket is closed.
> 

Are these hooks enough to capture a disconnect() operation ?

A socket can be reused (different flows) without inet_release() being ever called.


> Initial intended usecase is to cleanup statistics after POST{4,6}_BIND.
> Hooks have read-only access to all fields of struct bpf_sock.
> 
> First patch adds hooks, the rest of the patches add uapi and tests to make
> sure these hooks work.
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close
  2019-01-18  1:02 ` [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Eric Dumazet
@ 2019-01-18  1:58   ` Stanislav Fomichev
  0 siblings, 0 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-18  1:58 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Netdev, davem, Alexei Starovoitov, Daniel Borkmann

On Thu, Jan 17, 2019 at 5:02 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 01/17/2019 04:41 PM, Stanislav Fomichev wrote:
> > Currently, we have BPF_CGROUP_INET_SOCK_CREATE hook that triggers on
> > socket creation and there is no way to know when the socket is being
> > closed. Add new set of hooks BPF_CGROUP_INET{4,6}_SOCK_RELEASE
> > that trigger when the socket is closed.
> >
>
> Are these hooks enough to capture a disconnect() operation ?
>
> A socket can be reused (different flows) without inet_release() being ever called.
I didn't know about reuse, let me spend some time looking into this.

I mostly thought about the following usecase, where user manually calls close():
sys_close() -> sock_close() -> __sock_release -> inet[6]_release

(I also expected stack to call the release whenever process exits)
>
>
> > Initial intended usecase is to cleanup statistics after POST{4,6}_BIND.
> > Hooks have read-only access to all fields of struct bpf_sock.
> >
> > First patch adds hooks, the rest of the patches add uapi and tests to make
> > sure these hooks work.
> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
  2019-01-18  0:41 ` [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks Stanislav Fomichev
@ 2019-01-18  2:22   ` Andrey Ignatov
  2019-01-18  6:30   ` kbuild test robot
  2019-01-18  7:23   ` kbuild test robot
  2 siblings, 0 replies; 16+ messages in thread
From: Andrey Ignatov @ 2019-01-18  2:22 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, davem, ast, daniel

Stanislav Fomichev <sdf@google.com> [Thu, 2019-01-17 16:41 -0800]:
> These hooks mirror existing BPF_CGROUP_INET{4,6}_SOCK hooks (which
> trigger upon socket creation), but trigger when the socket is closed.
> 
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  include/linux/bpf-cgroup.h |  6 ++++++
>  include/net/inet_common.h  |  1 +
>  include/uapi/linux/bpf.h   |  2 ++
>  kernel/bpf/syscall.c       |  8 ++++++++
>  net/core/filter.c          |  7 +++++++
>  net/ipv4/af_inet.c         | 13 ++++++++++++-
>  net/ipv6/af_inet6.c        |  5 ++++-
>  7 files changed, 40 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index 588dd5f0bd85..31fcfe215d80 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -176,6 +176,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>  #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
>  	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
>  
> +#define BPF_CGROUP_RUN_PROG_INET4_SOCK_RELEASE(sk)			       \
> +	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_SOCK_RELEASE)
> +
> +#define BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE(sk)			       \
> +	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_SOCK_RELEASE)
> +
>  #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk)				       \
>  	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)

These macros should also be defined for the case when CONFIG_CGROUP_BPF
is not set. See in this file below:

#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })


> diff --git a/include/net/inet_common.h b/include/net/inet_common.h
> index 975901a95c0f..0e64046afe30 100644
> --- a/include/net/inet_common.h
> +++ b/include/net/inet_common.h
> @@ -17,6 +17,7 @@ struct sockaddr;
>  struct socket;
>  
>  int inet_release(struct socket *sock);
> +int __inet_release(struct socket *sock);
>  int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>  			int addr_len, int flags);
>  int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 91c43884f295..8e78aa28a42e 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -186,6 +186,8 @@ enum bpf_attach_type {
>  	BPF_CGROUP_UDP6_SENDMSG,
>  	BPF_LIRC_MODE2,
>  	BPF_FLOW_DISSECTOR,
> +	BPF_CGROUP_INET4_SOCK_RELEASE,
> +	BPF_CGROUP_INET6_SOCK_RELEASE,
>  	__MAX_BPF_ATTACH_TYPE
>  };
>  
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index b155cd17c1bd..6fa113448580 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1417,6 +1417,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
>  	case BPF_PROG_TYPE_CGROUP_SOCK:
>  		switch (expected_attach_type) {
>  		case BPF_CGROUP_INET_SOCK_CREATE:
> +		case BPF_CGROUP_INET4_SOCK_RELEASE:
> +		case BPF_CGROUP_INET6_SOCK_RELEASE:
>  		case BPF_CGROUP_INET4_POST_BIND:
>  		case BPF_CGROUP_INET6_POST_BIND:
>  			return 0;
> @@ -1709,6 +1711,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
>  		ptype = BPF_PROG_TYPE_CGROUP_SKB;
>  		break;
>  	case BPF_CGROUP_INET_SOCK_CREATE:
> +	case BPF_CGROUP_INET4_SOCK_RELEASE:
> +	case BPF_CGROUP_INET6_SOCK_RELEASE:
>  	case BPF_CGROUP_INET4_POST_BIND:
>  	case BPF_CGROUP_INET6_POST_BIND:
>  		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
> @@ -1791,6 +1795,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
>  		ptype = BPF_PROG_TYPE_CGROUP_SKB;
>  		break;
>  	case BPF_CGROUP_INET_SOCK_CREATE:
> +	case BPF_CGROUP_INET4_SOCK_RELEASE:
> +	case BPF_CGROUP_INET6_SOCK_RELEASE:
>  	case BPF_CGROUP_INET4_POST_BIND:
>  	case BPF_CGROUP_INET6_POST_BIND:
>  		ptype = BPF_PROG_TYPE_CGROUP_SOCK;
> @@ -1841,6 +1847,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
>  	case BPF_CGROUP_INET_INGRESS:
>  	case BPF_CGROUP_INET_EGRESS:
>  	case BPF_CGROUP_INET_SOCK_CREATE:
> +	case BPF_CGROUP_INET4_SOCK_RELEASE:
> +	case BPF_CGROUP_INET6_SOCK_RELEASE:
>  	case BPF_CGROUP_INET4_BIND:
>  	case BPF_CGROUP_INET6_BIND:
>  	case BPF_CGROUP_INET4_POST_BIND:
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2b3b436ef545..b4da6793fdbc 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5885,12 +5885,16 @@ static bool __sock_filter_check_attach_type(int off,
>  		switch (attach_type) {
>  		case BPF_CGROUP_INET_SOCK_CREATE:
>  			goto full_access;
> +		case BPF_CGROUP_INET4_SOCK_RELEASE:
> +		case BPF_CGROUP_INET6_SOCK_RELEASE:
> +			goto read_only;
>  		default:
>  			return false;
>  		}
>  	case bpf_ctx_range(struct bpf_sock, src_ip4):
>  		switch (attach_type) {
>  		case BPF_CGROUP_INET4_POST_BIND:
> +		case BPF_CGROUP_INET4_SOCK_RELEASE:
>  			goto read_only;
>  		default:
>  			return false;
> @@ -5898,6 +5902,7 @@ static bool __sock_filter_check_attach_type(int off,
>  	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
>  		switch (attach_type) {
>  		case BPF_CGROUP_INET6_POST_BIND:
> +		case BPF_CGROUP_INET6_SOCK_RELEASE:
>  			goto read_only;
>  		default:
>  			return false;
> @@ -5906,6 +5911,8 @@ static bool __sock_filter_check_attach_type(int off,
>  		switch (attach_type) {
>  		case BPF_CGROUP_INET4_POST_BIND:
>  		case BPF_CGROUP_INET6_POST_BIND:
> +		case BPF_CGROUP_INET4_SOCK_RELEASE:
> +		case BPF_CGROUP_INET6_SOCK_RELEASE:
>  			goto read_only;
>  		default:
>  			return false;
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index 0dfb72c46671..b703ad242365 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -403,7 +403,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
>   *	function we are destroying the object and from then on nobody
>   *	should refer to it.
>   */
> -int inet_release(struct socket *sock)
> +int __inet_release(struct socket *sock)
>  {
>  	struct sock *sk = sock->sk;
>  
> @@ -429,6 +429,17 @@ int inet_release(struct socket *sock)
>  	}
>  	return 0;
>  }
> +EXPORT_SYMBOL(__inet_release);
> +
> +int inet_release(struct socket *sock)
> +{
> +	struct sock *sk = sock->sk;
> +
> +	if (sk && !sk->sk_kern_sock)
> +		BPF_CGROUP_RUN_PROG_INET4_SOCK_RELEASE(sk);
> +
> +	return __inet_release(sock);
> +}
>  EXPORT_SYMBOL(inet_release);
>  
>  int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index d99753b5e39b..44c86595eba8 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -464,13 +464,16 @@ int inet6_release(struct socket *sock)
>  	if (!sk)
>  		return -EINVAL;
>  
> +	if (!sk->sk_kern_sock)
> +		BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE(sock->sk);

Nit: s/sock->sk/sk/

> +
>  	/* Free mc lists */
>  	ipv6_sock_mc_close(sk);
>  
>  	/* Free ac lists */
>  	ipv6_sock_ac_close(sk);
>  
> -	return inet_release(sock);
> +	return __inet_release(sock);
>  }
>  EXPORT_SYMBOL(inet6_release);
>  
> -- 
> 2.20.1.321.g9e740568ce-goog
> 

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close
  2019-01-18  0:41 [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Stanislav Fomichev
                   ` (5 preceding siblings ...)
  2019-01-18  1:02 ` [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Eric Dumazet
@ 2019-01-18  2:36 ` Andrey Ignatov
  2019-01-18 16:50   ` Stanislav Fomichev
  6 siblings, 1 reply; 16+ messages in thread
From: Andrey Ignatov @ 2019-01-18  2:36 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, davem, ast, daniel

Stanislav Fomichev <sdf@google.com> [Thu, 2019-01-17 16:41 -0800]:
> Currently, we have BPF_CGROUP_INET_SOCK_CREATE hook that triggers on
> socket creation and there is no way to know when the socket is being
> closed. Add new set of hooks BPF_CGROUP_INET{4,6}_SOCK_RELEASE
> that trigger when the socket is closed.
> 
> Initial intended usecase is to cleanup statistics after POST{4,6}_BIND.
> Hooks have read-only access to all fields of struct bpf_sock.

Do you need it for both TCP and UDP?

I was thinking about this hook earlier but since in my case only TCP was
needed I ended up using TCP-BPF. E.g. be BPF_SOCK_OPS_TCP_LISTEN_CB or
BPF_SOCK_OPS_TCP_CONNECT_CB can be used instead of POST{4,6}_BIND to
enable something, and then BPF_SOCK_OPS_STATE_CB can be used instead of
SOCK_RELEASE to disable that something when socket transisions to
BPF_TCP_CLOSE (e.g. BPF_TCP_LISTEN -> BPF_TCP_CLOSE).

That turned out to be much cleaner than POST{4,6}_BIND and also works
fine when socket is disconnected with AF_UNSPEC and then connected again
(what Eric mentioned).


> First patch adds hooks, the rest of the patches add uapi and tests to make
> sure these hooks work.
> 
> Stanislav Fomichev (5):
>   bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
>   tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in
>     libbpf/bpftool
>   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
>     test_section_names.c
>   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c
>   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
>     test_sock_addr.c
> 
>  include/linux/bpf-cgroup.h                    |   6 +
>  include/net/inet_common.h                     |   1 +
>  include/uapi/linux/bpf.h                      |   2 +
>  kernel/bpf/syscall.c                          |   8 ++
>  net/core/filter.c                             |   7 +
>  net/ipv4/af_inet.c                            |  13 +-
>  net/ipv6/af_inet6.c                           |   5 +-
>  tools/bpf/bpftool/cgroup.c                    |   2 +
>  tools/include/uapi/linux/bpf.h                |   2 +
>  tools/lib/bpf/libbpf.c                        |   4 +
>  .../selftests/bpf/test_section_names.c        |  10 ++
>  tools/testing/selftests/bpf/test_sock.c       | 119 ++++++++++++++++
>  tools/testing/selftests/bpf/test_sock_addr.c  | 131 +++++++++++++++++-
>  13 files changed, 307 insertions(+), 3 deletions(-)
> 
> -- 
> 2.20.1.321.g9e740568ce-goog
> 

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 5/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock_addr.c
  2019-01-18  0:41 ` [PATCH bpf-next 5/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock_addr.c Stanislav Fomichev
@ 2019-01-18  2:45   ` Andrey Ignatov
  0 siblings, 0 replies; 16+ messages in thread
From: Andrey Ignatov @ 2019-01-18  2:45 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, davem, ast, daniel

Stanislav Fomichev <sdf@google.com> [Thu, 2019-01-17 16:41 -0800]:
> Count the number of times hooks were executed in the cgroup
> local storage to make sure they trigger.
> 
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  tools/testing/selftests/bpf/test_sock_addr.c | 131 ++++++++++++++++++-
>  1 file changed, 130 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
> index 3f110eaaf29c..92845bf6fac9 100644
> --- a/tools/testing/selftests/bpf/test_sock_addr.c
> +++ b/tools/testing/selftests/bpf/test_sock_addr.c
> @@ -82,6 +82,9 @@ struct sock_addr_test {
>  	} expected_result;
>  };
>  
> +/* Per-test storage for additional cgroup storage map */
> +static int cg_storage_fd;
> +
>  static int bind4_prog_load(const struct sock_addr_test *test);
>  static int bind6_prog_load(const struct sock_addr_test *test);
>  static int connect4_prog_load(const struct sock_addr_test *test);
> @@ -94,6 +97,7 @@ static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
>  static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
>  static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
>  static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
> +static int release46_prog_load(const struct sock_addr_test *test);
>  
>  static struct sock_addr_test tests[] = {
>  	/* bind */
> @@ -507,6 +511,35 @@ static struct sock_addr_test tests[] = {
>  		SRC6_REWRITE_IP,
>  		SYSCALL_EPERM,
>  	},
> +	/* release */
> +	{
> +		"release4: make sure the hook triggers",
> +		release46_prog_load,
> +		BPF_CGROUP_INET4_SOCK_RELEASE,
> +		BPF_CGROUP_INET4_SOCK_RELEASE,
> +		AF_INET,
> +		SOCK_STREAM,
> +		SERV4_REWRITE_IP,
> +		0,
> +		SERV4_REWRITE_IP,
> +		0,
> +		NULL,
> +		SUCCESS,
> +	},
> +	{
> +		"release6: make sure the hook triggers",
> +		release46_prog_load,
> +		BPF_CGROUP_INET6_SOCK_RELEASE,
> +		BPF_CGROUP_INET6_SOCK_RELEASE,
> +		AF_INET6,
> +		SOCK_DGRAM,
> +		SERV6_REWRITE_IP,
> +		0,
> +		SERV6_REWRITE_IP,
> +		0,
> +		NULL,
> +		SUCCESS,
> +	},
>  };
>  
>  static int mk_sockaddr(int domain, const char *ip, unsigned short port,
> @@ -554,7 +587,15 @@ static int load_insns(const struct sock_addr_test *test,
>  	int ret;
>  
>  	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
> -	load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
> +	switch (test->expected_attach_type) {
> +	case BPF_CGROUP_INET4_SOCK_RELEASE:
> +	case BPF_CGROUP_INET6_SOCK_RELEASE:
> +		load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
> +		break;
> +	default:
> +		load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
> +		break;
> +	}
>  	load_attr.expected_attach_type = test->expected_attach_type;
>  	load_attr.insns = insns;
>  	load_attr.insns_cnt = insns_cnt;
> @@ -916,6 +957,35 @@ static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
>  	return load_path(test, SENDMSG6_PROG_PATH);
>  }
>  
> +static int release46_prog_load(const struct sock_addr_test *test)
> +{
> +	struct bpf_cgroup_storage_key key;
> +	unsigned long long value;
> +	struct bpf_insn insns[] = {
> +		BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
> +		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
> +		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
> +			     BPF_FUNC_get_local_storage),
> +		BPF_MOV64_IMM(BPF_REG_1, 1),
> +		BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
> +		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
> +		BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
> +		BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
> +		BPF_EXIT_INSN(),
> +	};
> +
> +	cg_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
> +				sizeof(value), 0, 0);
> +	if (cg_storage_fd < 0) {
> +		printf("Failed to create map: %s\n", strerror(errno));
> +		return -1;
> +	}
> +
> +	insns[0].imm = cg_storage_fd;
> +
> +	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
> +}
> +
>  static int cmp_addr(const struct sockaddr_storage *addr1,
>  		    const struct sockaddr_storage *addr2, int cmp_port)
>  {
> @@ -1346,6 +1416,57 @@ static int run_sendmsg_test_case(const struct sock_addr_test *test)
>  	return err;
>  }
>  
> +static int run_release_test_case(const struct sock_addr_test *test)
> +{
> +	socklen_t addr_len = sizeof(struct sockaddr_storage);
> +	struct sockaddr_storage requested_addr;
> +	struct sockaddr_storage expected_addr;
> +	struct bpf_cgroup_storage_key key;
> +	unsigned long long value;
> +	int servfd = -1;
> +
> +	if (bpf_map_get_next_key(cg_storage_fd, NULL, &key)) {
> +		printf("Failed to get the first key in cgroup storage\n");
> +		return -1;
> +	}
> +
> +	if (bpf_map_lookup_elem(cg_storage_fd, &key, &value)) {
> +		printf("Failed to lookup cgroup storage 0\n");
> +		return -1;
> +	}
> +
> +	if (value != 0) {
> +		printf("Unexpected data in the cgroup storage: %llu\n", value);
> +		return -1;
> +	}
> +
> +	if (init_addrs(test, &requested_addr, &expected_addr, NULL))
> +		return -1;
> +
> +	servfd = start_server(test->type, &requested_addr, addr_len);
> +	if (servfd < 0)
> +		return -1;
> +
> +	close(servfd);
> +
> +	if (bpf_map_get_next_key(cg_storage_fd, NULL, &key)) {
> +		printf("Failed to get the first key in cgroup storage\n");
> +		return -1;
> +	}
> +
> +	if (bpf_map_lookup_elem(cg_storage_fd, &key, &value)) {
> +		printf("Failed to lookup cgroup storage 0\n");
> +		return -1;
> +	}
> +
> +	if (value != 1) {
> +		printf("Unexpected data in the cgroup storage: %llu\n", value);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
>  static int run_test_case(int cgfd, const struct sock_addr_test *test)
>  {
>  	int progfd = -1;
> @@ -1381,6 +1502,10 @@ static int run_test_case(int cgfd, const struct sock_addr_test *test)
>  	case BPF_CGROUP_UDP6_SENDMSG:
>  		err = run_sendmsg_test_case(test);
>  		break;
> +	case BPF_CGROUP_INET4_SOCK_RELEASE:
> +	case BPF_CGROUP_INET6_SOCK_RELEASE:
> +		err = run_release_test_case(test);
> +		break;
>  	default:
>  		goto err;
>  	}
> @@ -1405,6 +1530,10 @@ static int run_test_case(int cgfd, const struct sock_addr_test *test)
>  	/* Detaching w/o checking return code: best effort attempt. */
>  	if (progfd != -1)
>  		bpf_prog_detach(cgfd, test->attach_type);
> +	if (cg_storage_fd > 0) {
> +		close(cg_storage_fd);
> +		cg_storage_fd = 0;

Nit: zero is valid fd (stdin) and should not be used as "undefined" value.
That's why -1 is used with progfd and other descriptors in the test.


> +	}
>  	close(progfd);
>  	printf("[%s]\n", err ? "FAIL" : "PASS");
>  	return err;
> -- 
> 2.20.1.321.g9e740568ce-goog
> 

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
  2019-01-18  0:41 ` [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks Stanislav Fomichev
  2019-01-18  2:22   ` Andrey Ignatov
@ 2019-01-18  6:30   ` kbuild test robot
  2019-01-18  7:23   ` kbuild test robot
  2 siblings, 0 replies; 16+ messages in thread
From: kbuild test robot @ 2019-01-18  6:30 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: kbuild-all, netdev, davem, ast, daniel, Stanislav Fomichev

[-- Attachment #1: Type: text/plain, Size: 1908 bytes --]

Hi Stanislav,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Stanislav-Fomichev/bpf-add-BPF_CGROUP_INET-4-6-_SOCK_RELEASE-hooks/20190118-141334
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-x005-201902 (attached as .config)
compiler: gcc-8 (Debian 8.2.0-14) 8.2.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   net//ipv4/af_inet.c: In function 'inet_release':
>> net//ipv4/af_inet.c:439:3: error: implicit declaration of function 'BPF_CGROUP_RUN_PROG_INET4_SOCK_RELEASE'; did you mean 'BPF_CGROUP_RUN_PROG_INET_SOCK'? [-Werror=implicit-function-declaration]
      BPF_CGROUP_RUN_PROG_INET4_SOCK_RELEASE(sk);
      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      BPF_CGROUP_RUN_PROG_INET_SOCK
   cc1: some warnings being treated as errors
--
   net//ipv6/af_inet6.c: In function 'inet6_release':
>> net//ipv6/af_inet6.c:468:3: error: implicit declaration of function 'BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE'; did you mean 'BPF_CGROUP_RUN_PROG_INET_SOCK'? [-Werror=implicit-function-declaration]
      BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE(sock->sk);
      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      BPF_CGROUP_RUN_PROG_INET_SOCK
   cc1: some warnings being treated as errors

vim +439 net//ipv4/af_inet.c

   433	
   434	int inet_release(struct socket *sock)
   435	{
   436		struct sock *sk = sock->sk;
   437	
   438		if (sk && !sk->sk_kern_sock)
 > 439			BPF_CGROUP_RUN_PROG_INET4_SOCK_RELEASE(sk);
   440	
   441		return __inet_release(sock);
   442	}
   443	EXPORT_SYMBOL(inet_release);
   444	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 29393 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
  2019-01-18  0:41 ` [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks Stanislav Fomichev
  2019-01-18  2:22   ` Andrey Ignatov
  2019-01-18  6:30   ` kbuild test robot
@ 2019-01-18  7:23   ` kbuild test robot
  2 siblings, 0 replies; 16+ messages in thread
From: kbuild test robot @ 2019-01-18  7:23 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: kbuild-all, netdev, davem, ast, daniel, Stanislav Fomichev

[-- Attachment #1: Type: text/plain, Size: 1580 bytes --]

Hi Stanislav,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Stanislav-Fomichev/bpf-add-BPF_CGROUP_INET-4-6-_SOCK_RELEASE-hooks/20190118-141334
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: i386-randconfig-a2-201902 (attached as .config)
compiler: gcc-4.9 (Debian 4.9.4-2) 4.9.4
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   net/ipv6/af_inet6.c: In function 'inet6_release':
>> net/ipv6/af_inet6.c:468:3: error: implicit declaration of function 'BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE' [-Werror=implicit-function-declaration]
      BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE(sock->sk);
      ^
   cc1: some warnings being treated as errors

vim +/BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE +468 net/ipv6/af_inet6.c

   459	
   460	int inet6_release(struct socket *sock)
   461	{
   462		struct sock *sk = sock->sk;
   463	
   464		if (!sk)
   465			return -EINVAL;
   466	
   467		if (!sk->sk_kern_sock)
 > 468			BPF_CGROUP_RUN_PROG_INET6_SOCK_RELEASE(sock->sk);
   469	
   470		/* Free mc lists */
   471		ipv6_sock_mc_close(sk);
   472	
   473		/* Free ac lists */
   474		ipv6_sock_ac_close(sk);
   475	
   476		return __inet_release(sock);
   477	}
   478	EXPORT_SYMBOL(inet6_release);
   479	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 32601 bytes --]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close
  2019-01-18  2:36 ` Andrey Ignatov
@ 2019-01-18 16:50   ` Stanislav Fomichev
  2019-01-18 22:39     ` Andrey Ignatov
  0 siblings, 1 reply; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-18 16:50 UTC (permalink / raw)
  To: Andrey Ignatov; +Cc: Stanislav Fomichev, netdev, davem, ast, daniel, edumazet

On 01/18, Andrey Ignatov wrote:
> Stanislav Fomichev <sdf@google.com> [Thu, 2019-01-17 16:41 -0800]:
> > Currently, we have BPF_CGROUP_INET_SOCK_CREATE hook that triggers on
> > socket creation and there is no way to know when the socket is being
> > closed. Add new set of hooks BPF_CGROUP_INET{4,6}_SOCK_RELEASE
> > that trigger when the socket is closed.
> > 
> > Initial intended usecase is to cleanup statistics after POST{4,6}_BIND.
> > Hooks have read-only access to all fields of struct bpf_sock.
> 
> Do you need it for both TCP and UDP?
Yes, we need both TCP and UDP. Although, UDP is tricky in general with
the connected/unconnected cases.

> I was thinking about this hook earlier but since in my case only TCP was
> needed I ended up using TCP-BPF. E.g. be BPF_SOCK_OPS_TCP_LISTEN_CB or
> BPF_SOCK_OPS_TCP_CONNECT_CB can be used instead of POST{4,6}_BIND to
> enable something, and then BPF_SOCK_OPS_STATE_CB can be used instead of
> SOCK_RELEASE to disable that something when socket transisions to
> BPF_TCP_CLOSE (e.g. BPF_TCP_LISTEN -> BPF_TCP_CLOSE).
> 
> That turned out to be much cleaner than POST{4,6}_BIND and also works
> fine when socket is disconnected with AF_UNSPEC and then connected again
> (what Eric mentioned).

What if we do something like the patch below? Add pre_release hook (like we
currently have for pre_connect) and call it from connect(AF_UNSPEC) and
from inet_release? Any concerns here?

(I agree that TCP is probably better handled via BPF_SOCK_OPS_TCP_XYZ hooks,
but we need something for UDP as well)

-- 

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b703ad242365..ee3dc181df8f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -568,8 +568,11 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 
        if (addr_len < sizeof(uaddr->sa_family))
                return -EINVAL;
-       if (uaddr->sa_family == AF_UNSPEC)
+       if (uaddr->sa_family == AF_UNSPEC) {
+               if (BPF_CGROUP_PRE_RELEASE_ENABLED(sk))
+                       sk->sk_prot->pre_release(sk);
                return sk->sk_prot->disconnect(sk, flags);
+       }
 
        if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
                err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
@@ -632,6 +635,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
                        return -EINVAL;
 
                if (uaddr->sa_family == AF_UNSPEC) {
+                       if (BPF_CGROUP_PRE_RELEASE_ENABLED(sk))
+                               sk->sk_prot->pre_release(sk);
                        err = sk->sk_prot->disconnect(sk, flags);
                        sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
                        goto out; 

> > First patch adds hooks, the rest of the patches add uapi and tests to make
> > sure these hooks work.
> > 
> > Stanislav Fomichev (5):
> >   bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
> >   tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in
> >     libbpf/bpftool
> >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
> >     test_section_names.c
> >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c
> >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
> >     test_sock_addr.c
> > 
> >  include/linux/bpf-cgroup.h                    |   6 +
> >  include/net/inet_common.h                     |   1 +
> >  include/uapi/linux/bpf.h                      |   2 +
> >  kernel/bpf/syscall.c                          |   8 ++
> >  net/core/filter.c                             |   7 +
> >  net/ipv4/af_inet.c                            |  13 +-
> >  net/ipv6/af_inet6.c                           |   5 +-
> >  tools/bpf/bpftool/cgroup.c                    |   2 +
> >  tools/include/uapi/linux/bpf.h                |   2 +
> >  tools/lib/bpf/libbpf.c                        |   4 +
> >  .../selftests/bpf/test_section_names.c        |  10 ++
> >  tools/testing/selftests/bpf/test_sock.c       | 119 ++++++++++++++++
> >  tools/testing/selftests/bpf/test_sock_addr.c  | 131 +++++++++++++++++-
> >  13 files changed, 307 insertions(+), 3 deletions(-)
> > 
> > -- 
> > 2.20.1.321.g9e740568ce-goog
> > 
> 
> -- 
> Andrey Ignatov

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close
  2019-01-18 16:50   ` Stanislav Fomichev
@ 2019-01-18 22:39     ` Andrey Ignatov
  2019-01-22 17:00       ` Stanislav Fomichev
  0 siblings, 1 reply; 16+ messages in thread
From: Andrey Ignatov @ 2019-01-18 22:39 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Stanislav Fomichev, netdev, davem, ast, daniel, edumazet

Stanislav Fomichev <sdf@fomichev.me> [Fri, 2019-01-18 08:50 -0800]:
> On 01/18, Andrey Ignatov wrote:
> > Stanislav Fomichev <sdf@google.com> [Thu, 2019-01-17 16:41 -0800]:
> > > Currently, we have BPF_CGROUP_INET_SOCK_CREATE hook that triggers on
> > > socket creation and there is no way to know when the socket is being
> > > closed. Add new set of hooks BPF_CGROUP_INET{4,6}_SOCK_RELEASE
> > > that trigger when the socket is closed.
> > > 
> > > Initial intended usecase is to cleanup statistics after POST{4,6}_BIND.
> > > Hooks have read-only access to all fields of struct bpf_sock.
> > 
> > Do you need it for both TCP and UDP?
> Yes, we need both TCP and UDP. Although, UDP is tricky in general with
> the connected/unconnected cases.
> 
> > I was thinking about this hook earlier but since in my case only TCP was
> > needed I ended up using TCP-BPF. E.g. be BPF_SOCK_OPS_TCP_LISTEN_CB or
> > BPF_SOCK_OPS_TCP_CONNECT_CB can be used instead of POST{4,6}_BIND to
> > enable something, and then BPF_SOCK_OPS_STATE_CB can be used instead of
> > SOCK_RELEASE to disable that something when socket transisions to
> > BPF_TCP_CLOSE (e.g. BPF_TCP_LISTEN -> BPF_TCP_CLOSE).
> > 
> > That turned out to be much cleaner than POST{4,6}_BIND and also works
> > fine when socket is disconnected with AF_UNSPEC and then connected again
> > (what Eric mentioned).
> 
> What if we do something like the patch below? Add pre_release hook (like we
> currently have for pre_connect) and call it from connect(AF_UNSPEC) and
> from inet_release? Any concerns here?

That's hard to say w/o fully understanding the use-case. Could you
provide more details on it please?

Is my understanding correct that some statistics is needed only for
_client_ sockets (since we're talking about connect) and these client
sockets are always bound to local IP:port (or maybe IP only with
IP_BIND_ADDRESS_NO_PORT?) before sending data and that's why
POST{4,6}_BIND is used to start gathering statistics?

And then later, there should be some event to cleanup statistics and you
chose ops->release for this? If so what's kind of statistics is needed?
Is it per-destination statistics (since AF_UNSPEC matters?)? If so then
it should be cleaned up whenever destination is changed or when socket
is disconnected the last time.

For UDP socket connect(2) can be called many times with different
destinations even w/o AF_UNSPEC in between (in contrast to TCP). So your
patch with pre_release below won't handle it. Furthermore even if
connect(2) was used to set destination, sendmsg(2) can override it for
individual datagram. 

If per-destination statistics is needed for UDP client socket then IMO
both "start events" should be handled: BPF_CGROUP_INET{4,6}_CONNECT
(connected UDP) and BPF_CGROUP_UDP{4,6}_SENDMSG (unconnected UDP). The
former can be used to "close" previous statistics "window" (if it's not
the first connect) as well. The latter can be used to created "separate"
statistics "window" for destination of current datagram only.  And the
only case left, from what I see, is when socket is stopped being used
completely. That's, again, hard to say how to handle it better w/o
understanding use-case (e.g. since it's only UDP problem it may make
sense to implement in UDP stack).

> (I agree that TCP is probably better handled via BPF_SOCK_OPS_TCP_XYZ hooks,
> but we need something for UDP as well)

Do you think you may handle TCP with TCP-BPF and this patch set may
focus on UDP only if there is no good attach point that covers both TCP
and UDP?  That may lead to different BPF programs for TCP and UDP but
that may be hard to avoid anyway (BPF_CGROUP_UDP{4,6}_SENDMSG is a good
example here).

> 
> -- 
> 
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index b703ad242365..ee3dc181df8f 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -568,8 +568,11 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
>  
>         if (addr_len < sizeof(uaddr->sa_family))
>                 return -EINVAL;
> -       if (uaddr->sa_family == AF_UNSPEC)
> +       if (uaddr->sa_family == AF_UNSPEC) {
> +               if (BPF_CGROUP_PRE_RELEASE_ENABLED(sk))
> +                       sk->sk_prot->pre_release(sk);
>                 return sk->sk_prot->disconnect(sk, flags);
> +       }
>  
>         if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
>                 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
> @@ -632,6 +635,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>                         return -EINVAL;
>  
>                 if (uaddr->sa_family == AF_UNSPEC) {
> +                       if (BPF_CGROUP_PRE_RELEASE_ENABLED(sk))
> +                               sk->sk_prot->pre_release(sk);
>                         err = sk->sk_prot->disconnect(sk, flags);
>                         sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
>                         goto out; 
> 
> > > First patch adds hooks, the rest of the patches add uapi and tests to make
> > > sure these hooks work.
> > > 
> > > Stanislav Fomichev (5):
> > >   bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
> > >   tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in
> > >     libbpf/bpftool
> > >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
> > >     test_section_names.c
> > >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c
> > >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
> > >     test_sock_addr.c
> > > 
> > >  include/linux/bpf-cgroup.h                    |   6 +
> > >  include/net/inet_common.h                     |   1 +
> > >  include/uapi/linux/bpf.h                      |   2 +
> > >  kernel/bpf/syscall.c                          |   8 ++
> > >  net/core/filter.c                             |   7 +
> > >  net/ipv4/af_inet.c                            |  13 +-
> > >  net/ipv6/af_inet6.c                           |   5 +-
> > >  tools/bpf/bpftool/cgroup.c                    |   2 +
> > >  tools/include/uapi/linux/bpf.h                |   2 +
> > >  tools/lib/bpf/libbpf.c                        |   4 +
> > >  .../selftests/bpf/test_section_names.c        |  10 ++
> > >  tools/testing/selftests/bpf/test_sock.c       | 119 ++++++++++++++++
> > >  tools/testing/selftests/bpf/test_sock_addr.c  | 131 +++++++++++++++++-
> > >  13 files changed, 307 insertions(+), 3 deletions(-)
> > > 
> > > -- 
> > > 2.20.1.321.g9e740568ce-goog
> > > 
> > 
> > -- 
> > Andrey Ignatov

-- 
Andrey Ignatov

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close
  2019-01-18 22:39     ` Andrey Ignatov
@ 2019-01-22 17:00       ` Stanislav Fomichev
  0 siblings, 0 replies; 16+ messages in thread
From: Stanislav Fomichev @ 2019-01-22 17:00 UTC (permalink / raw)
  To: Andrey Ignatov; +Cc: Stanislav Fomichev, netdev, davem, ast, daniel, edumazet

On 01/18, Andrey Ignatov wrote:
> Stanislav Fomichev <sdf@fomichev.me> [Fri, 2019-01-18 08:50 -0800]:
> > On 01/18, Andrey Ignatov wrote:
> > > Stanislav Fomichev <sdf@google.com> [Thu, 2019-01-17 16:41 -0800]:
> > > > Currently, we have BPF_CGROUP_INET_SOCK_CREATE hook that triggers on
> > > > socket creation and there is no way to know when the socket is being
> > > > closed. Add new set of hooks BPF_CGROUP_INET{4,6}_SOCK_RELEASE
> > > > that trigger when the socket is closed.
> > > > 
> > > > Initial intended usecase is to cleanup statistics after POST{4,6}_BIND.
> > > > Hooks have read-only access to all fields of struct bpf_sock.
> > > 
> > > Do you need it for both TCP and UDP?
> > Yes, we need both TCP and UDP. Although, UDP is tricky in general with
> > the connected/unconnected cases.
> > 
> > > I was thinking about this hook earlier but since in my case only TCP was
> > > needed I ended up using TCP-BPF. E.g. be BPF_SOCK_OPS_TCP_LISTEN_CB or
> > > BPF_SOCK_OPS_TCP_CONNECT_CB can be used instead of POST{4,6}_BIND to
> > > enable something, and then BPF_SOCK_OPS_STATE_CB can be used instead of
> > > SOCK_RELEASE to disable that something when socket transisions to
> > > BPF_TCP_CLOSE (e.g. BPF_TCP_LISTEN -> BPF_TCP_CLOSE).
> > > 
> > > That turned out to be much cleaner than POST{4,6}_BIND and also works
> > > fine when socket is disconnected with AF_UNSPEC and then connected again
> > > (what Eric mentioned).
> > 
> > What if we do something like the patch below? Add pre_release hook (like we
> > currently have for pre_connect) and call it from connect(AF_UNSPEC) and
> > from inet_release? Any concerns here?
> 
> That's hard to say w/o fully understanding the use-case. Could you
> provide more details on it please?
The use-case is a simple lightweight per-cgroup bpf audit (without involving
audit subsystem).
Basically record the following events: when the socket is created, when (and
where) it's connected/bound and when it's torn down (plus, _maybe_ rx/tx
stats).

> Is my understanding correct that some statistics is needed only for
> _client_ sockets (since we're talking about connect) and these client
> sockets are always bound to local IP:port (or maybe IP only with
> IP_BIND_ADDRESS_NO_PORT?) before sending data and that's why
> POST{4,6}_BIND is used to start gathering statistics?
> 
> And then later, there should be some event to cleanup statistics and you
> chose ops->release for this? If so what's kind of statistics is needed?
> Is it per-destination statistics (since AF_UNSPEC matters?)? If so then
> it should be cleaned up whenever destination is changed or when socket
> is disconnected the last time.
> 
> For UDP socket connect(2) can be called many times with different
> destinations even w/o AF_UNSPEC in between (in contrast to TCP). So your
> patch with pre_release below won't handle it. Furthermore even if
> connect(2) was used to set destination, sendmsg(2) can override it for
> individual datagram. 
> 
> If per-destination statistics is needed for UDP client socket then IMO
> both "start events" should be handled: BPF_CGROUP_INET{4,6}_CONNECT
> (connected UDP) and BPF_CGROUP_UDP{4,6}_SENDMSG (unconnected UDP). The
> former can be used to "close" previous statistics "window" (if it's not
> the first connect) as well. The latter can be used to created "separate"
> statistics "window" for destination of current datagram only.  And the
> only case left, from what I see, is when socket is stopped being used
> completely. That's, again, hard to say how to handle it better w/o
> understanding use-case (e.g. since it's only UDP problem it may make
> sense to implement in UDP stack).
> 
> > (I agree that TCP is probably better handled via BPF_SOCK_OPS_TCP_XYZ hooks,
> > but we need something for UDP as well)
> 
> Do you think you may handle TCP with TCP-BPF and this patch set may
> focus on UDP only if there is no good attach point that covers both TCP
> and UDP?  That may lead to different BPF programs for TCP and UDP but
> that may be hard to avoid anyway (BPF_CGROUP_UDP{4,6}_SENDMSG is a good
> example here).
Let me look closer into TCP-BPF hooks, it looks like that would be
enough for TCP at least. I'll get back to you in case there is an issue.

I'll also think about UDP use case. Maybe we should do as you suggested
and support release only in UDP. (Or maybe have release for TCP+UDP and an
additional 'disconnect' hook to handle connect(AF_UNSPEC), that might
be more generic).

But thanks for you input, really appreciate it!

> 
> > 
> > -- 
> > 
> > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> > index b703ad242365..ee3dc181df8f 100644
> > --- a/net/ipv4/af_inet.c
> > +++ b/net/ipv4/af_inet.c
> > @@ -568,8 +568,11 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
> >  
> >         if (addr_len < sizeof(uaddr->sa_family))
> >                 return -EINVAL;
> > -       if (uaddr->sa_family == AF_UNSPEC)
> > +       if (uaddr->sa_family == AF_UNSPEC) {
> > +               if (BPF_CGROUP_PRE_RELEASE_ENABLED(sk))
> > +                       sk->sk_prot->pre_release(sk);
> >                 return sk->sk_prot->disconnect(sk, flags);
> > +       }
> >  
> >         if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) {
> >                 err = sk->sk_prot->pre_connect(sk, uaddr, addr_len);
> > @@ -632,6 +635,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> >                         return -EINVAL;
> >  
> >                 if (uaddr->sa_family == AF_UNSPEC) {
> > +                       if (BPF_CGROUP_PRE_RELEASE_ENABLED(sk))
> > +                               sk->sk_prot->pre_release(sk);
> >                         err = sk->sk_prot->disconnect(sk, flags);
> >                         sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
> >                         goto out; 
> > 
> > > > First patch adds hooks, the rest of the patches add uapi and tests to make
> > > > sure these hooks work.
> > > > 
> > > > Stanislav Fomichev (5):
> > > >   bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks
> > > >   tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in
> > > >     libbpf/bpftool
> > > >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
> > > >     test_section_names.c
> > > >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c
> > > >   selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to
> > > >     test_sock_addr.c
> > > > 
> > > >  include/linux/bpf-cgroup.h                    |   6 +
> > > >  include/net/inet_common.h                     |   1 +
> > > >  include/uapi/linux/bpf.h                      |   2 +
> > > >  kernel/bpf/syscall.c                          |   8 ++
> > > >  net/core/filter.c                             |   7 +
> > > >  net/ipv4/af_inet.c                            |  13 +-
> > > >  net/ipv6/af_inet6.c                           |   5 +-
> > > >  tools/bpf/bpftool/cgroup.c                    |   2 +
> > > >  tools/include/uapi/linux/bpf.h                |   2 +
> > > >  tools/lib/bpf/libbpf.c                        |   4 +
> > > >  .../selftests/bpf/test_section_names.c        |  10 ++
> > > >  tools/testing/selftests/bpf/test_sock.c       | 119 ++++++++++++++++
> > > >  tools/testing/selftests/bpf/test_sock_addr.c  | 131 +++++++++++++++++-
> > > >  13 files changed, 307 insertions(+), 3 deletions(-)
> > > > 
> > > > -- 
> > > > 2.20.1.321.g9e740568ce-goog
> > > > 
> > > 
> > > -- 
> > > Andrey Ignatov
> 
> -- 
> Andrey Ignatov

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2019-01-22 17:06 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-18  0:41 [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Stanislav Fomichev
2019-01-18  0:41 ` [PATCH bpf-next 1/5] bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE hooks Stanislav Fomichev
2019-01-18  2:22   ` Andrey Ignatov
2019-01-18  6:30   ` kbuild test robot
2019-01-18  7:23   ` kbuild test robot
2019-01-18  0:41 ` [PATCH bpf-next 2/5] tools: bpf: support BPF_CGROUP_INET{4,6}_SOCK_RELEASE in libbpf/bpftool Stanislav Fomichev
2019-01-18  0:41 ` [PATCH bpf-next 3/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_section_names.c Stanislav Fomichev
2019-01-18  0:41 ` [PATCH bpf-next 4/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock.c Stanislav Fomichev
2019-01-18  0:41 ` [PATCH bpf-next 5/5] selftests/bpf: add BPF_CGROUP_INET{4,6}_SOCK_RELEASE to test_sock_addr.c Stanislav Fomichev
2019-01-18  2:45   ` Andrey Ignatov
2019-01-18  1:02 ` [PATCH bpf-next 0/5] add bpf cgroup hooks that trigger on socket close Eric Dumazet
2019-01-18  1:58   ` Stanislav Fomichev
2019-01-18  2:36 ` Andrey Ignatov
2019-01-18 16:50   ` Stanislav Fomichev
2019-01-18 22:39     ` Andrey Ignatov
2019-01-22 17:00       ` Stanislav Fomichev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.