All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next v3 01/15] net: bpf: add bpf_seq_afinfo in tcp_iter_state
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 02/15] net: bpf: implement bpf iterator for tcp Yonghong Song
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

A new field bpf_seq_afinfo is added to tcp_iter_state
to provide bpf tcp iterator afinfo. There are two
reasons on why we did this.

First, the current way to get afinfo from PDE_DATA
does not work for bpf iterator as its seq_file
inode does not conform to /proc/net/{tcp,tcp6}
inode structures. More specifically, anonymous
bpf iterator will use an anonymous inode which
is shared in the system and we cannot change inode
private data structure at all.

Second, bpf iterator for tcp/tcp6 wants to
traverse all tcp and tcp6 sockets in one pass
and bpf program can control whether they want
to skip one sk_family or not. Having a different
afinfo with family AF_UNSPEC make it easier
to understand in the code.

This patch does not change /proc/net/{tcp,tcp6} behavior
as the bpf_seq_afinfo will be NULL for these two proc files.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/net/tcp.h   |  1 +
 net/ipv4/tcp_ipv4.c | 30 ++++++++++++++++++++++++------
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4de9485f73d9..eab1c7d0facb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1935,6 +1935,7 @@ struct tcp_iter_state {
 	struct seq_net_private	p;
 	enum tcp_seq_states	state;
 	struct sock		*syn_wait_sk;
+	struct tcp_seq_afinfo	*bpf_seq_afinfo;
 	int			bucket, offset, sbucket, num;
 	loff_t			last_pos;
 };
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad6435ba6d72..9cb65ee4ec63 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2211,13 +2211,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
  */
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
-	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+	struct tcp_seq_afinfo *afinfo;
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct inet_listen_hashbucket *ilb;
 	struct hlist_nulls_node *node;
 	struct sock *sk = cur;
 
+	if (st->bpf_seq_afinfo)
+		afinfo = st->bpf_seq_afinfo;
+	else
+		afinfo = PDE_DATA(file_inode(seq->file));
+
 	if (!sk) {
 get_head:
 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
@@ -2235,7 +2240,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 	sk_nulls_for_each_from(sk, node) {
 		if (!net_eq(sock_net(sk), net))
 			continue;
-		if (sk->sk_family == afinfo->family)
+		if (afinfo->family == AF_UNSPEC ||
+		    sk->sk_family == afinfo->family)
 			return sk;
 	}
 	spin_unlock(&ilb->lock);
@@ -2272,11 +2278,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
  */
 static void *established_get_first(struct seq_file *seq)
 {
-	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+	struct tcp_seq_afinfo *afinfo;
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	void *rc = NULL;
 
+	if (st->bpf_seq_afinfo)
+		afinfo = st->bpf_seq_afinfo;
+	else
+		afinfo = PDE_DATA(file_inode(seq->file));
+
 	st->offset = 0;
 	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
 		struct sock *sk;
@@ -2289,7 +2300,8 @@ static void *established_get_first(struct seq_file *seq)
 
 		spin_lock_bh(lock);
 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-			if (sk->sk_family != afinfo->family ||
+			if ((afinfo->family != AF_UNSPEC &&
+			     sk->sk_family != afinfo->family) ||
 			    !net_eq(sock_net(sk), net)) {
 				continue;
 			}
@@ -2304,19 +2316,25 @@ static void *established_get_first(struct seq_file *seq)
 
 static void *established_get_next(struct seq_file *seq, void *cur)
 {
-	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+	struct tcp_seq_afinfo *afinfo;
 	struct sock *sk = cur;
 	struct hlist_nulls_node *node;
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 
+	if (st->bpf_seq_afinfo)
+		afinfo = st->bpf_seq_afinfo;
+	else
+		afinfo = PDE_DATA(file_inode(seq->file));
+
 	++st->num;
 	++st->offset;
 
 	sk = sk_nulls_next(sk);
 
 	sk_nulls_for_each_from(sk, node) {
-		if (sk->sk_family == afinfo->family &&
+		if ((afinfo->family == AF_UNSPEC ||
+		     sk->sk_family == afinfo->family) &&
 		    net_eq(sock_net(sk), net))
 			return sk;
 	}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets
@ 2020-06-23  0:36 Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 01/15] net: bpf: add bpf_seq_afinfo in tcp_iter_state Yonghong Song
                   ` (14 more replies)
  0 siblings, 15 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

bpf iterator implments traversal of kernel data structures and these
data structures are passed to a bpf program for processing.
This gives great flexibility for users to examine kernel data
structure without using e.g. /proc/net which has limited and
fixed format.

Commit 138d0be35b14 ("net: bpf: Add netlink and ipv6_route bpf_iter targets")
implemented bpf iterators for netlink and ipv6_route.
This patch set intends to implement bpf iterators for tcp and udp.

Currently, /proc/net/tcp is used to print tcp4 stats and /proc/net/tcp6
is used to print tcp6 stats. /proc/net/udp[6] have similar usage model.
In contrast, only one tcp iterator is implemented and it is bpf program
resposibility to filter based on socket family. The same is for udp.
This will avoid another unnecessary traversal pass if users want
to check both tcp4 and tcp6.

Several helpers are also implemented in this patch
  bpf_skc_to_{tcp, tcp6, tcp_timewait, tcp_request, udp6}_sock
The argument for these helpers is not a fixed btf_id. For example,
  bpf_skc_to_tcp(struct sock_common *), or
  bpf_skc_to_tcp(struct sock *), or
  bpf_skc_to_tcp(struct inet_sock *), ...
are all valid. At runtime, the helper will check whether pointer cast
is legal or not. Please see Patch #5 for details.

Since btf_id's for both arguments and return value are known at
build time, the btf_id's are pre-computed once vmlinux btf becomes
valid. Jiri's "adding d_path helper" patch set
  https://lore.kernel.org/bpf/20200616100512.2168860-1-jolsa@kernel.org/T/
provides a way to pre-compute btf id during vmlinux build time.
This can be applied here as well. A followup patch can convert
to build time btf id computation after Jiri's patch landed.

Changelogs:
  v2 -> v3:
    - change sock_cast*/SOCK_CAST* names to btf_sock* names for generality (Martin)
    - change gpl_license to false (Martin)
    - fix helper to cast to tcp timewait/request socket. (Martin)
  v1 -> v2:
    - guard init_sock_cast_types() defination properly with CONFIG_NET (Martin)
    - reuse the btf_ids, computed for new helper argument, for return
      values (Martin)
    - using BTF_TYPE_EMIT to express intent of btf type generation (Andrii)
    - abstract out common net macros into bpf_tracing_net.h (Andrii)

Yonghong Song (15):
  net: bpf: add bpf_seq_afinfo in tcp_iter_state
  net: bpf: implement bpf iterator for tcp
  bpf: support 'X' in bpf_seq_printf() helper
  bpf: allow tracing programs to use bpf_jiffies64() helper
  bpf: add bpf_skc_to_tcp6_sock() helper
  bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers
  net: bpf: add bpf_seq_afinfo in udp_iter_state
  net: bpf: implement bpf iterator for udp
  bpf: add bpf_skc_to_udp6_sock() helper
  bpf/selftests: move newer bpf_iter_* type redefining to a new header
    file
  tools/bpf: refactor some net macros to libbpf bpf_tracing_net.h
  tools/libbpf: add more common macros to bpf_tracing_net.h
  tools/bpf: selftests: implement sample tcp/tcp6 bpf_iter programs
  tools/bpf: add udp4/udp6 bpf iterator
  bpf/selftests: add tcp/udp iterator programs to selftests

 include/linux/bpf.h                           |  16 ++
 include/net/tcp.h                             |   1 +
 include/net/udp.h                             |   1 +
 include/uapi/linux/bpf.h                      |  37 ++-
 kernel/bpf/btf.c                              |   1 +
 kernel/bpf/verifier.c                         |  43 ++-
 kernel/trace/bpf_trace.c                      |  15 +-
 net/core/filter.c                             | 156 +++++++++++
 net/ipv4/tcp_ipv4.c                           | 153 ++++++++++-
 net/ipv4/udp.c                                | 144 +++++++++-
 scripts/bpf_helpers_doc.py                    |  10 +
 tools/include/uapi/linux/bpf.h                |  37 ++-
 tools/lib/bpf/Makefile                        |   1 +
 tools/lib/bpf/bpf_tracing_net.h               |  51 ++++
 .../selftests/bpf/prog_tests/bpf_iter.c       |  68 +++++
 tools/testing/selftests/bpf/progs/bpf_iter.h  |  80 ++++++
 .../selftests/bpf/progs/bpf_iter_bpf_map.c    |  18 +-
 .../selftests/bpf/progs/bpf_iter_ipv6_route.c |  25 +-
 .../selftests/bpf/progs/bpf_iter_netlink.c    |  22 +-
 .../selftests/bpf/progs/bpf_iter_task.c       |  18 +-
 .../selftests/bpf/progs/bpf_iter_task_file.c  |  20 +-
 .../selftests/bpf/progs/bpf_iter_tcp4.c       | 235 ++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_tcp6.c       | 250 ++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_test_kern3.c |  17 +-
 .../selftests/bpf/progs/bpf_iter_test_kern4.c |  17 +-
 .../bpf/progs/bpf_iter_test_kern_common.h     |  18 +-
 .../selftests/bpf/progs/bpf_iter_udp4.c       |  71 +++++
 .../selftests/bpf/progs/bpf_iter_udp6.c       |  79 ++++++
 28 files changed, 1435 insertions(+), 169 deletions(-)
 create mode 100644 tools/lib/bpf/bpf_tracing_net.h
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter.h
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c

-- 
2.24.1


^ permalink raw reply	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 02/15] net: bpf: implement bpf iterator for tcp
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 01/15] net: bpf: add bpf_seq_afinfo in tcp_iter_state Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 03/15] bpf: support 'X' in bpf_seq_printf() helper Yonghong Song
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

The bpf iterator for tcp is implemented. Both tcp4 and tcp6
sockets will be traversed. It is up to bpf program to
filter for tcp4 or tcp6 only, or both families of sockets.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 net/ipv4/tcp_ipv4.c | 123 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9cb65ee4ec63..ea0df9fd7618 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2613,6 +2613,74 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__tcp {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct sock_common *, sk_common);
+	uid_t uid __aligned(8);
+};
+
+static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+			     struct sock_common *sk_common, uid_t uid)
+{
+	struct bpf_iter__tcp ctx;
+
+	meta->seq_num--;  /* skip SEQ_START_TOKEN */
+	ctx.meta = meta;
+	ctx.sk_common = sk_common;
+	ctx.uid = uid;
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	struct sock *sk = v;
+	uid_t uid;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	if (sk->sk_state == TCP_TIME_WAIT) {
+		uid = 0;
+	} else if (sk->sk_state == TCP_NEW_SYN_RECV) {
+		const struct request_sock *req = v;
+
+		uid = from_kuid_munged(seq_user_ns(seq),
+				       sock_i_uid(req->rsk_listener));
+	} else {
+		uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+	}
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, false);
+	return tcp_prog_seq_show(prog, &meta, v, uid);
+}
+
+static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	if (!v) {
+		meta.seq = seq;
+		prog = bpf_iter_get_info(&meta, true);
+		if (prog)
+			(void)tcp_prog_seq_show(prog, &meta, v, 0);
+	}
+
+	tcp_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_tcp_seq_ops = {
+	.show		= bpf_iter_tcp_seq_show,
+	.start		= tcp_seq_start,
+	.next		= tcp_seq_next,
+	.stop		= bpf_iter_tcp_seq_stop,
+};
+#endif
+
 static const struct seq_operations tcp4_seq_ops = {
 	.show		= tcp4_seq_show,
 	.start		= tcp_seq_start,
@@ -2844,8 +2912,63 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
        .exit_batch = tcp_sk_exit_batch,
 };
 
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
+		     struct sock_common *sk_common, uid_t uid)
+
+static int bpf_iter_init_tcp(void *priv_data)
+{
+	struct tcp_iter_state *st = priv_data;
+	struct tcp_seq_afinfo *afinfo;
+	int ret;
+
+	afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
+	if (!afinfo)
+		return -ENOMEM;
+
+	afinfo->family = AF_UNSPEC;
+	st->bpf_seq_afinfo = afinfo;
+	ret = bpf_iter_init_seq_net(priv_data);
+	if (ret)
+		kfree(afinfo);
+	return ret;
+}
+
+static void bpf_iter_fini_tcp(void *priv_data)
+{
+	struct tcp_iter_state *st = priv_data;
+
+	kfree(st->bpf_seq_afinfo);
+	bpf_iter_fini_seq_net(priv_data);
+}
+
+static const struct bpf_iter_reg tcp_reg_info = {
+	.target			= "tcp",
+	.seq_ops		= &bpf_iter_tcp_seq_ops,
+	.init_seq_private	= bpf_iter_init_tcp,
+	.fini_seq_private	= bpf_iter_fini_tcp,
+	.seq_priv_size		= sizeof(struct tcp_iter_state),
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__tcp, sk_common),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
+};
+
+static void __init bpf_iter_register(void)
+{
+	if (bpf_iter_reg_target(&tcp_reg_info))
+		pr_warn("Warning: could not register bpf iterator tcp\n");
+}
+
+#endif
+
 void __init tcp_v4_init(void)
 {
 	if (register_pernet_subsys(&tcp_sk_ops))
 		panic("Failed to create the TCP control socket.\n");
+
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	bpf_iter_register();
+#endif
 }
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 03/15] bpf: support 'X' in bpf_seq_printf() helper
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 01/15] net: bpf: add bpf_seq_afinfo in tcp_iter_state Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 02/15] net: bpf: implement bpf iterator for tcp Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 04/15] bpf: allow tracing programs to use bpf_jiffies64() helper Yonghong Song
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

'X' tells kernel to print hex with upper case letters.
/proc/net/tcp{4,6} seq_file show() used this, and
supports it in bpf_seq_printf() helper too.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 kernel/trace/bpf_trace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e729c9e587a0..dbee30e2ad91 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -681,7 +681,8 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 		}
 
 		if (fmt[i] != 'i' && fmt[i] != 'd' &&
-		    fmt[i] != 'u' && fmt[i] != 'x') {
+		    fmt[i] != 'u' && fmt[i] != 'x' &&
+		    fmt[i] != 'X') {
 			err = -EINVAL;
 			goto out;
 		}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 04/15] bpf: allow tracing programs to use bpf_jiffies64() helper
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (2 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 03/15] bpf: support 'X' in bpf_seq_printf() helper Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper Yonghong Song
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

/proc/net/tcp{4,6} uses jiffies for various computations.
Let us add bpf_jiffies64() helper to tracing program
so bpf_iter and other programs can use it.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 kernel/trace/bpf_trace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index dbee30e2ad91..afaec7e082d9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1135,6 +1135,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_ringbuf_discard_proto;
 	case BPF_FUNC_ringbuf_query:
 		return &bpf_ringbuf_query_proto;
+	case BPF_FUNC_jiffies64:
+		return &bpf_jiffies64_proto;
 	default:
 		return NULL;
 	}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (3 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 04/15] bpf: allow tracing programs to use bpf_jiffies64() helper Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  5:46     ` kernel test robot
                     ` (2 more replies)
  2020-06-23  0:36 ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers Yonghong Song
                   ` (9 subsequent siblings)
  14 siblings, 3 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

The helper is used in tracing programs to cast a socket
pointer to a tcp6_sock pointer.
The return value could be NULL if the casting is illegal.

A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
so the verifier is able to deduce proper return types for the helper.

Different from the previous BTF_ID based helpers,
the bpf_skc_to_tcp6_sock() argument can be several possible
btf_ids. More specifically, all possible socket data structures
with sock_common appearing in the first in the memory layout.
This patch only added socket types related to tcp and udp.

All possible argument btf_id and return value btf_id
for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
cached. In the future, it is even possible to precompute
these btf_id's at kernel build time.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            | 12 +++++
 include/uapi/linux/bpf.h       |  9 +++-
 kernel/bpf/btf.c               |  1 +
 kernel/bpf/verifier.c          | 43 +++++++++++++-----
 kernel/trace/bpf_trace.c       |  2 +
 net/core/filter.c              | 80 ++++++++++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 +
 tools/include/uapi/linux/bpf.h |  9 +++-
 8 files changed, 146 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1e1501ee53ce..e2b9f2075e5b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -265,6 +265,7 @@ enum bpf_return_type {
 	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
 	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
 	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
+	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -287,6 +288,12 @@ struct bpf_func_proto {
 		enum bpf_arg_type arg_type[5];
 	};
 	int *btf_id; /* BTF ids of arguments */
+	bool (*check_btf_id)(u32 btf_id, u32 arg); /* if the argument btf_id is
+						    * valid. Often used if more
+						    * than one btf id is permitted
+						    * for this argument.
+						    */
+	int *ret_btf_id; /* return value btf_id */
 };
 
 /* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -1638,6 +1645,7 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto;
 extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
 extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
 extern const struct bpf_func_proto bpf_ringbuf_query_proto;
+extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -1661,6 +1669,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 				struct bpf_insn *insn_buf,
 				struct bpf_prog *prog,
 				u32 *target_size);
+void init_btf_sock_ids(struct btf *btf);
 #else
 static inline bool bpf_sock_common_is_valid_access(int off, int size,
 						   enum bpf_access_type type,
@@ -1682,6 +1691,9 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 {
 	return 0;
 }
+static inline void init_btf_sock_ids(struct btf *btf)
+{
+}
 #endif
 
 #ifdef CONFIG_INET
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 19684813faae..394fcba27b6a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3252,6 +3252,12 @@ union bpf_attr {
  * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
  * 		is returned or the error code -EACCES in case the skb is not
  * 		subject to CHECKSUM_UNNECESSARY.
+ *
+ * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3389,7 +3395,8 @@ union bpf_attr {
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
-	FN(csum_level),
+	FN(csum_level),			\
+	FN(skc_to_tcp6_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index e377d1981730..4c3007f428b1 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3674,6 +3674,7 @@ struct btf *btf_parse_vmlinux(void)
 		goto errout;
 
 	bpf_struct_ops_init(btf, log);
+	init_btf_sock_ids(btf);
 
 	btf_verifier_env_free(env);
 	refcount_set(&btf->refcnt, 1);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7460f967cb75..7de98906ddf4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3800,12 +3800,14 @@ static int int_ptr_type_to_size(enum bpf_arg_type type)
 	return -EINVAL;
 }
 
-static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
-			  enum bpf_arg_type arg_type,
-			  struct bpf_call_arg_meta *meta)
+static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
+			  struct bpf_call_arg_meta *meta,
+			  const struct bpf_func_proto *fn)
 {
+	u32 regno = BPF_REG_1 + arg;
 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 	enum bpf_reg_type expected_type, type = reg->type;
+	enum bpf_arg_type arg_type = fn->arg_type[arg];
 	int err = 0;
 
 	if (arg_type == ARG_DONTCARE)
@@ -3885,9 +3887,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		expected_type = PTR_TO_BTF_ID;
 		if (type != expected_type)
 			goto err_type;
-		if (reg->btf_id != meta->btf_id) {
-			verbose(env, "Helper has type %s got %s in R%d\n",
-				kernel_type_name(meta->btf_id),
+		if (!fn->check_btf_id) {
+			if (reg->btf_id != meta->btf_id) {
+				verbose(env, "Helper has type %s got %s in R%d\n",
+					kernel_type_name(meta->btf_id),
+					kernel_type_name(reg->btf_id), regno);
+
+				return -EACCES;
+			}
+		} else if (!fn->check_btf_id(reg->btf_id, arg)) {
+			verbose(env, "Helper does not support %s in R%d\n",
 				kernel_type_name(reg->btf_id), regno);
 
 			return -EACCES;
@@ -4709,10 +4718,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	meta.func_id = func_id;
 	/* check args */
 	for (i = 0; i < 5; i++) {
-		err = btf_resolve_helper_id(&env->log, fn, i);
-		if (err > 0)
-			meta.btf_id = err;
-		err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
+		if (!fn->check_btf_id) {
+			err = btf_resolve_helper_id(&env->log, fn, i);
+			if (err > 0)
+				meta.btf_id = err;
+		}
+		err = check_func_arg(env, i, &meta, fn);
 		if (err)
 			return err;
 	}
@@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
 		regs[BPF_REG_0].id = ++env->id_gen;
 		regs[BPF_REG_0].mem_size = meta.mem_size;
+	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
+		int ret_btf_id;
+
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
+		ret_btf_id = *fn->ret_btf_id;
+		if (ret_btf_id == 0) {
+			verbose(env, "invalid return type %d of func %s#%d\n",
+				fn->ret_type, func_id_name(func_id), func_id);
+			return -EINVAL;
+		}
+		regs[BPF_REG_0].btf_id = ret_btf_id;
 	} else {
 		verbose(env, "unknown return type %d of func %s#%d\n",
 			fn->ret_type, func_id_name(func_id), func_id);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index afaec7e082d9..478c10d1ec33 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1515,6 +1515,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skb_output_proto;
 	case BPF_FUNC_xdp_output:
 		return &bpf_xdp_output_proto;
+	case BPF_FUNC_skc_to_tcp6_sock:
+		return &bpf_skc_to_tcp6_sock_proto;
 #endif
 	case BPF_FUNC_seq_printf:
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/core/filter.c b/net/core/filter.c
index 73395384afe2..32efc1fc16cf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -47,6 +47,7 @@
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <net/sch_generic.h>
 #include <net/cls_cgroup.h>
 #include <net/dst_metadata.h>
@@ -9191,3 +9192,82 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
 {
 	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
 }
+
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock")			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock")	\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock")	\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock")	\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock")		\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock")			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common")		\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock")			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock")	\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock")	\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock")			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock")			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock")
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+static const char *bpf_sock_types[] = {
+#define BTF_SOCK_TYPE(name, str) str,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+};
+
+static int btf_sock_ids[MAX_BTF_SOCK_TYPE];
+
+void init_btf_sock_ids(struct btf *btf)
+{
+	int i, btf_id;
+
+	for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
+		btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
+					       BTF_KIND_STRUCT);
+		if (btf_id > 0)
+			btf_sock_ids[i] = btf_id;
+	}
+}
+
+static bool check_arg_btf_id(u32 btf_id, u32 arg)
+{
+	int i;
+
+	/* only one argument, no need to check arg */
+	for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
+		if (btf_sock_ids[i] == btf_id)
+			return true;
+	return false;
+}
+
+BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
+{
+	/* tcp6_sock type is not generated in dwarf and hence btf,
+	 * trigger an explicit type generation here.
+	 */
+	BTF_TYPE_EMIT(struct tcp6_sock);
+	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
+	    sk->sk_family == AF_INET6)
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
+	.func			= bpf_skc_to_tcp6_sock,
+	.gpl_only		= false,
+	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
+	.arg1_type		= ARG_PTR_TO_BTF_ID,
+	.check_btf_id		= check_arg_btf_id,
+	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
+};
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 91fa668fa860..6c2f64118651 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -421,6 +421,7 @@ class PrinterHelpers(Printer):
             'struct sockaddr',
             'struct tcphdr',
             'struct seq_file',
+            'struct tcp6_sock',
 
             'struct __sk_buff',
             'struct sk_msg_md',
@@ -458,6 +459,7 @@ class PrinterHelpers(Printer):
             'struct sockaddr',
             'struct tcphdr',
             'struct seq_file',
+            'struct tcp6_sock',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 19684813faae..394fcba27b6a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3252,6 +3252,12 @@ union bpf_attr {
  * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
  * 		is returned or the error code -EACCES in case the skb is not
  * 		subject to CHECKSUM_UNNECESSARY.
+ *
+ * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3389,7 +3395,8 @@ union bpf_attr {
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
-	FN(csum_level),
+	FN(csum_level),			\
+	FN(skc_to_tcp6_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (4 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  5:18     ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers kernel test robot
  2020-06-23  6:39     ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers kernel test robot
  2020-06-23  0:36 ` [PATCH bpf-next v3 07/15] net: bpf: add bpf_seq_afinfo in udp_iter_state Yonghong Song
                   ` (8 subsequent siblings)
  14 siblings, 2 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

Three more helpers are added to cast a sock_common pointer to
an tcp_sock, tcp_timewait_sock or a tcp_request_sock for
tracing programs.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            |  3 ++
 include/uapi/linux/bpf.h       | 23 ++++++++++++++-
 kernel/trace/bpf_trace.c       |  6 ++++
 net/core/filter.c              | 54 ++++++++++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  6 ++++
 tools/include/uapi/linux/bpf.h | 23 ++++++++++++++-
 6 files changed, 113 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e2b9f2075e5b..cc3f89827b89 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1646,6 +1646,9 @@ extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
 extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
 extern const struct bpf_func_proto bpf_ringbuf_query_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
+extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
+extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
+extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 394fcba27b6a..e256417d94c2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3258,6 +3258,24 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3396,7 +3414,10 @@ union bpf_attr {
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
 	FN(csum_level),			\
-	FN(skc_to_tcp6_sock),
+	FN(skc_to_tcp6_sock),		\
+	FN(skc_to_tcp_sock),		\
+	FN(skc_to_tcp_timewait_sock),	\
+	FN(skc_to_tcp_request_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 478c10d1ec33..de5fbe66e1ca 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1517,6 +1517,12 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_output_proto;
 	case BPF_FUNC_skc_to_tcp6_sock:
 		return &bpf_skc_to_tcp6_sock_proto;
+	case BPF_FUNC_skc_to_tcp_sock:
+		return &bpf_skc_to_tcp_sock_proto;
+	case BPF_FUNC_skc_to_tcp_timewait_sock:
+		return &bpf_skc_to_tcp_timewait_sock_proto;
+	case BPF_FUNC_skc_to_tcp_request_sock:
+		return &bpf_skc_to_tcp_request_sock_proto;
 #endif
 	case BPF_FUNC_seq_printf:
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/core/filter.c b/net/core/filter.c
index 32efc1fc16cf..140fc0fdf3e1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -74,6 +74,7 @@
 #include <net/lwtunnel.h>
 #include <net/ipv6_stubs.h>
 #include <net/bpf_sk_storage.h>
+#include <net/transp_v6.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -9271,3 +9272,56 @@ const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
 	.check_btf_id		= check_arg_btf_id,
 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
 };
+
+BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
+{
+	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
+	.func			= bpf_skc_to_tcp_sock,
+	.gpl_only		= false,
+	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
+	.arg1_type		= ARG_PTR_TO_BTF_ID,
+	.check_btf_id		= check_arg_btf_id,
+	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
+{
+	if ((sk->sk_prot == &tcp_prot || sk->sk_prot == &tcpv6_prot) &&
+	    sk->sk_state == TCP_TIME_WAIT)
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
+	.func			= bpf_skc_to_tcp_timewait_sock,
+	.gpl_only		= false,
+	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
+	.arg1_type		= ARG_PTR_TO_BTF_ID,
+	.check_btf_id		= check_arg_btf_id,
+	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
+{
+	if ((sk->sk_prot == &tcp_prot || sk->sk_prot == &tcpv6_prot) &&
+	    sk->sk_state == TCP_NEW_SYN_RECV)
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
+	.func			= bpf_skc_to_tcp_request_sock,
+	.gpl_only		= false,
+	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
+	.arg1_type		= ARG_PTR_TO_BTF_ID,
+	.check_btf_id		= check_arg_btf_id,
+	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
+};
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 6c2f64118651..d886657c6aaa 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -422,6 +422,9 @@ class PrinterHelpers(Printer):
             'struct tcphdr',
             'struct seq_file',
             'struct tcp6_sock',
+            'struct tcp_sock',
+            'struct tcp_timewait_sock',
+            'struct tcp_request_sock',
 
             'struct __sk_buff',
             'struct sk_msg_md',
@@ -460,6 +463,9 @@ class PrinterHelpers(Printer):
             'struct tcphdr',
             'struct seq_file',
             'struct tcp6_sock',
+            'struct tcp_sock',
+            'struct tcp_timewait_sock',
+            'struct tcp_request_sock',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 394fcba27b6a..e256417d94c2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3258,6 +3258,24 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3396,7 +3414,10 @@ union bpf_attr {
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
 	FN(csum_level),			\
-	FN(skc_to_tcp6_sock),
+	FN(skc_to_tcp6_sock),		\
+	FN(skc_to_tcp_sock),		\
+	FN(skc_to_tcp_timewait_sock),	\
+	FN(skc_to_tcp_request_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 07/15] net: bpf: add bpf_seq_afinfo in udp_iter_state
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (5 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 08/15] net: bpf: implement bpf iterator for udp Yonghong Song
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

Similar to tcp_iter_state, a new field bpf_seq_afinfo is
added to udp_iter_state to provide bpf udp iterator
afinfo.

This does not change /proc/net/{udp, udp6} behavior. But
it enables bpf iterator to avoid get afinfo from PDE_DATA
and iterate through all udp and udp6 sockets in one pass.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/net/udp.h |  1 +
 net/ipv4/udp.c    | 28 +++++++++++++++++++++++-----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/include/net/udp.h b/include/net/udp.h
index a8fa6c0c6ded..67c8b7368845 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -440,6 +440,7 @@ struct udp_seq_afinfo {
 struct udp_iter_state {
 	struct seq_net_private  p;
 	int			bucket;
+	struct udp_seq_afinfo	*bpf_seq_afinfo;
 };
 
 void *udp_seq_start(struct seq_file *seq, loff_t *pos);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b7ebbcae497..90355301b266 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2826,10 +2826,15 @@ EXPORT_SYMBOL(udp_prot);
 static struct sock *udp_get_first(struct seq_file *seq, int start)
 {
 	struct sock *sk;
-	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+	struct udp_seq_afinfo *afinfo;
 	struct udp_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
+	if (state->bpf_seq_afinfo)
+		afinfo = state->bpf_seq_afinfo;
+	else
+		afinfo = PDE_DATA(file_inode(seq->file));
+
 	for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
 	     ++state->bucket) {
 		struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
@@ -2841,7 +2846,8 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
 		sk_for_each(sk, &hslot->head) {
 			if (!net_eq(sock_net(sk), net))
 				continue;
-			if (sk->sk_family == afinfo->family)
+			if (afinfo->family == AF_UNSPEC ||
+			    sk->sk_family == afinfo->family)
 				goto found;
 		}
 		spin_unlock_bh(&hslot->lock);
@@ -2853,13 +2859,20 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
 
 static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
 {
-	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+	struct udp_seq_afinfo *afinfo;
 	struct udp_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
+	if (state->bpf_seq_afinfo)
+		afinfo = state->bpf_seq_afinfo;
+	else
+		afinfo = PDE_DATA(file_inode(seq->file));
+
 	do {
 		sk = sk_next(sk);
-	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family));
+	} while (sk && (!net_eq(sock_net(sk), net) ||
+			(afinfo->family != AF_UNSPEC &&
+			 sk->sk_family != afinfo->family)));
 
 	if (!sk) {
 		if (state->bucket <= afinfo->udp_table->mask)
@@ -2904,9 +2917,14 @@ EXPORT_SYMBOL(udp_seq_next);
 
 void udp_seq_stop(struct seq_file *seq, void *v)
 {
-	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+	struct udp_seq_afinfo *afinfo;
 	struct udp_iter_state *state = seq->private;
 
+	if (state->bpf_seq_afinfo)
+		afinfo = state->bpf_seq_afinfo;
+	else
+		afinfo = PDE_DATA(file_inode(seq->file));
+
 	if (state->bucket <= afinfo->udp_table->mask)
 		spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
 }
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 08/15] net: bpf: implement bpf iterator for udp
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (6 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 07/15] net: bpf: add bpf_seq_afinfo in udp_iter_state Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper Yonghong Song
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

The bpf iterator for udp is implemented. Both udp4 and udp6
sockets will be traversed. It is up to bpf program to
filter for udp4 or udp6 only, or both families of sockets.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 net/ipv4/udp.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 90355301b266..31530129f137 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2968,6 +2968,67 @@ int udp4_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__udp {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct udp_sock *, udp_sk);
+	uid_t uid __aligned(8);
+	int bucket __aligned(8);
+};
+
+static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+			     struct udp_sock *udp_sk, uid_t uid, int bucket)
+{
+	struct bpf_iter__udp ctx;
+
+	meta->seq_num--;  /* skip SEQ_START_TOKEN */
+	ctx.meta = meta;
+	ctx.udp_sk = udp_sk;
+	ctx.uid = uid;
+	ctx.bucket = bucket;
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
+{
+	struct udp_iter_state *state = seq->private;
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	struct sock *sk = v;
+	uid_t uid;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, false);
+	return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
+}
+
+static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	if (!v) {
+		meta.seq = seq;
+		prog = bpf_iter_get_info(&meta, true);
+		if (prog)
+			(void)udp_prog_seq_show(prog, &meta, v, 0, 0);
+	}
+
+	udp_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_udp_seq_ops = {
+	.start		= udp_seq_start,
+	.next		= udp_seq_next,
+	.stop		= bpf_iter_udp_seq_stop,
+	.show		= bpf_iter_udp_seq_show,
+};
+#endif
+
 const struct seq_operations udp_seq_ops = {
 	.start		= udp_seq_start,
 	.next		= udp_seq_next,
@@ -3085,6 +3146,57 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
 	.init	= udp_sysctl_init,
 };
 
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
+		     struct udp_sock *udp_sk, uid_t uid, int bucket)
+
+static int bpf_iter_init_udp(void *priv_data)
+{
+	struct udp_iter_state *st = priv_data;
+	struct udp_seq_afinfo *afinfo;
+	int ret;
+
+	afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
+	if (!afinfo)
+		return -ENOMEM;
+
+	afinfo->family = AF_UNSPEC;
+	afinfo->udp_table = &udp_table;
+	st->bpf_seq_afinfo = afinfo;
+	ret = bpf_iter_init_seq_net(priv_data);
+	if (ret)
+		kfree(afinfo);
+	return ret;
+}
+
+static void bpf_iter_fini_udp(void *priv_data)
+{
+	struct udp_iter_state *st = priv_data;
+
+	kfree(st->bpf_seq_afinfo);
+	bpf_iter_fini_seq_net(priv_data);
+}
+
+static const struct bpf_iter_reg udp_reg_info = {
+	.target			= "udp",
+	.seq_ops		= &bpf_iter_udp_seq_ops,
+	.init_seq_private	= bpf_iter_init_udp,
+	.fini_seq_private	= bpf_iter_fini_udp,
+	.seq_priv_size		= sizeof(struct udp_iter_state),
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__udp, udp_sk),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
+};
+
+static void __init bpf_iter_register(void)
+{
+	if (bpf_iter_reg_target(&udp_reg_info))
+		pr_warn("Warning: could not register bpf iterator udp\n");
+}
+#endif
+
 void __init udp_init(void)
 {
 	unsigned long limit;
@@ -3110,4 +3222,8 @@ void __init udp_init(void)
 
 	if (register_pernet_subsys(&udp_sysctl_ops))
 		panic("UDP: failed to init sysctl parameters.\n");
+
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	bpf_iter_register();
+#endif
 }
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (7 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 08/15] net: bpf: implement bpf iterator for udp Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  1:47   ` Eric Dumazet
  2020-06-23  0:36 ` [PATCH bpf-next v3 10/15] bpf/selftests: move newer bpf_iter_* type redefining to a new header file Yonghong Song
                   ` (5 subsequent siblings)
  14 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

The helper is used in tracing programs to cast a socket
pointer to a udp6_sock pointer.
The return value could be NULL if the casting is illegal.

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       |  9 ++++++++-
 kernel/trace/bpf_trace.c       |  2 ++
 net/core/filter.c              | 22 ++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |  2 ++
 tools/include/uapi/linux/bpf.h |  9 ++++++++-
 6 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc3f89827b89..3f5c6bb5e3a7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1649,6 +1649,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
+extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e256417d94c2..3f4b12c5c563 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3276,6 +3276,12 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3417,7 +3423,8 @@ union bpf_attr {
 	FN(skc_to_tcp6_sock),		\
 	FN(skc_to_tcp_sock),		\
 	FN(skc_to_tcp_timewait_sock),	\
-	FN(skc_to_tcp_request_sock),
+	FN(skc_to_tcp_request_sock),	\
+	FN(skc_to_udp6_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index de5fbe66e1ca..d10ab16c4a2f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1523,6 +1523,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skc_to_tcp_timewait_sock_proto;
 	case BPF_FUNC_skc_to_tcp_request_sock:
 		return &bpf_skc_to_tcp_request_sock_proto;
+	case BPF_FUNC_skc_to_udp6_sock:
+		return &bpf_skc_to_udp6_sock_proto;
 #endif
 	case BPF_FUNC_seq_printf:
 		return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/core/filter.c b/net/core/filter.c
index 140fc0fdf3e1..9a98f3616273 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9325,3 +9325,25 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
 	.check_btf_id		= check_arg_btf_id,
 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
 };
+
+BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
+{
+	/* udp6_sock type is not generated in dwarf and hence btf,
+	 * trigger an explicit type generation here.
+	 */
+	BTF_TYPE_EMIT(struct udp6_sock);
+	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
+	    sk->sk_family == AF_INET6)
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
+	.func			= bpf_skc_to_udp6_sock,
+	.gpl_only		= false,
+	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
+	.arg1_type		= ARG_PTR_TO_BTF_ID,
+	.check_btf_id		= check_arg_btf_id,
+	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
+};
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index d886657c6aaa..6bab40ff442e 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -425,6 +425,7 @@ class PrinterHelpers(Printer):
             'struct tcp_sock',
             'struct tcp_timewait_sock',
             'struct tcp_request_sock',
+            'struct udp6_sock',
 
             'struct __sk_buff',
             'struct sk_msg_md',
@@ -466,6 +467,7 @@ class PrinterHelpers(Printer):
             'struct tcp_sock',
             'struct tcp_timewait_sock',
             'struct tcp_request_sock',
+            'struct udp6_sock',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e256417d94c2..3f4b12c5c563 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3276,6 +3276,12 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3417,7 +3423,8 @@ union bpf_attr {
 	FN(skc_to_tcp6_sock),		\
 	FN(skc_to_tcp_sock),		\
 	FN(skc_to_tcp_timewait_sock),	\
-	FN(skc_to_tcp_request_sock),
+	FN(skc_to_tcp_request_sock),	\
+	FN(skc_to_udp6_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 10/15] bpf/selftests: move newer bpf_iter_* type redefining to a new header file
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (8 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 11/15] tools/bpf: refactor some net macros to libbpf bpf_tracing_net.h Yonghong Song
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team,
	Martin KaFai Lau, Andrii Nakryiko

Commit b9f4c01f3e0b ("selftest/bpf: Make bpf_iter selftest
compilable against old vmlinux.h") and Commit dda18a5c0b75
("selftests/bpf: Convert bpf_iter_test_kern{3, 4}.c to define
own bpf_iter_meta") redefined newly introduced types
in bpf programs so the bpf program can still compile
properly with old kernels although loading may fail.

Since this patch set introduced new types and the same
workaround is needed, so let us move the workaround
to a separate header file so they do not clutter
bpf programs.

Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/testing/selftests/bpf/progs/bpf_iter.h  | 49 +++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_bpf_map.c    | 18 +------
 .../selftests/bpf/progs/bpf_iter_ipv6_route.c | 18 +------
 .../selftests/bpf/progs/bpf_iter_netlink.c    | 18 +------
 .../selftests/bpf/progs/bpf_iter_task.c       | 18 +------
 .../selftests/bpf/progs/bpf_iter_task_file.c  | 20 +-------
 .../selftests/bpf/progs/bpf_iter_test_kern3.c | 17 +------
 .../selftests/bpf/progs/bpf_iter_test_kern4.c | 17 +------
 .../bpf/progs/bpf_iter_test_kern_common.h     | 18 +------
 9 files changed, 57 insertions(+), 136 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter.h

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
new file mode 100644
index 000000000000..d8e6820e49e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#undef bpf_iter__ipv6_route
+#undef bpf_iter__netlink
+#undef bpf_iter__task
+#undef bpf_iter__task_file
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+	struct bpf_iter_meta *meta;
+	struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+	struct bpf_iter_meta *meta;
+	struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+	struct bpf_iter_meta *meta; 
+	struct task_struct *task;
+	__u32 fd;
+	struct file *file;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b57bd6fef208..08651b23edba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -1,27 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
-	struct bpf_iter_meta *meta;
-	struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
 SEC("iter/bpf_map")
 int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index c8e9ca74c87b..93a452d1d136 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,25 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__ipv6_route
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__ipv6_route {
-	struct bpf_iter_meta *meta;
-	struct fib6_info *rt;
-} __attribute__((preserve_access_index));
-
 char _license[] SEC("license") = "GPL";
 
 extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index e7b8753eac0b..fda5036fdf75 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__netlink bpf_iter__netlink___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__netlink
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -14,17 +9,6 @@ char _license[] SEC("license") = "GPL";
 #define sk_rmem_alloc	sk_backlog.rmem_alloc
 #define sk_refcnt	__sk_common.skc_refcnt
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__netlink {
-	struct bpf_iter_meta *meta;
-	struct netlink_sock *sk;
-} __attribute__((preserve_access_index));
-
 static inline struct inode *SOCK_INODE(struct socket *socket)
 {
 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index ee754021f98e..4983087852a0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -1,27 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 0f0ec3db20ba..8b787baa2654 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -1,29 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task_file bpf_iter__task_file___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task_file
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task_file {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-	__u32 fd;
-	struct file *file;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task_file")
 int dump_task_file(struct bpf_iter__task_file *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
index 13c2c90c835f..2a4647f20c46 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -1,25 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index 0aa71b333cf3..ee49493dc125 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -1,25 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
-	struct bpf_iter_meta *meta;
-	struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
 __u32 map1_id = 0, map2_id = 0;
 __u32 map1_accessed = 0, map2_accessed = 0;
 __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
index dee1339e6905..d5e3df66ad9a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -1,27 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 int count = 0;
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 11/15] tools/bpf: refactor some net macros to libbpf bpf_tracing_net.h
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (9 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 10/15] bpf/selftests: move newer bpf_iter_* type redefining to a new header file Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  6:45   ` Andrii Nakryiko
  2020-06-23  0:36 ` [PATCH bpf-next v3 12/15] tools/libbpf: add more common macros to bpf_tracing_net.h Yonghong Song
                   ` (3 subsequent siblings)
  14 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

Refactor bpf_iter_ipv6_route.c and bpf_iter_netlink.c
so net macros, originally from various include/linux header
files, are moved to a new libbpf installable header file
bpf_tracing_net.h. The goal is to improve reuse so
networking tracing programs do not need to
copy these macros every time they use them.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/lib/bpf/Makefile                           |  1 +
 tools/lib/bpf/bpf_tracing_net.h                  | 16 ++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_ipv6_route.c    |  7 +------
 .../selftests/bpf/progs/bpf_iter_netlink.c       |  4 +---
 4 files changed, 19 insertions(+), 9 deletions(-)
 create mode 100644 tools/lib/bpf/bpf_tracing_net.h

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index bf8ed134cb8a..3d766c80eb78 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -257,6 +257,7 @@ install_headers: $(BPF_HELPER_DEFS)
 		$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
 		$(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
+		$(call do_install,bpf_tracing_net.h,$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
 		$(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
 
diff --git a/tools/lib/bpf/bpf_tracing_net.h b/tools/lib/bpf/bpf_tracing_net.h
new file mode 100644
index 000000000000..1f38a1098727
--- /dev/null
+++ b/tools/lib/bpf/bpf_tracing_net.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_NET_H__
+#define __BPF_TRACING_NET_H__
+
+#define IFNAMSIZ		16
+
+#define RTF_GATEWAY		0x0002
+
+#define fib_nh_dev		nh_common.nhc_dev
+#define fib_nh_gw_family	nh_common.nhc_gw_family
+#define fib_nh_gw6		nh_common.nhc_gw.ipv6
+
+#define sk_rmem_alloc		sk_backlog.rmem_alloc
+#define sk_refcnt		__sk_common.skc_refcnt
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index 93a452d1d136..64f952d68710 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -3,17 +3,12 @@
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <bpf/bpf_tracing_net.h>
 
 char _license[] SEC("license") = "GPL";
 
 extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
 
-#define RTF_GATEWAY		0x0002
-#define IFNAMSIZ		16
-#define fib_nh_gw_family	nh_common.nhc_gw_family
-#define fib_nh_gw6		nh_common.nhc_gw.ipv6
-#define fib_nh_dev		nh_common.nhc_dev
-
 SEC("iter/ipv6_route")
 int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index fda5036fdf75..cde88ad957a2 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -3,12 +3,10 @@
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <bpf/bpf_tracing_net.h>
 
 char _license[] SEC("license") = "GPL";
 
-#define sk_rmem_alloc	sk_backlog.rmem_alloc
-#define sk_refcnt	__sk_common.skc_refcnt
-
 static inline struct inode *SOCK_INODE(struct socket *socket)
 {
 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 12/15] tools/libbpf: add more common macros to bpf_tracing_net.h
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (10 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 11/15] tools/bpf: refactor some net macros to libbpf bpf_tracing_net.h Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 13/15] tools/bpf: selftests: implement sample tcp/tcp6 bpf_iter programs Yonghong Song
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

These newly added macros will be used in subsequent bpf iterator
tcp{4,6} and udp{4,6} programs.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/lib/bpf/bpf_tracing_net.h | 35 +++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tools/lib/bpf/bpf_tracing_net.h b/tools/lib/bpf/bpf_tracing_net.h
index 1f38a1098727..01378911252b 100644
--- a/tools/lib/bpf/bpf_tracing_net.h
+++ b/tools/lib/bpf/bpf_tracing_net.h
@@ -2,15 +2,50 @@
 #ifndef __BPF_TRACING_NET_H__
 #define __BPF_TRACING_NET_H__
 
+#define AF_INET			2
+#define AF_INET6		10
+
+#define ICSK_TIME_RETRANS	1
+#define ICSK_TIME_PROBE0	3
+#define ICSK_TIME_LOSS_PROBE	5
+#define ICSK_TIME_REO_TIMEOUT	6
+
 #define IFNAMSIZ		16
 
 #define RTF_GATEWAY		0x0002
 
+#define TCP_INFINITE_SSTHRESH	0x7fffffff
+#define TCP_PINGPONG_THRESH	3
+
 #define fib_nh_dev		nh_common.nhc_dev
 #define fib_nh_gw_family	nh_common.nhc_gw_family
 #define fib_nh_gw6		nh_common.nhc_gw.ipv6
 
+#define inet_daddr		sk.__sk_common.skc_daddr
+#define inet_rcv_saddr		sk.__sk_common.skc_rcv_saddr
+#define inet_dport		sk.__sk_common.skc_dport
+
+#define ir_loc_addr		req.__req_common.skc_rcv_saddr
+#define ir_num			req.__req_common.skc_num
+#define ir_rmt_addr		req.__req_common.skc_daddr
+#define ir_rmt_port		req.__req_common.skc_dport
+#define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
+
+#define sk_family		__sk_common.skc_family
 #define sk_rmem_alloc		sk_backlog.rmem_alloc
 #define sk_refcnt		__sk_common.skc_refcnt
+#define sk_state		__sk_common.skc_state
+#define sk_v6_daddr		__sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
+
+#define s6_addr32		in6_u.u6_addr32
+
+#define tw_daddr		__tw_common.skc_daddr
+#define tw_rcv_saddr		__tw_common.skc_rcv_saddr
+#define tw_dport		__tw_common.skc_dport
+#define tw_refcnt		__tw_common.skc_refcnt
+#define tw_v6_daddr		__tw_common.skc_v6_daddr
+#define tw_v6_rcv_saddr		__tw_common.skc_v6_rcv_saddr
 
 #endif
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 13/15] tools/bpf: selftests: implement sample tcp/tcp6 bpf_iter programs
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (11 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 12/15] tools/libbpf: add more common macros to bpf_tracing_net.h Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  6:56   ` Andrii Nakryiko
  2020-06-23  0:36 ` [PATCH bpf-next v3 14/15] tools/bpf: add udp4/udp6 bpf iterator Yonghong Song
  2020-06-23  0:36 ` [PATCH bpf-next v3 15/15] bpf/selftests: add tcp/udp iterator programs to selftests Yonghong Song
  14 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

In my VM, I got identical result compared to /proc/net/{tcp,tcp6}.
For tcp6:
  $ cat /proc/net/tcp6
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
     0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000001 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0

  $ cat /sys/fs/bpf/p1
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
     0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0

For tcp:
  $ cat /proc/net/tcp
  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
   0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0
  $ cat /sys/fs/bpf/p2
  sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
   1: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 tools/testing/selftests/bpf/progs/bpf_iter.h  |  15 ++
 .../selftests/bpf/progs/bpf_iter_tcp4.c       | 235 ++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_tcp6.c       | 250 ++++++++++++++++++
 3 files changed, 500 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index d8e6820e49e6..ab3ed904d391 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -7,6 +7,8 @@
 #define bpf_iter__netlink bpf_iter__netlink___not_used
 #define bpf_iter__task bpf_iter__task___not_used
 #define bpf_iter__task_file bpf_iter__task_file___not_used
+#define bpf_iter__tcp bpf_iter__tcp___not_used
+#define tcp6_sock tcp6_sock___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
@@ -14,6 +16,8 @@
 #undef bpf_iter__netlink
 #undef bpf_iter__task
 #undef bpf_iter__task_file
+#undef bpf_iter__tcp
+#undef tcp6_sock
 
 struct bpf_iter_meta {
 	struct seq_file *seq;
@@ -47,3 +51,14 @@ struct bpf_iter__bpf_map {
 	struct bpf_iter_meta *meta;
 	struct bpf_map *map;
 } __attribute__((preserve_access_index));
+
+struct bpf_iter__tcp {
+	struct bpf_iter_meta *meta;
+	struct sock_common *sk_common;
+	uid_t uid;
+} __attribute__((preserve_access_index));
+
+struct tcp6_sock {
+	struct tcp_sock	tcp;
+	struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
new file mode 100644
index 000000000000..9d0d3e56e444
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_tracing_net.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+        return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+	return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig __weak;
+
+#define USER_HZ		100
+#define NSEC_PER_SEC	1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+	/* The implementation here tailored to a particular
+	 * setting of USER_HZ.
+	 */
+	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+	if ((tick_nsec % user_hz_nsec) == 0) {
+		if (CONFIG_HZ < USER_HZ)
+			return x * (USER_HZ / CONFIG_HZ);
+		else
+			return x / (CONFIG_HZ / USER_HZ);
+	}
+	return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+	if (delta <= 0)
+		return 0;
+
+	return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
+			 uid_t uid, __u32 seq_num)
+{
+	const struct inet_connection_sock *icsk;
+	const struct fastopen_queue *fastopenq;
+	const struct inet_sock *inet;
+	unsigned long timer_expires;
+	const struct sock *sp;
+	__u16 destp, srcp;
+	__be32 dest, src;
+	int timer_active;
+	int rx_queue;
+	int state;
+
+	icsk = &tp->inet_conn;
+	inet = &icsk->icsk_inet;
+	sp = &inet->sk;
+	fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+	dest = inet->inet_daddr;
+	src = inet->inet_rcv_saddr;
+	destp = bpf_ntohs(inet->inet_dport);
+	srcp = bpf_ntohs(inet->inet_sport);
+
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+		timer_active = 1;
+		timer_expires = icsk->icsk_timeout;
+	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+		timer_active = 4;
+		timer_expires = icsk->icsk_timeout;
+	} else if (timer_pending(&sp->sk_timer)) {
+		timer_active = 2;
+		timer_expires = sp->sk_timer.expires;
+	} else {
+		timer_active = 0;
+		timer_expires = bpf_jiffies64();
+	}
+
+	state = sp->sk_state;
+	if (state == TCP_LISTEN) {
+		rx_queue = sp->sk_ack_backlog;
+	} else {
+		rx_queue = tp->rcv_nxt - tp->copied_seq;
+		if (rx_queue < 0)
+			rx_queue = 0;
+	}
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, src, srcp, destp, destp);
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+		       state,
+		       tp->write_seq - tp->snd_una, rx_queue,
+		       timer_active,
+		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+		       icsk->icsk_retransmits, uid,
+		       icsk->icsk_probes_out,
+		       sock_i_ino(sp),
+		       sp->sk_refcnt.refs.counter);
+	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+		       tp,
+		       jiffies_to_clock_t(icsk->icsk_rto),
+		       jiffies_to_clock_t(icsk->icsk_ack.ato),
+		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+		       tp->snd_cwnd,
+		       state == TCP_LISTEN ? fastopenq->max_qlen
+				: (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
+		      );
+
+	return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+			uid_t uid, __u32 seq_num)
+{
+	struct inet_timewait_sock *tw = &ttw->tw_sk;
+	__u16 destp, srcp;
+	__be32 dest, src;
+	long delta;
+
+	delta = tw->tw_timer.expires - bpf_jiffies64();
+	dest = tw->tw_daddr;
+	src  = tw->tw_rcv_saddr;
+	destp = bpf_ntohs(tw->tw_dport);
+	srcp  = bpf_ntohs(tw->tw_sport);
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, src, srcp, dest, destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       tw->tw_substate, 0, 0,
+		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+		       tw->tw_refcnt.refs.counter, tw);
+
+	return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+			 uid_t uid, __u32 seq_num)
+{
+	struct inet_request_sock *irsk = &treq->req;
+	struct request_sock *req = &irsk->req;
+	long ttd;
+
+	ttd = req->rsk_timer.expires - bpf_jiffies64();
+
+	if (ttd < 0)
+		ttd = 0;
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, irsk->ir_loc_addr,
+		       irsk->ir_num, irsk->ir_rmt_addr,
+		       bpf_ntohs(irsk->ir_rmt_port));
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+		       req->num_timeout, uid, 0, 0, 0, req);
+
+	return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp4(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	struct seq_file *seq = ctx->meta->seq;
+	struct tcp_timewait_sock *tw;
+	struct tcp_request_sock *req;
+	struct tcp_sock *tp;
+	uid_t uid = ctx->uid;
+	__u32 seq_num;
+
+	if (sk_common == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "  sl  "
+				    "local_address "
+				    "rem_address   "
+				    "st tx_queue rx_queue tr tm->when retrnsmt"
+				    "   uid  timeout inode\n");
+
+	if (sk_common->skc_family != AF_INET)
+		return 0;
+
+	tp = bpf_skc_to_tcp_sock(sk_common);
+	if (tp) {
+		return dump_tcp_sock(seq, tp, uid, seq_num);
+	}
+
+	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+	if (tw)
+		return dump_tw_sock(seq, tw, uid, seq_num);
+
+	req = bpf_skc_to_tcp_request_sock(sk_common);
+	if (req)
+		return dump_req_sock(seq, req, uid, seq_num);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
new file mode 100644
index 000000000000..32b2209d1cde
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_tracing_net.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+        return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+	return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig __weak;
+
+#define USER_HZ		100
+#define NSEC_PER_SEC	1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+	/* The implementation here tailored to a particular
+	 * setting of USER_HZ.
+	 */
+	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+	if ((tick_nsec % user_hz_nsec) == 0) {
+		if (CONFIG_HZ < USER_HZ)
+			return x * (USER_HZ / CONFIG_HZ);
+		else
+			return x / (CONFIG_HZ / USER_HZ);
+	}
+	return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+	if (delta <= 0)
+		return 0;
+
+	return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
+			 uid_t uid, __u32 seq_num)
+{
+	const struct inet_connection_sock *icsk;
+	const struct fastopen_queue *fastopenq;
+	const struct in6_addr *dest, *src;
+	const struct inet_sock *inet;
+	unsigned long timer_expires;
+	const struct sock *sp;
+	__u16 destp, srcp;
+	int timer_active;
+	int rx_queue;
+	int state;
+
+	icsk = &tp->tcp.inet_conn;
+	inet = &icsk->icsk_inet;
+	sp = &inet->sk;
+	fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+	dest = &sp->sk_v6_daddr;
+	src = &sp->sk_v6_rcv_saddr;
+	destp = bpf_ntohs(inet->inet_dport);
+	srcp = bpf_ntohs(inet->inet_sport);
+
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+		timer_active = 1;
+		timer_expires = icsk->icsk_timeout;
+	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+		timer_active = 4;
+		timer_expires = icsk->icsk_timeout;
+	} else if (timer_pending(&sp->sk_timer)) {
+		timer_active = 2;
+		timer_expires = sp->sk_timer.expires;
+	} else {
+		timer_active = 0;
+		timer_expires = bpf_jiffies64();
+	}
+
+	state = sp->sk_state;
+	if (state == TCP_LISTEN) {
+		rx_queue = sp->sk_ack_backlog;
+	} else {
+		rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
+		if (rx_queue < 0)
+			rx_queue = 0;
+	}
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+		       state,
+		       tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
+		       timer_active,
+		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+		       icsk->icsk_retransmits, uid,
+		       icsk->icsk_probes_out,
+		       sock_i_ino(sp),
+		       sp->sk_refcnt.refs.counter);
+	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+		       tp,
+		       jiffies_to_clock_t(icsk->icsk_rto),
+		       jiffies_to_clock_t(icsk->icsk_ack.ato),
+		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+		       tp->tcp.snd_cwnd,
+		       state == TCP_LISTEN ? fastopenq->max_qlen
+				: (tcp_in_initial_slowstart(&tp->tcp) ? -1
+								      : tp->tcp.snd_ssthresh)
+		      );
+
+	return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+			uid_t uid, __u32 seq_num)
+{
+	struct inet_timewait_sock *tw = &ttw->tw_sk;
+	const struct in6_addr *dest, *src;
+	__u16 destp, srcp;
+	long delta;
+
+	delta = tw->tw_timer.expires - bpf_jiffies64();
+	dest = &tw->tw_v6_daddr;
+	src  = &tw->tw_v6_rcv_saddr;
+	destp = bpf_ntohs(tw->tw_dport);
+	srcp  = bpf_ntohs(tw->tw_sport);
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       tw->tw_substate, 0, 0,
+		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+		       tw->tw_refcnt.refs.counter, tw);
+
+	return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+			 uid_t uid, __u32 seq_num)
+{
+	struct inet_request_sock *irsk = &treq->req;
+	struct request_sock *req = &irsk->req;
+	struct in6_addr *src, *dest;
+	long ttd;
+
+	ttd = req->rsk_timer.expires - bpf_jiffies64();
+	src = &irsk->ir_v6_loc_addr;
+	dest = &irsk->ir_v6_rmt_addr;
+
+	if (ttd < 0)
+		ttd = 0;
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3],
+		       irsk->ir_num,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3],
+		       bpf_ntohs(irsk->ir_rmt_port));
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+		       req->num_timeout, uid, 0, 0, 0, req);
+
+	return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp6(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	struct seq_file *seq = ctx->meta->seq;
+	struct tcp_timewait_sock *tw;
+	struct tcp_request_sock *req;
+	struct tcp6_sock *tp;
+	uid_t uid = ctx->uid;
+	__u32 seq_num;
+
+	if (sk_common == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "  sl  "
+				    "local_address                         "
+				    "remote_address                        "
+				    "st tx_queue rx_queue tr tm->when retrnsmt"
+				    "   uid  timeout inode\n");
+
+	if (sk_common->skc_family != AF_INET6)
+		return 0;
+
+	tp = bpf_skc_to_tcp6_sock(sk_common);
+	if (tp)
+		return dump_tcp6_sock(seq, tp, uid, seq_num);
+
+	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+	if (tw)
+		return dump_tw_sock(seq, tw, uid, seq_num);
+
+	req = bpf_skc_to_tcp_request_sock(sk_common);
+	if (req)
+		return dump_req_sock(seq, req, uid, seq_num);
+
+	return 0;
+}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 14/15] tools/bpf: add udp4/udp6 bpf iterator
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (12 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 13/15] tools/bpf: selftests: implement sample tcp/tcp6 bpf_iter programs Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  6:57   ` Andrii Nakryiko
  2020-06-23  0:36 ` [PATCH bpf-next v3 15/15] bpf/selftests: add tcp/udp iterator programs to selftests Yonghong Song
  14 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

On my VM, I got identical results between /proc/net/udp[6] and
the udp{4,6} bpf iterator.

For udp6:
  $ cat /sys/fs/bpf/p1
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0
  $ cat /proc/net/udp6
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0

For udp4:
  $ cat /sys/fs/bpf/p4
    sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0
  $ cat /proc/net/udp
    sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0
---
 tools/testing/selftests/bpf/progs/bpf_iter.h  | 16 ++++
 .../selftests/bpf/progs/bpf_iter_udp4.c       | 71 +++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_udp6.c       | 79 +++++++++++++++++++
 3 files changed, 166 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index ab3ed904d391..d2b9e26efbe0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -9,6 +9,8 @@
 #define bpf_iter__task_file bpf_iter__task_file___not_used
 #define bpf_iter__tcp bpf_iter__tcp___not_used
 #define tcp6_sock tcp6_sock___not_used
+#define bpf_iter__udp bpf_iter__udp___not_used
+#define udp6_sock udp6_sock___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
@@ -18,6 +20,8 @@
 #undef bpf_iter__task_file
 #undef bpf_iter__tcp
 #undef tcp6_sock
+#undef bpf_iter__udp
+#undef udp6_sock
 
 struct bpf_iter_meta {
 	struct seq_file *seq;
@@ -62,3 +66,15 @@ struct tcp6_sock {
 	struct tcp_sock	tcp;
 	struct ipv6_pinfo inet6;
 } __attribute__((preserve_access_index));
+
+struct bpf_iter__udp {
+	struct bpf_iter_meta *meta;
+	struct udp_sock *udp_sk;
+	uid_t uid __attribute__((aligned(8)));
+	int bucket __attribute__((aligned(8)));
+} __attribute__((preserve_access_index));
+
+struct udp6_sock {
+	struct udp_sock	udp;
+	struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
new file mode 100644
index 000000000000..8a06b7f4a05d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_tracing_net.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/udp")
+int dump_udp4(struct bpf_iter__udp *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct udp_sock *udp_sk = ctx->udp_sk;
+	struct inet_sock *inet;
+	__u16 srcp, destp;
+	__be32 dest, src;
+	__u32 seq_num;
+	int rqueue;
+
+	if (udp_sk == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq,
+			       "  sl  local_address rem_address   st tx_queue "
+			       "rx_queue tr tm->when retrnsmt   uid  timeout "
+			       "inode ref pointer drops\n");
+
+	/* filter out udp6 sockets */
+	inet = &udp_sk->inet;
+	if (inet->sk.sk_family == AF_INET6)
+		return 0;
+
+	inet = &udp_sk->inet;
+	dest = inet->inet_daddr;
+	src = inet->inet_rcv_saddr;
+	srcp = bpf_ntohs(inet->inet_sport);
+	destp = bpf_ntohs(inet->inet_dport);
+	rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+
+	BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ",
+		       ctx->bucket, src, srcp, dest, destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+		       inet->sk.sk_state,
+		       inet->sk.sk_wmem_alloc.refs.counter - 1,
+		       rqueue,
+		       0, 0L, 0, ctx->uid, 0,
+		       sock_i_ino(&inet->sk),
+		       inet->sk.sk_refcnt.refs.counter, udp_sk,
+		       inet->sk.sk_drops.counter);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
new file mode 100644
index 000000000000..f5735baf45aa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_tracing_net.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define IPV6_SEQ_DGRAM_HEADER				\
+	"  sl  "					\
+	"local_address                         "	\
+	"remote_address                        "	\
+	"st tx_queue rx_queue tr tm->when retrnsmt"	\
+	"   uid  timeout inode ref pointer drops\n"
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/udp")
+int dump_udp6(struct bpf_iter__udp *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct udp_sock *udp_sk = ctx->udp_sk;
+	const struct in6_addr *dest, *src;
+	struct udp6_sock *udp6_sk;
+	struct inet_sock *inet;
+	__u16 srcp, destp;
+	__u32 seq_num;
+	int rqueue;
+
+	if (udp_sk == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER);
+
+	udp6_sk = bpf_skc_to_udp6_sock(udp_sk);
+	if (udp6_sk == (void *)0)
+		return 0;
+
+	inet = &udp_sk->inet;
+	srcp = bpf_ntohs(inet->inet_sport);
+	destp = bpf_ntohs(inet->inet_dport);
+	rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+	dest  = &inet->sk.sk_v6_daddr;
+	src   = &inet->sk.sk_v6_rcv_saddr;
+
+	BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       ctx->bucket,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+		       inet->sk.sk_state,
+		       inet->sk.sk_wmem_alloc.refs.counter - 1,
+		       rqueue,
+		       0, 0L, 0, ctx->uid, 0,
+		       sock_i_ino(&inet->sk),
+		       inet->sk.sk_refcnt.refs.counter, udp_sk,
+		       inet->sk.sk_drops.counter);
+
+	return 0;
+}
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* [PATCH bpf-next v3 15/15] bpf/selftests: add tcp/udp iterator programs to selftests
  2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
                   ` (13 preceding siblings ...)
  2020-06-23  0:36 ` [PATCH bpf-next v3 14/15] tools/bpf: add udp4/udp6 bpf iterator Yonghong Song
@ 2020-06-23  0:36 ` Yonghong Song
  2020-06-23  6:59   ` Andrii Nakryiko
  14 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  0:36 UTC (permalink / raw)
  To: bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau

Added tcp{4,6} and udp{4,6} bpf programs into test_progs
selftest so that they at least can load successfully.
  $ ./test_progs -n 3
  ...
  #3/7 tcp4:OK
  #3/8 tcp6:OK
  #3/9 udp4:OK
  #3/10 udp6:OK
  ...
  #3 bpf_iter:OK
  Summary: 1/16 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 87c29dde1cf9..1e2e0fced6e8 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -6,6 +6,10 @@
 #include "bpf_iter_bpf_map.skel.h"
 #include "bpf_iter_task.skel.h"
 #include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_tcp4.skel.h"
+#include "bpf_iter_tcp6.skel.h"
+#include "bpf_iter_udp4.skel.h"
+#include "bpf_iter_udp6.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
@@ -120,6 +124,62 @@ static void test_task_file(void)
 	bpf_iter_task_file__destroy(skel);
 }
 
+static void test_tcp4(void)
+{
+	struct bpf_iter_tcp4 *skel;
+
+	skel = bpf_iter_tcp4__open_and_load();
+	if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_tcp4);
+
+	bpf_iter_tcp4__destroy(skel);
+}
+
+static void test_tcp6(void)
+{
+	struct bpf_iter_tcp6 *skel;
+
+	skel = bpf_iter_tcp6__open_and_load();
+	if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_tcp6);
+
+	bpf_iter_tcp6__destroy(skel);
+}
+
+static void test_udp4(void)
+{
+	struct bpf_iter_udp4 *skel;
+
+	skel = bpf_iter_udp4__open_and_load();
+	if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_udp4);
+
+	bpf_iter_udp4__destroy(skel);
+}
+
+static void test_udp6(void)
+{
+	struct bpf_iter_udp6 *skel;
+
+	skel = bpf_iter_udp6__open_and_load();
+	if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_udp6);
+
+	bpf_iter_udp6__destroy(skel);
+}
+
 /* The expected string is less than 16 bytes */
 static int do_read_with_fd(int iter_fd, const char *expected,
 			   bool read_one_char)
@@ -394,6 +454,14 @@ void test_bpf_iter(void)
 		test_task();
 	if (test__start_subtest("task_file"))
 		test_task_file();
+	if (test__start_subtest("tcp4"))
+		test_tcp4();
+	if (test__start_subtest("tcp6"))
+		test_tcp6();
+	if (test__start_subtest("udp4"))
+		test_udp4();
+	if (test__start_subtest("udp6"))
+		test_udp6();
 	if (test__start_subtest("anon"))
 		test_anon_iter(false);
 	if (test__start_subtest("anon-read-one-char"))
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper
  2020-06-23  0:36 ` [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper Yonghong Song
@ 2020-06-23  1:47   ` Eric Dumazet
  2020-06-23  2:22     ` Yonghong Song
  0 siblings, 1 reply; 43+ messages in thread
From: Eric Dumazet @ 2020-06-23  1:47 UTC (permalink / raw)
  To: Yonghong Song, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau



On 6/22/20 5:36 PM, Yonghong Song wrote:
> The helper is used in tracing programs to cast a socket
> pointer to a udp6_sock pointer.
> The return value could be NULL if the casting is illegal.
> 
> Acked-by: Martin KaFai Lau <kafai@fb.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  include/linux/bpf.h            |  1 +
>  include/uapi/linux/bpf.h       |  9 ++++++++-
>  kernel/trace/bpf_trace.c       |  2 ++
>  net/core/filter.c              | 22 ++++++++++++++++++++++
>  scripts/bpf_helpers_doc.py     |  2 ++
>  tools/include/uapi/linux/bpf.h |  9 ++++++++-
>  6 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index cc3f89827b89..3f5c6bb5e3a7 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1649,6 +1649,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
>  extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
>  extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
>  extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
> +extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
>  
>  const struct bpf_func_proto *bpf_tracing_func_proto(
>  	enum bpf_func_id func_id, const struct bpf_prog *prog);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index e256417d94c2..3f4b12c5c563 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3276,6 +3276,12 @@ union bpf_attr {
>   *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
>   *	Return
>   *		*sk* if casting is valid, or NULL otherwise.
> + *
> + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
> + * 	Description
> + *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
> + *	Return
> + *		*sk* if casting is valid, or NULL otherwise.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> @@ -3417,7 +3423,8 @@ union bpf_attr {
>  	FN(skc_to_tcp6_sock),		\
>  	FN(skc_to_tcp_sock),		\
>  	FN(skc_to_tcp_timewait_sock),	\
> -	FN(skc_to_tcp_request_sock),
> +	FN(skc_to_tcp_request_sock),	\
> +	FN(skc_to_udp6_sock),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index de5fbe66e1ca..d10ab16c4a2f 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -1523,6 +1523,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  		return &bpf_skc_to_tcp_timewait_sock_proto;
>  	case BPF_FUNC_skc_to_tcp_request_sock:
>  		return &bpf_skc_to_tcp_request_sock_proto;
> +	case BPF_FUNC_skc_to_udp6_sock:
> +		return &bpf_skc_to_udp6_sock_proto;
>  #endif
>  	case BPF_FUNC_seq_printf:
>  		return prog->expected_attach_type == BPF_TRACE_ITER ?
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 140fc0fdf3e1..9a98f3616273 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -9325,3 +9325,25 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
>  	.check_btf_id		= check_arg_btf_id,
>  	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
>  };
> +
> +BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
> +{
> +	/* udp6_sock type is not generated in dwarf and hence btf,
> +	 * trigger an explicit type generation here.
> +	 */
> +	BTF_TYPE_EMIT(struct udp6_sock);
> +	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&

Why is the sk_fullsock(sk) needed ?

> +	    sk->sk_family == AF_INET6)
> +		return (unsigned long)sk;
> +
> +	return (unsigned long)NULL;
> +}
> +
> +const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
> +	.func			= bpf_skc_to_udp6_sock,
> +	.gpl_only		= false,
> +	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
> +	.arg1_type		= ARG_PTR_TO_BTF_ID,
> +	.check_btf_id		= check_arg_btf_id,
> +	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
> +};
> diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
> index d886657c6aaa..6bab40ff442e 100755
> --- a/scripts/bpf_helpers_doc.py
> +++ b/scripts/bpf_helpers_doc.py
> @@ -425,6 +425,7 @@ class PrinterHelpers(Printer):
>              'struct tcp_sock',
>              'struct tcp_timewait_sock',
>              'struct tcp_request_sock',
> +            'struct udp6_sock',
>  
>              'struct __sk_buff',
>              'struct sk_msg_md',
> @@ -466,6 +467,7 @@ class PrinterHelpers(Printer):
>              'struct tcp_sock',
>              'struct tcp_timewait_sock',
>              'struct tcp_request_sock',
> +            'struct udp6_sock',
>      }
>      mapped_types = {
>              'u8': '__u8',
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index e256417d94c2..3f4b12c5c563 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -3276,6 +3276,12 @@ union bpf_attr {
>   *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
>   *	Return
>   *		*sk* if casting is valid, or NULL otherwise.
> + *
> + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
> + * 	Description
> + *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
> + *	Return
> + *		*sk* if casting is valid, or NULL otherwise.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> @@ -3417,7 +3423,8 @@ union bpf_attr {
>  	FN(skc_to_tcp6_sock),		\
>  	FN(skc_to_tcp_sock),		\
>  	FN(skc_to_tcp_timewait_sock),	\
> -	FN(skc_to_tcp_request_sock),
> +	FN(skc_to_tcp_request_sock),	\
> +	FN(skc_to_udp6_sock),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper
  2020-06-23  1:47   ` Eric Dumazet
@ 2020-06-23  2:22     ` Yonghong Song
  2020-06-23 16:27       ` Eric Dumazet
  0 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23  2:22 UTC (permalink / raw)
  To: Eric Dumazet, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau



On 6/22/20 6:47 PM, Eric Dumazet wrote:
> 
> 
> On 6/22/20 5:36 PM, Yonghong Song wrote:
>> The helper is used in tracing programs to cast a socket
>> pointer to a udp6_sock pointer.
>> The return value could be NULL if the casting is illegal.
>>
>> Acked-by: Martin KaFai Lau <kafai@fb.com>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   include/linux/bpf.h            |  1 +
>>   include/uapi/linux/bpf.h       |  9 ++++++++-
>>   kernel/trace/bpf_trace.c       |  2 ++
>>   net/core/filter.c              | 22 ++++++++++++++++++++++
>>   scripts/bpf_helpers_doc.py     |  2 ++
>>   tools/include/uapi/linux/bpf.h |  9 ++++++++-
>>   6 files changed, 43 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index cc3f89827b89..3f5c6bb5e3a7 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1649,6 +1649,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto;
>>   extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
>>   extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
>>   extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
>> +extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
>>   
>>   const struct bpf_func_proto *bpf_tracing_func_proto(
>>   	enum bpf_func_id func_id, const struct bpf_prog *prog);
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index e256417d94c2..3f4b12c5c563 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -3276,6 +3276,12 @@ union bpf_attr {
>>    *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
>>    *	Return
>>    *		*sk* if casting is valid, or NULL otherwise.
>> + *
>> + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
>> + * 	Description
>> + *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
>> + *	Return
>> + *		*sk* if casting is valid, or NULL otherwise.
>>    */
>>   #define __BPF_FUNC_MAPPER(FN)		\
>>   	FN(unspec),			\
>> @@ -3417,7 +3423,8 @@ union bpf_attr {
>>   	FN(skc_to_tcp6_sock),		\
>>   	FN(skc_to_tcp_sock),		\
>>   	FN(skc_to_tcp_timewait_sock),	\
>> -	FN(skc_to_tcp_request_sock),
>> +	FN(skc_to_tcp_request_sock),	\
>> +	FN(skc_to_udp6_sock),
>>   
>>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>>    * function eBPF program intends to call
>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>> index de5fbe66e1ca..d10ab16c4a2f 100644
>> --- a/kernel/trace/bpf_trace.c
>> +++ b/kernel/trace/bpf_trace.c
>> @@ -1523,6 +1523,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>>   		return &bpf_skc_to_tcp_timewait_sock_proto;
>>   	case BPF_FUNC_skc_to_tcp_request_sock:
>>   		return &bpf_skc_to_tcp_request_sock_proto;
>> +	case BPF_FUNC_skc_to_udp6_sock:
>> +		return &bpf_skc_to_udp6_sock_proto;
>>   #endif
>>   	case BPF_FUNC_seq_printf:
>>   		return prog->expected_attach_type == BPF_TRACE_ITER ?
>> diff --git a/net/core/filter.c b/net/core/filter.c
>> index 140fc0fdf3e1..9a98f3616273 100644
>> --- a/net/core/filter.c
>> +++ b/net/core/filter.c
>> @@ -9325,3 +9325,25 @@ const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
>>   	.check_btf_id		= check_arg_btf_id,
>>   	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
>>   };
>> +
>> +BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
>> +{
>> +	/* udp6_sock type is not generated in dwarf and hence btf,
>> +	 * trigger an explicit type generation here.
>> +	 */
>> +	BTF_TYPE_EMIT(struct udp6_sock);
>> +	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
> 
> Why is the sk_fullsock(sk) needed ?

The parameter 'sk' could be a sock_common. That is why the
helper name bpf_skc_to_udp6_sock implies. The sock_common cannot
access sk_protocol, hence we requires sk_fullsock(sk) here.
Did I miss anything?

> 
>> +	    sk->sk_family == AF_INET6)
>> +		return (unsigned long)sk;
>> +
>> +	return (unsigned long)NULL;
>> +}
>> +
>> +const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
>> +	.func			= bpf_skc_to_udp6_sock,
>> +	.gpl_only		= false,
>> +	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
>> +	.arg1_type		= ARG_PTR_TO_BTF_ID,
>> +	.check_btf_id		= check_arg_btf_id,
>> +	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
>> +};
[...]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers
  2020-06-23  0:36 ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers Yonghong Song
@ 2020-06-23  5:18     ` kernel test robot
  2020-06-23  6:39     ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers kernel test robot
  1 sibling, 0 replies; 43+ messages in thread
From: kernel test robot @ 2020-06-23  5:18 UTC (permalink / raw)
  To: Yonghong Song, bpf, netdev
  Cc: kbuild-all, Alexei Starovoitov, Daniel Borkmann, kernel-team,
	Martin KaFai Lau

[-- Attachment #1: Type: text/plain, Size: 1694 bytes --]

Hi Yonghong,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Yonghong-Song/implement-bpf-iterator-for-tcp-and-udp-sockets/20200623-090149
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: openrisc-defconfig (attached as .config)
compiler: or1k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=openrisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   or1k-linux-ld: net/core/filter.o: in function `bpf_skc_to_tcp_timewait_sock':
   filter.c:(.text+0x3e60): undefined reference to `tcpv6_prot'
>> or1k-linux-ld: filter.c:(.text+0x3e64): undefined reference to `tcpv6_prot'
   or1k-linux-ld: net/core/filter.o: in function `bpf_skc_to_tcp_request_sock':
   filter.c:(.text+0x3ee0): undefined reference to `tcpv6_prot'
   or1k-linux-ld: filter.c:(.text+0x3ee4): undefined reference to `tcpv6_prot'
   or1k-linux-ld: net/core/filter.o: in function `init_btf_sock_ids':
   filter.c:(.text+0x11384): undefined reference to `btf_find_by_name_kind'
   filter.c:(.text+0x11384): relocation truncated to fit: R_OR1K_INSN_REL_26 against undefined symbol `btf_find_by_name_kind'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 8143 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers
@ 2020-06-23  5:18     ` kernel test robot
  0 siblings, 0 replies; 43+ messages in thread
From: kernel test robot @ 2020-06-23  5:18 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1730 bytes --]

Hi Yonghong,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Yonghong-Song/implement-bpf-iterator-for-tcp-and-udp-sockets/20200623-090149
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: openrisc-defconfig (attached as .config)
compiler: or1k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=openrisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   or1k-linux-ld: net/core/filter.o: in function `bpf_skc_to_tcp_timewait_sock':
   filter.c:(.text+0x3e60): undefined reference to `tcpv6_prot'
>> or1k-linux-ld: filter.c:(.text+0x3e64): undefined reference to `tcpv6_prot'
   or1k-linux-ld: net/core/filter.o: in function `bpf_skc_to_tcp_request_sock':
   filter.c:(.text+0x3ee0): undefined reference to `tcpv6_prot'
   or1k-linux-ld: filter.c:(.text+0x3ee4): undefined reference to `tcpv6_prot'
   or1k-linux-ld: net/core/filter.o: in function `init_btf_sock_ids':
   filter.c:(.text+0x11384): undefined reference to `btf_find_by_name_kind'
   filter.c:(.text+0x11384): relocation truncated to fit: R_OR1K_INSN_REL_26 against undefined symbol `btf_find_by_name_kind'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 8143 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23  0:36 ` [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper Yonghong Song
@ 2020-06-23  5:46     ` kernel test robot
  2020-06-23  5:53     ` kernel test robot
  2020-06-23  6:39   ` Andrii Nakryiko
  2 siblings, 0 replies; 43+ messages in thread
From: kernel test robot @ 2020-06-23  5:46 UTC (permalink / raw)
  To: Yonghong Song, bpf, netdev
  Cc: kbuild-all, Alexei Starovoitov, Daniel Borkmann, kernel-team,
	Martin KaFai Lau

[-- Attachment #1: Type: text/plain, Size: 1107 bytes --]

Hi Yonghong,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Yonghong-Song/implement-bpf-iterator-for-tcp-and-udp-sockets/20200623-090149
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: parisc-defconfig (attached as .config)
compiler: hppa-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=parisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   hppa-linux-ld: net/core/filter.o: in function `init_btf_sock_ids':
>> (.text+0xe878): undefined reference to `btf_find_by_name_kind'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 18577 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
@ 2020-06-23  5:46     ` kernel test robot
  0 siblings, 0 replies; 43+ messages in thread
From: kernel test robot @ 2020-06-23  5:46 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1136 bytes --]

Hi Yonghong,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Yonghong-Song/implement-bpf-iterator-for-tcp-and-udp-sockets/20200623-090149
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: parisc-defconfig (attached as .config)
compiler: hppa-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=parisc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   hppa-linux-ld: net/core/filter.o: in function `init_btf_sock_ids':
>> (.text+0xe878): undefined reference to `btf_find_by_name_kind'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 18577 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23  0:36 ` [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper Yonghong Song
@ 2020-06-23  5:53     ` kernel test robot
  2020-06-23  5:53     ` kernel test robot
  2020-06-23  6:39   ` Andrii Nakryiko
  2 siblings, 0 replies; 43+ messages in thread
From: kernel test robot @ 2020-06-23  5:53 UTC (permalink / raw)
  To: Yonghong Song, bpf, netdev
  Cc: kbuild-all, Alexei Starovoitov, Daniel Borkmann, kernel-team,
	Martin KaFai Lau

[-- Attachment #1: Type: text/plain, Size: 1191 bytes --]

Hi Yonghong,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Yonghong-Song/implement-bpf-iterator-for-tcp-and-udp-sockets/20200623-090149
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: arc-defconfig (attached as .config)
compiler: arc-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arc-elf-ld: net/core/filter.o: in function `init_btf_sock_ids':
   filter.c:(.text+0xaf22): undefined reference to `btf_find_by_name_kind'
>> arc-elf-ld: filter.c:(.text+0xaf22): undefined reference to `btf_find_by_name_kind'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 9333 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
@ 2020-06-23  5:53     ` kernel test robot
  0 siblings, 0 replies; 43+ messages in thread
From: kernel test robot @ 2020-06-23  5:53 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1221 bytes --]

Hi Yonghong,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Yonghong-Song/implement-bpf-iterator-for-tcp-and-udp-sockets/20200623-090149
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: arc-defconfig (attached as .config)
compiler: arc-elf-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arc-elf-ld: net/core/filter.o: in function `init_btf_sock_ids':
   filter.c:(.text+0xaf22): undefined reference to `btf_find_by_name_kind'
>> arc-elf-ld: filter.c:(.text+0xaf22): undefined reference to `btf_find_by_name_kind'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 9333 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23  0:36 ` [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper Yonghong Song
  2020-06-23  5:46     ` kernel test robot
  2020-06-23  5:53     ` kernel test robot
@ 2020-06-23  6:39   ` Andrii Nakryiko
  2020-06-23 14:52     ` Yonghong Song
  2 siblings, 1 reply; 43+ messages in thread
From: Andrii Nakryiko @ 2020-06-23  6:39 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau

On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>
> The helper is used in tracing programs to cast a socket
> pointer to a tcp6_sock pointer.
> The return value could be NULL if the casting is illegal.
>
> A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
> so the verifier is able to deduce proper return types for the helper.
>
> Different from the previous BTF_ID based helpers,
> the bpf_skc_to_tcp6_sock() argument can be several possible
> btf_ids. More specifically, all possible socket data structures
> with sock_common appearing in the first in the memory layout.
> This patch only added socket types related to tcp and udp.
>
> All possible argument btf_id and return value btf_id
> for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
> cached. In the future, it is even possible to precompute
> these btf_id's at kernel build time.
>
> Acked-by: Martin KaFai Lau <kafai@fb.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---

Looks good to me as is, but see a few suggestions, they will probably
save me time at some point as well :)

Acked-by: Andrii Nakryiko <andriin@fb.com>


>  include/linux/bpf.h            | 12 +++++
>  include/uapi/linux/bpf.h       |  9 +++-
>  kernel/bpf/btf.c               |  1 +
>  kernel/bpf/verifier.c          | 43 +++++++++++++-----
>  kernel/trace/bpf_trace.c       |  2 +
>  net/core/filter.c              | 80 ++++++++++++++++++++++++++++++++++
>  scripts/bpf_helpers_doc.py     |  2 +
>  tools/include/uapi/linux/bpf.h |  9 +++-
>  8 files changed, 146 insertions(+), 12 deletions(-)
>

[...]

> @@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>                 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
>                 regs[BPF_REG_0].id = ++env->id_gen;
>                 regs[BPF_REG_0].mem_size = meta.mem_size;
> +       } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
> +               int ret_btf_id;
> +
> +               mark_reg_known_zero(env, regs, BPF_REG_0);
> +               regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
> +               ret_btf_id = *fn->ret_btf_id;

might be a good idea to check fb->ret_btf_id for NULL and print a
warning + return -EFAULT. It's not supposed to happen on properly
configured kernel, but during development this will save a bunch of
time and frustration for next person trying to add something with
RET_PTR_TO_BTF_ID_OR_NULL.

> +               if (ret_btf_id == 0) {

This also has to be struct/union (after typedef/mods stripping, of
course). Or are there other cases?

> +                       verbose(env, "invalid return type %d of func %s#%d\n",
> +                               fn->ret_type, func_id_name(func_id), func_id);
> +                       return -EINVAL;
> +               }
> +               regs[BPF_REG_0].btf_id = ret_btf_id;
>         } else {
>                 verbose(env, "unknown return type %d of func %s#%d\n",
>                         fn->ret_type, func_id_name(func_id), func_id);

[...]

> +void init_btf_sock_ids(struct btf *btf)
> +{
> +       int i, btf_id;
> +
> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
> +               btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
> +                                              BTF_KIND_STRUCT);
> +               if (btf_id > 0)
> +                       btf_sock_ids[i] = btf_id;
> +       }
> +}

This will hopefully go away with Jiri's work on static BTF IDs, right?
So looking forward to that :)

> +
> +static bool check_arg_btf_id(u32 btf_id, u32 arg)
> +{
> +       int i;
> +
> +       /* only one argument, no need to check arg */
> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
> +               if (btf_sock_ids[i] == btf_id)
> +                       return true;
> +       return false;
> +}
> +

[...]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers
  2020-06-23  0:36 ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers Yonghong Song
@ 2020-06-23  6:39     ` kernel test robot
  2020-06-23  6:39     ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers kernel test robot
  1 sibling, 0 replies; 43+ messages in thread
From: kernel test robot @ 2020-06-23  6:39 UTC (permalink / raw)
  To: Yonghong Song, bpf, netdev
  Cc: kbuild-all, Alexei Starovoitov, Daniel Borkmann, kernel-team,
	Martin KaFai Lau

[-- Attachment #1: Type: text/plain, Size: 1671 bytes --]

Hi Yonghong,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Yonghong-Song/implement-bpf-iterator-for-tcp-and-udp-sockets/20200623-090149
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: nios2-defconfig (attached as .config)
compiler: nios2-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=nios2 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   nios2-linux-ld: net/core/filter.o: in function `bpf_skc_to_tcp_timewait_sock':
   filter.c:(.text+0x367c): undefined reference to `tcpv6_prot'
>> nios2-linux-ld: filter.c:(.text+0x3680): undefined reference to `tcpv6_prot'
   nios2-linux-ld: net/core/filter.o: in function `bpf_skc_to_tcp_request_sock':
   filter.c:(.text+0x36c0): undefined reference to `tcpv6_prot'
   nios2-linux-ld: filter.c:(.text+0x36c4): undefined reference to `tcpv6_prot'
   nios2-linux-ld: net/core/filter.o: in function `init_btf_sock_ids':
   filter.c:(.text+0xe65c): undefined reference to `btf_find_by_name_kind'
   filter.c:(.text+0xe65c): relocation truncated to fit: R_NIOS2_CALL26 against `btf_find_by_name_kind'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 9865 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers
@ 2020-06-23  6:39     ` kernel test robot
  0 siblings, 0 replies; 43+ messages in thread
From: kernel test robot @ 2020-06-23  6:39 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 1707 bytes --]

Hi Yonghong,

I love your patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/Yonghong-Song/implement-bpf-iterator-for-tcp-and-udp-sockets/20200623-090149
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: nios2-defconfig (attached as .config)
compiler: nios2-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=nios2 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   nios2-linux-ld: net/core/filter.o: in function `bpf_skc_to_tcp_timewait_sock':
   filter.c:(.text+0x367c): undefined reference to `tcpv6_prot'
>> nios2-linux-ld: filter.c:(.text+0x3680): undefined reference to `tcpv6_prot'
   nios2-linux-ld: net/core/filter.o: in function `bpf_skc_to_tcp_request_sock':
   filter.c:(.text+0x36c0): undefined reference to `tcpv6_prot'
   nios2-linux-ld: filter.c:(.text+0x36c4): undefined reference to `tcpv6_prot'
   nios2-linux-ld: net/core/filter.o: in function `init_btf_sock_ids':
   filter.c:(.text+0xe65c): undefined reference to `btf_find_by_name_kind'
   filter.c:(.text+0xe65c): relocation truncated to fit: R_NIOS2_CALL26 against `btf_find_by_name_kind'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

[-- Attachment #2: config.gz --]
[-- Type: application/gzip, Size: 9865 bytes --]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 11/15] tools/bpf: refactor some net macros to libbpf bpf_tracing_net.h
  2020-06-23  0:36 ` [PATCH bpf-next v3 11/15] tools/bpf: refactor some net macros to libbpf bpf_tracing_net.h Yonghong Song
@ 2020-06-23  6:45   ` Andrii Nakryiko
  2020-06-23 14:56     ` Yonghong Song
  0 siblings, 1 reply; 43+ messages in thread
From: Andrii Nakryiko @ 2020-06-23  6:45 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau

On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>
> Refactor bpf_iter_ipv6_route.c and bpf_iter_netlink.c
> so net macros, originally from various include/linux header
> files, are moved to a new libbpf installable header file
> bpf_tracing_net.h. The goal is to improve reuse so
> networking tracing programs do not need to
> copy these macros every time they use them.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  tools/lib/bpf/Makefile                           |  1 +
>  tools/lib/bpf/bpf_tracing_net.h                  | 16 ++++++++++++++++
>  .../selftests/bpf/progs/bpf_iter_ipv6_route.c    |  7 +------
>  .../selftests/bpf/progs/bpf_iter_netlink.c       |  4 +---
>  4 files changed, 19 insertions(+), 9 deletions(-)
>  create mode 100644 tools/lib/bpf/bpf_tracing_net.h
>
> diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
> index bf8ed134cb8a..3d766c80eb78 100644
> --- a/tools/lib/bpf/Makefile
> +++ b/tools/lib/bpf/Makefile
> @@ -257,6 +257,7 @@ install_headers: $(BPF_HELPER_DEFS)
>                 $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
>                 $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
>                 $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
> +               $(call do_install,bpf_tracing_net.h,$(prefix)/include/bpf,644); \
>                 $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
>                 $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
>
> diff --git a/tools/lib/bpf/bpf_tracing_net.h b/tools/lib/bpf/bpf_tracing_net.h
> new file mode 100644
> index 000000000000..1f38a1098727
> --- /dev/null
> +++ b/tools/lib/bpf/bpf_tracing_net.h
> @@ -0,0 +1,16 @@
> +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
> +#ifndef __BPF_TRACING_NET_H__
> +#define __BPF_TRACING_NET_H__
> +
> +#define IFNAMSIZ               16
> +
> +#define RTF_GATEWAY            0x0002
> +
> +#define fib_nh_dev             nh_common.nhc_dev
> +#define fib_nh_gw_family       nh_common.nhc_gw_family
> +#define fib_nh_gw6             nh_common.nhc_gw.ipv6
> +
> +#define sk_rmem_alloc          sk_backlog.rmem_alloc
> +#define sk_refcnt              __sk_common.skc_refcnt

Question to networking guys. How probable it is for these and similar
definitions to ever be changed?

I'm a bit hesitant to make any stability guarantees (which is implied
by libbpf-provided headers). I don't want us to get into the game of
trying to maintain this across multiple kernel versions, if they are
going to be changed.

Let's for now keep bpf_tracing_net.h under selftests/bpf? It's still
good to have these definitions, because we can point people to it.

> +
> +#endif

[...]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 13/15] tools/bpf: selftests: implement sample tcp/tcp6 bpf_iter programs
  2020-06-23  0:36 ` [PATCH bpf-next v3 13/15] tools/bpf: selftests: implement sample tcp/tcp6 bpf_iter programs Yonghong Song
@ 2020-06-23  6:56   ` Andrii Nakryiko
  2020-06-23 15:03     ` Yonghong Song
  0 siblings, 1 reply; 43+ messages in thread
From: Andrii Nakryiko @ 2020-06-23  6:56 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau

On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>
> In my VM, I got identical result compared to /proc/net/{tcp,tcp6}.
> For tcp6:
>   $ cat /proc/net/tcp6
>     sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
>      0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000001 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0
>
>   $ cat /sys/fs/bpf/p1
>     sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
>      0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0
>
> For tcp:
>   $ cat /proc/net/tcp
>   sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
>    0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0
>   $ cat /sys/fs/bpf/p2
>   sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
>    1: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---

Looks reasonable, to the extent possible ;)

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  tools/testing/selftests/bpf/progs/bpf_iter.h  |  15 ++
>  .../selftests/bpf/progs/bpf_iter_tcp4.c       | 235 ++++++++++++++++
>  .../selftests/bpf/progs/bpf_iter_tcp6.c       | 250 ++++++++++++++++++
>  3 files changed, 500 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
>

[...]

> +static int hlist_unhashed_lockless(const struct hlist_node *h)
> +{
> +        return !(h->pprev);
> +}
> +
> +static int timer_pending(const struct timer_list * timer)
> +{
> +       return !hlist_unhashed_lockless(&timer->entry);
> +}
> +
> +extern unsigned CONFIG_HZ __kconfig __weak;

Why the __weak? We expect to have /proc/kconfig.gz in other tests
anyway? __weak will make CONFIG_HZ to be a zero and you'll get a bunch
of divisions by zero.

> +
> +#define USER_HZ                100
> +#define NSEC_PER_SEC   1000000000ULL
> +static clock_t jiffies_to_clock_t(unsigned long x)
> +{
> +       /* The implementation here tailored to a particular
> +        * setting of USER_HZ.
> +        */
> +       u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
> +       u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
> +
> +       if ((tick_nsec % user_hz_nsec) == 0) {
> +               if (CONFIG_HZ < USER_HZ)
> +                       return x * (USER_HZ / CONFIG_HZ);
> +               else
> +                       return x / (CONFIG_HZ / USER_HZ);
> +       }
> +       return x * tick_nsec/user_hz_nsec;
> +}
> +

[...]

> +       if (sk_common->skc_family != AF_INET)
> +               return 0;
> +
> +       tp = bpf_skc_to_tcp_sock(sk_common);
> +       if (tp) {
> +               return dump_tcp_sock(seq, tp, uid, seq_num);
> +       }

nit: unnecessary {}

> +
> +       tw = bpf_skc_to_tcp_timewait_sock(sk_common);
> +       if (tw)
> +               return dump_tw_sock(seq, tw, uid, seq_num);
> +
> +       req = bpf_skc_to_tcp_request_sock(sk_common);
> +       if (req)
> +               return dump_req_sock(seq, req, uid, seq_num);
> +
> +       return 0;
> +}

[...]

> +static int timer_pending(const struct timer_list * timer)
> +{
> +       return !hlist_unhashed_lockless(&timer->entry);
> +}
> +
> +extern unsigned CONFIG_HZ __kconfig __weak;

same about __weak here

> +
> +#define USER_HZ                100
> +#define NSEC_PER_SEC   1000000000ULL
> +static clock_t jiffies_to_clock_t(unsigned long x)
> +{
> +       /* The implementation here tailored to a particular
> +        * setting of USER_HZ.
> +        */
> +       u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
> +       u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
> +
> +       if ((tick_nsec % user_hz_nsec) == 0) {
> +               if (CONFIG_HZ < USER_HZ)
> +                       return x * (USER_HZ / CONFIG_HZ);
> +               else
> +                       return x / (CONFIG_HZ / USER_HZ);
> +       }
> +       return x * tick_nsec/user_hz_nsec;
> +}

nit: jiffies_to_clock_t() implementation looks like an overkill for
this use case... Would it be just `x / CONFIG_HZ * NSEC_PER_SEC` with
some potential rounding error?

> +
> +static clock_t jiffies_delta_to_clock_t(long delta)
> +{
> +       if (delta <= 0)
> +               return 0;
> +
> +       return jiffies_to_clock_t(delta);
> +}
> +

[...]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 14/15] tools/bpf: add udp4/udp6 bpf iterator
  2020-06-23  0:36 ` [PATCH bpf-next v3 14/15] tools/bpf: add udp4/udp6 bpf iterator Yonghong Song
@ 2020-06-23  6:57   ` Andrii Nakryiko
  2020-06-23 15:03     ` Yonghong Song
  0 siblings, 1 reply; 43+ messages in thread
From: Andrii Nakryiko @ 2020-06-23  6:57 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau

On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>
> On my VM, I got identical results between /proc/net/udp[6] and
> the udp{4,6} bpf iterator.
>
> For udp6:
>   $ cat /sys/fs/bpf/p1
>     sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
>    1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0
>   $ cat /proc/net/udp6
>     sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
>    1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0
>
> For udp4:
>   $ cat /sys/fs/bpf/p4
>     sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
>    2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0
>   $ cat /proc/net/udp
>     sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
>    2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0
> ---

patch subject prefix is misleading: tools/bpf -> selftests/bpf?

Otherwise:

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  tools/testing/selftests/bpf/progs/bpf_iter.h  | 16 ++++
>  .../selftests/bpf/progs/bpf_iter_udp4.c       | 71 +++++++++++++++++
>  .../selftests/bpf/progs/bpf_iter_udp6.c       | 79 +++++++++++++++++++
>  3 files changed, 166 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
>

[...]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 15/15] bpf/selftests: add tcp/udp iterator programs to selftests
  2020-06-23  0:36 ` [PATCH bpf-next v3 15/15] bpf/selftests: add tcp/udp iterator programs to selftests Yonghong Song
@ 2020-06-23  6:59   ` Andrii Nakryiko
  0 siblings, 0 replies; 43+ messages in thread
From: Andrii Nakryiko @ 2020-06-23  6:59 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau

On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>
> Added tcp{4,6} and udp{4,6} bpf programs into test_progs
> selftest so that they at least can load successfully.
>   $ ./test_progs -n 3
>   ...
>   #3/7 tcp4:OK
>   #3/8 tcp6:OK
>   #3/9 udp4:OK
>   #3/10 udp6:OK
>   ...
>   #3 bpf_iter:OK
>   Summary: 1/16 PASSED, 0 SKIPPED, 0 FAILED
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---

nit: subject "bpf/selftest" -> "selftests/bpf"

Acked-by: Andrii Nakryiko <andriin@fb.com>

>  .../selftests/bpf/prog_tests/bpf_iter.c       | 68 +++++++++++++++++++
>  1 file changed, 68 insertions(+)
>

[...]

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23  6:39   ` Andrii Nakryiko
@ 2020-06-23 14:52     ` Yonghong Song
  2020-06-23 18:23       ` Andrii Nakryiko
  0 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23 14:52 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau



On 6/22/20 11:39 PM, Andrii Nakryiko wrote:
> On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> The helper is used in tracing programs to cast a socket
>> pointer to a tcp6_sock pointer.
>> The return value could be NULL if the casting is illegal.
>>
>> A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
>> so the verifier is able to deduce proper return types for the helper.
>>
>> Different from the previous BTF_ID based helpers,
>> the bpf_skc_to_tcp6_sock() argument can be several possible
>> btf_ids. More specifically, all possible socket data structures
>> with sock_common appearing in the first in the memory layout.
>> This patch only added socket types related to tcp and udp.
>>
>> All possible argument btf_id and return value btf_id
>> for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
>> cached. In the future, it is even possible to precompute
>> these btf_id's at kernel build time.
>>
>> Acked-by: Martin KaFai Lau <kafai@fb.com>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
> 
> Looks good to me as is, but see a few suggestions, they will probably
> save me time at some point as well :)
> 
> Acked-by: Andrii Nakryiko <andriin@fb.com>
> 
> 
>>   include/linux/bpf.h            | 12 +++++
>>   include/uapi/linux/bpf.h       |  9 +++-
>>   kernel/bpf/btf.c               |  1 +
>>   kernel/bpf/verifier.c          | 43 +++++++++++++-----
>>   kernel/trace/bpf_trace.c       |  2 +
>>   net/core/filter.c              | 80 ++++++++++++++++++++++++++++++++++
>>   scripts/bpf_helpers_doc.py     |  2 +
>>   tools/include/uapi/linux/bpf.h |  9 +++-
>>   8 files changed, 146 insertions(+), 12 deletions(-)
>>
> 
> [...]
> 
>> @@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>>                  regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
>>                  regs[BPF_REG_0].id = ++env->id_gen;
>>                  regs[BPF_REG_0].mem_size = meta.mem_size;
>> +       } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
>> +               int ret_btf_id;
>> +
>> +               mark_reg_known_zero(env, regs, BPF_REG_0);
>> +               regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
>> +               ret_btf_id = *fn->ret_btf_id;
> 
> might be a good idea to check fb->ret_btf_id for NULL and print a
> warning + return -EFAULT. It's not supposed to happen on properly
> configured kernel, but during development this will save a bunch of
> time and frustration for next person trying to add something with
> RET_PTR_TO_BTF_ID_OR_NULL.

I would like prefer to delay this with current code. Otherwise,
it gives an impression fn->ret_btf_id might be NULL and it is
actually never NULL. We can add NULL check if the future change
requires it. I am not sure what the future change could be,
but you need some way to get the return btf_id, the above is
one of them.

> 
>> +               if (ret_btf_id == 0) {
> 
> This also has to be struct/union (after typedef/mods stripping, of
> course). Or are there other cases?

This is an "int". The func_proto difinition is below:
int *ret_btf_id; /* return value btf_id */

> 
>> +                       verbose(env, "invalid return type %d of func %s#%d\n",
>> +                               fn->ret_type, func_id_name(func_id), func_id);
>> +                       return -EINVAL;
>> +               }
>> +               regs[BPF_REG_0].btf_id = ret_btf_id;
>>          } else {
>>                  verbose(env, "unknown return type %d of func %s#%d\n",
>>                          fn->ret_type, func_id_name(func_id), func_id);
> 
> [...]
> 
>> +void init_btf_sock_ids(struct btf *btf)
>> +{
>> +       int i, btf_id;
>> +
>> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
>> +               btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
>> +                                              BTF_KIND_STRUCT);
>> +               if (btf_id > 0)
>> +                       btf_sock_ids[i] = btf_id;
>> +       }
>> +}
> 
> This will hopefully go away with Jiri's work on static BTF IDs, right?
> So looking forward to that :)

Yes. That's the plan.

> 
>> +
>> +static bool check_arg_btf_id(u32 btf_id, u32 arg)
>> +{
>> +       int i;
>> +
>> +       /* only one argument, no need to check arg */
>> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
>> +               if (btf_sock_ids[i] == btf_id)
>> +                       return true;
>> +       return false;
>> +}
>> +
> 
> [...]
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 11/15] tools/bpf: refactor some net macros to libbpf bpf_tracing_net.h
  2020-06-23  6:45   ` Andrii Nakryiko
@ 2020-06-23 14:56     ` Yonghong Song
  0 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23 14:56 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau



On 6/22/20 11:45 PM, Andrii Nakryiko wrote:
> On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> Refactor bpf_iter_ipv6_route.c and bpf_iter_netlink.c
>> so net macros, originally from various include/linux header
>> files, are moved to a new libbpf installable header file
>> bpf_tracing_net.h. The goal is to improve reuse so
>> networking tracing programs do not need to
>> copy these macros every time they use them.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   tools/lib/bpf/Makefile                           |  1 +
>>   tools/lib/bpf/bpf_tracing_net.h                  | 16 ++++++++++++++++
>>   .../selftests/bpf/progs/bpf_iter_ipv6_route.c    |  7 +------
>>   .../selftests/bpf/progs/bpf_iter_netlink.c       |  4 +---
>>   4 files changed, 19 insertions(+), 9 deletions(-)
>>   create mode 100644 tools/lib/bpf/bpf_tracing_net.h
>>
>> diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
>> index bf8ed134cb8a..3d766c80eb78 100644
>> --- a/tools/lib/bpf/Makefile
>> +++ b/tools/lib/bpf/Makefile
>> @@ -257,6 +257,7 @@ install_headers: $(BPF_HELPER_DEFS)
>>                  $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
>>                  $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
>>                  $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
>> +               $(call do_install,bpf_tracing_net.h,$(prefix)/include/bpf,644); \
>>                  $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
>>                  $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
>>
>> diff --git a/tools/lib/bpf/bpf_tracing_net.h b/tools/lib/bpf/bpf_tracing_net.h
>> new file mode 100644
>> index 000000000000..1f38a1098727
>> --- /dev/null
>> +++ b/tools/lib/bpf/bpf_tracing_net.h
>> @@ -0,0 +1,16 @@
>> +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
>> +#ifndef __BPF_TRACING_NET_H__
>> +#define __BPF_TRACING_NET_H__
>> +
>> +#define IFNAMSIZ               16
>> +
>> +#define RTF_GATEWAY            0x0002
>> +
>> +#define fib_nh_dev             nh_common.nhc_dev
>> +#define fib_nh_gw_family       nh_common.nhc_gw_family
>> +#define fib_nh_gw6             nh_common.nhc_gw.ipv6
>> +
>> +#define sk_rmem_alloc          sk_backlog.rmem_alloc
>> +#define sk_refcnt              __sk_common.skc_refcnt
> 
> Question to networking guys. How probable it is for these and similar
> definitions to ever be changed?
> 
> I'm a bit hesitant to make any stability guarantees (which is implied
> by libbpf-provided headers). I don't want us to get into the game of
> trying to maintain this across multiple kernel versions, if they are
> going to be changed.
> 
> Let's for now keep bpf_tracing_net.h under selftests/bpf? It's still
> good to have these definitions, because we can point people to it.

I am using bpf_tracing_net.h name to somehow signal that it may
*potentially* change. But it does have long-time stability issue.
Will move to selftests/bpf then.

> 
>> +
>> +#endif
> 
> [...]
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 13/15] tools/bpf: selftests: implement sample tcp/tcp6 bpf_iter programs
  2020-06-23  6:56   ` Andrii Nakryiko
@ 2020-06-23 15:03     ` Yonghong Song
  0 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23 15:03 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau



On 6/22/20 11:56 PM, Andrii Nakryiko wrote:
> On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> In my VM, I got identical result compared to /proc/net/{tcp,tcp6}.
>> For tcp6:
>>    $ cat /proc/net/tcp6
>>      sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
>>       0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000001 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0
>>
>>    $ cat /sys/fs/bpf/p1
>>      sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
>>       0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0
>>
>> For tcp:
>>    $ cat /proc/net/tcp
>>    sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
>>     0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0
>>    $ cat /sys/fs/bpf/p2
>>    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
>>     1: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
> 
> Looks reasonable, to the extent possible ;)
> 
> Acked-by: Andrii Nakryiko <andriin@fb.com>
> 
>>   tools/testing/selftests/bpf/progs/bpf_iter.h  |  15 ++
>>   .../selftests/bpf/progs/bpf_iter_tcp4.c       | 235 ++++++++++++++++
>>   .../selftests/bpf/progs/bpf_iter_tcp6.c       | 250 ++++++++++++++++++
>>   3 files changed, 500 insertions(+)
>>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
>>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
>>
> 
> [...]
> 
>> +static int hlist_unhashed_lockless(const struct hlist_node *h)
>> +{
>> +        return !(h->pprev);
>> +}
>> +
>> +static int timer_pending(const struct timer_list * timer)
>> +{
>> +       return !hlist_unhashed_lockless(&timer->entry);
>> +}
>> +
>> +extern unsigned CONFIG_HZ __kconfig __weak;
> 
> Why the __weak? We expect to have /proc/kconfig.gz in other tests
> anyway? __weak will make CONFIG_HZ to be a zero and you'll get a bunch
> of divisions by zero.

Make sense. Will change.

> 
>> +
>> +#define USER_HZ                100
>> +#define NSEC_PER_SEC   1000000000ULL
>> +static clock_t jiffies_to_clock_t(unsigned long x)
>> +{
>> +       /* The implementation here tailored to a particular
>> +        * setting of USER_HZ.
>> +        */
>> +       u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
>> +       u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
>> +
>> +       if ((tick_nsec % user_hz_nsec) == 0) {
>> +               if (CONFIG_HZ < USER_HZ)
>> +                       return x * (USER_HZ / CONFIG_HZ);
>> +               else
>> +                       return x / (CONFIG_HZ / USER_HZ);
>> +       }
>> +       return x * tick_nsec/user_hz_nsec;
>> +}
>> +
> 
> [...]
> 
>> +       if (sk_common->skc_family != AF_INET)
>> +               return 0;
>> +
>> +       tp = bpf_skc_to_tcp_sock(sk_common);
>> +       if (tp) {
>> +               return dump_tcp_sock(seq, tp, uid, seq_num);
>> +       }
> 
> nit: unnecessary {}
> 
>> +
>> +       tw = bpf_skc_to_tcp_timewait_sock(sk_common);
>> +       if (tw)
>> +               return dump_tw_sock(seq, tw, uid, seq_num);
>> +
>> +       req = bpf_skc_to_tcp_request_sock(sk_common);
>> +       if (req)
>> +               return dump_req_sock(seq, req, uid, seq_num);
>> +
>> +       return 0;
>> +}
> 
> [...]
> 
>> +static int timer_pending(const struct timer_list * timer)
>> +{
>> +       return !hlist_unhashed_lockless(&timer->entry);
>> +}
>> +
>> +extern unsigned CONFIG_HZ __kconfig __weak;
> 
> same about __weak here
> 
>> +
>> +#define USER_HZ                100
>> +#define NSEC_PER_SEC   1000000000ULL
>> +static clock_t jiffies_to_clock_t(unsigned long x)
>> +{
>> +       /* The implementation here tailored to a particular
>> +        * setting of USER_HZ.
>> +        */
>> +       u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
>> +       u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
>> +
>> +       if ((tick_nsec % user_hz_nsec) == 0) {
>> +               if (CONFIG_HZ < USER_HZ)
>> +                       return x * (USER_HZ / CONFIG_HZ);
>> +               else
>> +                       return x / (CONFIG_HZ / USER_HZ);
>> +       }
>> +       return x * tick_nsec/user_hz_nsec;
>> +}
> 
> nit: jiffies_to_clock_t() implementation looks like an overkill for
> this use case... Would it be just `x / CONFIG_HZ * NSEC_PER_SEC` with
> some potential rounding error?

We really want to have the output the same as /proc/net/{tcp,tcp6}.
Otherwise, it may cause confusion when comparing bpf_iter_tcp[6] outputs
vs. /proc/net/tcp[6] outputs.

> 
>> +
>> +static clock_t jiffies_delta_to_clock_t(long delta)
>> +{
>> +       if (delta <= 0)
>> +               return 0;
>> +
>> +       return jiffies_to_clock_t(delta);
>> +}
>> +
> 
> [...]
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 14/15] tools/bpf: add udp4/udp6 bpf iterator
  2020-06-23  6:57   ` Andrii Nakryiko
@ 2020-06-23 15:03     ` Yonghong Song
  0 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23 15:03 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau



On 6/22/20 11:57 PM, Andrii Nakryiko wrote:
> On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> On my VM, I got identical results between /proc/net/udp[6] and
>> the udp{4,6} bpf iterator.
>>
>> For udp6:
>>    $ cat /sys/fs/bpf/p1
>>      sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
>>     1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0
>>    $ cat /proc/net/udp6
>>      sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
>>     1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0
>>
>> For udp4:
>>    $ cat /sys/fs/bpf/p4
>>      sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
>>     2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0
>>    $ cat /proc/net/udp
>>      sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
>>     2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0
>> ---
> 
> patch subject prefix is misleading: tools/bpf -> selftests/bpf?

Sure I can do this.

> 
> Otherwise:
> 
> Acked-by: Andrii Nakryiko <andriin@fb.com>
> 
>>   tools/testing/selftests/bpf/progs/bpf_iter.h  | 16 ++++
>>   .../selftests/bpf/progs/bpf_iter_udp4.c       | 71 +++++++++++++++++
>>   .../selftests/bpf/progs/bpf_iter_udp6.c       | 79 +++++++++++++++++++
>>   3 files changed, 166 insertions(+)
>>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
>>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
>>
> 
> [...]
> 

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper
  2020-06-23  2:22     ` Yonghong Song
@ 2020-06-23 16:27       ` Eric Dumazet
  2020-06-23 17:03         ` Yonghong Song
  0 siblings, 1 reply; 43+ messages in thread
From: Eric Dumazet @ 2020-06-23 16:27 UTC (permalink / raw)
  To: Yonghong Song, Eric Dumazet, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau



On 6/22/20 7:22 PM, Yonghong Song wrote:
> 
> 
> On 6/22/20 6:47 PM, Eric Dumazet wrote:
&
>>
>> Why is the sk_fullsock(sk) needed ?
> 
> The parameter 'sk' could be a sock_common. That is why the
> helper name bpf_skc_to_udp6_sock implies. The sock_common cannot
> access sk_protocol, hence we requires sk_fullsock(sk) here.
> Did I miss anything?

OK, if arbitrary sockets can land here, you need also to check sk_type



^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper
  2020-06-23 16:27       ` Eric Dumazet
@ 2020-06-23 17:03         ` Yonghong Song
  2020-06-23 22:11           ` Eric Dumazet
  0 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23 17:03 UTC (permalink / raw)
  To: Eric Dumazet, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau



On 6/23/20 9:27 AM, Eric Dumazet wrote:
> 
> 
> On 6/22/20 7:22 PM, Yonghong Song wrote:
>>
>>
>> On 6/22/20 6:47 PM, Eric Dumazet wrote:
> &
>>>
>>> Why is the sk_fullsock(sk) needed ?
>>
>> The parameter 'sk' could be a sock_common. That is why the
>> helper name bpf_skc_to_udp6_sock implies. The sock_common cannot
>> access sk_protocol, hence we requires sk_fullsock(sk) here.
>> Did I miss anything?
> 
> OK, if arbitrary sockets can land here, you need also to check sk_type

The current check is:
         if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
             sk->sk_family == AF_INET6)
                 return (unsigned long)sk;
it checks to ensure it is full socket, it is a ipv6 socket and then 
check protocol.

Are you suggesting to add the following check?
   sk->sk_type == SOCK_DGRAM

Not a networking expert. Maybe you can explain when we could have
protocol is IPPROTO_UDP and sk_type not SOCK_DGRAM?

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23 14:52     ` Yonghong Song
@ 2020-06-23 18:23       ` Andrii Nakryiko
  2020-06-23 19:45         ` Yonghong Song
  0 siblings, 1 reply; 43+ messages in thread
From: Andrii Nakryiko @ 2020-06-23 18:23 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau

On Tue, Jun 23, 2020 at 7:52 AM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 6/22/20 11:39 PM, Andrii Nakryiko wrote:
> > On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> The helper is used in tracing programs to cast a socket
> >> pointer to a tcp6_sock pointer.
> >> The return value could be NULL if the casting is illegal.
> >>
> >> A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
> >> so the verifier is able to deduce proper return types for the helper.
> >>
> >> Different from the previous BTF_ID based helpers,
> >> the bpf_skc_to_tcp6_sock() argument can be several possible
> >> btf_ids. More specifically, all possible socket data structures
> >> with sock_common appearing in the first in the memory layout.
> >> This patch only added socket types related to tcp and udp.
> >>
> >> All possible argument btf_id and return value btf_id
> >> for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
> >> cached. In the future, it is even possible to precompute
> >> these btf_id's at kernel build time.
> >>
> >> Acked-by: Martin KaFai Lau <kafai@fb.com>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >
> > Looks good to me as is, but see a few suggestions, they will probably
> > save me time at some point as well :)
> >
> > Acked-by: Andrii Nakryiko <andriin@fb.com>
> >
> >
> >>   include/linux/bpf.h            | 12 +++++
> >>   include/uapi/linux/bpf.h       |  9 +++-
> >>   kernel/bpf/btf.c               |  1 +
> >>   kernel/bpf/verifier.c          | 43 +++++++++++++-----
> >>   kernel/trace/bpf_trace.c       |  2 +
> >>   net/core/filter.c              | 80 ++++++++++++++++++++++++++++++++++
> >>   scripts/bpf_helpers_doc.py     |  2 +
> >>   tools/include/uapi/linux/bpf.h |  9 +++-
> >>   8 files changed, 146 insertions(+), 12 deletions(-)
> >>
> >
> > [...]
> >
> >> @@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
> >>                  regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
> >>                  regs[BPF_REG_0].id = ++env->id_gen;
> >>                  regs[BPF_REG_0].mem_size = meta.mem_size;
> >> +       } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
> >> +               int ret_btf_id;
> >> +
> >> +               mark_reg_known_zero(env, regs, BPF_REG_0);
> >> +               regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
> >> +               ret_btf_id = *fn->ret_btf_id;
> >
> > might be a good idea to check fb->ret_btf_id for NULL and print a
> > warning + return -EFAULT. It's not supposed to happen on properly
> > configured kernel, but during development this will save a bunch of
> > time and frustration for next person trying to add something with
> > RET_PTR_TO_BTF_ID_OR_NULL.
>
> I would like prefer to delay this with current code. Otherwise,
> it gives an impression fn->ret_btf_id might be NULL and it is
> actually never NULL. We can add NULL check if the future change
> requires it. I am not sure what the future change could be,
> but you need some way to get the return btf_id, the above is
> one of them.

It's not **supposed** to be NULL, same as a bunch of other invariants
about BPF helper proto definitions, but verifier does check sanity for
such cases, instead of crashing. But up to you. I'm pretty sure
someone will trip up on this.

>
> >
> >> +               if (ret_btf_id == 0) {
> >
> > This also has to be struct/union (after typedef/mods stripping, of
> > course). Or are there other cases?
>
> This is an "int". The func_proto difinition is below:
> int *ret_btf_id; /* return value btf_id */

I meant the BTF type itself that this btf_id points to. Is there any
use case where this won't be a pointer to struct/union and instead
something like a pointer to an int?

>
> >
> >> +                       verbose(env, "invalid return type %d of func %s#%d\n",
> >> +                               fn->ret_type, func_id_name(func_id), func_id);
> >> +                       return -EINVAL;
> >> +               }
> >> +               regs[BPF_REG_0].btf_id = ret_btf_id;
> >>          } else {
> >>                  verbose(env, "unknown return type %d of func %s#%d\n",
> >>                          fn->ret_type, func_id_name(func_id), func_id);
> >
> > [...]
> >
> >> +void init_btf_sock_ids(struct btf *btf)
> >> +{
> >> +       int i, btf_id;
> >> +
> >> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
> >> +               btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
> >> +                                              BTF_KIND_STRUCT);
> >> +               if (btf_id > 0)
> >> +                       btf_sock_ids[i] = btf_id;
> >> +       }
> >> +}
> >
> > This will hopefully go away with Jiri's work on static BTF IDs, right?
> > So looking forward to that :)
>
> Yes. That's the plan.
>
> >
> >> +
> >> +static bool check_arg_btf_id(u32 btf_id, u32 arg)
> >> +{
> >> +       int i;
> >> +
> >> +       /* only one argument, no need to check arg */
> >> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
> >> +               if (btf_sock_ids[i] == btf_id)
> >> +                       return true;
> >> +       return false;
> >> +}
> >> +
> >
> > [...]
> >

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23 18:23       ` Andrii Nakryiko
@ 2020-06-23 19:45         ` Yonghong Song
  2020-06-23 20:11           ` Andrii Nakryiko
  0 siblings, 1 reply; 43+ messages in thread
From: Yonghong Song @ 2020-06-23 19:45 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau



On 6/23/20 11:23 AM, Andrii Nakryiko wrote:
> On Tue, Jun 23, 2020 at 7:52 AM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 6/22/20 11:39 PM, Andrii Nakryiko wrote:
>>> On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>>>>
>>>> The helper is used in tracing programs to cast a socket
>>>> pointer to a tcp6_sock pointer.
>>>> The return value could be NULL if the casting is illegal.
>>>>
>>>> A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
>>>> so the verifier is able to deduce proper return types for the helper.
>>>>
>>>> Different from the previous BTF_ID based helpers,
>>>> the bpf_skc_to_tcp6_sock() argument can be several possible
>>>> btf_ids. More specifically, all possible socket data structures
>>>> with sock_common appearing in the first in the memory layout.
>>>> This patch only added socket types related to tcp and udp.
>>>>
>>>> All possible argument btf_id and return value btf_id
>>>> for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
>>>> cached. In the future, it is even possible to precompute
>>>> these btf_id's at kernel build time.
>>>>
>>>> Acked-by: Martin KaFai Lau <kafai@fb.com>
>>>> Signed-off-by: Yonghong Song <yhs@fb.com>
>>>> ---
>>>
>>> Looks good to me as is, but see a few suggestions, they will probably
>>> save me time at some point as well :)
>>>
>>> Acked-by: Andrii Nakryiko <andriin@fb.com>
>>>
>>>
>>>>    include/linux/bpf.h            | 12 +++++
>>>>    include/uapi/linux/bpf.h       |  9 +++-
>>>>    kernel/bpf/btf.c               |  1 +
>>>>    kernel/bpf/verifier.c          | 43 +++++++++++++-----
>>>>    kernel/trace/bpf_trace.c       |  2 +
>>>>    net/core/filter.c              | 80 ++++++++++++++++++++++++++++++++++
>>>>    scripts/bpf_helpers_doc.py     |  2 +
>>>>    tools/include/uapi/linux/bpf.h |  9 +++-
>>>>    8 files changed, 146 insertions(+), 12 deletions(-)
>>>>
>>>
>>> [...]
>>>
>>>> @@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>>>>                   regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
>>>>                   regs[BPF_REG_0].id = ++env->id_gen;
>>>>                   regs[BPF_REG_0].mem_size = meta.mem_size;
>>>> +       } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
>>>> +               int ret_btf_id;
>>>> +
>>>> +               mark_reg_known_zero(env, regs, BPF_REG_0);
>>>> +               regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
>>>> +               ret_btf_id = *fn->ret_btf_id;
>>>
>>> might be a good idea to check fb->ret_btf_id for NULL and print a
>>> warning + return -EFAULT. It's not supposed to happen on properly
>>> configured kernel, but during development this will save a bunch of
>>> time and frustration for next person trying to add something with
>>> RET_PTR_TO_BTF_ID_OR_NULL.
>>
>> I would like prefer to delay this with current code. Otherwise,
>> it gives an impression fn->ret_btf_id might be NULL and it is
>> actually never NULL. We can add NULL check if the future change
>> requires it. I am not sure what the future change could be,
>> but you need some way to get the return btf_id, the above is
>> one of them.
> 
> It's not **supposed** to be NULL, same as a bunch of other invariants
> about BPF helper proto definitions, but verifier does check sanity for
> such cases, instead of crashing. But up to you. I'm pretty sure
> someone will trip up on this.

I think there are certain expectation for argument reg_state vs. certain
fields in the structure.

int btf_resolve_helper_id(struct bpf_verifier_log *log,
                           const struct bpf_func_proto *fn, int arg)
{
         int *btf_id = &fn->btf_id[arg];
         int ret;

         if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
                 return -EINVAL;

         ret = READ_ONCE(*btf_id);
	...
}

If ARG_PTR_TO_BTF_ID, the verifier did not really check
whether btf_id pointer is valid or not. It just use it.

The same applies to the new return type. If in func_proto,
somebody sets RET_PTR_TO_BTF_ID_OR_NULL, it is expected
that func_proto->ret_btf_id is valid.

Code review or feature selftest should catch errors
if they are out-of-sync.

> 
>>
>>>
>>>> +               if (ret_btf_id == 0) {
>>>
>>> This also has to be struct/union (after typedef/mods stripping, of
>>> course). Or are there other cases?
>>
>> This is an "int". The func_proto difinition is below:
>> int *ret_btf_id; /* return value btf_id */
> 
> I meant the BTF type itself that this btf_id points to. Is there any
> use case where this won't be a pointer to struct/union and instead
> something like a pointer to an int?

Maybe you misunderstood. The mechanism is similar to the argument btf_id 
encoding in func_proto's:

static int bpf_seq_printf_btf_ids[5];
...
         .btf_id         = bpf_seq_printf_btf_ids,

func_proto->ret_btf_id will be a pointer to int which encodes the 
btf_id, not the btf_type.

> 
>>
>>>
>>>> +                       verbose(env, "invalid return type %d of func %s#%d\n",
>>>> +                               fn->ret_type, func_id_name(func_id), func_id);
>>>> +                       return -EINVAL;
>>>> +               }
>>>> +               regs[BPF_REG_0].btf_id = ret_btf_id;
>>>>           } else {
>>>>                   verbose(env, "unknown return type %d of func %s#%d\n",
>>>>                           fn->ret_type, func_id_name(func_id), func_id);
>>>
>>> [...]
>>>
>>>> +void init_btf_sock_ids(struct btf *btf)
>>>> +{
>>>> +       int i, btf_id;
>>>> +
>>>> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
>>>> +               btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
>>>> +                                              BTF_KIND_STRUCT);
>>>> +               if (btf_id > 0)
>>>> +                       btf_sock_ids[i] = btf_id;
>>>> +       }
>>>> +}
>>>
>>> This will hopefully go away with Jiri's work on static BTF IDs, right?
>>> So looking forward to that :)
>>
>> Yes. That's the plan.
>>
>>>
>>>> +
>>>> +static bool check_arg_btf_id(u32 btf_id, u32 arg)
>>>> +{
>>>> +       int i;
>>>> +
>>>> +       /* only one argument, no need to check arg */
>>>> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
>>>> +               if (btf_sock_ids[i] == btf_id)
>>>> +                       return true;
>>>> +       return false;
>>>> +}
>>>> +
>>>
>>> [...]
>>>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23 19:45         ` Yonghong Song
@ 2020-06-23 20:11           ` Andrii Nakryiko
  2020-06-23 20:46             ` Yonghong Song
  0 siblings, 1 reply; 43+ messages in thread
From: Andrii Nakryiko @ 2020-06-23 20:11 UTC (permalink / raw)
  To: Yonghong Song
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau

On Tue, Jun 23, 2020 at 12:47 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 6/23/20 11:23 AM, Andrii Nakryiko wrote:
> > On Tue, Jun 23, 2020 at 7:52 AM Yonghong Song <yhs@fb.com> wrote:
> >>
> >>
> >>
> >> On 6/22/20 11:39 PM, Andrii Nakryiko wrote:
> >>> On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
> >>>>
> >>>> The helper is used in tracing programs to cast a socket
> >>>> pointer to a tcp6_sock pointer.
> >>>> The return value could be NULL if the casting is illegal.
> >>>>
> >>>> A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
> >>>> so the verifier is able to deduce proper return types for the helper.
> >>>>
> >>>> Different from the previous BTF_ID based helpers,
> >>>> the bpf_skc_to_tcp6_sock() argument can be several possible
> >>>> btf_ids. More specifically, all possible socket data structures
> >>>> with sock_common appearing in the first in the memory layout.
> >>>> This patch only added socket types related to tcp and udp.
> >>>>
> >>>> All possible argument btf_id and return value btf_id
> >>>> for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
> >>>> cached. In the future, it is even possible to precompute
> >>>> these btf_id's at kernel build time.
> >>>>
> >>>> Acked-by: Martin KaFai Lau <kafai@fb.com>
> >>>> Signed-off-by: Yonghong Song <yhs@fb.com>
> >>>> ---
> >>>
> >>> Looks good to me as is, but see a few suggestions, they will probably
> >>> save me time at some point as well :)
> >>>
> >>> Acked-by: Andrii Nakryiko <andriin@fb.com>
> >>>
> >>>
> >>>>    include/linux/bpf.h            | 12 +++++
> >>>>    include/uapi/linux/bpf.h       |  9 +++-
> >>>>    kernel/bpf/btf.c               |  1 +
> >>>>    kernel/bpf/verifier.c          | 43 +++++++++++++-----
> >>>>    kernel/trace/bpf_trace.c       |  2 +
> >>>>    net/core/filter.c              | 80 ++++++++++++++++++++++++++++++++++
> >>>>    scripts/bpf_helpers_doc.py     |  2 +
> >>>>    tools/include/uapi/linux/bpf.h |  9 +++-
> >>>>    8 files changed, 146 insertions(+), 12 deletions(-)
> >>>>
> >>>
> >>> [...]
> >>>
> >>>> @@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
> >>>>                   regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
> >>>>                   regs[BPF_REG_0].id = ++env->id_gen;
> >>>>                   regs[BPF_REG_0].mem_size = meta.mem_size;
> >>>> +       } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
> >>>> +               int ret_btf_id;
> >>>> +
> >>>> +               mark_reg_known_zero(env, regs, BPF_REG_0);
> >>>> +               regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
> >>>> +               ret_btf_id = *fn->ret_btf_id;
> >>>
> >>> might be a good idea to check fb->ret_btf_id for NULL and print a
> >>> warning + return -EFAULT. It's not supposed to happen on properly
> >>> configured kernel, but during development this will save a bunch of
> >>> time and frustration for next person trying to add something with
> >>> RET_PTR_TO_BTF_ID_OR_NULL.
> >>
> >> I would like prefer to delay this with current code. Otherwise,
> >> it gives an impression fn->ret_btf_id might be NULL and it is
> >> actually never NULL. We can add NULL check if the future change
> >> requires it. I am not sure what the future change could be,
> >> but you need some way to get the return btf_id, the above is
> >> one of them.
> >
> > It's not **supposed** to be NULL, same as a bunch of other invariants
> > about BPF helper proto definitions, but verifier does check sanity for
> > such cases, instead of crashing. But up to you. I'm pretty sure
> > someone will trip up on this.
>
> I think there are certain expectation for argument reg_state vs. certain
> fields in the structure.
>
> int btf_resolve_helper_id(struct bpf_verifier_log *log,
>                            const struct bpf_func_proto *fn, int arg)
> {
>          int *btf_id = &fn->btf_id[arg];
>          int ret;
>
>          if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
>                  return -EINVAL;
>
>          ret = READ_ONCE(*btf_id);
>         ...
> }
>
> If ARG_PTR_TO_BTF_ID, the verifier did not really check
> whether btf_id pointer is valid or not. It just use it.

Right, it's not a universal rule. But grep for "misconfigured" in
kernel/bpf/verifier.c to see a bunch of places where the verifier
could crash on NULL dereference, but instead emits an error message
and returns failure.

This was a suggestion, I'll stop asking for this :)

>
> The same applies to the new return type. If in func_proto,
> somebody sets RET_PTR_TO_BTF_ID_OR_NULL, it is expected
> that func_proto->ret_btf_id is valid.
>
> Code review or feature selftest should catch errors
> if they are out-of-sync.
>
> >
> >>
> >>>
> >>>> +               if (ret_btf_id == 0) {
> >>>
> >>> This also has to be struct/union (after typedef/mods stripping, of
> >>> course). Or are there other cases?
> >>
> >> This is an "int". The func_proto difinition is below:
> >> int *ret_btf_id; /* return value btf_id */
> >
> > I meant the BTF type itself that this btf_id points to. Is there any
> > use case where this won't be a pointer to struct/union and instead
> > something like a pointer to an int?
>
> Maybe you misunderstood. The mechanism is similar to the argument btf_id
> encoding in func_proto's:
>
> static int bpf_seq_printf_btf_ids[5];
> ...
>          .btf_id         = bpf_seq_printf_btf_ids,
>
> func_proto->ret_btf_id will be a pointer to int which encodes the
> btf_id, not the btf_type.

I understand that. Say it points to value 25. BTF type with ID=25 is
going to be BTF_KIND_PTR -> BTF_KIND_STRUCT. I was wondering if we
want/need to check that it's always BTF_KIND_PTR -> (modifier)* ->
BTF_KIND_STRUCT/BTF_KIND_UNION. That's it.

>
> >
> >>
> >>>
> >>>> +                       verbose(env, "invalid return type %d of func %s#%d\n",
> >>>> +                               fn->ret_type, func_id_name(func_id), func_id);
> >>>> +                       return -EINVAL;
> >>>> +               }
> >>>> +               regs[BPF_REG_0].btf_id = ret_btf_id;
> >>>>           } else {
> >>>>                   verbose(env, "unknown return type %d of func %s#%d\n",
> >>>>                           fn->ret_type, func_id_name(func_id), func_id);
> >>>
> >>> [...]
> >>>
> >>>> +void init_btf_sock_ids(struct btf *btf)
> >>>> +{
> >>>> +       int i, btf_id;
> >>>> +
> >>>> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
> >>>> +               btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
> >>>> +                                              BTF_KIND_STRUCT);
> >>>> +               if (btf_id > 0)
> >>>> +                       btf_sock_ids[i] = btf_id;
> >>>> +       }
> >>>> +}
> >>>
> >>> This will hopefully go away with Jiri's work on static BTF IDs, right?
> >>> So looking forward to that :)
> >>
> >> Yes. That's the plan.
> >>
> >>>
> >>>> +
> >>>> +static bool check_arg_btf_id(u32 btf_id, u32 arg)
> >>>> +{
> >>>> +       int i;
> >>>> +
> >>>> +       /* only one argument, no need to check arg */
> >>>> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
> >>>> +               if (btf_sock_ids[i] == btf_id)
> >>>> +                       return true;
> >>>> +       return false;
> >>>> +}
> >>>> +
> >>>
> >>> [...]
> >>>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper
  2020-06-23 20:11           ` Andrii Nakryiko
@ 2020-06-23 20:46             ` Yonghong Song
  0 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23 20:46 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, Networking, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team, Martin KaFai Lau



On 6/23/20 1:11 PM, Andrii Nakryiko wrote:
> On Tue, Jun 23, 2020 at 12:47 PM Yonghong Song <yhs@fb.com> wrote:
>>
>>
>>
>> On 6/23/20 11:23 AM, Andrii Nakryiko wrote:
>>> On Tue, Jun 23, 2020 at 7:52 AM Yonghong Song <yhs@fb.com> wrote:
>>>>
>>>>
>>>>
>>>> On 6/22/20 11:39 PM, Andrii Nakryiko wrote:
>>>>> On Mon, Jun 22, 2020 at 5:38 PM Yonghong Song <yhs@fb.com> wrote:
>>>>>>
>>>>>> The helper is used in tracing programs to cast a socket
>>>>>> pointer to a tcp6_sock pointer.
>>>>>> The return value could be NULL if the casting is illegal.
>>>>>>
>>>>>> A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
>>>>>> so the verifier is able to deduce proper return types for the helper.
>>>>>>
>>>>>> Different from the previous BTF_ID based helpers,
>>>>>> the bpf_skc_to_tcp6_sock() argument can be several possible
>>>>>> btf_ids. More specifically, all possible socket data structures
>>>>>> with sock_common appearing in the first in the memory layout.
>>>>>> This patch only added socket types related to tcp and udp.
>>>>>>
>>>>>> All possible argument btf_id and return value btf_id
>>>>>> for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
>>>>>> cached. In the future, it is even possible to precompute
>>>>>> these btf_id's at kernel build time.
>>>>>>
>>>>>> Acked-by: Martin KaFai Lau <kafai@fb.com>
>>>>>> Signed-off-by: Yonghong Song <yhs@fb.com>
>>>>>> ---
>>>>>
>>>>> Looks good to me as is, but see a few suggestions, they will probably
>>>>> save me time at some point as well :)
>>>>>
>>>>> Acked-by: Andrii Nakryiko <andriin@fb.com>
>>>>>
>>>>>
>>>>>>     include/linux/bpf.h            | 12 +++++
>>>>>>     include/uapi/linux/bpf.h       |  9 +++-
>>>>>>     kernel/bpf/btf.c               |  1 +
>>>>>>     kernel/bpf/verifier.c          | 43 +++++++++++++-----
>>>>>>     kernel/trace/bpf_trace.c       |  2 +
>>>>>>     net/core/filter.c              | 80 ++++++++++++++++++++++++++++++++++
>>>>>>     scripts/bpf_helpers_doc.py     |  2 +
>>>>>>     tools/include/uapi/linux/bpf.h |  9 +++-
>>>>>>     8 files changed, 146 insertions(+), 12 deletions(-)
>>>>>>
>>>>>
>>>>> [...]
>>>>>
>>>>>> @@ -4815,6 +4826,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
>>>>>>                    regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
>>>>>>                    regs[BPF_REG_0].id = ++env->id_gen;
>>>>>>                    regs[BPF_REG_0].mem_size = meta.mem_size;
>>>>>> +       } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
>>>>>> +               int ret_btf_id;
>>>>>> +
>>>>>> +               mark_reg_known_zero(env, regs, BPF_REG_0);
>>>>>> +               regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
>>>>>> +               ret_btf_id = *fn->ret_btf_id;
[...]
>>>
>>>>
>>>>>
>>>>>> +               if (ret_btf_id == 0) {
>>>>>
>>>>> This also has to be struct/union (after typedef/mods stripping, of
>>>>> course). Or are there other cases?
>>>>
>>>> This is an "int". The func_proto difinition is below:
>>>> int *ret_btf_id; /* return value btf_id */
>>>
>>> I meant the BTF type itself that this btf_id points to. Is there any
>>> use case where this won't be a pointer to struct/union and instead
>>> something like a pointer to an int?
>>
>> Maybe you misunderstood. The mechanism is similar to the argument btf_id
>> encoding in func_proto's:
>>
>> static int bpf_seq_printf_btf_ids[5];
>> ...
>>           .btf_id         = bpf_seq_printf_btf_ids,
>>
>> func_proto->ret_btf_id will be a pointer to int which encodes the
>> btf_id, not the btf_type.
> 
> I understand that. Say it points to value 25. BTF type with ID=25 is
> going to be BTF_KIND_PTR -> BTF_KIND_STRUCT. I was wondering if we
> want/need to check that it's always BTF_KIND_PTR -> (modifier)* ->
> BTF_KIND_STRUCT/BTF_KIND_UNION. That's it.

Just to be clear. The ret_btf_id returned here is the btf id is the
type id of the pointee, so in this case it is BTF_KIND_STRUCT/....

These id's are pre-calculated and stored in memory. Unless the whole
thing is mess up, there is no need to check...

> 
>>
>>>
>>>>
>>>>>
>>>>>> +                       verbose(env, "invalid return type %d of func %s#%d\n",
>>>>>> +                               fn->ret_type, func_id_name(func_id), func_id);
>>>>>> +                       return -EINVAL;
>>>>>> +               }
>>>>>> +               regs[BPF_REG_0].btf_id = ret_btf_id;
>>>>>>            } else {
>>>>>>                    verbose(env, "unknown return type %d of func %s#%d\n",
>>>>>>                            fn->ret_type, func_id_name(func_id), func_id);
>>>>>
>>>>> [...]
>>>>>
>>>>>> +void init_btf_sock_ids(struct btf *btf)
>>>>>> +{
>>>>>> +       int i, btf_id;
>>>>>> +
>>>>>> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
>>>>>> +               btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
>>>>>> +                                              BTF_KIND_STRUCT);
>>>>>> +               if (btf_id > 0)
>>>>>> +                       btf_sock_ids[i] = btf_id;
>>>>>> +       }
>>>>>> +}
>>>>>
>>>>> This will hopefully go away with Jiri's work on static BTF IDs, right?
>>>>> So looking forward to that :)
>>>>
>>>> Yes. That's the plan.
>>>>
>>>>>
>>>>>> +
>>>>>> +static bool check_arg_btf_id(u32 btf_id, u32 arg)
>>>>>> +{
>>>>>> +       int i;
>>>>>> +
>>>>>> +       /* only one argument, no need to check arg */
>>>>>> +       for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
>>>>>> +               if (btf_sock_ids[i] == btf_id)
>>>>>> +                       return true;
>>>>>> +       return false;
>>>>>> +}
>>>>>> +
>>>>>
>>>>> [...]
>>>>>

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper
  2020-06-23 17:03         ` Yonghong Song
@ 2020-06-23 22:11           ` Eric Dumazet
  2020-06-23 22:44             ` Yonghong Song
  0 siblings, 1 reply; 43+ messages in thread
From: Eric Dumazet @ 2020-06-23 22:11 UTC (permalink / raw)
  To: Yonghong Song, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau



On 6/23/20 10:03 AM, Yonghong Song wrote:
> 
> 
> On 6/23/20 9:27 AM, Eric Dumazet wrote:
>>
>>
>> On 6/22/20 7:22 PM, Yonghong Song wrote:
>>>
>>>
>>> On 6/22/20 6:47 PM, Eric Dumazet wrote:
>> &
>>>>
>>>> Why is the sk_fullsock(sk) needed ?
>>>
>>> The parameter 'sk' could be a sock_common. That is why the
>>> helper name bpf_skc_to_udp6_sock implies. The sock_common cannot
>>> access sk_protocol, hence we requires sk_fullsock(sk) here.
>>> Did I miss anything?
>>
>> OK, if arbitrary sockets can land here, you need also to check sk_type
> 
> The current check is:
>         if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
>             sk->sk_family == AF_INET6)
>                 return (unsigned long)sk;
> it checks to ensure it is full socket, it is a ipv6 socket and then check protocol.
> 
> Are you suggesting to add the following check?
>   sk->sk_type == SOCK_DGRAM
> 
> Not a networking expert. Maybe you can explain when we could have
> protocol is IPPROTO_UDP and sk_type not SOCK_DGRAM?


RAW sockets for instance.

Look at :

commit 940ba14986657a50c15f694efca1beba31fa568f
Author: Eric Dumazet <edumazet@google.com>
Date:   Tue Jan 21 23:17:14 2020 -0800

    gtp: make sure only SOCK_DGRAM UDP sockets are accepted
    
    A malicious user could use RAW sockets and fool
    GTP using them as standard SOCK_DGRAM UDP sockets.

^ permalink raw reply	[flat|nested] 43+ messages in thread

* Re: [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper
  2020-06-23 22:11           ` Eric Dumazet
@ 2020-06-23 22:44             ` Yonghong Song
  0 siblings, 0 replies; 43+ messages in thread
From: Yonghong Song @ 2020-06-23 22:44 UTC (permalink / raw)
  To: Eric Dumazet, bpf, netdev
  Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, Martin KaFai Lau



On 6/23/20 3:11 PM, Eric Dumazet wrote:
> 
> 
> On 6/23/20 10:03 AM, Yonghong Song wrote:
>>
>>
>> On 6/23/20 9:27 AM, Eric Dumazet wrote:
>>>
>>>
>>> On 6/22/20 7:22 PM, Yonghong Song wrote:
>>>>
>>>>
>>>> On 6/22/20 6:47 PM, Eric Dumazet wrote:
>>> &
>>>>>
>>>>> Why is the sk_fullsock(sk) needed ?
>>>>
>>>> The parameter 'sk' could be a sock_common. That is why the
>>>> helper name bpf_skc_to_udp6_sock implies. The sock_common cannot
>>>> access sk_protocol, hence we requires sk_fullsock(sk) here.
>>>> Did I miss anything?
>>>
>>> OK, if arbitrary sockets can land here, you need also to check sk_type
>>
>> The current check is:
>>          if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
>>              sk->sk_family == AF_INET6)
>>                  return (unsigned long)sk;
>> it checks to ensure it is full socket, it is a ipv6 socket and then check protocol.
>>
>> Are you suggesting to add the following check?
>>    sk->sk_type == SOCK_DGRAM
>>
>> Not a networking expert. Maybe you can explain when we could have
>> protocol is IPPROTO_UDP and sk_type not SOCK_DGRAM?
> 
> 
> RAW sockets for instance.
> 
> Look at :
> 
> commit 940ba14986657a50c15f694efca1beba31fa568f
> Author: Eric Dumazet <edumazet@google.com>
> Date:   Tue Jan 21 23:17:14 2020 -0800
> 
>      gtp: make sure only SOCK_DGRAM UDP sockets are accepted
>      
>      A malicious user could use RAW sockets and fool
>      GTP using them as standard SOCK_DGRAM UDP sockets.

Thanks for the pointer! Will fix it in the next revision.

^ permalink raw reply	[flat|nested] 43+ messages in thread

end of thread, other threads:[~2020-06-23 22:44 UTC | newest]

Thread overview: 43+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-23  0:36 [PATCH bpf-next v3 00/15] implement bpf iterator for tcp and udp sockets Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 01/15] net: bpf: add bpf_seq_afinfo in tcp_iter_state Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 02/15] net: bpf: implement bpf iterator for tcp Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 03/15] bpf: support 'X' in bpf_seq_printf() helper Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 04/15] bpf: allow tracing programs to use bpf_jiffies64() helper Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 05/15] bpf: add bpf_skc_to_tcp6_sock() helper Yonghong Song
2020-06-23  5:46   ` kernel test robot
2020-06-23  5:46     ` kernel test robot
2020-06-23  5:53   ` kernel test robot
2020-06-23  5:53     ` kernel test robot
2020-06-23  6:39   ` Andrii Nakryiko
2020-06-23 14:52     ` Yonghong Song
2020-06-23 18:23       ` Andrii Nakryiko
2020-06-23 19:45         ` Yonghong Song
2020-06-23 20:11           ` Andrii Nakryiko
2020-06-23 20:46             ` Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers Yonghong Song
2020-06-23  5:18   ` kernel test robot
2020-06-23  5:18     ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers kernel test robot
2020-06-23  6:39   ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp,tcp_timewait,tcp_request}_sock() helpers kernel test robot
2020-06-23  6:39     ` [PATCH bpf-next v3 06/15] bpf: add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers kernel test robot
2020-06-23  0:36 ` [PATCH bpf-next v3 07/15] net: bpf: add bpf_seq_afinfo in udp_iter_state Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 08/15] net: bpf: implement bpf iterator for udp Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 09/15] bpf: add bpf_skc_to_udp6_sock() helper Yonghong Song
2020-06-23  1:47   ` Eric Dumazet
2020-06-23  2:22     ` Yonghong Song
2020-06-23 16:27       ` Eric Dumazet
2020-06-23 17:03         ` Yonghong Song
2020-06-23 22:11           ` Eric Dumazet
2020-06-23 22:44             ` Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 10/15] bpf/selftests: move newer bpf_iter_* type redefining to a new header file Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 11/15] tools/bpf: refactor some net macros to libbpf bpf_tracing_net.h Yonghong Song
2020-06-23  6:45   ` Andrii Nakryiko
2020-06-23 14:56     ` Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 12/15] tools/libbpf: add more common macros to bpf_tracing_net.h Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 13/15] tools/bpf: selftests: implement sample tcp/tcp6 bpf_iter programs Yonghong Song
2020-06-23  6:56   ` Andrii Nakryiko
2020-06-23 15:03     ` Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 14/15] tools/bpf: add udp4/udp6 bpf iterator Yonghong Song
2020-06-23  6:57   ` Andrii Nakryiko
2020-06-23 15:03     ` Yonghong Song
2020-06-23  0:36 ` [PATCH bpf-next v3 15/15] bpf/selftests: add tcp/udp iterator programs to selftests Yonghong Song
2020-06-23  6:59   ` Andrii Nakryiko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.