All of lore.kernel.org
 help / color / mirror / Atom feed
* [Patch net-next v3] sock_map: dump socket map id via diag
@ 2023-03-19 19:19 Cong Wang
  2023-03-20 18:13 ` Stanislav Fomichev
  0 siblings, 1 reply; 7+ messages in thread
From: Cong Wang @ 2023-03-19 19:19 UTC (permalink / raw)
  To: netdev; +Cc: bpf, sdf, Cong Wang, John Fastabend, Jakub Sitnicki

From: Cong Wang <cong.wang@bytedance.com>

Currently there is no way to know which sockmap a socket has been added
to from outside, especially for that a socket can be added to multiple
sockmap's. We could dump this via socket diag, as shown below.

Sample output:

  # ./iproute2/misc/ss -tnaie --bpf-map
  ESTAB  0      344329     127.0.0.1:1234     127.0.0.1:40912 ino:21098 sk:5 cgroup:/user.slice/user-0.slice/session-c1.scope <-> sockmap: 1

  # bpftool map
  1: sockmap  flags 0x0
  	key 4B  value 4B  max_entries 2  memlock 4096B
	pids echo-sockmap(549)
  4: array  name pid_iter.rodata  flags 0x480
	key 4B  value 4B  max_entries 1  memlock 4096B
	btf_id 10  frozen
	pids bpftool(624)

In the future, we could dump other sockmap related stats too, hence I
make it a nested attribute.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
v3: remove redundant rcu read lock
    use likely() for psock check

v2: rename enum's with more generic names
    sock_map_idiag_dump -> sock_map_diag_dump()
    make sock_map_diag_dump() return number of maps

 include/linux/bpf.h            |  1 +
 include/uapi/linux/inet_diag.h |  1 +
 include/uapi/linux/sock_diag.h |  8 ++++++
 include/uapi/linux/unix_diag.h |  1 +
 net/core/sock_map.c            | 46 ++++++++++++++++++++++++++++++++++
 net/ipv4/inet_diag.c           |  5 ++++
 net/unix/diag.c                |  6 +++++
 7 files changed, 68 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6792a7940e1e..4cc315ce26a9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2638,6 +2638,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
 void sock_map_unhash(struct sock *sk);
 void sock_map_destroy(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
+int sock_map_diag_dump(struct sock *sk, struct sk_buff *skb, int attr);
 #else
 static inline int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
 					    struct bpf_prog_aux *prog_aux)
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 50655de04c9b..d1f1e4522633 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -161,6 +161,7 @@ enum {
 	INET_DIAG_SK_BPF_STORAGES,
 	INET_DIAG_CGROUP_ID,
 	INET_DIAG_SOCKOPT,
+	INET_DIAG_BPF_MAP,
 	__INET_DIAG_MAX,
 };
 
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index 5f74a5f6091d..7c961940b408 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -62,4 +62,12 @@ enum {
 
 #define SK_DIAG_BPF_STORAGE_MAX        (__SK_DIAG_BPF_STORAGE_MAX - 1)
 
+enum {
+	SK_DIAG_BPF_MAP_NONE,
+	SK_DIAG_BPF_MAP_IDS,
+	__SK_DIAG_BPF_MAP_MAX,
+};
+
+#define SK_DIAG_BPF_MAP_MAX        (__SK_DIAG_BPF_MAP_MAX - 1)
+
 #endif /* _UAPI__SOCK_DIAG_H__ */
diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h
index a1988576fa8a..b95a2b33521d 100644
--- a/include/uapi/linux/unix_diag.h
+++ b/include/uapi/linux/unix_diag.h
@@ -42,6 +42,7 @@ enum {
 	UNIX_DIAG_MEMINFO,
 	UNIX_DIAG_SHUTDOWN,
 	UNIX_DIAG_UID,
+	UNIX_DIAG_BPF_MAP,
 
 	__UNIX_DIAG_MAX,
 };
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 9b854e236d23..c4049095f64e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1656,6 +1656,52 @@ void sock_map_close(struct sock *sk, long timeout)
 }
 EXPORT_SYMBOL_GPL(sock_map_close);
 
+int sock_map_diag_dump(struct sock *sk, struct sk_buff *skb, int attrtype)
+{
+	struct sk_psock_link *link;
+	struct nlattr *nla, *attr;
+	int nr_links = 0, ret = 0;
+	struct sk_psock *psock;
+	u32 *ids;
+
+	psock = sk_psock_get(sk);
+	if (likely(!psock))
+		return 0;
+
+	nla = nla_nest_start_noflag(skb, attrtype);
+	if (!nla) {
+		sk_psock_put(sk, psock);
+		return -EMSGSIZE;
+	}
+	spin_lock_bh(&psock->link_lock);
+	list_for_each_entry(link, &psock->link, list)
+		nr_links++;
+
+	attr = nla_reserve(skb, SK_DIAG_BPF_MAP_IDS,
+			   sizeof(link->map->id) * nr_links);
+	if (!attr) {
+		ret = -EMSGSIZE;
+		goto unlock;
+	}
+
+	ids = nla_data(attr);
+	list_for_each_entry(link, &psock->link, list) {
+		*ids = link->map->id;
+		ids++;
+	}
+unlock:
+	spin_unlock_bh(&psock->link_lock);
+	sk_psock_put(sk, psock);
+	if (ret) {
+		nla_nest_cancel(skb, nla);
+	} else {
+		ret = nr_links;
+		nla_nest_end(skb, nla);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sock_map_diag_dump);
+
 static int sock_map_iter_attach_target(struct bpf_prog *prog,
 				       union bpf_iter_link_info *linfo,
 				       struct bpf_iter_aux_info *aux)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b812eb36f0e3..0949909d5b46 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -197,6 +197,11 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 		    &inet_sockopt))
 		goto errout;
 
+#ifdef CONFIG_BPF_SYSCALL
+	if (sock_map_diag_dump(sk, skb, INET_DIAG_BPF_MAP) < 0)
+		goto errout;
+#endif
+
 	return 0;
 errout:
 	return 1;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 616b55c5b890..54aa8da2831e 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -6,6 +6,7 @@
 #include <linux/skbuff.h>
 #include <linux/module.h>
 #include <linux/uidgid.h>
+#include <linux/bpf.h>
 #include <net/netlink.h>
 #include <net/af_unix.h>
 #include <net/tcp_states.h>
@@ -172,6 +173,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	    sk_diag_dump_uid(sk, skb, user_ns))
 		goto out_nlmsg_trim;
 
+#ifdef CONFIG_BPF_SYSCALL
+	if (sock_map_diag_dump(sk, skb, UNIX_DIAG_BPF_MAP) < 0)
+		goto out_nlmsg_trim;
+#endif
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [Patch net-next v3] sock_map: dump socket map id via diag
  2023-03-19 19:19 [Patch net-next v3] sock_map: dump socket map id via diag Cong Wang
@ 2023-03-20 18:13 ` Stanislav Fomichev
  2023-03-20 20:29   ` Martin KaFai Lau
  2023-03-25 20:07   ` Cong Wang
  0 siblings, 2 replies; 7+ messages in thread
From: Stanislav Fomichev @ 2023-03-20 18:13 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, bpf, Cong Wang, John Fastabend, Jakub Sitnicki

On Sun, Mar 19, 2023 at 12:19 PM Cong Wang <xiyou.wangcong@gmail.com> wrote:
>
> From: Cong Wang <cong.wang@bytedance.com>
>
> Currently there is no way to know which sockmap a socket has been added
> to from outside, especially for that a socket can be added to multiple
> sockmap's. We could dump this via socket diag, as shown below.
>
> Sample output:
>
>   # ./iproute2/misc/ss -tnaie --bpf-map
>   ESTAB  0      344329     127.0.0.1:1234     127.0.0.1:40912 ino:21098 sk:5 cgroup:/user.slice/user-0.slice/session-c1.scope <-> sockmap: 1
>
>   # bpftool map
>   1: sockmap  flags 0x0
>         key 4B  value 4B  max_entries 2  memlock 4096B
>         pids echo-sockmap(549)
>   4: array  name pid_iter.rodata  flags 0x480
>         key 4B  value 4B  max_entries 1  memlock 4096B
>         btf_id 10  frozen
>         pids bpftool(624)
>
> In the future, we could dump other sockmap related stats too, hence I
> make it a nested attribute.
>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: Jakub Sitnicki <jakub@cloudflare.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>

Looks fine from my POW, will let others comment.

One thing I still don't understand here: what is missing from the
socket iterators to implement this? Is it all the sk_psock_get magic?
I remember you dismissed Yonghong's suggestion on v1, but have you
actually tried it?

Also: a test would be nice to have. I know you've tested it with the
iproute2, but having something regularly exercised by the ci seems
good to have (and not a ton of work).

> ---
> v3: remove redundant rcu read lock
>     use likely() for psock check
>
> v2: rename enum's with more generic names
>     sock_map_idiag_dump -> sock_map_diag_dump()
>     make sock_map_diag_dump() return number of maps
>
>  include/linux/bpf.h            |  1 +
>  include/uapi/linux/inet_diag.h |  1 +
>  include/uapi/linux/sock_diag.h |  8 ++++++
>  include/uapi/linux/unix_diag.h |  1 +
>  net/core/sock_map.c            | 46 ++++++++++++++++++++++++++++++++++
>  net/ipv4/inet_diag.c           |  5 ++++
>  net/unix/diag.c                |  6 +++++
>  7 files changed, 68 insertions(+)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 6792a7940e1e..4cc315ce26a9 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2638,6 +2638,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr,
>  void sock_map_unhash(struct sock *sk);
>  void sock_map_destroy(struct sock *sk);
>  void sock_map_close(struct sock *sk, long timeout);
> +int sock_map_diag_dump(struct sock *sk, struct sk_buff *skb, int attr);
>  #else
>  static inline int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
>                                             struct bpf_prog_aux *prog_aux)
> diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
> index 50655de04c9b..d1f1e4522633 100644
> --- a/include/uapi/linux/inet_diag.h
> +++ b/include/uapi/linux/inet_diag.h
> @@ -161,6 +161,7 @@ enum {
>         INET_DIAG_SK_BPF_STORAGES,
>         INET_DIAG_CGROUP_ID,
>         INET_DIAG_SOCKOPT,
> +       INET_DIAG_BPF_MAP,
>         __INET_DIAG_MAX,
>  };
>
> diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
> index 5f74a5f6091d..7c961940b408 100644
> --- a/include/uapi/linux/sock_diag.h
> +++ b/include/uapi/linux/sock_diag.h
> @@ -62,4 +62,12 @@ enum {
>
>  #define SK_DIAG_BPF_STORAGE_MAX        (__SK_DIAG_BPF_STORAGE_MAX - 1)
>
> +enum {
> +       SK_DIAG_BPF_MAP_NONE,
> +       SK_DIAG_BPF_MAP_IDS,
> +       __SK_DIAG_BPF_MAP_MAX,
> +};
> +
> +#define SK_DIAG_BPF_MAP_MAX        (__SK_DIAG_BPF_MAP_MAX - 1)
> +
>  #endif /* _UAPI__SOCK_DIAG_H__ */
> diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h
> index a1988576fa8a..b95a2b33521d 100644
> --- a/include/uapi/linux/unix_diag.h
> +++ b/include/uapi/linux/unix_diag.h
> @@ -42,6 +42,7 @@ enum {
>         UNIX_DIAG_MEMINFO,
>         UNIX_DIAG_SHUTDOWN,
>         UNIX_DIAG_UID,
> +       UNIX_DIAG_BPF_MAP,
>
>         __UNIX_DIAG_MAX,
>  };
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index 9b854e236d23..c4049095f64e 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -1656,6 +1656,52 @@ void sock_map_close(struct sock *sk, long timeout)
>  }
>  EXPORT_SYMBOL_GPL(sock_map_close);
>
> +int sock_map_diag_dump(struct sock *sk, struct sk_buff *skb, int attrtype)
> +{
> +       struct sk_psock_link *link;
> +       struct nlattr *nla, *attr;
> +       int nr_links = 0, ret = 0;
> +       struct sk_psock *psock;
> +       u32 *ids;
> +
> +       psock = sk_psock_get(sk);
> +       if (likely(!psock))
> +               return 0;
> +
> +       nla = nla_nest_start_noflag(skb, attrtype);
> +       if (!nla) {
> +               sk_psock_put(sk, psock);
> +               return -EMSGSIZE;
> +       }
> +       spin_lock_bh(&psock->link_lock);
> +       list_for_each_entry(link, &psock->link, list)
> +               nr_links++;
> +
> +       attr = nla_reserve(skb, SK_DIAG_BPF_MAP_IDS,
> +                          sizeof(link->map->id) * nr_links);
> +       if (!attr) {
> +               ret = -EMSGSIZE;
> +               goto unlock;
> +       }
> +
> +       ids = nla_data(attr);
> +       list_for_each_entry(link, &psock->link, list) {
> +               *ids = link->map->id;
> +               ids++;
> +       }
> +unlock:
> +       spin_unlock_bh(&psock->link_lock);
> +       sk_psock_put(sk, psock);
> +       if (ret) {
> +               nla_nest_cancel(skb, nla);
> +       } else {
> +               ret = nr_links;
> +               nla_nest_end(skb, nla);
> +       }
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(sock_map_diag_dump);
> +
>  static int sock_map_iter_attach_target(struct bpf_prog *prog,
>                                        union bpf_iter_link_info *linfo,
>                                        struct bpf_iter_aux_info *aux)
> diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
> index b812eb36f0e3..0949909d5b46 100644
> --- a/net/ipv4/inet_diag.c
> +++ b/net/ipv4/inet_diag.c
> @@ -197,6 +197,11 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
>                     &inet_sockopt))
>                 goto errout;
>
> +#ifdef CONFIG_BPF_SYSCALL
> +       if (sock_map_diag_dump(sk, skb, INET_DIAG_BPF_MAP) < 0)
> +               goto errout;
> +#endif
> +
>         return 0;
>  errout:
>         return 1;
> diff --git a/net/unix/diag.c b/net/unix/diag.c
> index 616b55c5b890..54aa8da2831e 100644
> --- a/net/unix/diag.c
> +++ b/net/unix/diag.c
> @@ -6,6 +6,7 @@
>  #include <linux/skbuff.h>
>  #include <linux/module.h>
>  #include <linux/uidgid.h>
> +#include <linux/bpf.h>
>  #include <net/netlink.h>
>  #include <net/af_unix.h>
>  #include <net/tcp_states.h>
> @@ -172,6 +173,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
>             sk_diag_dump_uid(sk, skb, user_ns))
>                 goto out_nlmsg_trim;
>
> +#ifdef CONFIG_BPF_SYSCALL
> +       if (sock_map_diag_dump(sk, skb, UNIX_DIAG_BPF_MAP) < 0)
> +               goto out_nlmsg_trim;
> +#endif
> +
>         nlmsg_end(skb, nlh);
>         return 0;
>
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch net-next v3] sock_map: dump socket map id via diag
  2023-03-20 18:13 ` Stanislav Fomichev
@ 2023-03-20 20:29   ` Martin KaFai Lau
  2023-03-25 20:27     ` Cong Wang
  2023-03-25 20:07   ` Cong Wang
  1 sibling, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2023-03-20 20:29 UTC (permalink / raw)
  To: Stanislav Fomichev, Cong Wang
  Cc: netdev, bpf, Cong Wang, John Fastabend, Jakub Sitnicki

On 3/20/23 11:13 AM, Stanislav Fomichev wrote:
> One thing I still don't understand here: what is missing from the
> socket iterators to implement this? Is it all the sk_psock_get magic?
> I remember you dismissed Yonghong's suggestion on v1, but have you
> actually tried it?
would be useful to know what is missing to print the bpf map id without adding 
kernel code. There is new bpf_rdonly_cast() which will be useful here also.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch net-next v3] sock_map: dump socket map id via diag
  2023-03-20 18:13 ` Stanislav Fomichev
  2023-03-20 20:29   ` Martin KaFai Lau
@ 2023-03-25 20:07   ` Cong Wang
  2023-03-27 16:33     ` Stanislav Fomichev
  1 sibling, 1 reply; 7+ messages in thread
From: Cong Wang @ 2023-03-25 20:07 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, bpf, Cong Wang, John Fastabend, Jakub Sitnicki

On Mon, Mar 20, 2023 at 11:13:03AM -0700, Stanislav Fomichev wrote:
> On Sun, Mar 19, 2023 at 12:19 PM Cong Wang <xiyou.wangcong@gmail.com> wrote:
> >
> > From: Cong Wang <cong.wang@bytedance.com>
> >
> > Currently there is no way to know which sockmap a socket has been added
> > to from outside, especially for that a socket can be added to multiple
> > sockmap's. We could dump this via socket diag, as shown below.
> >
> > Sample output:
> >
> >   # ./iproute2/misc/ss -tnaie --bpf-map
> >   ESTAB  0      344329     127.0.0.1:1234     127.0.0.1:40912 ino:21098 sk:5 cgroup:/user.slice/user-0.slice/session-c1.scope <-> sockmap: 1
> >
> >   # bpftool map
> >   1: sockmap  flags 0x0
> >         key 4B  value 4B  max_entries 2  memlock 4096B
> >         pids echo-sockmap(549)
> >   4: array  name pid_iter.rodata  flags 0x480
> >         key 4B  value 4B  max_entries 1  memlock 4096B
> >         btf_id 10  frozen
> >         pids bpftool(624)
> >
> > In the future, we could dump other sockmap related stats too, hence I
> > make it a nested attribute.
> >
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: Jakub Sitnicki <jakub@cloudflare.com>
> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> 
> Looks fine from my POW, will let others comment.
> 
> One thing I still don't understand here: what is missing from the
> socket iterators to implement this? Is it all the sk_psock_get magic?
> I remember you dismissed Yonghong's suggestion on v1, but have you
> actually tried it?

I am very confused. So in order to figure out which sockmap a socket has
been added to, I have to dump *all* sockmap's??? It seems you are
suggesting to solve this with a more complex and unnecessary approach?
Please tell me why, I am really lost, I don't even see there is a point
to make here.

> 
> Also: a test would be nice to have. I know you've tested it with the
> iproute2, but having something regularly exercised by the ci seems
> good to have (and not a ton of work).

Sure, so where are the tests for socket diag? I don't see any within the
tree:

$ git grep INET_DIAG_SOCKOPT -- tools/
$

Note, this is not suitable for bpf selftests, because it is less relevant
to bpf, much more relevant to socket diag. I thought this is obvious.

Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch net-next v3] sock_map: dump socket map id via diag
  2023-03-20 20:29   ` Martin KaFai Lau
@ 2023-03-25 20:27     ` Cong Wang
  2023-03-25 22:10       ` Martin KaFai Lau
  0 siblings, 1 reply; 7+ messages in thread
From: Cong Wang @ 2023-03-25 20:27 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Stanislav Fomichev, netdev, bpf, Cong Wang, John Fastabend,
	Jakub Sitnicki

On Mon, Mar 20, 2023 at 01:29:39PM -0700, Martin KaFai Lau wrote:
> On 3/20/23 11:13 AM, Stanislav Fomichev wrote:
> > One thing I still don't understand here: what is missing from the
> > socket iterators to implement this? Is it all the sk_psock_get magic?
> > I remember you dismissed Yonghong's suggestion on v1, but have you
> > actually tried it?
> would be useful to know what is missing to print the bpf map id without
> adding kernel code. There is new bpf_rdonly_cast() which will be useful here
> also.

So you don't consider eBPF code as kernel code, right? Interestingly
enough, eBPF code runs in kernel... and you still need to write eBPF
code. So what is the point of "without adding kernel code" here?

What is even more interesting is that even your own code does not agree
with you here, for example, you introduced INET_DIAG_SK_BPF_STORAGES, so
what was missing to print sk bpf storage without adding kernel code?

Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch net-next v3] sock_map: dump socket map id via diag
  2023-03-25 20:27     ` Cong Wang
@ 2023-03-25 22:10       ` Martin KaFai Lau
  0 siblings, 0 replies; 7+ messages in thread
From: Martin KaFai Lau @ 2023-03-25 22:10 UTC (permalink / raw)
  To: Cong Wang
  Cc: Stanislav Fomichev, netdev, bpf, Cong Wang, John Fastabend,
	Jakub Sitnicki

On 3/25/23 1:27 PM, Cong Wang wrote:
> On Mon, Mar 20, 2023 at 01:29:39PM -0700, Martin KaFai Lau wrote:
>> On 3/20/23 11:13 AM, Stanislav Fomichev wrote:
>>> One thing I still don't understand here: what is missing from the
>>> socket iterators to implement this? Is it all the sk_psock_get magic?
>>> I remember you dismissed Yonghong's suggestion on v1, but have you
>>> actually tried it?
>> would be useful to know what is missing to print the bpf map id without
>> adding kernel code. There is new bpf_rdonly_cast() which will be useful here
>> also.
> 
> So you don't consider eBPF code as kernel code, right? Interestingly
> enough, eBPF code runs in kernel... and you still need to write eBPF
> code. So what is the point of "without adding kernel code" here?

Interesting, how is it the same? if it needs to print something other than the 
map id in the future, even putting aside further kernel maintenance and 
additional review, does a new bpf prog need to upgrade and reboot the kernel?

Based on your idea, all possible sk_filter automatically qualify to be added to 
the kernel source tree now because they also run in the kernel?

> 
> What is even more interesting is that even your own code does not agree
> with you here, for example, you introduced INET_DIAG_SK_BPF_STORAGES, so
> what was missing to print sk bpf storage without adding kernel code?

Yep, you are absolutely correct. Only if bpf-iter was available earlier. If 
bpf-iter was available before INET_DIAG_SK_BPF_STORAGES was added, there was no 
need to add INET_DIAG_SK_BPF_STORAGES and it would be no brainer to explore the 
bpf-iter approach first. Things have been improving.

The question (from a few people) was to figure out what was missing in the 
bpf-iter approach to print this bpf bits and trying to help. It is the only 
reason I replied. Apparently, you have not even tried and not interested.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Patch net-next v3] sock_map: dump socket map id via diag
  2023-03-25 20:07   ` Cong Wang
@ 2023-03-27 16:33     ` Stanislav Fomichev
  0 siblings, 0 replies; 7+ messages in thread
From: Stanislav Fomichev @ 2023-03-27 16:33 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, bpf, Cong Wang, John Fastabend, Jakub Sitnicki

On 03/25, Cong Wang wrote:
> On Mon, Mar 20, 2023 at 11:13:03AM -0700, Stanislav Fomichev wrote:
> > On Sun, Mar 19, 2023 at 12:19 PM Cong Wang <xiyou.wangcong@gmail.com>  
> wrote:
> > >
> > > From: Cong Wang <cong.wang@bytedance.com>
> > >
> > > Currently there is no way to know which sockmap a socket has been  
> added
> > > to from outside, especially for that a socket can be added to multiple
> > > sockmap's. We could dump this via socket diag, as shown below.
> > >
> > > Sample output:
> > >
> > >   # ./iproute2/misc/ss -tnaie --bpf-map
> > >   ESTAB  0      344329     127.0.0.1:1234     127.0.0.1:40912  
> ino:21098 sk:5 cgroup:/user.slice/user-0.slice/session-c1.scope <->  
> sockmap: 1
> > >
> > >   # bpftool map
> > >   1: sockmap  flags 0x0
> > >         key 4B  value 4B  max_entries 2  memlock 4096B
> > >         pids echo-sockmap(549)
> > >   4: array  name pid_iter.rodata  flags 0x480
> > >         key 4B  value 4B  max_entries 1  memlock 4096B
> > >         btf_id 10  frozen
> > >         pids bpftool(624)
> > >
> > > In the future, we could dump other sockmap related stats too, hence I
> > > make it a nested attribute.
> > >
> > > Cc: John Fastabend <john.fastabend@gmail.com>
> > > Cc: Jakub Sitnicki <jakub@cloudflare.com>
> > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> >
> > Looks fine from my POW, will let others comment.
> >
> > One thing I still don't understand here: what is missing from the
> > socket iterators to implement this? Is it all the sk_psock_get magic?
> > I remember you dismissed Yonghong's suggestion on v1, but have you
> > actually tried it?

> I am very confused. So in order to figure out which sockmap a socket has
> been added to, I have to dump *all* sockmap's??? It seems you are
> suggesting to solve this with a more complex and unnecessary approach?
> Please tell me why, I am really lost, I don't even see there is a point
> to make here.

With a socket iter, you can iterate over all sockets and run some bpf
program on it do dump some state. So you'd iterate over the sockets,
not sockmaps. For each socket you get a pointer to sock and you do the same
sk_psock_get+list_for_each_entry(psock->link).

(in theory; would be interesting to see whether it works in practice)

> >
> > Also: a test would be nice to have. I know you've tested it with the
> > iproute2, but having something regularly exercised by the ci seems
> > good to have (and not a ton of work).

> Sure, so where are the tests for socket diag? I don't see any within the
> tree:

> $ git grep INET_DIAG_SOCKOPT -- tools/
> $

> Note, this is not suitable for bpf selftests, because it is less relevant
> to bpf, much more relevant to socket diag. I thought this is obvious.

Never too late to start testing those sock diag paths :-)
Put them in the net selftests if you don't think bpf selftests is the right
fit?

> Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-03-27 16:33 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-19 19:19 [Patch net-next v3] sock_map: dump socket map id via diag Cong Wang
2023-03-20 18:13 ` Stanislav Fomichev
2023-03-20 20:29   ` Martin KaFai Lau
2023-03-25 20:27     ` Cong Wang
2023-03-25 22:10       ` Martin KaFai Lau
2023-03-25 20:07   ` Cong Wang
2023-03-27 16:33     ` Stanislav Fomichev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.