linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
@ 2022-01-15  2:38 zhudi (E)
  2022-01-15  2:53 ` Andrii Nakryiko
  0 siblings, 1 reply; 16+ messages in thread
From: zhudi (E) @ 2022-01-15  2:38 UTC (permalink / raw)
  To: Andrii Nakryiko, Jakub Sitnicki
  Cc: Alexei Starovoitov, David S. Miller, Daniel Borkmann,
	Andrii Nakryiko, Martin Lau, Song Liu, Yonghong Song,
	john fastabend, KP Singh, bpf, open list, Luzhihao (luzhihao,
	Euler), Chenxiang (EulerOS)

> On Thu, Jan 13, 2022 at 8:15 AM Jakub Sitnicki <jakub@cloudflare.com> wrote:
> >
> > On Thu, Jan 13, 2022 at 10:00 AM CET, Di Zhu wrote:
> > > Right now there is no way to query whether BPF programs are
> > > attached to a sockmap or not.
> > >
> > > we can use the standard interface in libbpf to query, such as:
> > > bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> > > the mapFd is the fd of sockmap.
> > >
> > > Signed-off-by: Di Zhu <zhudi2@huawei.com>
> > > Acked-by: Yonghong Song <yhs@fb.com>
> > > ---
> > >  include/linux/bpf.h  |  9 +++++
> > >  kernel/bpf/syscall.c |  5 +++
> > >  net/core/sock_map.c  | 78
> ++++++++++++++++++++++++++++++++++++++++----
> > >  3 files changed, 85 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > index 6e947cd91152..c4ca14c9f838 100644
> > > --- a/include/linux/bpf.h
> > > +++ b/include/linux/bpf.h
> > > @@ -2071,6 +2071,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog
> *prog,
> > >  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog
> *prog);
> > >  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type
> ptype);
> > >  int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
> *value, u64 flags);
> > > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > > +                         union bpf_attr __user *uattr);
> > > +
> > >  void sock_map_unhash(struct sock *sk);
> > >  void sock_map_close(struct sock *sk, long timeout);
> > >  #else
> > > @@ -2124,6 +2127,12 @@ static inline int
> sock_map_update_elem_sys(struct bpf_map *map, void *key, void
> > >  {
> > >       return -EOPNOTSUPP;
> > >  }
> > > +
> > > +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > > +                                       union bpf_attr __user
> *uattr)
> > > +{
> > > +     return -EINVAL;
> > > +}
> > >  #endif /* CONFIG_BPF_SYSCALL */
> > >  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
> > >
> > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > > index fa4505f9b611..9e0631f091a6 100644
> > > --- a/kernel/bpf/syscall.c
> > > +++ b/kernel/bpf/syscall.c
> > > @@ -3318,6 +3318,11 @@ static int bpf_prog_query(const union bpf_attr
> *attr,
> > >       case BPF_FLOW_DISSECTOR:
> > >       case BPF_SK_LOOKUP:
> > >               return netns_bpf_prog_query(attr, uattr);
> > > +     case BPF_SK_SKB_STREAM_PARSER:
> > > +     case BPF_SK_SKB_STREAM_VERDICT:
> > > +     case BPF_SK_MSG_VERDICT:
> > > +     case BPF_SK_SKB_VERDICT:
> > > +             return sock_map_bpf_prog_query(attr, uattr);
> > >       default:
> > >               return -EINVAL;
> > >       }
> > > diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> > > index 1827669eedd6..a424f51041ca 100644
> > > --- a/net/core/sock_map.c
> > > +++ b/net/core/sock_map.c
> > > @@ -1416,38 +1416,50 @@ static struct sk_psock_progs
> *sock_map_progs(struct bpf_map *map)
> > >       return NULL;
> > >  }
> > >
> > > -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog
> *prog,
> > > -                             struct bpf_prog *old, u32 which)
> > > +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog
> ***pprog,
> > > +                             u32 which)
> > >  {
> > >       struct sk_psock_progs *progs = sock_map_progs(map);
> > > -     struct bpf_prog **pprog;
> > >
> > >       if (!progs)
> > >               return -EOPNOTSUPP;
> > >
> > >       switch (which) {
> > >       case BPF_SK_MSG_VERDICT:
> > > -             pprog = &progs->msg_parser;
> > > +             *pprog = &progs->msg_parser;
> > >               break;
> > >  #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> > >       case BPF_SK_SKB_STREAM_PARSER:
> > > -             pprog = &progs->stream_parser;
> > > +             *pprog = &progs->stream_parser;
> > >               break;
> > >  #endif
> > >       case BPF_SK_SKB_STREAM_VERDICT:
> > >               if (progs->skb_verdict)
> > >                       return -EBUSY;
> > > -             pprog = &progs->stream_verdict;
> > > +             *pprog = &progs->stream_verdict;
> > >               break;
> > >       case BPF_SK_SKB_VERDICT:
> > >               if (progs->stream_verdict)
> > >                       return -EBUSY;
> > > -             pprog = &progs->skb_verdict;
> > > +             *pprog = &progs->skb_verdict;
> > >               break;
> > >       default:
> > >               return -EOPNOTSUPP;
> > >       }
> > >
> > > +     return 0;
> > > +}
> > > +
> > > +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog
> *prog,
> > > +                             struct bpf_prog *old, u32 which)
> > > +{
> > > +     struct bpf_prog **pprog;
> > > +     int ret;
> > > +
> > > +     ret = sock_map_prog_lookup(map, &pprog, which);
> > > +     if (ret)
> > > +             return ret;
> > > +
> > >       if (old)
> > >               return psock_replace_prog(pprog, prog, old);
> > >
> > > @@ -1455,6 +1467,58 @@ static int sock_map_prog_update(struct bpf_map
> *map, struct bpf_prog *prog,
> > >       return 0;
> > >  }
> > >
> > > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > > +                         union bpf_attr __user *uattr)
> > > +{
> > > +     __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> > > +     u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> > > +     struct bpf_prog **pprog;
> > > +     struct bpf_prog *prog;
> > > +     struct bpf_map *map;
> > > +     struct fd f;
> > > +     u32 id = 0;
> > > +     int ret;
> > > +
> > > +     if (attr->query.query_flags)
> > > +             return -EINVAL;
> > > +
> > > +     f = fdget(ufd);
> > > +     map = __bpf_map_get(f);
> > > +     if (IS_ERR(map))
> > > +             return PTR_ERR(map);
> > > +
> > > +     rcu_read_lock();
> > > +
> > > +     ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> > > +     if (ret)
> > > +             goto end;
> > > +
> > > +     prog = *pprog;
> > > +     prog_cnt = !prog ? 0 : 1;
> > > +
> > > +     if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> > > +             goto end;
> > > +
> > > +     id = prog->aux->id;
> >
> > ^ This looks like a concurrent read/write.
> 
> You mean that bpf_prog_load() might be setting it in a different
> thread? I think ID is allocated and fixed before prog FD is available
> to the user-space, so prog->aux->id is set in stone and immutable in
> that regard.

What we're talking about here is that bpf_prog_free_id() will write the id
identifier synchronously.

> 
> >
> > Would wrap with READ_ONCE() and corresponding WRITE_ONCE() in
> > bpf_prog_free_id(). See [1] for rationale.
> >
> > [1]
> https://github.com/google/kernel-sanitizers/blob/master/other/READ_WRITE_O
> NCE.md
> >
> > > +
> > > +     /* we do not hold the refcnt, the bpf prog may be released
> > > +      * asynchronously and the id would be set to 0.
> > > +      */
> > > +     if (id == 0)
> > > +             prog_cnt = 0;
> > > +
> > > +end:
> > > +     rcu_read_unlock();
> > > +
> > > +     if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> > > +         (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
> > > +         copy_to_user(&uattr->query.prog_cnt, &prog_cnt,
> sizeof(prog_cnt)))
> > > +             ret = -EFAULT;
> > > +
> > > +     fdput(f);
> > > +     return ret;
> > > +}
> > > +
> > >  static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
> > >  {
> > >       switch (link->map->map_type) {

^ permalink raw reply	[flat|nested] 16+ messages in thread
* Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
@ 2022-01-14  5:44 zhudi (E)
  0 siblings, 0 replies; 16+ messages in thread
From: zhudi (E) @ 2022-01-14  5:44 UTC (permalink / raw)
  To: Jakub Sitnicki
  Cc: ast, davem, daniel, andrii, kafai, songliubraving, yhs,
	john.fastabend, kpsingh, bpf, linux-kernel, Luzhihao (luzhihao,
	Euler), Chenxiang (EulerOS)

> On Thu, Jan 13, 2022 at 10:00 AM CET, Di Zhu wrote:
> > Right now there is no way to query whether BPF programs are
> > attached to a sockmap or not.
> >
> > we can use the standard interface in libbpf to query, such as:
> > bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> > the mapFd is the fd of sockmap.
> >
> > Signed-off-by: Di Zhu <zhudi2@huawei.com>
> > Acked-by: Yonghong Song <yhs@fb.com>
> > ---
> >  include/linux/bpf.h  |  9 +++++
> >  kernel/bpf/syscall.c |  5 +++
> >  net/core/sock_map.c  | 78
> ++++++++++++++++++++++++++++++++++++++++----
> >  3 files changed, 85 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 6e947cd91152..c4ca14c9f838 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -2071,6 +2071,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog
> *prog,
> >  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog
> *prog);
> >  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type
> ptype);
> >  int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
> u64 flags);


	.......


> > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > +			    union bpf_attr __user *uattr)
> > +{
> > +	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> > +	u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> > +	struct bpf_prog **pprog;
> > +	struct bpf_prog *prog;
> > +	struct bpf_map *map;
> > +	struct fd f;
> > +	u32 id = 0;
> > +	int ret;
> > +
> > +	if (attr->query.query_flags)
> > +		return -EINVAL;
> > +
> > +	f = fdget(ufd);
> > +	map = __bpf_map_get(f);
> > +	if (IS_ERR(map))
> > +		return PTR_ERR(map);
> > +
> > +	rcu_read_lock();
> > +
> > +	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> > +	if (ret)
> > +		goto end;
> > +
> > +	prog = *pprog;
> > +	prog_cnt = !prog ? 0 : 1;
> > +
> > +	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> > +		goto end;
> > +
> > +	id = prog->aux->id;
> 
> ^ This looks like a concurrent read/write.
> 
> Would wrap with READ_ONCE() and corresponding WRITE_ONCE() in
> bpf_prog_free_id(). See [1] for rationale.
> 
> [1]
> https://github.com/google/kernel-sanitizers/blob/master/other/READ_WRITE_O
> NCE.md


Thanks for your advice, I will modify this code.

 
> > +
> > +	/* we do not hold the refcnt, the bpf prog may be released
> > +	 * asynchronously and the id would be set to 0.
> > +	 */
> > +	if (id == 0)
> > +		prog_cnt = 0;
> > +
> > +end:
> > +	rcu_read_unlock();
> > +
> > +	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> > +	    (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
> > +	    copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
> > +		ret = -EFAULT;
> > +
> > +	fdput(f);
> > +	return ret;
> > +}
> > +
> >  static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
> >  {
> >  	switch (link->map->map_type) {

^ permalink raw reply	[flat|nested] 16+ messages in thread
* [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
@ 2022-01-13  9:00 Di Zhu
  2022-01-13 16:15 ` Jakub Sitnicki
  0 siblings, 1 reply; 16+ messages in thread
From: Di Zhu @ 2022-01-13  9:00 UTC (permalink / raw)
  To: ast, davem, daniel, andrii, kafai, songliubraving, yhs,
	john.fastabend, kpsingh, jakub
  Cc: bpf, linux-kernel, zhudi2, luzhihao, rose.chen

Right now there is no way to query whether BPF programs are
attached to a sockmap or not.

we can use the standard interface in libbpf to query, such as:
bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
the mapFd is the fd of sockmap.

Signed-off-by: Di Zhu <zhudi2@huawei.com>
Acked-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h  |  9 +++++
 kernel/bpf/syscall.c |  5 +++
 net/core/sock_map.c  | 78 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..c4ca14c9f838 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2071,6 +2071,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+			    union bpf_attr __user *uattr);
+
 void sock_map_unhash(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
 #else
@@ -2124,6 +2127,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+					  union bpf_attr __user *uattr)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fa4505f9b611..9e0631f091a6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3318,6 +3318,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_FLOW_DISSECTOR:
 	case BPF_SK_LOOKUP:
 		return netns_bpf_prog_query(attr, uattr);
+	case BPF_SK_SKB_STREAM_PARSER:
+	case BPF_SK_SKB_STREAM_VERDICT:
+	case BPF_SK_MSG_VERDICT:
+	case BPF_SK_SKB_VERDICT:
+		return sock_map_bpf_prog_query(attr, uattr);
 	default:
 		return -EINVAL;
 	}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1827669eedd6..a424f51041ca 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
 	return NULL;
 }
 
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-				struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+				u32 which)
 {
 	struct sk_psock_progs *progs = sock_map_progs(map);
-	struct bpf_prog **pprog;
 
 	if (!progs)
 		return -EOPNOTSUPP;
 
 	switch (which) {
 	case BPF_SK_MSG_VERDICT:
-		pprog = &progs->msg_parser;
+		*pprog = &progs->msg_parser;
 		break;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 	case BPF_SK_SKB_STREAM_PARSER:
-		pprog = &progs->stream_parser;
+		*pprog = &progs->stream_parser;
 		break;
 #endif
 	case BPF_SK_SKB_STREAM_VERDICT:
 		if (progs->skb_verdict)
 			return -EBUSY;
-		pprog = &progs->stream_verdict;
+		*pprog = &progs->stream_verdict;
 		break;
 	case BPF_SK_SKB_VERDICT:
 		if (progs->stream_verdict)
 			return -EBUSY;
-		pprog = &progs->skb_verdict;
+		*pprog = &progs->skb_verdict;
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
+	return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+				struct bpf_prog *old, u32 which)
+{
+	struct bpf_prog **pprog;
+	int ret;
+
+	ret = sock_map_prog_lookup(map, &pprog, which);
+	if (ret)
+		return ret;
+
 	if (old)
 		return psock_replace_prog(pprog, prog, old);
 
@@ -1455,6 +1467,58 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 	return 0;
 }
 
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+			    union bpf_attr __user *uattr)
+{
+	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+	u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+	struct bpf_prog **pprog;
+	struct bpf_prog *prog;
+	struct bpf_map *map;
+	struct fd f;
+	u32 id = 0;
+	int ret;
+
+	if (attr->query.query_flags)
+		return -EINVAL;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	rcu_read_lock();
+
+	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+	if (ret)
+		goto end;
+
+	prog = *pprog;
+	prog_cnt = !prog ? 0 : 1;
+
+	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+		goto end;
+
+	id = prog->aux->id;
+
+	/* we do not hold the refcnt, the bpf prog may be released
+	 * asynchronously and the id would be set to 0.
+	 */
+	if (id == 0)
+		prog_cnt = 0;
+
+end:
+	rcu_read_unlock();
+
+	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+	    (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+	    copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+		ret = -EFAULT;
+
+	fdput(f);
+	return ret;
+}
+
 static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
 {
 	switch (link->map->map_type) {
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread
* Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
@ 2021-11-08  2:13 zhudi (E)
  0 siblings, 0 replies; 16+ messages in thread
From: zhudi (E) @ 2021-11-08  2:13 UTC (permalink / raw)
  To: Cong Wang
  Cc: davem, ast, daniel, andrii, kafai, songliubraving, yhs,
	john.fastabend, kpsingh, jakub, bpf, linux-kernel

> On Thu, Nov 04, 2021 at 09:07:44AM +0800, Di Zhu wrote:
> > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > +			    union bpf_attr __user *uattr)
> > +{
> > +	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> > +	u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> > +	struct bpf_prog **pprog;
> > +	struct bpf_prog *prog;
> > +	struct bpf_map *map;
> > +	struct fd f;
> > +	u32 id = 0;
> > +	int ret;
> > +
> > +	if (attr->query.query_flags)
> > +		return -EINVAL;
> > +
> > +	f = fdget(ufd);
> > +	map = __bpf_map_get(f);
> > +	if (IS_ERR(map))
> > +		return PTR_ERR(map);
> > +
> > +	rcu_read_lock();
> > +
> > +	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> > +	if (ret)
> > +		goto end;
> > +
> > +	prog = *pprog;
> > +	prog_cnt = (!prog) ? 0 : 1;
> > +
> > +	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> > +		goto end;
> 

> This sanity check (except prog_cnt) can be moved before RCU read lock?

I think we should call sock_map_prog_lookup() in any case. Because we can
just return query results(such as -EOPNOTSUPP) which may not care about
the prog_ids.

So this sanity check should right behind this call and must be in rcu critical zone.

> > +
> > +	id = prog->aux->id;
> > +	if (id == 0)
> > +		prog_cnt = 0;
> 
> The id seems generic, so why not handle it in bpf_prog_query() for all progs?

The prog id is a generic, but different progs have different organizational forms, 
so they can only be handled differently at present...

> > +
> > +end:
> > +	rcu_read_unlock();
> > +
> > +	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> 
> 'flags' is always 0 here, right? So this is not needed as uattr has been already
> cleared in __sys_bpf().

 I recheck the code, it seems that __sys_bpf() does not do this clear things.

 
> Thanks.

^ permalink raw reply	[flat|nested] 16+ messages in thread
* [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
@ 2021-11-05  1:57 Di Zhu
  2021-11-05  4:23 ` Yonghong Song
  0 siblings, 1 reply; 16+ messages in thread
From: Di Zhu @ 2021-11-05  1:57 UTC (permalink / raw)
  To: davem, ast, daniel, andrii, kafai, songliubraving, yhs,
	john.fastabend, kpsingh, jakub
  Cc: bpf, linux-kernel, zhudi2

Right now there is no way to query whether BPF programs are
attached to a sockmap or not.

we can use the standard interface in libbpf to query, such as:
bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
the mapFd is the fd of sockmap.

Signed-off-by: Di Zhu <zhudi2@huawei.com>
---
/* v2 */
- John Fastabend <john.fastabend@gmail.com>
  - add selftest code

/* v3 */
 - avoid sleeping caused by copy_to_user() in rcu critical zone

/* v4 */
 - Alexei Starovoitov <alexei.starovoitov@gmail.com>
  -split into two patches, one for core code and one for selftest.

/* v5 */
 - Yonghong Song <yhs@fb.com>
  -Some naming and formatting changes
  -use ASSERT_* macros in selftest code
  -Add the necessary comments
---
 include/linux/bpf.h  |  9 +++++
 kernel/bpf/syscall.c |  5 +++
 net/core/sock_map.c  | 78 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d604c8251d88..235ea7fc5fd8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1961,6 +1961,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+			    union bpf_attr __user *uattr);
+
 void sock_map_unhash(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
 #else
@@ -2014,6 +2017,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+					  union bpf_attr __user *uattr)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e50c0bfdb7d..748102c3e0c9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3275,6 +3275,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_FLOW_DISSECTOR:
 	case BPF_SK_LOOKUP:
 		return netns_bpf_prog_query(attr, uattr);
+	case BPF_SK_SKB_STREAM_PARSER:
+	case BPF_SK_SKB_STREAM_VERDICT:
+	case BPF_SK_MSG_VERDICT:
+	case BPF_SK_SKB_VERDICT:
+		return sock_map_bpf_prog_query(attr, uattr);
 	default:
 		return -EINVAL;
 	}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index e252b8ec2b85..3801d565bddd 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1412,38 +1412,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
 	return NULL;
 }
 
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-				struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+				u32 which)
 {
 	struct sk_psock_progs *progs = sock_map_progs(map);
-	struct bpf_prog **pprog;
 
 	if (!progs)
 		return -EOPNOTSUPP;
 
 	switch (which) {
 	case BPF_SK_MSG_VERDICT:
-		pprog = &progs->msg_parser;
+		*pprog = &progs->msg_parser;
 		break;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 	case BPF_SK_SKB_STREAM_PARSER:
-		pprog = &progs->stream_parser;
+		*pprog = &progs->stream_parser;
 		break;
 #endif
 	case BPF_SK_SKB_STREAM_VERDICT:
 		if (progs->skb_verdict)
 			return -EBUSY;
-		pprog = &progs->stream_verdict;
+		*pprog = &progs->stream_verdict;
 		break;
 	case BPF_SK_SKB_VERDICT:
 		if (progs->stream_verdict)
 			return -EBUSY;
-		pprog = &progs->skb_verdict;
+		*pprog = &progs->skb_verdict;
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
+	return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+				struct bpf_prog *old, u32 which)
+{
+	struct bpf_prog **pprog;
+	int ret;
+
+	ret = sock_map_prog_lookup(map, &pprog, which);
+	if (ret)
+		return ret;
+
 	if (old)
 		return psock_replace_prog(pprog, prog, old);
 
@@ -1451,6 +1463,58 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 	return 0;
 }
 
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+			    union bpf_attr __user *uattr)
+{
+	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+	u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+	struct bpf_prog **pprog;
+	struct bpf_prog *prog;
+	struct bpf_map *map;
+	struct fd f;
+	u32 id = 0;
+	int ret;
+
+	if (attr->query.query_flags)
+		return -EINVAL;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	rcu_read_lock();
+
+	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+	if (ret)
+		goto end;
+
+	prog = *pprog;
+	prog_cnt = !prog ? 0 : 1;
+
+	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+		goto end;
+
+	id = prog->aux->id;
+
+	/* we do not hold the refcnt, the bpf prog may be released
+	 * asynchronously and the id would be set to 0.
+	 */
+	if (id == 0)
+		prog_cnt = 0;
+
+end:
+	rcu_read_unlock();
+
+	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+	    (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+	    copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+		ret = -EFAULT;
+
+	fdput(f);
+	return ret;
+}
+
 static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
 {
 	switch (link->map->map_type) {
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread
* Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
@ 2021-11-04  6:35 zhudi (E)
  0 siblings, 0 replies; 16+ messages in thread
From: zhudi (E) @ 2021-11-04  6:35 UTC (permalink / raw)
  To: Yonghong Song, davem, ast, daniel, andrii, kafai, songliubraving,
	john.fastabend, kpsingh, jakub
  Cc: bpf, linux-kernel



> -----邮件原件-----
> 发件人: Yonghong Song [mailto:yhs@fb.com]
> 发送时间: 2021年11月4日 14:31
> 收件人: zhudi (E) <zhudi2@huawei.com>; davem@davemloft.net;
> ast@kernel.org; daniel@iogearbox.net; andrii@kernel.org; kafai@fb.com;
> songliubraving@fb.com; john.fastabend@gmail.com; kpsingh@kernel.org;
> jakub@cloudflare.com
> 抄送: bpf@vger.kernel.org; linux-kernel@vger.kernel.org
> 主题: Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs
> attached to sockmap
> 
> 
> 
> On 11/3/21 11:07 PM, zhudi (E) wrote:
> >   > On 11/3/21 6:07 PM, Di Zhu wrote:
> >>> Right now there is no way to query whether BPF programs are
> >>> attached to a sockmap or not.
> >>>
> >>> we can use the standard interface in libbpf to query, such as:
> >>> bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> >>> the mapFd is the fd of sockmap.
> >>>
> >>> Signed-off-by: Di Zhu <zhudi2@huawei.com>
> >>> ---
> >>> /* v2 */
> >>> - John Fastabend <john.fastabend@gmail.com>
> >>>     - add selftest code
> >>>
> >>> /* v3 */
> >>>    - avoid sleeping caused by copy_to_user() in rcu critical zone
> >>>
> >>> /* v4 */
> >>>    - Alexei Starovoitov <alexei.starovoitov@gmail.com>
> >>>     -split into two patches, one for core code and one for selftest.
> >>>
> >>> /* v5 */
> >>>    - Yonghong Song <yhs@fb.com>
> >>>     -Some naming and formatting changes
> >>> ---
> >>>    include/linux/bpf.h  |  9 ++++++
> >>>    kernel/bpf/syscall.c |  5 +++
> >>>    net/core/sock_map.c  | 74
> >> +++++++++++++++++++++++++++++++++++++++-----
> >>>    3 files changed, 81 insertions(+), 7 deletions(-)
> >>>
> >>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> >>> index d604c8251d88..235ea7fc5fd8 100644
> >>> --- a/include/linux/bpf.h
> >>> +++ b/include/linux/bpf.h
> >>> @@ -1961,6 +1961,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog
> >> *prog,
> >>>    int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog
> >> *prog);
> >>>    int sock_map_prog_detach(const union bpf_attr *attr, enum
> bpf_prog_type
> >> ptype);
> >>>    int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
> >> *value, u64 flags);
> >>> +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> >>> +			    union bpf_attr __user *uattr);
> >>> +
> >>>    void sock_map_unhash(struct sock *sk);
> >>>    void sock_map_close(struct sock *sk, long timeout);
> >>>    #else
> >>> @@ -2014,6 +2017,12 @@ static inline int
> sock_map_update_elem_sys(struct
> >> bpf_map *map, void *key, void
> >>>    {
> >>>    	return -EOPNOTSUPP;
> >>>    }
> >>> +
> >>> +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
> >>> +					  union bpf_attr __user *uattr)
> >>> +{
> >>> +	return -EINVAL;
> >>> +}
> >>>    #endif /* CONFIG_BPF_SYSCALL */
> >>>    #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
> >>>
> >>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> >>> index 4e50c0bfdb7d..748102c3e0c9 100644
> >>> --- a/kernel/bpf/syscall.c
> >>> +++ b/kernel/bpf/syscall.c
> >>> @@ -3275,6 +3275,11 @@ static int bpf_prog_query(const union bpf_attr
> >> *attr,
> >>>    	case BPF_FLOW_DISSECTOR:
> >>>    	case BPF_SK_LOOKUP:
> >>>    		return netns_bpf_prog_query(attr, uattr);
> >>> +	case BPF_SK_SKB_STREAM_PARSER:
> >>> +	case BPF_SK_SKB_STREAM_VERDICT:
> >>> +	case BPF_SK_MSG_VERDICT:
> >>> +	case BPF_SK_SKB_VERDICT:
> >>> +		return sock_map_bpf_prog_query(attr, uattr);
> >>>    	default:
> >>>    		return -EINVAL;
> >>>    	}
> >>> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> >>> index e252b8ec2b85..0320d27550fe 100644
> >>> --- a/net/core/sock_map.c
> >>> +++ b/net/core/sock_map.c
> >>> @@ -1412,38 +1412,50 @@ static struct sk_psock_progs
> >> *sock_map_progs(struct bpf_map *map)
> >>>    	return NULL;
> >>>    }
> >>>
> >>> -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog
> >> *prog,
> >>> -				struct bpf_prog *old, u32 which)
> >>> +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog
> >> ***pprog,
> >>> +				u32 which)
> >>>    {
> >>>    	struct sk_psock_progs *progs = sock_map_progs(map);
> >>> -	struct bpf_prog **pprog;
> >>>
> >>>    	if (!progs)
> >>>    		return -EOPNOTSUPP;
> >>>
> >>>    	switch (which) {
> >>>    	case BPF_SK_MSG_VERDICT:
> >>> -		pprog = &progs->msg_parser;
> >>> +		*pprog = &progs->msg_parser;
> >>>    		break;
> >>>    #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> >>>    	case BPF_SK_SKB_STREAM_PARSER:
> >>> -		pprog = &progs->stream_parser;
> >>> +		*pprog = &progs->stream_parser;
> >>>    		break;
> >>>    #endif
> >>>    	case BPF_SK_SKB_STREAM_VERDICT:
> >>>    		if (progs->skb_verdict)
> >>>    			return -EBUSY;
> >>> -		pprog = &progs->stream_verdict;
> >>> +		*pprog = &progs->stream_verdict;
> >>>    		break;
> >>>    	case BPF_SK_SKB_VERDICT:
> >>>    		if (progs->stream_verdict)
> >>>    			return -EBUSY;
> >>> -		pprog = &progs->skb_verdict;
> >>> +		*pprog = &progs->skb_verdict;
> >>>    		break;
> >>>    	default:
> >>>    		return -EOPNOTSUPP;
> >>>    	}
> >>>
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog
> >> *prog,
> >>> +				struct bpf_prog *old, u32 which)
> >>> +{
> >>> +	struct bpf_prog **pprog;
> >>> +	int ret;
> >>> +
> >>> +	ret = sock_map_prog_lookup(map, &pprog, which);
> >>> +	if (ret)
> >>> +		return ret;
> >>> +
> >>>    	if (old)
> >>>    		return psock_replace_prog(pprog, prog, old);
> >>>
> >>> @@ -1451,6 +1463,54 @@ static int sock_map_prog_update(struct
> bpf_map
> >> *map, struct bpf_prog *prog,
> >>>    	return 0;
> >>>    }
> >>>
> >>> +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> >>> +			    union bpf_attr __user *uattr)
> >>> +{
> >>> +	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> >>> +	u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> >>> +	struct bpf_prog **pprog;
> >>> +	struct bpf_prog *prog;
> >>> +	struct bpf_map *map;
> >>> +	struct fd f;
> >>> +	u32 id = 0;
> >>
> >> There is no need to initialize 'id = 0'. id will be assigned later.
> >
> >
> > if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) is met, the id will not be
> assigned later.
> > At the end of the function, we judge whether to copy the program ID by the
> value of the id.
> 
> Ya, that is true. id = 0 is indeed needed.
> 
> >
> >
> >>
> >>> +	int ret;
> >>> +
> >>> +	if (attr->query.query_flags)
> >>> +		return -EINVAL;
> >>> +
> >>> +	f = fdget(ufd);
> >>> +	map = __bpf_map_get(f);
> >>> +	if (IS_ERR(map))
> >>> +		return PTR_ERR(map);
> >>> +
> >>> +	rcu_read_lock();
> >>> +
> >>> +	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> >>> +	if (ret)
> >>> +		goto end;
> >>> +
> >>> +	prog = *pprog;
> >>> +	prog_cnt = (!prog) ? 0 : 1;
> >>
> >> (!prog) => !prog ?
> >
> > Yes, It's just my habit
> >
> >>
> >>> +
> >>> +	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> >>> +		goto end;
> >>> +
> >>> +	id = prog->aux->id;
> >>> +	if (id == 0)
> >>> +		prog_cnt = 0;
> >>
> >>
> >> id will never be 0, see function bpf_prog_alloc_id() is syscall.c.
> >> So 'if (id == 0)' check is not needed.
> >
> >
> > Because we do not hold the reference count, the program may be released
> synchronously
> 
>    synchronously => asynchronously
> 
> > and its ID will be set to 0 in bpf_prog_free_id().
> >
> > Is that right?
> 
> Just checked the code again. You are right. Maybe add a comment here to
> make it clear why we check id == 0 here?


Yes, I'll add a comment


> >
> >
> >>
> >>> +
> >>> +end:
> >>> +	rcu_read_unlock();
> >>> +
> >>> +	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> >>> +	    (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
> >>> +	    copy_to_user(&uattr->query.prog_cnt, &prog_cnt,
> sizeof(prog_cnt)))
> >>> +		ret = -EFAULT;
> >>> +
> >>> +	fdput(f);
> >>> +	return ret;
> >>> +}
> >>> +
> >>>    static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
> >>>    {
> >>>    	switch (link->map->map_type) {
> >>>

^ permalink raw reply	[flat|nested] 16+ messages in thread
* Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
@ 2021-11-04  6:07 zhudi (E)
  2021-11-04  6:30 ` Yonghong Song
  0 siblings, 1 reply; 16+ messages in thread
From: zhudi (E) @ 2021-11-04  6:07 UTC (permalink / raw)
  To: Yonghong Song, davem, ast, daniel, andrii, kafai, songliubraving,
	john.fastabend, kpsingh, jakub
  Cc: bpf, linux-kernel

 > On 11/3/21 6:07 PM, Di Zhu wrote:
> > Right now there is no way to query whether BPF programs are
> > attached to a sockmap or not.
> >
> > we can use the standard interface in libbpf to query, such as:
> > bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> > the mapFd is the fd of sockmap.
> >
> > Signed-off-by: Di Zhu <zhudi2@huawei.com>
> > ---
> > /* v2 */
> > - John Fastabend <john.fastabend@gmail.com>
> >    - add selftest code
> >
> > /* v3 */
> >   - avoid sleeping caused by copy_to_user() in rcu critical zone
> >
> > /* v4 */
> >   - Alexei Starovoitov <alexei.starovoitov@gmail.com>
> >    -split into two patches, one for core code and one for selftest.
> >
> > /* v5 */
> >   - Yonghong Song <yhs@fb.com>
> >    -Some naming and formatting changes
> > ---
> >   include/linux/bpf.h  |  9 ++++++
> >   kernel/bpf/syscall.c |  5 +++
> >   net/core/sock_map.c  | 74
> +++++++++++++++++++++++++++++++++++++++-----
> >   3 files changed, 81 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index d604c8251d88..235ea7fc5fd8 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -1961,6 +1961,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog
> *prog,
> >   int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog
> *prog);
> >   int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type
> ptype);
> >   int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
> *value, u64 flags);
> > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > +			    union bpf_attr __user *uattr);
> > +
> >   void sock_map_unhash(struct sock *sk);
> >   void sock_map_close(struct sock *sk, long timeout);
> >   #else
> > @@ -2014,6 +2017,12 @@ static inline int sock_map_update_elem_sys(struct
> bpf_map *map, void *key, void
> >   {
> >   	return -EOPNOTSUPP;
> >   }
> > +
> > +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > +					  union bpf_attr __user *uattr)
> > +{
> > +	return -EINVAL;
> > +}
> >   #endif /* CONFIG_BPF_SYSCALL */
> >   #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
> >
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 4e50c0bfdb7d..748102c3e0c9 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -3275,6 +3275,11 @@ static int bpf_prog_query(const union bpf_attr
> *attr,
> >   	case BPF_FLOW_DISSECTOR:
> >   	case BPF_SK_LOOKUP:
> >   		return netns_bpf_prog_query(attr, uattr);
> > +	case BPF_SK_SKB_STREAM_PARSER:
> > +	case BPF_SK_SKB_STREAM_VERDICT:
> > +	case BPF_SK_MSG_VERDICT:
> > +	case BPF_SK_SKB_VERDICT:
> > +		return sock_map_bpf_prog_query(attr, uattr);
> >   	default:
> >   		return -EINVAL;
> >   	}
> > diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> > index e252b8ec2b85..0320d27550fe 100644
> > --- a/net/core/sock_map.c
> > +++ b/net/core/sock_map.c
> > @@ -1412,38 +1412,50 @@ static struct sk_psock_progs
> *sock_map_progs(struct bpf_map *map)
> >   	return NULL;
> >   }
> >
> > -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog
> *prog,
> > -				struct bpf_prog *old, u32 which)
> > +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog
> ***pprog,
> > +				u32 which)
> >   {
> >   	struct sk_psock_progs *progs = sock_map_progs(map);
> > -	struct bpf_prog **pprog;
> >
> >   	if (!progs)
> >   		return -EOPNOTSUPP;
> >
> >   	switch (which) {
> >   	case BPF_SK_MSG_VERDICT:
> > -		pprog = &progs->msg_parser;
> > +		*pprog = &progs->msg_parser;
> >   		break;
> >   #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> >   	case BPF_SK_SKB_STREAM_PARSER:
> > -		pprog = &progs->stream_parser;
> > +		*pprog = &progs->stream_parser;
> >   		break;
> >   #endif
> >   	case BPF_SK_SKB_STREAM_VERDICT:
> >   		if (progs->skb_verdict)
> >   			return -EBUSY;
> > -		pprog = &progs->stream_verdict;
> > +		*pprog = &progs->stream_verdict;
> >   		break;
> >   	case BPF_SK_SKB_VERDICT:
> >   		if (progs->stream_verdict)
> >   			return -EBUSY;
> > -		pprog = &progs->skb_verdict;
> > +		*pprog = &progs->skb_verdict;
> >   		break;
> >   	default:
> >   		return -EOPNOTSUPP;
> >   	}
> >
> > +	return 0;
> > +}
> > +
> > +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog
> *prog,
> > +				struct bpf_prog *old, u32 which)
> > +{
> > +	struct bpf_prog **pprog;
> > +	int ret;
> > +
> > +	ret = sock_map_prog_lookup(map, &pprog, which);
> > +	if (ret)
> > +		return ret;
> > +
> >   	if (old)
> >   		return psock_replace_prog(pprog, prog, old);
> >
> > @@ -1451,6 +1463,54 @@ static int sock_map_prog_update(struct bpf_map
> *map, struct bpf_prog *prog,
> >   	return 0;
> >   }
> >
> > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > +			    union bpf_attr __user *uattr)
> > +{
> > +	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> > +	u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> > +	struct bpf_prog **pprog;
> > +	struct bpf_prog *prog;
> > +	struct bpf_map *map;
> > +	struct fd f;
> > +	u32 id = 0;
> 
> There is no need to initialize 'id = 0'. id will be assigned later.


if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) is met, the id will not be assigned later.
At the end of the function, we judge whether to copy the program ID by the value of the id. 


> 
> > +	int ret;
> > +
> > +	if (attr->query.query_flags)
> > +		return -EINVAL;
> > +
> > +	f = fdget(ufd);
> > +	map = __bpf_map_get(f);
> > +	if (IS_ERR(map))
> > +		return PTR_ERR(map);
> > +
> > +	rcu_read_lock();
> > +
> > +	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> > +	if (ret)
> > +		goto end;
> > +
> > +	prog = *pprog;
> > +	prog_cnt = (!prog) ? 0 : 1;
> 
> (!prog) => !prog ?

Yes, It's just my habit

> 
> > +
> > +	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> > +		goto end;
> > +
> > +	id = prog->aux->id;
> > +	if (id == 0)
> > +		prog_cnt = 0;
> 
> 
> id will never be 0, see function bpf_prog_alloc_id() is syscall.c.
> So 'if (id == 0)' check is not needed.


Because we do not hold the reference count, the program may be released synchronously 
and its ID will be set to 0 in bpf_prog_free_id().

Is that right?


> 
> > +
> > +end:
> > +	rcu_read_unlock();
> > +
> > +	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> > +	    (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
> > +	    copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
> > +		ret = -EFAULT;
> > +
> > +	fdput(f);
> > +	return ret;
> > +}
> > +
> >   static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
> >   {
> >   	switch (link->map->map_type) {
> >

^ permalink raw reply	[flat|nested] 16+ messages in thread
* [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
@ 2021-11-04  1:07 Di Zhu
  2021-11-04  5:31 ` Yonghong Song
  2021-11-05 19:51 ` Cong Wang
  0 siblings, 2 replies; 16+ messages in thread
From: Di Zhu @ 2021-11-04  1:07 UTC (permalink / raw)
  To: davem, ast, daniel, andrii, kafai, songliubraving, yhs,
	john.fastabend, kpsingh, jakub
  Cc: bpf, linux-kernel, zhudi2

Right now there is no way to query whether BPF programs are
attached to a sockmap or not.

we can use the standard interface in libbpf to query, such as:
bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
the mapFd is the fd of sockmap.

Signed-off-by: Di Zhu <zhudi2@huawei.com>
---
/* v2 */
- John Fastabend <john.fastabend@gmail.com>
  - add selftest code

/* v3 */
 - avoid sleeping caused by copy_to_user() in rcu critical zone

/* v4 */
 - Alexei Starovoitov <alexei.starovoitov@gmail.com>
  -split into two patches, one for core code and one for selftest.

/* v5 */
 - Yonghong Song <yhs@fb.com>
  -Some naming and formatting changes
---
 include/linux/bpf.h  |  9 ++++++
 kernel/bpf/syscall.c |  5 +++
 net/core/sock_map.c  | 74 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d604c8251d88..235ea7fc5fd8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1961,6 +1961,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+			    union bpf_attr __user *uattr);
+
 void sock_map_unhash(struct sock *sk);
 void sock_map_close(struct sock *sk, long timeout);
 #else
@@ -2014,6 +2017,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+					  union bpf_attr __user *uattr)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e50c0bfdb7d..748102c3e0c9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3275,6 +3275,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_FLOW_DISSECTOR:
 	case BPF_SK_LOOKUP:
 		return netns_bpf_prog_query(attr, uattr);
+	case BPF_SK_SKB_STREAM_PARSER:
+	case BPF_SK_SKB_STREAM_VERDICT:
+	case BPF_SK_MSG_VERDICT:
+	case BPF_SK_SKB_VERDICT:
+		return sock_map_bpf_prog_query(attr, uattr);
 	default:
 		return -EINVAL;
 	}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index e252b8ec2b85..0320d27550fe 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1412,38 +1412,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
 	return NULL;
 }
 
-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
-				struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+				u32 which)
 {
 	struct sk_psock_progs *progs = sock_map_progs(map);
-	struct bpf_prog **pprog;
 
 	if (!progs)
 		return -EOPNOTSUPP;
 
 	switch (which) {
 	case BPF_SK_MSG_VERDICT:
-		pprog = &progs->msg_parser;
+		*pprog = &progs->msg_parser;
 		break;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 	case BPF_SK_SKB_STREAM_PARSER:
-		pprog = &progs->stream_parser;
+		*pprog = &progs->stream_parser;
 		break;
 #endif
 	case BPF_SK_SKB_STREAM_VERDICT:
 		if (progs->skb_verdict)
 			return -EBUSY;
-		pprog = &progs->stream_verdict;
+		*pprog = &progs->stream_verdict;
 		break;
 	case BPF_SK_SKB_VERDICT:
 		if (progs->stream_verdict)
 			return -EBUSY;
-		pprog = &progs->skb_verdict;
+		*pprog = &progs->skb_verdict;
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
+	return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+				struct bpf_prog *old, u32 which)
+{
+	struct bpf_prog **pprog;
+	int ret;
+
+	ret = sock_map_prog_lookup(map, &pprog, which);
+	if (ret)
+		return ret;
+
 	if (old)
 		return psock_replace_prog(pprog, prog, old);
 
@@ -1451,6 +1463,54 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 	return 0;
 }
 
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+			    union bpf_attr __user *uattr)
+{
+	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+	u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+	struct bpf_prog **pprog;
+	struct bpf_prog *prog;
+	struct bpf_map *map;
+	struct fd f;
+	u32 id = 0;
+	int ret;
+
+	if (attr->query.query_flags)
+		return -EINVAL;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	rcu_read_lock();
+
+	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+	if (ret)
+		goto end;
+
+	prog = *pprog;
+	prog_cnt = (!prog) ? 0 : 1;
+
+	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+		goto end;
+
+	id = prog->aux->id;
+	if (id == 0)
+		prog_cnt = 0;
+
+end:
+	rcu_read_unlock();
+
+	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+	    (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+	    copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+		ret = -EFAULT;
+
+	fdput(f);
+	return ret;
+}
+
 static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
 {
 	switch (link->map->map_type) {
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2022-01-15 19:10 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-15  2:38 [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap zhudi (E)
2022-01-15  2:53 ` Andrii Nakryiko
2022-01-15 19:09   ` Jakub Sitnicki
  -- strict thread matches above, loose matches on Subject: below --
2022-01-14  5:44 zhudi (E)
2022-01-13  9:00 Di Zhu
2022-01-13 16:15 ` Jakub Sitnicki
2022-01-15  1:22   ` Andrii Nakryiko
2021-11-08  2:13 zhudi (E)
2021-11-05  1:57 Di Zhu
2021-11-05  4:23 ` Yonghong Song
2021-11-04  6:35 zhudi (E)
2021-11-04  6:07 zhudi (E)
2021-11-04  6:30 ` Yonghong Song
2021-11-04  1:07 Di Zhu
2021-11-04  5:31 ` Yonghong Song
2021-11-05 19:51 ` Cong Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).