All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yonghong Song <yhs@fb.com>
To: Di Zhu <zhudi2@huawei.com>, <davem@davemloft.net>,
	<ast@kernel.org>, <daniel@iogearbox.net>, <andrii@kernel.org>,
	<kafai@fb.com>, <songliubraving@fb.com>,
	<john.fastabend@gmail.com>, <kpsingh@kernel.org>,
	<jakub@cloudflare.com>
Cc: <bpf@vger.kernel.org>, <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH bpf-next v4 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap
Date: Tue, 2 Nov 2021 13:11:11 -0700	[thread overview]
Message-ID: <7511b8fd-c5b6-96b3-8b1d-e7eeeb0b2c33@fb.com> (raw)
In-Reply-To: <20211102084856.483534-1-zhudi2@huawei.com>



On 11/2/21 1:48 AM, Di Zhu wrote:
> Right now there is no way to query whether BPF programs are
> attached to a sockmap or not.
> 
> we can use the standard interface in libbpf to query, such as:
> bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> the mapFd is the fd of sockmap.
> 
> Signed-off-by: Di Zhu <zhudi2@huawei.com>
> ---
>   include/linux/bpf.h  |  9 +++++
>   kernel/bpf/syscall.c |  5 +++
>   net/core/sock_map.c  | 88 ++++++++++++++++++++++++++++++++++++++++----
>   3 files changed, 95 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index d604c8251d88..594ca91992db 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1961,6 +1961,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
>   int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
>   int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
>   int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
> +int sockmap_bpf_prog_query(const union bpf_attr *attr,
> +			   union bpf_attr __user *uattr);

All previous functions are with prefix "sock_map". Why you choose
a different prefix "sockmap"?

> +
>   void sock_map_unhash(struct sock *sk);
>   void sock_map_close(struct sock *sk, long timeout);
>   #else
> @@ -2014,6 +2017,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
>   {
>   	return -EOPNOTSUPP;
>   }
> +
> +static inline int sockmap_bpf_prog_query(const union bpf_attr *attr,
> +					 union bpf_attr __user *uattr)
> +{
> +	return -EINVAL;
> +}
>   #endif /* CONFIG_BPF_SYSCALL */
>   #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
>   
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 4e50c0bfdb7d..17faeff8f85f 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3275,6 +3275,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
>   	case BPF_FLOW_DISSECTOR:
>   	case BPF_SK_LOOKUP:
>   		return netns_bpf_prog_query(attr, uattr);
> +	case BPF_SK_SKB_STREAM_PARSER:
> +	case BPF_SK_SKB_STREAM_VERDICT:
> +	case BPF_SK_MSG_VERDICT:
> +	case BPF_SK_SKB_VERDICT:
> +		return sockmap_bpf_prog_query(attr, uattr);
>   	default:
>   		return -EINVAL;
>   	}
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index e252b8ec2b85..ca65ed0004d3 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -1412,38 +1412,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
>   	return NULL;
>   }
>   
> -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> -				struct bpf_prog *old, u32 which)
> +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog **pprog[],

Can we just change "**pprog[]" to "***pprog"? In the code, you really 
just pass the address of the decl "struct bpf_prog **pprog;" to the 
function.

> +				u32 which)

Some format issue here?

>   {
>   	struct sk_psock_progs *progs = sock_map_progs(map);
> -	struct bpf_prog **pprog;
>   
>   	if (!progs)
>   		return -EOPNOTSUPP;
>   
>   	switch (which) {
>   	case BPF_SK_MSG_VERDICT:
> -		pprog = &progs->msg_parser;
> +		*pprog = &progs->msg_parser;
>   		break;
>   #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
>   	case BPF_SK_SKB_STREAM_PARSER:
> -		pprog = &progs->stream_parser;
> +		*pprog = &progs->stream_parser;
>   		break;
>   #endif
>   	case BPF_SK_SKB_STREAM_VERDICT:
>   		if (progs->skb_verdict)
>   			return -EBUSY;
> -		pprog = &progs->stream_verdict;
> +		*pprog = &progs->stream_verdict;
>   		break;
>   	case BPF_SK_SKB_VERDICT:
>   		if (progs->stream_verdict)
>   			return -EBUSY;
> -		pprog = &progs->skb_verdict;
> +		*pprog = &progs->skb_verdict;
>   		break;
>   	default:
>   		return -EOPNOTSUPP;
>   	}
>   
> +	return 0;
> +}
> +
> +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> +				struct bpf_prog *old, u32 which)

Some format issue here?

> +{
> +	struct bpf_prog **pprog;
> +	int ret;
> +
> +	ret = sock_map_prog_lookup(map, &pprog, which);
> +	if (ret)
> +		return ret;
> +
>   	if (old)
>   		return psock_replace_prog(pprog, prog, old);
>   
> @@ -1451,6 +1463,68 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
>   	return 0;
>   }
>   
> +int sockmap_bpf_prog_query(const union bpf_attr *attr,
> +			   union bpf_attr __user *uattr)

Format issue here?

> +{
> +	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);

Typically we use u32 in the kernel code. But I know there are __u32 
usage as well, esp. with __user attributes. I put a comment here just
in case that somebody else has a different opinion.

> +	u32 prog_cnt = 0, flags = 0;
> +	u32 ufd = attr->target_fd;

You can merge the above u32 together.

> +	struct bpf_prog **pprog;
> +	struct bpf_prog *prog;
> +	struct bpf_map *map;
> +	struct fd f;
> +	int ret;
> +	u32 id = 0;

to maintain reverse christmas tree?

> +
> +	if (attr->query.query_flags)
> +		return -EINVAL;
> +
> +	f = fdget(ufd);
> +	map = __bpf_map_get(f);
> +	if (IS_ERR(map))
> +		return PTR_ERR(map);
> +
> +	rcu_read_lock();
> +
> +	ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> +	if (ret)
> +		goto end;
> +
> +	prog = *pprog;
> +	prog_cnt = (!prog) ? 0 : 1;
> +
> +	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> +		goto end;
> +
> +	prog = bpf_prog_inc_not_zero(prog);

Could you explain why we need bpf_prog_inc_not_zero here?
We are inside rcu_read_lock/unlock region. We got a program
from *pprog. If this program is not NULL, this program should
not disappear since we are in rcu read lock region, right?
Maybe I missed something, it would be good you can explain
the scenario you try to pretect here.

> +	if (IS_ERR(prog)) {
> +		ret = PTR_ERR(prog);
> +		goto end;
> +	}
> +	id = prog->aux->id;
> +	bpf_prog_put(prog);
> +
> +end:
> +	rcu_read_unlock();
> +
> +	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) {
> +		ret = -EFAULT;
> +		goto err;
> +	}
> +	if (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) {
> +		ret = -EFAULT;
> +		goto err;
> +	}
> +	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) {
> +		ret = -EFAULT;
> +		goto err;
> +	}

You can do

	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
	    (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
	    copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
		ret = -EFAULT;

to make code a little bit concise.

> +
> +err:
> +	fdput(f);
> +	return ret;
> +}
> +
>   static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
>   {
>   	switch (link->map->map_type) {
> 

  parent reply	other threads:[~2021-11-02 20:11 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-02  8:48 [PATCH bpf-next v4 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap Di Zhu
2021-11-02  8:48 ` [PATCH bpf-next v4 2/2] selftests: bpf: test " Di Zhu
2021-11-02 20:24   ` Yonghong Song
2021-11-02 20:11 ` Yonghong Song [this message]
2021-11-02 21:16   ` [PATCH bpf-next v4 1/2] bpf: support " Alexei Starovoitov
  -- strict thread matches above, loose matches on Subject: below --
2021-11-03  2:25 zhudi (E)
2021-11-03  2:23 zhudi (E)
2021-11-03  2:37 ` Yonghong Song
2021-11-02  5:59 Di Zhu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7511b8fd-c5b6-96b3-8b1d-e7eeeb0b2c33@fb.com \
    --to=yhs@fb.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=jakub@cloudflare.com \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=zhudi2@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.