All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: bpf@vger.kernel.org, "Alexei Starovoitov" <ast@kernel.org>,
	"Andrii Nakryiko" <andrii@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Toke Høiland-Jørgensen" <toke@redhat.com>,
	"Jesper Dangaard Brouer" <hawk@kernel.org>,
	netfilter-devel@vger.kernel.org, netdev@vger.kernel.org
Subject: Re: [PATCH bpf-next v1 08/15] bpf: Adapt copy_map_value for multiple offset case
Date: Mon, 21 Feb 2022 23:04:05 -0800	[thread overview]
Message-ID: <20220222070405.i6esgcf7ouqrmoef@ast-mbp.dhcp.thefacebook.com> (raw)
In-Reply-To: <20220220134813.3411982-9-memxor@gmail.com>

On Sun, Feb 20, 2022 at 07:18:06PM +0530, Kumar Kartikeya Dwivedi wrote:
> The changes in this patch deserve closer look, so it has been split into
> its own independent patch. While earlier we just had to skip two objects
> at most while copying in and out of map, now we have potentially many
> objects (at most 8 + 2 = 10, due to the BPF_MAP_VALUE_OFF_MAX limit).
> 
> Hence, divide the copy_map_value function into an inlined fast path and
> function call to slowpath. The slowpath handles the case of > 3 offsets,
> while we handle the most common cases (0, 1, 2, or 3 offsets) in the
> inline function itself.
> 
> In copy_map_value_slow, we use 11 offsets, just to make the for loop
> that copies the value free of edge cases for the last offset, by using
> map->value_size as final offset to subtract remaining area to copy from.
> 
> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> ---
>  include/linux/bpf.h  | 43 +++++++++++++++++++++++++++++++---
>  kernel/bpf/syscall.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 95 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index ae599aaf8d4c..5d845ca02eba 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -253,12 +253,22 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
>  		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
>  	if (unlikely(map_value_has_timer(map)))
>  		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
> +	if (unlikely(map_value_has_ptr_to_btf_id(map))) {
> +		struct bpf_map_value_off *tab = map->ptr_off_tab;
> +		int i;
> +
> +		for (i = 0; i < tab->nr_off; i++)
> +			*(u64 *)(dst + tab->off[i].offset) = 0;
> +	}
>  }
>  
> +void copy_map_value_slow(struct bpf_map *map, void *dst, void *src, u32 s_off,
> +			 u32 s_sz, u32 t_off, u32 t_sz);
> +
>  /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
>  static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
>  {
> -	u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
> +	u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0, p_off = 0, p_sz = 0;
>  
>  	if (unlikely(map_value_has_spin_lock(map))) {
>  		s_off = map->spin_lock_off;
> @@ -268,13 +278,40 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
>  		t_off = map->timer_off;
>  		t_sz = sizeof(struct bpf_timer);
>  	}
> +	/* Multiple offset case is slow, offload to function */
> +	if (unlikely(map_value_has_ptr_to_btf_id(map))) {
> +		struct bpf_map_value_off *tab = map->ptr_off_tab;
> +
> +		/* Inline the likely common case */
> +		if (likely(tab->nr_off == 1)) {
> +			p_off = tab->off[0].offset;
> +			p_sz = sizeof(u64);
> +		} else {
> +			copy_map_value_slow(map, dst, src, s_off, s_sz, t_off, t_sz);
> +			return;
> +		}
> +	}
> +
> +	if (unlikely(s_sz || t_sz || p_sz)) {
> +		/* The order is p_off, t_off, s_off, use insertion sort */
>  
> -	if (unlikely(s_sz || t_sz)) {
> +		if (t_off < p_off || !t_sz) {
> +			swap(t_off, p_off);
> +			swap(t_sz, p_sz);
> +		}
>  		if (s_off < t_off || !s_sz) {
>  			swap(s_off, t_off);
>  			swap(s_sz, t_sz);
> +			if (t_off < p_off || !t_sz) {
> +				swap(t_off, p_off);
> +				swap(t_sz, p_sz);
> +			}
>  		}
> -		memcpy(dst, src, t_off);
> +
> +		memcpy(dst, src, p_off);
> +		memcpy(dst + p_off + p_sz,
> +		       src + p_off + p_sz,
> +		       t_off - p_off - p_sz);
>  		memcpy(dst + t_off + t_sz,
>  		       src + t_off + t_sz,
>  		       s_off - t_off - t_sz);
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index beb96866f34d..83d71d6912f5 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -30,6 +30,7 @@
>  #include <linux/pgtable.h>
>  #include <linux/bpf_lsm.h>
>  #include <linux/poll.h>
> +#include <linux/sort.h>
>  #include <linux/bpf-netns.h>
>  #include <linux/rcupdate_trace.h>
>  #include <linux/memcontrol.h>
> @@ -230,6 +231,60 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
>  	return err;
>  }
>  
> +static int copy_map_value_cmp(const void *_a, const void *_b)
> +{
> +	const u32 a = *(const u32 *)_a;
> +	const u32 b = *(const u32 *)_b;
> +
> +	/* We only need to sort based on offset */
> +	if (a < b)
> +		return -1;
> +	else if (a > b)
> +		return 1;
> +	return 0;
> +}
> +
> +void copy_map_value_slow(struct bpf_map *map, void *dst, void *src, u32 s_off,
> +			 u32 s_sz, u32 t_off, u32 t_sz)
> +{
> +	struct bpf_map_value_off *tab = map->ptr_off_tab; /* already set to non-NULL */
> +	/* 3 = 2 for bpf_timer, bpf_spin_lock, 1 for map->value_size sentinel */
> +	struct {
> +		u32 off;
> +		u32 sz;
> +	} off_arr[BPF_MAP_VALUE_OFF_MAX + 3];
> +	int i, cnt = 0;
> +
> +	/* Reconsider stack usage when bumping BPF_MAP_VALUE_OFF_MAX */
> +	BUILD_BUG_ON(sizeof(off_arr) != 88);
> +
> +	for (i = 0; i < tab->nr_off; i++) {
> +		off_arr[cnt].off = tab->off[i].offset;
> +		off_arr[cnt++].sz = sizeof(u64);
> +	}
> +	if (s_sz) {
> +		off_arr[cnt].off = s_off;
> +		off_arr[cnt++].sz = s_sz;
> +	}
> +	if (t_sz) {
> +		off_arr[cnt].off = t_off;
> +		off_arr[cnt++].sz = t_sz;
> +	}
> +	off_arr[cnt].off = map->value_size;
> +
> +	sort(off_arr, cnt, sizeof(off_arr[0]), copy_map_value_cmp, NULL);

Ouch. sort every time we need to copy map value?
sort it once please. 88 bytes in a map are worth it.
Especially since "slow" version will trigger with just 2 kptrs.
(if I understand this correctly).

  reply	other threads:[~2022-02-22  7:04 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-20 13:47 [PATCH bpf-next v1 00/15] Introduce typed pointer support in BPF maps Kumar Kartikeya Dwivedi
2022-02-20 13:47 ` [PATCH bpf-next v1 01/15] bpf: Factor out fd returning from bpf_btf_find_by_name_kind Kumar Kartikeya Dwivedi
2022-02-22  5:28   ` Alexei Starovoitov
2022-02-23  3:05     ` Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 02/15] bpf: Make btf_find_field more generic Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 03/15] bpf: Allow storing PTR_TO_BTF_ID in map Kumar Kartikeya Dwivedi
2022-02-22  6:46   ` Alexei Starovoitov
2022-02-23  3:09     ` Kumar Kartikeya Dwivedi
2022-02-23 21:46       ` Alexei Starovoitov
2022-02-20 13:48 ` [PATCH bpf-next v1 04/15] bpf: Allow storing referenced " Kumar Kartikeya Dwivedi
2022-02-22  6:53   ` Alexei Starovoitov
2022-02-22  7:10     ` Kumar Kartikeya Dwivedi
2022-02-22 16:20       ` Alexei Starovoitov
2022-02-23  3:04         ` Kumar Kartikeya Dwivedi
2022-02-23 21:52           ` Alexei Starovoitov
2022-02-24  8:43             ` Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 05/15] bpf: Allow storing PTR_TO_PERCPU_BTF_ID " Kumar Kartikeya Dwivedi
2022-02-20 20:40   ` kernel test robot
2022-02-20 13:48 ` [PATCH bpf-next v1 06/15] bpf: Allow storing __user PTR_TO_BTF_ID " Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 07/15] bpf: Prevent escaping of pointers loaded from maps Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 08/15] bpf: Adapt copy_map_value for multiple offset case Kumar Kartikeya Dwivedi
2022-02-22  7:04   ` Alexei Starovoitov [this message]
2022-02-23  3:13     ` Kumar Kartikeya Dwivedi
2022-02-23 21:41       ` Alexei Starovoitov
2022-02-20 13:48 ` [PATCH bpf-next v1 09/15] bpf: Populate pairs of btf_id and destructor kfunc in btf Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 10/15] bpf: Wire up freeing of referenced PTR_TO_BTF_ID in map Kumar Kartikeya Dwivedi
2022-02-20 21:43   ` kernel test robot
2022-02-20 22:55   ` kernel test robot
2022-02-21  0:39   ` kernel test robot
2022-02-20 13:48 ` [PATCH bpf-next v1 11/15] bpf: Teach verifier about kptr_get style kfunc helpers Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 12/15] net/netfilter: Add bpf_ct_kptr_get helper Kumar Kartikeya Dwivedi
2022-02-21  4:35   ` kernel test robot
2022-02-20 13:48 ` [PATCH bpf-next v1 13/15] libbpf: Add __kptr* macros to bpf_helpers.h Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 14/15] selftests/bpf: Add C tests for PTR_TO_BTF_ID in map Kumar Kartikeya Dwivedi
2022-02-20 13:48 ` [PATCH bpf-next v1 15/15] selftests/bpf: Add verifier " Kumar Kartikeya Dwivedi
2022-02-22  6:05 ` [PATCH bpf-next v1 00/15] Introduce typed pointer support in BPF maps Song Liu
2022-02-22  8:21   ` Kumar Kartikeya Dwivedi
2022-02-23  7:29     ` Song Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220222070405.i6esgcf7ouqrmoef@ast-mbp.dhcp.thefacebook.com \
    --to=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=hawk@kernel.org \
    --cc=memxor@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=netfilter-devel@vger.kernel.org \
    --cc=toke@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.