All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: bpf@vger.kernel.org, "Alexei Starovoitov" <ast@kernel.org>,
	"Andrii Nakryiko" <andrii@kernel.org>,
	"Daniel Borkmann" <daniel@iogearbox.net>,
	"Toke Høiland-Jørgensen" <toke@redhat.com>,
	"Jesper Dangaard Brouer" <brouer@redhat.com>
Subject: Re: [PATCH bpf-next v2 08/15] bpf: Adapt copy_map_value for multiple offset case
Date: Sat, 19 Mar 2022 11:34:40 -0700	[thread overview]
Message-ID: <20220319183440.jkp25f4lo5o2xdck@ast-mbp.dhcp.thefacebook.com> (raw)
In-Reply-To: <20220317115957.3193097-9-memxor@gmail.com>

On Thu, Mar 17, 2022 at 05:29:50PM +0530, Kumar Kartikeya Dwivedi wrote:
> Since now there might be at most 10 offsets that need handling in
> copy_map_value, the manual shuffling and special case is no longer going
> to work. Hence, let's generalise the copy_map_value function by using
> a sorted array of offsets to skip regions that must be avoided while
> copying into and out of a map value.
> 
> When the map is created, we populate the offset array in struct map,
> with one extra element for map->value_size, which is used as the final
> offset to subtract previous offset from. Since there can only be three
> sizes, we can avoid recording the size in the struct map, and only store
> sorted offsets. Later we can determine the size for each offset by
> comparing it to timer_off and spin_lock_off, otherwise it must be
> sizeof(u64) for kptr.
> 
> Then, copy_map_value uses this sorted offset array is used to memcpy
> while skipping timer, spin lock, and kptr.
> 
> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> ---
>  include/linux/bpf.h  | 59 +++++++++++++++++++++++++-------------------
>  kernel/bpf/syscall.c | 47 +++++++++++++++++++++++++++++++++++
>  2 files changed, 80 insertions(+), 26 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 8ac3070aa5e6..f0f1e0d3bb2e 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -158,6 +158,10 @@ struct bpf_map_ops {
>  enum {
>  	/* Support at most 8 pointers in a BPF map value */
>  	BPF_MAP_VALUE_OFF_MAX = 8,
> +	BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
> +				1 + /* for bpf_spin_lock */
> +				1 + /* for bpf_timer */
> +				1,  /* for map->value_size sentinel */
>  };
>  
>  enum {
> @@ -208,7 +212,12 @@ struct bpf_map {
>  	char name[BPF_OBJ_NAME_LEN];
>  	bool bypass_spec_v1;
>  	bool frozen; /* write-once; write-protected by freeze_mutex */
> -	/* 6 bytes hole */
> +	/* 2 bytes hole */
> +	struct {
> +		u32 off[BPF_MAP_OFF_ARR_MAX];
> +		u32 cnt;
> +	} off_arr;
> +	/* 20 bytes hole */
>  
>  	/* The 3rd and 4th cacheline with misc members to avoid false sharing
>  	 * particularly with refcounting.
> @@ -252,36 +261,34 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
>  		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
>  	if (unlikely(map_value_has_timer(map)))
>  		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
> +	if (unlikely(map_value_has_kptr(map))) {
> +		struct bpf_map_value_off *tab = map->kptr_off_tab;
> +		int i;
> +
> +		for (i = 0; i < tab->nr_off; i++)
> +			*(u64 *)(dst + tab->off[i].offset) = 0;
> +	}
>  }
>  
>  /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
>  static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
>  {
> -	u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
> -
> -	if (unlikely(map_value_has_spin_lock(map))) {
> -		s_off = map->spin_lock_off;
> -		s_sz = sizeof(struct bpf_spin_lock);
> -	}
> -	if (unlikely(map_value_has_timer(map))) {
> -		t_off = map->timer_off;
> -		t_sz = sizeof(struct bpf_timer);
> -	}
> -
> -	if (unlikely(s_sz || t_sz)) {
> -		if (s_off < t_off || !s_sz) {
> -			swap(s_off, t_off);
> -			swap(s_sz, t_sz);
> -		}
> -		memcpy(dst, src, t_off);
> -		memcpy(dst + t_off + t_sz,
> -		       src + t_off + t_sz,
> -		       s_off - t_off - t_sz);
> -		memcpy(dst + s_off + s_sz,
> -		       src + s_off + s_sz,
> -		       map->value_size - s_off - s_sz);
> -	} else {
> -		memcpy(dst, src, map->value_size);
> +	int i;
> +
> +	memcpy(dst, src, map->off_arr.off[0]);
> +	for (i = 1; i < map->off_arr.cnt; i++) {
> +		u32 curr_off = map->off_arr.off[i - 1];
> +		u32 next_off = map->off_arr.off[i];
> +		u32 curr_sz;
> +
> +		if (map_value_has_spin_lock(map) && map->spin_lock_off == curr_off)
> +			curr_sz = sizeof(struct bpf_spin_lock);
> +		else if (map_value_has_timer(map) && map->timer_off == curr_off)
> +			curr_sz = sizeof(struct bpf_timer);
> +		else
> +			curr_sz = sizeof(u64);

Lets store size in off_arr as well.
Memory consumption of few u8-s are worth it.
Single load is faster than two if-s and a bunch of load.

> +		curr_off += curr_sz;
> +		memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
>  	}
>  }
>  void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 87263b07f40b..69e8ea1be432 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -30,6 +30,7 @@
>  #include <linux/pgtable.h>
>  #include <linux/bpf_lsm.h>
>  #include <linux/poll.h>
> +#include <linux/sort.h>
>  #include <linux/bpf-netns.h>
>  #include <linux/rcupdate_trace.h>
>  #include <linux/memcontrol.h>
> @@ -850,6 +851,50 @@ int map_check_no_btf(const struct bpf_map *map,
>  	return -ENOTSUPP;
>  }
>  
> +static int map_off_arr_cmp(const void *_a, const void *_b)
> +{
> +	const u32 a = *(const u32 *)_a;
> +	const u32 b = *(const u32 *)_b;
> +
> +	if (a < b)
> +		return -1;
> +	else if (a > b)
> +		return 1;
> +	return 0;
> +}
> +
> +static void map_populate_off_arr(struct bpf_map *map)
> +{
> +	u32 i;
> +
> +	map->off_arr.cnt = 0;
> +	if (map_value_has_spin_lock(map)) {
> +		i = map->off_arr.cnt;
> +
> +		map->off_arr.off[i] = map->spin_lock_off;
> +		map->off_arr.cnt++;
> +	}
> +	if (map_value_has_timer(map)) {
> +		i = map->off_arr.cnt;
> +
> +		map->off_arr.off[i] = map->timer_off;
> +		map->off_arr.cnt++;
> +	}
> +	if (map_value_has_kptr(map)) {
> +		struct bpf_map_value_off *tab = map->kptr_off_tab;
> +		u32 j = map->off_arr.cnt;
> +
> +		for (i = 0; i < tab->nr_off; i++)
> +			map->off_arr.off[j + i] = tab->off[i].offset;
> +		map->off_arr.cnt += tab->nr_off;
> +	}
> +
> +	map->off_arr.off[map->off_arr.cnt++] = map->value_size;
> +	if (map->off_arr.cnt == 1)
> +		return;
> +	sort(map->off_arr.off, map->off_arr.cnt, sizeof(map->off_arr.off[0]), map_off_arr_cmp, NULL);
> +}
> +
>  static int map_check_btf(struct bpf_map *map, const struct btf *btf,
>  			 u32 btf_key_id, u32 btf_value_id)
>  {
> @@ -1015,6 +1060,8 @@ static int map_create(union bpf_attr *attr)
>  			attr->btf_vmlinux_value_type_id;
>  	}
>  
> +	map_populate_off_arr(map);
> +
>  	err = security_bpf_map_alloc(map);
>  	if (err)
>  		goto free_map;
> -- 
> 2.35.1
> 

-- 

  reply	other threads:[~2022-03-19 18:34 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-17 11:59 [PATCH bpf-next v2 00/15] Introduce typed pointer support in BPF maps Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 01/15] bpf: Factor out fd returning from bpf_btf_find_by_name_kind Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 02/15] bpf: Make btf_find_field more generic Kumar Kartikeya Dwivedi
2022-03-19 17:55   ` Alexei Starovoitov
2022-03-19 19:31     ` Kumar Kartikeya Dwivedi
2022-03-19 20:06       ` Kumar Kartikeya Dwivedi
2022-03-19 21:30         ` Alexei Starovoitov
2022-03-17 11:59 ` [PATCH bpf-next v2 03/15] bpf: Allow storing unreferenced kptr in map Kumar Kartikeya Dwivedi
2022-03-19 18:15   ` Alexei Starovoitov
2022-03-19 18:52     ` Kumar Kartikeya Dwivedi
2022-03-19 21:17       ` Alexei Starovoitov
2022-03-19 21:39         ` Kumar Kartikeya Dwivedi
2022-03-19 21:50           ` Kumar Kartikeya Dwivedi
2022-03-19 22:57             ` Alexei Starovoitov
2022-03-17 11:59 ` [PATCH bpf-next v2 04/15] bpf: Allow storing referenced " Kumar Kartikeya Dwivedi
2022-03-19 18:24   ` Alexei Starovoitov
2022-03-19 18:59     ` Kumar Kartikeya Dwivedi
2022-03-19 21:23       ` Alexei Starovoitov
2022-03-19 21:43         ` Kumar Kartikeya Dwivedi
2022-03-20  0:57           ` Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 05/15] bpf: Allow storing percpu " Kumar Kartikeya Dwivedi
2022-03-19 18:30   ` Alexei Starovoitov
2022-03-19 19:04     ` Kumar Kartikeya Dwivedi
2022-03-19 21:26       ` Alexei Starovoitov
2022-03-19 21:45         ` Kumar Kartikeya Dwivedi
2022-03-19 23:01           ` Alexei Starovoitov
2022-03-17 11:59 ` [PATCH bpf-next v2 06/15] bpf: Allow storing user " Kumar Kartikeya Dwivedi
2022-03-19 18:28   ` Alexei Starovoitov
2022-03-19 19:02     ` Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 07/15] bpf: Prevent escaping of kptr loaded from maps Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 08/15] bpf: Adapt copy_map_value for multiple offset case Kumar Kartikeya Dwivedi
2022-03-19 18:34   ` Alexei Starovoitov [this message]
2022-03-19 19:06     ` Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 09/15] bpf: Always raise reference in btf_get_module_btf Kumar Kartikeya Dwivedi
2022-03-19 18:43   ` Alexei Starovoitov
2022-03-20 13:11   ` [bpf] 9a707eb02e: BUG:kernel_NULL_pointer_dereference,address kernel test robot
2022-03-20 13:11     ` kernel test robot
2022-03-17 11:59 ` [PATCH bpf-next v2 10/15] bpf: Populate pairs of btf_id and destructor kfunc in btf Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 11/15] bpf: Wire up freeing of referenced kptr Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 12/15] bpf: Teach verifier about kptr_get kfunc helpers Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 13/15] libbpf: Add kptr type tag macros to bpf_helpers.h Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 14/15] selftests/bpf: Add C tests for kptr Kumar Kartikeya Dwivedi
2022-03-17 11:59 ` [PATCH bpf-next v2 15/15] selftests/bpf: Add verifier " Kumar Kartikeya Dwivedi
2022-03-19 18:50 ` [PATCH bpf-next v2 00/15] Introduce typed pointer support in BPF maps patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220319183440.jkp25f4lo5o2xdck@ast-mbp.dhcp.thefacebook.com \
    --to=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=memxor@gmail.com \
    --cc=toke@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.