BPF Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab
@ 2021-01-27 17:12 Denis Salopek
  2021-02-09  5:44 ` Andrii Nakryiko
       [not found] ` <74e61161-3330-88b8-aa18-84d7357cd945@iogearbox.net>
  0 siblings, 2 replies; 7+ messages in thread
From: Denis Salopek @ 2021-01-27 17:12 UTC (permalink / raw)
  To: bpf; +Cc: luka.perkov, luka.oreskovic, juraj.vijtiuk

Extend the existing bpf_map_lookup_and_delete_elem() functionality to
hashtab maps, in addition to stacks and queues.
Create a new hashtab bpf_map_ops function that does lookup and deletion
of the element under the same bucket lock and add the created map_ops to
bpf.h.
Add the appropriate test case to 'maps' selftests.

Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
Cc: Luka Perkov <luka.perkov@sartura.hr>
---
 include/linux/bpf.h                     |  1 +
 kernel/bpf/hashtab.c                    | 38 +++++++++++++++++++++++++
 kernel/bpf/syscall.c                    |  9 ++++++
 tools/testing/selftests/bpf/test_maps.c |  7 +++++
 4 files changed, 55 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1aac2af12fed..003c1505f0e3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -77,6 +77,7 @@ struct bpf_map_ops {
 
 	/* funcs callable from userspace and from eBPF programs */
 	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key, void *value);
 	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
 	int (*map_delete_elem)(struct bpf_map *map, void *key);
 	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index c1ac7f964bc9..8d8463e0ea34 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -973,6 +973,43 @@ static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
 	return 0;
 }
 
+/* Called from syscall or from eBPF program */
+static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, void *value)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct hlist_nulls_head *head;
+	struct bucket *b;
+	struct htab_elem *l;
+	unsigned long flags;
+	u32 hash, key_size;
+	int ret;
+
+	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
+
+	key_size = map->key_size;
+
+	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	b = __select_bucket(htab, hash);
+	head = &b->head;
+
+	ret = htab_lock_bucket(htab, b, hash, &flags);
+	if (ret)
+		return ret;
+
+	l = lookup_elem_raw(head, hash, key, key_size);
+
+	if (l) {
+		copy_map_value(map, value, l->key + round_up(key_size, 8));
+		hlist_nulls_del_rcu(&l->hash_node);
+		free_htab_elem(htab, l);
+	} else {
+		ret = -ENOENT;
+	}
+
+	htab_unlock_bucket(htab, b, hash, flags);
+	return ret;
+}
+
 /* Called from syscall or from eBPF program */
 static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 				u64 map_flags)
@@ -1877,6 +1914,7 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
 	.map_update_elem = htab_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
 	.map_gen_lookup = htab_map_gen_lookup,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e5999d86c76e..4ff45c8d1077 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1505,6 +1505,15 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 	    map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_pop_elem(map, value);
+	} else if (map->map_type == BPF_MAP_TYPE_HASH) {
+		if (!bpf_map_is_dev_bound(map)) {
+			bpf_disable_instrumentation();
+			rcu_read_lock();
+			err = map->ops->map_lookup_and_delete_elem(map, key, value);
+			rcu_read_unlock();
+			bpf_enable_instrumentation();
+			maybe_wait_bpf_programs(map);
+		}
 	} else {
 		err = -ENOTSUPP;
 	}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 51adc42b2b40..3e1900e46e1d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -65,6 +65,13 @@ static void test_hashmap(unsigned int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
 
 	key = 2;
+	value = 1234;
+	/* Insert key=2 element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+	/* Check that key=2 matches the value and delete it */
+	assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
 	/* Check that key=2 is not found. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
-- 
2.26.2


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab
  2021-01-27 17:12 [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab Denis Salopek
@ 2021-02-09  5:44 ` Andrii Nakryiko
  2021-02-16 18:00   ` Denis Salopek
       [not found] ` <74e61161-3330-88b8-aa18-84d7357cd945@iogearbox.net>
  1 sibling, 1 reply; 7+ messages in thread
From: Andrii Nakryiko @ 2021-02-09  5:44 UTC (permalink / raw)
  To: Denis Salopek; +Cc: bpf, Luka Perkov, Luka Oreskovic, Juraj Vijtiuk

On Wed, Jan 27, 2021 at 9:15 AM Denis Salopek <denis.salopek@sartura.hr> wrote:
>
> Extend the existing bpf_map_lookup_and_delete_elem() functionality to
> hashtab maps, in addition to stacks and queues.
> Create a new hashtab bpf_map_ops function that does lookup and deletion
> of the element under the same bucket lock and add the created map_ops to
> bpf.h.
> Add the appropriate test case to 'maps' selftests.
>
> Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
> Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
> Cc: Luka Perkov <luka.perkov@sartura.hr>
> ---

I think this patch somehow got lost, even though it seems like a good
addition. I'd recommend rebasing and re-submitting to let people take
a fresh look at this.

It would also be nice to have a test_progs test added, not just
test_maps. I'd also look at supporting lookup_and_delete for other
kinds of hash maps (LRU, per-CPU), so that the support is more
complete. Thanks!

>  include/linux/bpf.h                     |  1 +
>  kernel/bpf/hashtab.c                    | 38 +++++++++++++++++++++++++
>  kernel/bpf/syscall.c                    |  9 ++++++
>  tools/testing/selftests/bpf/test_maps.c |  7 +++++
>  4 files changed, 55 insertions(+)
>

[...]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab
       [not found] ` <74e61161-3330-88b8-aa18-84d7357cd945@iogearbox.net>
@ 2021-02-10 17:56   ` Denis Salopek
  2021-02-11  0:25     ` Daniel Borkmann
  0 siblings, 1 reply; 7+ messages in thread
From: Denis Salopek @ 2021-02-10 17:56 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: bpf, luka.perkov, luka.oreskovic, juraj.vijtiuk, andrii.nakryiko

On Fri, Jan 29, 2021 at 04:58:12PM +0100, Daniel Borkmann wrote:
> On 1/27/21 6:12 PM, Denis Salopek wrote:
> > Extend the existing bpf_map_lookup_and_delete_elem() functionality to
> > hashtab maps, in addition to stacks and queues.
> > Create a new hashtab bpf_map_ops function that does lookup and deletion
> > of the element under the same bucket lock and add the created map_ops to
> > bpf.h.
> > Add the appropriate test case to 'maps' selftests.
> > 
> > Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
> > Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> > Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
> > Cc: Luka Perkov <luka.perkov@sartura.hr>
> 
> This is already possible in a more generic form, that is, via bpf(2) cmd
> BPF_MAP_LOOKUP_AND_DELETE_BATCH which is implemented by the different htab
> map flavors which also take the bucket lock. Did you have a try at that?
> 
> > ---
> >   include/linux/bpf.h                     |  1 +
> >   kernel/bpf/hashtab.c                    | 38 +++++++++++++++++++++++++
> >   kernel/bpf/syscall.c                    |  9 ++++++
> >   tools/testing/selftests/bpf/test_maps.c |  7 +++++
> >   4 files changed, 55 insertions(+)
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 1aac2af12fed..003c1505f0e3 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -77,6 +77,7 @@ struct bpf_map_ops {
> >   	/* funcs callable from userspace and from eBPF programs */
> >   	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
> > +	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key, void *value);
> >   	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
> >   	int (*map_delete_elem)(struct bpf_map *map, void *key);
> >   	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
> > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> > index c1ac7f964bc9..8d8463e0ea34 100644
> > --- a/kernel/bpf/hashtab.c
> > +++ b/kernel/bpf/hashtab.c
> > @@ -973,6 +973,43 @@ static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
> >   	return 0;
> >   }
> > +/* Called from syscall or from eBPF program */
> > +static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, void *value)
> > +{
> > +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
> > +	struct hlist_nulls_head *head;
> > +	struct bucket *b;
> > +	struct htab_elem *l;
> > +	unsigned long flags;
> > +	u32 hash, key_size;
> > +	int ret;
> > +
> > +	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
> > +
> > +	key_size = map->key_size;
> > +
> > +	hash = htab_map_hash(key, key_size, htab->hashrnd);
> > +	b = __select_bucket(htab, hash);
> > +	head = &b->head;
> > +
> > +	ret = htab_lock_bucket(htab, b, hash, &flags);
> > +	if (ret)
> > +		return ret;
> > +
> > +	l = lookup_elem_raw(head, hash, key, key_size);
> > +
> > +	if (l) {
> > +		copy_map_value(map, value, l->key + round_up(key_size, 8));
> > +		hlist_nulls_del_rcu(&l->hash_node);
> > +		free_htab_elem(htab, l);
> > +	} else {
> > +		ret = -ENOENT;
> > +	}
> > +
> > +	htab_unlock_bucket(htab, b, hash, flags);
> > +	return ret;
> > +}
> > +
> >   /* Called from syscall or from eBPF program */
> >   static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
> >   				u64 map_flags)
> > @@ -1877,6 +1914,7 @@ const struct bpf_map_ops htab_map_ops = {
> >   	.map_free = htab_map_free,
> >   	.map_get_next_key = htab_map_get_next_key,
> >   	.map_lookup_elem = htab_map_lookup_elem,
> > +	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
> >   	.map_update_elem = htab_map_update_elem,
> >   	.map_delete_elem = htab_map_delete_elem,
> >   	.map_gen_lookup = htab_map_gen_lookup,
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index e5999d86c76e..4ff45c8d1077 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -1505,6 +1505,15 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> >   	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> >   	    map->map_type == BPF_MAP_TYPE_STACK) {
> >   		err = map->ops->map_pop_elem(map, value);
> > +	} else if (map->map_type == BPF_MAP_TYPE_HASH) {
> > +		if (!bpf_map_is_dev_bound(map)) {
> > +			bpf_disable_instrumentation();
> > +			rcu_read_lock();
> > +			err = map->ops->map_lookup_and_delete_elem(map, key, value);
> > +			rcu_read_unlock();
> > +			bpf_enable_instrumentation();
> > +			maybe_wait_bpf_programs(map);
> > +		}
> >   	} else {
> >   		err = -ENOTSUPP;
> >   	}

Hi,

Is there something wrong with adding hashmap functionality to the
map_lookup_and_delete_elem() function? As per commit
bd513cd08f10cbe28856f99ae951e86e86803861, adding this functionality for
other map types was the plan at the time this function was implemented,
so wouldn't this addition be useful? Otherwise, this function would only
be used for stack/queue popping. So why does it even exist in the first
place, if the _batch function can be used instead?

I understand the _batch function can do the same job, but is there a
reason why using it would be a better option? There is also a
performance impact in the _batch function when compared to my function -
not too big, but still worth the mention. In my benchmarks, the _batch
function takes 15% longer for lookup and deletion of one element.

Denis

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab
  2021-02-10 17:56   ` Denis Salopek
@ 2021-02-11  0:25     ` Daniel Borkmann
  2021-02-16 19:46       ` Denis Salopek
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2021-02-11  0:25 UTC (permalink / raw)
  To: Denis Salopek
  Cc: bpf, luka.perkov, luka.oreskovic, juraj.vijtiuk, andrii.nakryiko

On 2/10/21 6:56 PM, Denis Salopek wrote:
> On Fri, Jan 29, 2021 at 04:58:12PM +0100, Daniel Borkmann wrote:
>> On 1/27/21 6:12 PM, Denis Salopek wrote:
>>> Extend the existing bpf_map_lookup_and_delete_elem() functionality to
>>> hashtab maps, in addition to stacks and queues.
>>> Create a new hashtab bpf_map_ops function that does lookup and deletion
>>> of the element under the same bucket lock and add the created map_ops to
>>> bpf.h.
>>> Add the appropriate test case to 'maps' selftests.
>>>
>>> Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
>>> Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
>>> Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
>>> Cc: Luka Perkov <luka.perkov@sartura.hr>
>>
>> This is already possible in a more generic form, that is, via bpf(2) cmd
>> BPF_MAP_LOOKUP_AND_DELETE_BATCH which is implemented by the different htab
>> map flavors which also take the bucket lock. Did you have a try at that?
>>
>>> ---
>>>    include/linux/bpf.h                     |  1 +
>>>    kernel/bpf/hashtab.c                    | 38 +++++++++++++++++++++++++
>>>    kernel/bpf/syscall.c                    |  9 ++++++
>>>    tools/testing/selftests/bpf/test_maps.c |  7 +++++
>>>    4 files changed, 55 insertions(+)
>>>
>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>> index 1aac2af12fed..003c1505f0e3 100644
>>> --- a/include/linux/bpf.h
>>> +++ b/include/linux/bpf.h
>>> @@ -77,6 +77,7 @@ struct bpf_map_ops {
>>>    	/* funcs callable from userspace and from eBPF programs */
>>>    	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
>>> +	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key, void *value);
>>>    	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
>>>    	int (*map_delete_elem)(struct bpf_map *map, void *key);
>>>    	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
>>> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
>>> index c1ac7f964bc9..8d8463e0ea34 100644
>>> --- a/kernel/bpf/hashtab.c
>>> +++ b/kernel/bpf/hashtab.c
>>> @@ -973,6 +973,43 @@ static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
>>>    	return 0;
>>>    }
>>> +/* Called from syscall or from eBPF program */
>>> +static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, void *value)
>>> +{
>>> +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
>>> +	struct hlist_nulls_head *head;
>>> +	struct bucket *b;
>>> +	struct htab_elem *l;
>>> +	unsigned long flags;
>>> +	u32 hash, key_size;
>>> +	int ret;
>>> +
>>> +	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
>>> +
>>> +	key_size = map->key_size;
>>> +
>>> +	hash = htab_map_hash(key, key_size, htab->hashrnd);
>>> +	b = __select_bucket(htab, hash);
>>> +	head = &b->head;
>>> +
>>> +	ret = htab_lock_bucket(htab, b, hash, &flags);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	l = lookup_elem_raw(head, hash, key, key_size);
>>> +
>>> +	if (l) {
>>> +		copy_map_value(map, value, l->key + round_up(key_size, 8));
>>> +		hlist_nulls_del_rcu(&l->hash_node);
>>> +		free_htab_elem(htab, l);
>>> +	} else {
>>> +		ret = -ENOENT;
>>> +	}
>>> +
>>> +	htab_unlock_bucket(htab, b, hash, flags);
>>> +	return ret;
>>> +}
>>> +
>>>    /* Called from syscall or from eBPF program */
>>>    static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
>>>    				u64 map_flags)
>>> @@ -1877,6 +1914,7 @@ const struct bpf_map_ops htab_map_ops = {
>>>    	.map_free = htab_map_free,
>>>    	.map_get_next_key = htab_map_get_next_key,
>>>    	.map_lookup_elem = htab_map_lookup_elem,
>>> +	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
>>>    	.map_update_elem = htab_map_update_elem,
>>>    	.map_delete_elem = htab_map_delete_elem,
>>>    	.map_gen_lookup = htab_map_gen_lookup,
>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>> index e5999d86c76e..4ff45c8d1077 100644
>>> --- a/kernel/bpf/syscall.c
>>> +++ b/kernel/bpf/syscall.c
>>> @@ -1505,6 +1505,15 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>>>    	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
>>>    	    map->map_type == BPF_MAP_TYPE_STACK) {
>>>    		err = map->ops->map_pop_elem(map, value);
>>> +	} else if (map->map_type == BPF_MAP_TYPE_HASH) {
>>> +		if (!bpf_map_is_dev_bound(map)) {
>>> +			bpf_disable_instrumentation();
>>> +			rcu_read_lock();
>>> +			err = map->ops->map_lookup_and_delete_elem(map, key, value);
>>> +			rcu_read_unlock();
>>> +			bpf_enable_instrumentation();
>>> +			maybe_wait_bpf_programs(map);
>>> +		}
>>>    	} else {
>>>    		err = -ENOTSUPP;
>>>    	}
> 
> Hi,
> 
> Is there something wrong with adding hashmap functionality to the
> map_lookup_and_delete_elem() function? As per commit
> bd513cd08f10cbe28856f99ae951e86e86803861, adding this functionality for
> other map types was the plan at the time this function was implemented,
> so wouldn't this addition be useful? Otherwise, this function would only
> be used for stack/queue popping. So why does it even exist in the first
> place, if the _batch function can be used instead?
> 
> I understand the _batch function can do the same job, but is there a
> reason why using it would be a better option? There is also a
> performance impact in the _batch function when compared to my function -
> not too big, but still worth the mention. In my benchmarks, the _batch
> function takes 15% longer for lookup and deletion of one element.

Thanks for the benchmark, out of curiosity do you happen to have numbers
at what point it pays off (2/3/more elems)? I don't think anything speaks
against single lookup + delete functionality given the cmd API is already
set in stone and it might be easier to use (though libbpf could also hide
this behind api), but it would be nice if we could ideally reuse the batch
lookup + delete code (e.g. bpf_map_do_batch()) in order to get this
transparently for free for the various other map types as well.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab
  2021-02-09  5:44 ` Andrii Nakryiko
@ 2021-02-16 18:00   ` Denis Salopek
  2021-02-23  0:53     ` Andrii Nakryiko
  0 siblings, 1 reply; 7+ messages in thread
From: Denis Salopek @ 2021-02-16 18:00 UTC (permalink / raw)
  To: Andrii Nakryiko; +Cc: bpf, Luka Perkov, Luka Oreskovic, Juraj Vijtiuk

On Mon, Feb 08, 2021 at 09:44:59PM -0800, Andrii Nakryiko wrote:
> On Wed, Jan 27, 2021 at 9:15 AM Denis Salopek <denis.salopek@sartura.hr> wrote:
> >
> > Extend the existing bpf_map_lookup_and_delete_elem() functionality to
> > hashtab maps, in addition to stacks and queues.
> > Create a new hashtab bpf_map_ops function that does lookup and deletion
> > of the element under the same bucket lock and add the created map_ops to
> > bpf.h.
> > Add the appropriate test case to 'maps' selftests.
> >
> > Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
> > Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> > Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
> > Cc: Luka Perkov <luka.perkov@sartura.hr>
> > ---
> 
> I think this patch somehow got lost, even though it seems like a good
> addition. I'd recommend rebasing and re-submitting to let people take
> a fresh look at this.
> 
> It would also be nice to have a test_progs test added, not just
> test_maps. I'd also look at supporting lookup_and_delete for other
> kinds of hash maps (LRU, per-CPU), so that the support is more
> complete. Thanks!
> 

Hi Andrii,

I'll also implement the LRU and per-CPU ones and resubmit. I don't quite
understand the test_progs, what kind of test(s) exactly should I add there?

Denis

> >  include/linux/bpf.h                     |  1 +
> >  kernel/bpf/hashtab.c                    | 38 +++++++++++++++++++++++++
> >  kernel/bpf/syscall.c                    |  9 ++++++
> >  tools/testing/selftests/bpf/test_maps.c |  7 +++++
> >  4 files changed, 55 insertions(+)
> >
> 
> [...]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab
  2021-02-11  0:25     ` Daniel Borkmann
@ 2021-02-16 19:46       ` Denis Salopek
  0 siblings, 0 replies; 7+ messages in thread
From: Denis Salopek @ 2021-02-16 19:46 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: bpf, luka.perkov, luka.oreskovic, juraj.vijtiuk, andrii.nakryiko

On Thu, Feb 11, 2021 at 01:25:00AM +0100, Daniel Borkmann wrote:
> On 2/10/21 6:56 PM, Denis Salopek wrote:
> > On Fri, Jan 29, 2021 at 04:58:12PM +0100, Daniel Borkmann wrote:
> > > On 1/27/21 6:12 PM, Denis Salopek wrote:
> > > > Extend the existing bpf_map_lookup_and_delete_elem() functionality to
> > > > hashtab maps, in addition to stacks and queues.
> > > > Create a new hashtab bpf_map_ops function that does lookup and deletion
> > > > of the element under the same bucket lock and add the created map_ops to
> > > > bpf.h.
> > > > Add the appropriate test case to 'maps' selftests.
> > > > 
> > > > Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
> > > > Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> > > > Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
> > > > Cc: Luka Perkov <luka.perkov@sartura.hr>
> > > 
> > > This is already possible in a more generic form, that is, via bpf(2) cmd
> > > BPF_MAP_LOOKUP_AND_DELETE_BATCH which is implemented by the different htab
> > > map flavors which also take the bucket lock. Did you have a try at that?
> > > 
> > > > ---
> > > >    include/linux/bpf.h                     |  1 +
> > > >    kernel/bpf/hashtab.c                    | 38 +++++++++++++++++++++++++
> > > >    kernel/bpf/syscall.c                    |  9 ++++++
> > > >    tools/testing/selftests/bpf/test_maps.c |  7 +++++
> > > >    4 files changed, 55 insertions(+)
> > > > 
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index 1aac2af12fed..003c1505f0e3 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -77,6 +77,7 @@ struct bpf_map_ops {
> > > >    	/* funcs callable from userspace and from eBPF programs */
> > > >    	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
> > > > +	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key, void *value);
> > > >    	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
> > > >    	int (*map_delete_elem)(struct bpf_map *map, void *key);
> > > >    	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
> > > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> > > > index c1ac7f964bc9..8d8463e0ea34 100644
> > > > --- a/kernel/bpf/hashtab.c
> > > > +++ b/kernel/bpf/hashtab.c
> > > > @@ -973,6 +973,43 @@ static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
> > > >    	return 0;
> > > >    }
> > > > +/* Called from syscall or from eBPF program */
> > > > +static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, void *value)
> > > > +{
> > > > +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
> > > > +	struct hlist_nulls_head *head;
> > > > +	struct bucket *b;
> > > > +	struct htab_elem *l;
> > > > +	unsigned long flags;
> > > > +	u32 hash, key_size;
> > > > +	int ret;
> > > > +
> > > > +	WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held());
> > > > +
> > > > +	key_size = map->key_size;
> > > > +
> > > > +	hash = htab_map_hash(key, key_size, htab->hashrnd);
> > > > +	b = __select_bucket(htab, hash);
> > > > +	head = &b->head;
> > > > +
> > > > +	ret = htab_lock_bucket(htab, b, hash, &flags);
> > > > +	if (ret)
> > > > +		return ret;
> > > > +
> > > > +	l = lookup_elem_raw(head, hash, key, key_size);
> > > > +
> > > > +	if (l) {
> > > > +		copy_map_value(map, value, l->key + round_up(key_size, 8));
> > > > +		hlist_nulls_del_rcu(&l->hash_node);
> > > > +		free_htab_elem(htab, l);
> > > > +	} else {
> > > > +		ret = -ENOENT;
> > > > +	}
> > > > +
> > > > +	htab_unlock_bucket(htab, b, hash, flags);
> > > > +	return ret;
> > > > +}
> > > > +
> > > >    /* Called from syscall or from eBPF program */
> > > >    static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
> > > >    				u64 map_flags)
> > > > @@ -1877,6 +1914,7 @@ const struct bpf_map_ops htab_map_ops = {
> > > >    	.map_free = htab_map_free,
> > > >    	.map_get_next_key = htab_map_get_next_key,
> > > >    	.map_lookup_elem = htab_map_lookup_elem,
> > > > +	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
> > > >    	.map_update_elem = htab_map_update_elem,
> > > >    	.map_delete_elem = htab_map_delete_elem,
> > > >    	.map_gen_lookup = htab_map_gen_lookup,
> > > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > > > index e5999d86c76e..4ff45c8d1077 100644
> > > > --- a/kernel/bpf/syscall.c
> > > > +++ b/kernel/bpf/syscall.c
> > > > @@ -1505,6 +1505,15 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> > > >    	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> > > >    	    map->map_type == BPF_MAP_TYPE_STACK) {
> > > >    		err = map->ops->map_pop_elem(map, value);
> > > > +	} else if (map->map_type == BPF_MAP_TYPE_HASH) {
> > > > +		if (!bpf_map_is_dev_bound(map)) {
> > > > +			bpf_disable_instrumentation();
> > > > +			rcu_read_lock();
> > > > +			err = map->ops->map_lookup_and_delete_elem(map, key, value);
> > > > +			rcu_read_unlock();
> > > > +			bpf_enable_instrumentation();
> > > > +			maybe_wait_bpf_programs(map);
> > > > +		}
> > > >    	} else {
> > > >    		err = -ENOTSUPP;
> > > >    	}
> > 
> > Hi,
> > 
> > Is there something wrong with adding hashmap functionality to the
> > map_lookup_and_delete_elem() function? As per commit
> > bd513cd08f10cbe28856f99ae951e86e86803861, adding this functionality for
> > other map types was the plan at the time this function was implemented,
> > so wouldn't this addition be useful? Otherwise, this function would only
> > be used for stack/queue popping. So why does it even exist in the first
> > place, if the _batch function can be used instead?
> > 
> > I understand the _batch function can do the same job, but is there a
> > reason why using it would be a better option? There is also a
> > performance impact in the _batch function when compared to my function -
> > not too big, but still worth the mention. In my benchmarks, the _batch
> > function takes 15% longer for lookup and deletion of one element.
> 
> Thanks for the benchmark, out of curiosity do you happen to have numbers
> at what point it pays off (2/3/more elems)? I don't think anything speaks
> against single lookup + delete functionality given the cmd API is already
> set in stone and it might be easier to use (though libbpf could also hide
> this behind api), but it would be nice if we could ideally reuse the batch
> lookup + delete code (e.g. bpf_map_do_batch()) in order to get this
> transparently for free for the various other map types as well.
> 
> Thanks,
> Daniel

Hi Daniel,

I made a couple of benchmarks with different batch sizes and noticed
that the _batch function performance depends on the size of the map
we're working on - the number of elements it "pays off" depends on the
max entries of the map.
For a map with 10 elements, 100,000 L&Ds of 1 element takes:
batch   : 0.055s
nobatch : 0.050s
Same map, 100,000 L&Ds of 3 elements, batch is faster:
batch   : 0.111s
nobatch : 0.138s

A map with 1,000 elements, 100,000 L&Ds of 1 element takes:
batch   : 0.149s
nobatch : 0.050s
Same map, 100,000 L&Ds of 9 elements, batch gets faster:
batch   : 0.316s
nobatch : 0.418s

A map with 10,000 elements, 100,000 L&Ds of 1 element takes:
batch   : 1.690s
nobatch : 0.049s
Same map, 100,000 L&Ds of 125 elements, batch gets faster:
batch   : 5.032s
nobatch : 5.041s

My benchmark is simple: I load an XDP program with a hashmap, and run
some dummy "statistics" program L&D-ing the same map in a loop and
updating it with some elements so it's never empty, i.e. the L&D never
returns ENOENT.

I'm not sure how "realistic" usecase this is, or are there any cache
hits/misses influencing the results, so I don't know if this is the best
benchmark.

Denis

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab
  2021-02-16 18:00   ` Denis Salopek
@ 2021-02-23  0:53     ` Andrii Nakryiko
  0 siblings, 0 replies; 7+ messages in thread
From: Andrii Nakryiko @ 2021-02-23  0:53 UTC (permalink / raw)
  To: Denis Salopek; +Cc: bpf, Luka Perkov, Luka Oreskovic, Juraj Vijtiuk

On Tue, Feb 16, 2021 at 10:00 AM Denis Salopek <denis.salopek@sartura.hr> wrote:
>
> On Mon, Feb 08, 2021 at 09:44:59PM -0800, Andrii Nakryiko wrote:
> > On Wed, Jan 27, 2021 at 9:15 AM Denis Salopek <denis.salopek@sartura.hr> wrote:
> > >
> > > Extend the existing bpf_map_lookup_and_delete_elem() functionality to
> > > hashtab maps, in addition to stacks and queues.
> > > Create a new hashtab bpf_map_ops function that does lookup and deletion
> > > of the element under the same bucket lock and add the created map_ops to
> > > bpf.h.
> > > Add the appropriate test case to 'maps' selftests.
> > >
> > > Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
> > > Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> > > Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
> > > Cc: Luka Perkov <luka.perkov@sartura.hr>
> > > ---
> >
> > I think this patch somehow got lost, even though it seems like a good
> > addition. I'd recommend rebasing and re-submitting to let people take
> > a fresh look at this.
> >
> > It would also be nice to have a test_progs test added, not just
> > test_maps. I'd also look at supporting lookup_and_delete for other
> > kinds of hash maps (LRU, per-CPU), so that the support is more
> > complete. Thanks!
> >
>
> Hi Andrii,
>
> I'll also implement the LRU and per-CPU ones and resubmit. I don't quite
> understand the test_progs, what kind of test(s) exactly should I add there?

test_progs is our preferred test runner. It's a collection of
independent tests (sometimes including subtests). See any of the
recently added tests to get a feel for it. test_progs' tests resemble
real-world applications much closer than test_verifier or test_maps
tests, so it serves both as a readable test and as an API demo.

>
> Denis
>
> > >  include/linux/bpf.h                     |  1 +
> > >  kernel/bpf/hashtab.c                    | 38 +++++++++++++++++++++++++
> > >  kernel/bpf/syscall.c                    |  9 ++++++
> > >  tools/testing/selftests/bpf/test_maps.c |  7 +++++
> > >  4 files changed, 55 insertions(+)
> > >
> >
> > [...]

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, back to index

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-27 17:12 [PATCH bpf-next] bpf: add lookup_and_delete_elem support to hashtab Denis Salopek
2021-02-09  5:44 ` Andrii Nakryiko
2021-02-16 18:00   ` Denis Salopek
2021-02-23  0:53     ` Andrii Nakryiko
     [not found] ` <74e61161-3330-88b8-aa18-84d7357cd945@iogearbox.net>
2021-02-10 17:56   ` Denis Salopek
2021-02-11  0:25     ` Daniel Borkmann
2021-02-16 19:46       ` Denis Salopek

BPF Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/bpf/0 bpf/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 bpf bpf/ https://lore.kernel.org/bpf \
		bpf@vger.kernel.org
	public-inbox-index bpf

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.bpf


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git