bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab
@ 2021-02-28  9:40 Denis Salopek
  2021-03-02  2:02 ` Yonghong Song
  0 siblings, 1 reply; 4+ messages in thread
From: Denis Salopek @ 2021-02-28  9:40 UTC (permalink / raw)
  To: bpf; +Cc: juraj.vijtiuk, luka.oreskovic, luka.perkov

Extend the existing bpf_map_lookup_and_delete_elem() functionality to
hashtab maps, in addition to stacks and queues.
Create a new hashtab bpf_map_ops function that does lookup and deletion
of the element under the same bucket lock and add the created map_ops to
bpf.h.
Add the appropriate test cases to 'maps' and 'lru_map' selftests
accompanied with new test_progs.

Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
Cc: Luka Perkov <luka.perkov@sartura.hr>
Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
---
v2: Add functionality for LRU/per-CPU, add test_progs tests.
---
 include/linux/bpf.h                           |   2 +
 kernel/bpf/hashtab.c                          |  80 +++++
 kernel/bpf/syscall.c                          |  14 +-
 .../bpf/prog_tests/lookup_and_delete.c        | 283 ++++++++++++++++++
 .../bpf/progs/test_lookup_and_delete.c        |  26 ++
 tools/testing/selftests/bpf/test_lru_map.c    |   8 +
 tools/testing/selftests/bpf/test_maps.c       |  19 +-
 7 files changed, 430 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_and_delete.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4c730863fa77..0bcc4f89af40 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -67,6 +67,8 @@ struct bpf_map_ops {
 	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
 	int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
 				union bpf_attr __user *uattr);
+	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
+					  void *value);
 	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
 					   const union bpf_attr *attr,
 					   union bpf_attr __user *uattr);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 330d721dd2af..8c3334d1b6b3 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1401,6 +1401,82 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
 	rcu_read_unlock();
 }
 
+static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+					     void *value, bool is_lru_map,
+					     bool is_percpu)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 hash, key_size, value_size;
+	struct hlist_nulls_head *head;
+	int cpu, off = 0, ret;
+	struct htab_elem *l;
+	unsigned long flags;
+	void __percpu *pptr;
+	struct bucket *b;
+
+	key_size = map->key_size;
+	value_size = round_up(map->value_size, 8);
+
+	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	b = __select_bucket(htab, hash);
+	head = &b->head;
+
+	ret = htab_lock_bucket(htab, b, hash, &flags);
+	if (ret)
+		return ret;
+
+	l = lookup_elem_raw(head, hash, key, key_size);
+	if (l) {
+		if (is_percpu) {
+			pptr = htab_elem_get_ptr(l, key_size);
+			for_each_possible_cpu(cpu) {
+				bpf_long_memcpy(value + off,
+						per_cpu_ptr(pptr, cpu),
+						value_size);
+				off += value_size;
+			}
+		} else {
+			copy_map_value(map, value, l->key + round_up(key_size, 8));
+		}
+
+		hlist_nulls_del_rcu(&l->hash_node);
+		if (!is_lru_map)
+			free_htab_elem(htab, l);
+	} else
+		ret = -ENOENT;
+
+	htab_unlock_bucket(htab, b, hash, flags);
+
+	if (is_lru_map && l)
+		bpf_lru_push_free(&htab->lru, &l->lru_node);
+
+	return ret;
+}
+
+static int htab_map_lookup_and_delete_elem(struct bpf_map *map,
+					   void *key, void *value)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, false, false);
+}
+
+static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+						  void *key, void *value)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, false, true);
+}
+
+static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+					       void *value)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, true, false);
+}
+
+static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+						      void *key, void *value)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, true, true);
+}
+
 static int
 __htab_map_lookup_and_delete_batch(struct bpf_map *map,
 				   const union bpf_attr *attr,
@@ -1934,6 +2010,7 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
 	.map_update_elem = htab_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
 	.map_gen_lookup = htab_map_gen_lookup,
@@ -1954,6 +2031,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_lru_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
 	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
 	.map_update_elem = htab_lru_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
@@ -2077,6 +2155,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_percpu_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
 	.map_update_elem = htab_percpu_map_update_elem,
 	.map_delete_elem = htab_map_delete_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -2096,6 +2175,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
 	.map_free = htab_map_free,
 	.map_get_next_key = htab_map_get_next_key,
 	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
 	.map_update_elem = htab_lru_percpu_map_update_elem,
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c859bc46d06c..2634aa4a2f37 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1495,7 +1495,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
-	value_size = map->value_size;
+	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
 	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
@@ -1505,6 +1505,18 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
 	    map->map_type == BPF_MAP_TYPE_STACK) {
 		err = map->ops->map_pop_elem(map, value);
+	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
+		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
+		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+		if (!bpf_map_is_dev_bound(map)) {
+			bpf_disable_instrumentation();
+			rcu_read_lock();
+			err = map->ops->map_lookup_and_delete_elem(map, key, value);
+			rcu_read_unlock();
+			bpf_enable_instrumentation();
+			maybe_wait_bpf_programs(map);
+		}
 	} else {
 		err = -ENOTSUPP;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644
index 000000000000..05123bbcdc1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+	__u64 key, value = START_VALUE;
+	int err;
+
+	for (key = 1; key < MAX_ENTRIES + 1; key++) {
+		err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+	BPF_DECLARE_PERCPU(__u64, value);
+	int i, err;
+	u64 key;
+
+	for (i = 0; i < nr_cpus; i++)
+		bpf_percpu(value, i) = START_VALUE;
+
+	for (key = 1; key < MAX_ENTRIES + 1; key++) {
+		err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+			return -1;
+	}
+
+	return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+						 int *map_fd)
+{
+	struct test_lookup_and_delete *skel;
+	int err;
+
+	skel = test_lookup_and_delete__open();
+	if (CHECK(!skel, "test_lookup_and_delete__open", "failed\n"))
+		return NULL;
+
+	err = bpf_map__set_type(skel->maps.hash_map, map_type);
+	if (CHECK(err, "bpf_map__set_type", "failed\n"))
+		goto error;
+
+	err = test_lookup_and_delete__load(skel);
+	if (CHECK(err, "test_lookup_and_delete__load", "failed\n"))
+		goto error;
+
+	*map_fd = bpf_map__fd(skel->maps.hash_map);
+	if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n"))
+		goto error;
+
+	return skel;
+
+error:
+	test_lookup_and_delete__destroy(skel);
+	return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+		      __u64 value)
+{
+	int err;
+
+	skel->bss->set_pid = getpid();
+	skel->bss->set_key = key;
+	skel->bss->set_value = value;
+
+	err = test_lookup_and_delete__attach(skel);
+	if (CHECK(err, "test_lookup_and_delete__attach", "failed\n"))
+		return -1;
+
+	syscall(__NR_getpgid);
+
+	test_lookup_and_delete__detach(skel);
+
+	return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+	struct test_lookup_and_delete *skel;
+	__u64 key, value;
+	int map_fd, err;
+
+	/* Setup program and fill the map. */
+	skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+	if (CHECK(!skel, "setup_prog", "failed\n"))
+		return;
+
+	err = fill_values(map_fd);
+	if (CHECK(err, "fill_values", "failed\n"))
+		goto cleanup;
+
+	/* Lookup and delete element. */
+	key = 1;
+	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	if (CHECK(err, "bpf_map_lookup_and_delete_elem", "failed, errno=%d\n",
+		  errno))
+		goto cleanup;
+
+	/* Fetched value should match the initially set value. */
+	if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+		  "value=%lld\n", value))
+		goto cleanup;
+
+	/* Check that the entry is non existent. */
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (CHECK(!err, "bpf_map_lookup_elem", "succeded, should fail\n"))
+		goto cleanup;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+	struct test_lookup_and_delete *skel;
+	BPF_DECLARE_PERCPU(__u64, value);
+	int map_fd, err, i;
+	__u64 key, val;
+
+	/* Setup program and fill the map. */
+	skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+	if (CHECK(!skel, "setup_prog", "failed\n"))
+		return;
+
+	err = fill_values_percpu(map_fd);
+	if (CHECK(err, "fill_values_percpu", "failed\n"))
+		goto cleanup;
+
+	/* Lookup and delete element. */
+	key = 1;
+	err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+	if (CHECK(err, "bpf_map_lookup_and_delete_elem", "failed, errno=%d\n",
+		  errno))
+		goto cleanup;
+
+	for (i = 0; i < nr_cpus; i++) {
+		if (bpf_percpu(value, i))
+			val = bpf_percpu(value, i);
+	}
+
+	/* Fetched value should match the initially set value. */
+	if (CHECK(val != START_VALUE, "bpf_map_lookup_and_delete_elem",
+		  "val=%lld\n", val))
+		goto cleanup;
+
+	/* Check that the entry is non existent. */
+	err = bpf_map_lookup_elem(map_fd, &key, value);
+	if (CHECK(!err, "bpf_map_lookup_elem", "succeded, should fail\n"))
+		goto cleanup;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+	struct test_lookup_and_delete *skel;
+	__u64 key, value;
+	int map_fd, err;
+
+	/* Setup program and fill the LRU map. */
+	skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+	if (CHECK(!skel, "setup_prog", "failed\n"))
+		return;
+
+	err = fill_values(map_fd);
+	if (CHECK(err, "fill_values", "failed\n"))
+		goto cleanup;
+
+	/* Insert new element at key=3, should reuse LRU element. */
+	key = 3;
+	err = trigger_tp(skel, key, NEW_VALUE);
+	if (CHECK(err, "trigger_tp", "failed"))
+		goto cleanup;
+
+	/* Lookup and delete element 3. */
+	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	if (CHECK(err, "bpf_map_lookup_and_delete_elem", "failed, errno=%d\n",
+		  errno))
+		goto cleanup;
+
+	/* Value should match the new value. */
+	if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+		  "value=%lld\n", value))
+		goto cleanup;
+
+	/* Check that entries 3 and 1 are non existent. */
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (CHECK(!err, "bpf_map_lookup_elem", "succeded, should fail\n"))
+		goto cleanup;
+
+	key = 1;
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (CHECK(!err, "bpf_map_lookup_elem", "succeded, should fail\n"))
+		goto cleanup;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+	struct test_lookup_and_delete *skel;
+	BPF_DECLARE_PERCPU(__u64, value);
+	int map_fd, err, i;
+	__u64 key, val;
+
+	/* Setup program and fill the LRU map. */
+	skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+	if (CHECK(!skel, "setup_prog", "failed\n"))
+		return;
+
+	err = fill_values_percpu(map_fd);
+	if (CHECK(err, "fill_values_percpu", "failed\n"))
+		goto cleanup;
+
+	/* Insert new element at key=3, should reuse LRU element. */
+	key = 3;
+	err = trigger_tp(skel, key, NEW_VALUE);
+	if (CHECK(err, "trigger_tp", "failed"))
+		goto cleanup;
+
+	/* Lookup and delete element 3. */
+	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	if (CHECK(err, "bpf_map_lookup_and_delete_elem", "failed, errno=%d\n",
+		  errno))
+		goto cleanup;
+
+	for (i = 0; i < nr_cpus; i++) {
+		if (bpf_percpu(value, i))
+			val = bpf_percpu(value, i);
+	}
+
+	/* Value should match the new value. */
+	if (CHECK(val != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+		  "val=%lld\n", val))
+		goto cleanup;
+
+	/* Check that entries 3 and 1 are non existent. */
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (CHECK(!err, "bpf_map_lookup_elem", "succeded, should fail\n"))
+		goto cleanup;
+
+	key = 1;
+	err = bpf_map_lookup_elem(map_fd, &key, &value);
+	if (CHECK(!err, "bpf_map_lookup_elem", "succeded, should fail\n"))
+		goto cleanup;
+
+cleanup:
+	test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+	nr_cpus = bpf_num_possible_cpus();
+
+	if (test__start_subtest("lookup_and_delete"))
+		test_lookup_and_delete_hash();
+	if (test__start_subtest("lookup_and_delete_percpu"))
+		test_lookup_and_delete_percpu_hash();
+	if (test__start_subtest("lookup_and_delete_lru"))
+		test_lookup_and_delete_lru_hash();
+	if (test__start_subtest("lookup_and_delete_lru_percpu"))
+		test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644
index 000000000000..eb19de8bb415
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid;
+__u64 set_key;
+__u64 set_value;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 2);
+	__type(key, __u64);
+	__type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+	if (set_pid == bpf_get_current_pid_tgid() >> 32)
+		bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 6a5349f9eb14..f33fb6de76bc 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags)
 	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
 	       errno == ENOENT);
 
+	/* lookup elem key=3 and delete it, than check it doesn't exist */
+	key = 1;
+	assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+	assert(value[0] == 1234);
+
+	/* remove the same element from the expected map */
+	assert(!bpf_map_delete_elem(expected_map_fd, &key));
+
 	assert(map_equal(lru_map_fd, expected_map_fd));
 
 	close(expected_map_fd);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 51adc42b2b40..dbd5f95e8bde 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -65,6 +65,13 @@ static void test_hashmap(unsigned int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
 
 	key = 2;
+	value = 1234;
+	/* Insert key=2 element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+	/* Check that key=2 matches the value and delete it */
+	assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
 	/* Check that key=2 is not found. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
@@ -164,8 +171,18 @@ static void test_hashmap_percpu(unsigned int task, void *data)
 
 	key = 1;
 	/* Insert key=1 element. */
-	assert(!(expected_key_mask & key));
 	assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+	/* Lookup and delete elem key=1 and check value. */
+	assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+	       bpf_percpu(value, 0) == 100);
+
+	for (i = 0; i < nr_cpus; i++)
+		bpf_percpu(value, i) = i + 100;
+
+	/* Insert key=1 element which should not exist. */
+	assert(!(expected_key_mask & key));
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
 	expected_key_mask |= key;
 
 	/* BPF_NOEXIST means add new element if it doesn't exist. */
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab
  2021-02-28  9:40 [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab Denis Salopek
@ 2021-03-02  2:02 ` Yonghong Song
  2021-03-13  8:54   ` Denis Salopek
  0 siblings, 1 reply; 4+ messages in thread
From: Yonghong Song @ 2021-03-02  2:02 UTC (permalink / raw)
  To: Denis Salopek, bpf; +Cc: juraj.vijtiuk, luka.oreskovic, luka.perkov



On 2/28/21 1:40 AM, Denis Salopek wrote:
> Extend the existing bpf_map_lookup_and_delete_elem() functionality to
> hashtab maps, in addition to stacks and queues.
> Create a new hashtab bpf_map_ops function that does lookup and deletion
> of the element under the same bucket lock and add the created map_ops to
> bpf.h.
> Add the appropriate test cases to 'maps' and 'lru_map' selftests
> accompanied with new test_progs.
> 
> Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
> Cc: Luka Perkov <luka.perkov@sartura.hr>
> Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
> ---
> v2: Add functionality for LRU/per-CPU, add test_progs tests.
> ---
>   include/linux/bpf.h                           |   2 +
>   kernel/bpf/hashtab.c                          |  80 +++++
>   kernel/bpf/syscall.c                          |  14 +-
>   .../bpf/prog_tests/lookup_and_delete.c        | 283 ++++++++++++++++++
>   .../bpf/progs/test_lookup_and_delete.c        |  26 ++
>   tools/testing/selftests/bpf/test_lru_map.c    |   8 +
>   tools/testing/selftests/bpf/test_maps.c       |  19 +-
>   7 files changed, 430 insertions(+), 2 deletions(-)
>   create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
>   create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4c730863fa77..0bcc4f89af40 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -67,6 +67,8 @@ struct bpf_map_ops {
>   	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
>   	int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
>   				union bpf_attr __user *uattr);
> +	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
> +					  void *value);
>   	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
>   					   const union bpf_attr *attr,
>   					   union bpf_attr __user *uattr);
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index 330d721dd2af..8c3334d1b6b3 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -1401,6 +1401,82 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
>   	rcu_read_unlock();
>   }
>   
> +static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
> +					     void *value, bool is_lru_map,
> +					     bool is_percpu)
> +{
> +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
> +	u32 hash, key_size, value_size;
> +	struct hlist_nulls_head *head;
> +	int cpu, off = 0, ret;
> +	struct htab_elem *l;
> +	unsigned long flags;
> +	void __percpu *pptr;
> +	struct bucket *b;
> +
> +	key_size = map->key_size;
> +	value_size = round_up(map->value_size, 8);
> +
> +	hash = htab_map_hash(key, key_size, htab->hashrnd);
> +	b = __select_bucket(htab, hash);
> +	head = &b->head;
> +
> +	ret = htab_lock_bucket(htab, b, hash, &flags);
> +	if (ret)
> +		return ret;
> +
> +	l = lookup_elem_raw(head, hash, key, key_size);
> +	if (l) {
> +		if (is_percpu) {
> +			pptr = htab_elem_get_ptr(l, key_size);
> +			for_each_possible_cpu(cpu) {
> +				bpf_long_memcpy(value + off,
> +						per_cpu_ptr(pptr, cpu),
> +						value_size);
> +				off += value_size;
> +			}
> +		} else {
> +			copy_map_value(map, value, l->key + round_up(key_size, 8));

For hashtab lookup elem, BPF_F_LOCK flag may be set by user, I think 
hashtab lookup_and_delete_elem should also support this flag, so user
can ensure they always get a lock protected sane value.

We have the following libbpf APIs.

LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void 
*value,
                                          __u64 flags);
LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
                                               void *value);

Previously, bpf_map_lookup_and_delete_elem only supports queue/stack,
which does not need flags as it does not support BPF_F_LOCK so we
are fine.

Maybe similar to bpf_map_lookup_elem_flags() we add a
bpf_map_lookup_and_delete_elem_flags()? Maybe libbpf v1.0
can consolidate into a better uniform api.

> +		}
> +
> +		hlist_nulls_del_rcu(&l->hash_node);
> +		if (!is_lru_map)
> +			free_htab_elem(htab, l);
> +	} else
> +		ret = -ENOENT;
> +
> +	htab_unlock_bucket(htab, b, hash, flags);
> +
> +	if (is_lru_map && l)
> +		bpf_lru_push_free(&htab->lru, &l->lru_node);
> +
> +	return ret;
> +}
> +
> +static int htab_map_lookup_and_delete_elem(struct bpf_map *map,
> +					   void *key, void *value)
> +{
> +	return __htab_map_lookup_and_delete_elem(map, key, value, false, false);
> +}
> +
> +static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
> +						  void *key, void *value)
> +{
> +	return __htab_map_lookup_and_delete_elem(map, key, value, false, true);
> +}
> +
> +static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
> +					       void *value)
> +{
> +	return __htab_map_lookup_and_delete_elem(map, key, value, true, false);
> +}
> +
> +static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
> +						      void *key, void *value)
> +{
> +	return __htab_map_lookup_and_delete_elem(map, key, value, true, true);
> +}
> +
>   static int
>   __htab_map_lookup_and_delete_batch(struct bpf_map *map,
>   				   const union bpf_attr *attr,
> @@ -1934,6 +2010,7 @@ const struct bpf_map_ops htab_map_ops = {
>   	.map_free = htab_map_free,
>   	.map_get_next_key = htab_map_get_next_key,
>   	.map_lookup_elem = htab_map_lookup_elem,
> +	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
>   	.map_update_elem = htab_map_update_elem,
>   	.map_delete_elem = htab_map_delete_elem,
>   	.map_gen_lookup = htab_map_gen_lookup,
> @@ -1954,6 +2031,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
>   	.map_free = htab_map_free,
>   	.map_get_next_key = htab_map_get_next_key,
>   	.map_lookup_elem = htab_lru_map_lookup_elem,
> +	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
>   	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
>   	.map_update_elem = htab_lru_map_update_elem,
>   	.map_delete_elem = htab_lru_map_delete_elem,
> @@ -2077,6 +2155,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
>   	.map_free = htab_map_free,
>   	.map_get_next_key = htab_map_get_next_key,
>   	.map_lookup_elem = htab_percpu_map_lookup_elem,
> +	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
>   	.map_update_elem = htab_percpu_map_update_elem,
>   	.map_delete_elem = htab_map_delete_elem,
>   	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
> @@ -2096,6 +2175,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
>   	.map_free = htab_map_free,
>   	.map_get_next_key = htab_map_get_next_key,
>   	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
> +	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
>   	.map_update_elem = htab_lru_percpu_map_update_elem,
>   	.map_delete_elem = htab_lru_map_delete_elem,
>   	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index c859bc46d06c..2634aa4a2f37 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1495,7 +1495,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>   		goto err_put;
>   	}
>   
> -	value_size = map->value_size;
> +	value_size = bpf_map_value_size(map);
>   
>   	err = -ENOMEM;
>   	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> @@ -1505,6 +1505,18 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>   	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
>   	    map->map_type == BPF_MAP_TYPE_STACK) {
>   		err = map->ops->map_pop_elem(map, value);
> +	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
> +		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
> +		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
> +		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
> +		if (!bpf_map_is_dev_bound(map)) {
> +			bpf_disable_instrumentation();
> +			rcu_read_lock();
> +			err = map->ops->map_lookup_and_delete_elem(map, key, value);
> +			rcu_read_unlock();
> +			bpf_enable_instrumentation();
> +			maybe_wait_bpf_programs(map);

maybe_wait_bpf_programs(map) is mostly for map-in-map.
but I think it is okay to put it here in case in the future
we will support map-in-map here. If maybe_wait_bpf_programs()
get inlined which mostly likely is the case, the compiler
should be able to optimize it away.

> +		}
>   	} else {
>   		err = -ENOTSUPP;
>   	}
> diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
> new file mode 100644
> index 000000000000..05123bbcdc1c
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
> @@ -0,0 +1,283 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <test_progs.h>
> +#include "test_lookup_and_delete.skel.h"
> +
> +#define START_VALUE 1234
> +#define NEW_VALUE 4321
> +#define MAX_ENTRIES 2
> +
> +static int duration;
> +static int nr_cpus;
> +
> +static int fill_values(int map_fd)
> +{
> +	__u64 key, value = START_VALUE;
> +	int err;
> +
> +	for (key = 1; key < MAX_ENTRIES + 1; key++) {
> +		err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
> +		if (CHECK(err, "bpf_map_update_elem", "failed\n"))

You can use
	if (!ASSERT_OK(err, "bpf_map_update_elem"))
to save you from explicit "failed" string.
The same for some later other CHECK usages.

> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int fill_values_percpu(int map_fd)
> +{
> +	BPF_DECLARE_PERCPU(__u64, value);
> +	int i, err;
> +	u64 key;
> +
> +	for (i = 0; i < nr_cpus; i++)
> +		bpf_percpu(value, i) = START_VALUE;
> +
> +	for (key = 1; key < MAX_ENTRIES + 1; key++) {
> +		err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
> +		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
[...]
> diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> new file mode 100644
> index 000000000000..eb19de8bb415
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> @@ -0,0 +1,26 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +
> +__u32 set_pid;
> +__u64 set_key;
> +__u64 set_value;

Please add "= 0" to the above declaration to make
it llvm10 friendly.

> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_HASH);
> +	__uint(max_entries, 2);
> +	__type(key, __u64);
> +	__type(value, __u64);
> +} hash_map SEC(".maps");
> +
> +SEC("tp/syscalls/sys_enter_getpgid")
> +int bpf_lookup_and_delete_test(const void *ctx)
> +{
> +	if (set_pid == bpf_get_current_pid_tgid() >> 32)
> +		bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
> +
> +	return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
[...]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab
  2021-03-02  2:02 ` Yonghong Song
@ 2021-03-13  8:54   ` Denis Salopek
  2021-03-15  0:08     ` Yonghong Song
  0 siblings, 1 reply; 4+ messages in thread
From: Denis Salopek @ 2021-03-13  8:54 UTC (permalink / raw)
  To: Yonghong Song; +Cc: bpf, juraj.vijtiuk, luka.oreskovic, luka.perkov

Hello,

Thank you for your feedback and comments.

On Mon, Mar 01, 2021 at 06:02:37PM -0800, Yonghong Song wrote:
> 
> 
> On 2/28/21 1:40 AM, Denis Salopek wrote:
> > Extend the existing bpf_map_lookup_and_delete_elem() functionality to
> > hashtab maps, in addition to stacks and queues.
> > Create a new hashtab bpf_map_ops function that does lookup and deletion
> > of the element under the same bucket lock and add the created map_ops to
> > bpf.h.
> > Add the appropriate test cases to 'maps' and 'lru_map' selftests
> > accompanied with new test_progs.
> > 
> > Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> > Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
> > Cc: Luka Perkov <luka.perkov@sartura.hr>
> > Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
> > ---
> > v2: Add functionality for LRU/per-CPU, add test_progs tests.
> > ---
> >   include/linux/bpf.h                           |   2 +
> >   kernel/bpf/hashtab.c                          |  80 +++++
> >   kernel/bpf/syscall.c                          |  14 +-
> >   .../bpf/prog_tests/lookup_and_delete.c        | 283 ++++++++++++++++++
> >   .../bpf/progs/test_lookup_and_delete.c        |  26 ++
> >   tools/testing/selftests/bpf/test_lru_map.c    |   8 +
> >   tools/testing/selftests/bpf/test_maps.c       |  19 +-
> >   7 files changed, 430 insertions(+), 2 deletions(-)
> >   create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
> >   create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 4c730863fa77..0bcc4f89af40 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -67,6 +67,8 @@ struct bpf_map_ops {
> >   	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
> >   	int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
> >   				union bpf_attr __user *uattr);
> > +	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
> > +					  void *value);
> >   	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
> >   					   const union bpf_attr *attr,
> >   					   union bpf_attr __user *uattr);
> > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> > index 330d721dd2af..8c3334d1b6b3 100644
> > --- a/kernel/bpf/hashtab.c
> > +++ b/kernel/bpf/hashtab.c
> > @@ -1401,6 +1401,82 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
> >   	rcu_read_unlock();
> >   }
> >   
> > +static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
> > +					     void *value, bool is_lru_map,
> > +					     bool is_percpu)
> > +{
> > +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
> > +	u32 hash, key_size, value_size;
> > +	struct hlist_nulls_head *head;
> > +	int cpu, off = 0, ret;
> > +	struct htab_elem *l;
> > +	unsigned long flags;
> > +	void __percpu *pptr;
> > +	struct bucket *b;
> > +
> > +	key_size = map->key_size;
> > +	value_size = round_up(map->value_size, 8);
> > +
> > +	hash = htab_map_hash(key, key_size, htab->hashrnd);
> > +	b = __select_bucket(htab, hash);
> > +	head = &b->head;
> > +
> > +	ret = htab_lock_bucket(htab, b, hash, &flags);
> > +	if (ret)
> > +		return ret;
> > +
> > +	l = lookup_elem_raw(head, hash, key, key_size);
> > +	if (l) {
> > +		if (is_percpu) {
> > +			pptr = htab_elem_get_ptr(l, key_size);
> > +			for_each_possible_cpu(cpu) {
> > +				bpf_long_memcpy(value + off,
> > +						per_cpu_ptr(pptr, cpu),
> > +						value_size);
> > +				off += value_size;
> > +			}
> > +		} else {
> > +			copy_map_value(map, value, l->key + round_up(key_size, 8));
> 
> For hashtab lookup elem, BPF_F_LOCK flag may be set by user, I think 
> hashtab lookup_and_delete_elem should also support this flag, so user
> can ensure they always get a lock protected sane value.
> 
> We have the following libbpf APIs.
> 
> LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
> LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void 
> *value,
>                                           __u64 flags);
> LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
>                                                void *value);
> 
> Previously, bpf_map_lookup_and_delete_elem only supports queue/stack,
> which does not need flags as it does not support BPF_F_LOCK so we
> are fine.
> 
> Maybe similar to bpf_map_lookup_elem_flags() we add a
> bpf_map_lookup_and_delete_elem_flags()? Maybe libbpf v1.0
> can consolidate into a better uniform api.
> 

If I understood correctly, there shouldn't be much changes for this
addition:
- add LIBBPF_API prototype and function in bpf.[hc] - those are
  practically the same as bpf_map_lookup_elem_flags() but we call
  BPF_LOOKUP_AND_DELETE_ELEM syscall,
- add global declaration for bpf_map_lookup_elem_flags() in libbpf.map,
- make the necessary checks for flags and the lock in the functions,
- call copy_map_value_locked() if BPF_F_LOCK is set,
- mask lock with check_and_init_map_lock().

Is this right or is there anything else I've missed?

> > +		}
> > +
> > +		hlist_nulls_del_rcu(&l->hash_node);
> > +		if (!is_lru_map)
> > +			free_htab_elem(htab, l);
> > +	} else
> > +		ret = -ENOENT;
> > +
> > +	htab_unlock_bucket(htab, b, hash, flags);
> > +
> > +	if (is_lru_map && l)
> > +		bpf_lru_push_free(&htab->lru, &l->lru_node);
> > +
> > +	return ret;
> > +}
> > +
> > +static int htab_map_lookup_and_delete_elem(struct bpf_map *map,
> > +					   void *key, void *value)
> > +{
> > +	return __htab_map_lookup_and_delete_elem(map, key, value, false, false);
> > +}
> > +
> > +static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
> > +						  void *key, void *value)
> > +{
> > +	return __htab_map_lookup_and_delete_elem(map, key, value, false, true);
> > +}
> > +
> > +static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
> > +					       void *value)
> > +{
> > +	return __htab_map_lookup_and_delete_elem(map, key, value, true, false);
> > +}
> > +
> > +static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
> > +						      void *key, void *value)
> > +{
> > +	return __htab_map_lookup_and_delete_elem(map, key, value, true, true);
> > +}
> > +
> >   static int
> >   __htab_map_lookup_and_delete_batch(struct bpf_map *map,
> >   				   const union bpf_attr *attr,
> > @@ -1934,6 +2010,7 @@ const struct bpf_map_ops htab_map_ops = {
> >   	.map_free = htab_map_free,
> >   	.map_get_next_key = htab_map_get_next_key,
> >   	.map_lookup_elem = htab_map_lookup_elem,
> > +	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
> >   	.map_update_elem = htab_map_update_elem,
> >   	.map_delete_elem = htab_map_delete_elem,
> >   	.map_gen_lookup = htab_map_gen_lookup,
> > @@ -1954,6 +2031,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
> >   	.map_free = htab_map_free,
> >   	.map_get_next_key = htab_map_get_next_key,
> >   	.map_lookup_elem = htab_lru_map_lookup_elem,
> > +	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
> >   	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
> >   	.map_update_elem = htab_lru_map_update_elem,
> >   	.map_delete_elem = htab_lru_map_delete_elem,
> > @@ -2077,6 +2155,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
> >   	.map_free = htab_map_free,
> >   	.map_get_next_key = htab_map_get_next_key,
> >   	.map_lookup_elem = htab_percpu_map_lookup_elem,
> > +	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
> >   	.map_update_elem = htab_percpu_map_update_elem,
> >   	.map_delete_elem = htab_map_delete_elem,
> >   	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
> > @@ -2096,6 +2175,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
> >   	.map_free = htab_map_free,
> >   	.map_get_next_key = htab_map_get_next_key,
> >   	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
> > +	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
> >   	.map_update_elem = htab_lru_percpu_map_update_elem,
> >   	.map_delete_elem = htab_lru_map_delete_elem,
> >   	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index c859bc46d06c..2634aa4a2f37 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -1495,7 +1495,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> >   		goto err_put;
> >   	}
> >   
> > -	value_size = map->value_size;
> > +	value_size = bpf_map_value_size(map);
> >   
> >   	err = -ENOMEM;
> >   	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> > @@ -1505,6 +1505,18 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> >   	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> >   	    map->map_type == BPF_MAP_TYPE_STACK) {
> >   		err = map->ops->map_pop_elem(map, value);
> > +	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
> > +		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
> > +		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
> > +		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
> > +		if (!bpf_map_is_dev_bound(map)) {
> > +			bpf_disable_instrumentation();
> > +			rcu_read_lock();
> > +			err = map->ops->map_lookup_and_delete_elem(map, key, value);
> > +			rcu_read_unlock();
> > +			bpf_enable_instrumentation();
> > +			maybe_wait_bpf_programs(map);
> 
> maybe_wait_bpf_programs(map) is mostly for map-in-map.
> but I think it is okay to put it here in case in the future
> we will support map-in-map here. If maybe_wait_bpf_programs()
> get inlined which mostly likely is the case, the compiler
> should be able to optimize it away.
> 

I didn't realise at first it's only for map-in-map and forgot to remove
it later, so I can remove this if you think it's better?

> > +		}
> >   	} else {
> >   		err = -ENOTSUPP;
> >   	}
> > diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
> > new file mode 100644
> > index 000000000000..05123bbcdc1c
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
> > @@ -0,0 +1,283 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +
> > +#include <test_progs.h>
> > +#include "test_lookup_and_delete.skel.h"
> > +
> > +#define START_VALUE 1234
> > +#define NEW_VALUE 4321
> > +#define MAX_ENTRIES 2
> > +
> > +static int duration;
> > +static int nr_cpus;
> > +
> > +static int fill_values(int map_fd)
> > +{
> > +	__u64 key, value = START_VALUE;
> > +	int err;
> > +
> > +	for (key = 1; key < MAX_ENTRIES + 1; key++) {
> > +		err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
> > +		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
> 
> You can use
> 	if (!ASSERT_OK(err, "bpf_map_update_elem"))
> to save you from explicit "failed" string.
> The same for some later other CHECK usages.
> 

Ok.

> > +			return -1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int fill_values_percpu(int map_fd)
> > +{
> > +	BPF_DECLARE_PERCPU(__u64, value);
> > +	int i, err;
> > +	u64 key;
> > +
> > +	for (i = 0; i < nr_cpus; i++)
> > +		bpf_percpu(value, i) = START_VALUE;
> > +
> > +	for (key = 1; key < MAX_ENTRIES + 1; key++) {
> > +		err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
> > +		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
> > +			return -1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> [...]
> > diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> > new file mode 100644
> > index 000000000000..eb19de8bb415
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> > @@ -0,0 +1,26 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#include "vmlinux.h"
> > +#include <bpf/bpf_helpers.h>
> > +
> > +__u32 set_pid;
> > +__u64 set_key;
> > +__u64 set_value;
> 
> Please add "= 0" to the above declaration to make
> it llvm10 friendly.
> 

Ok, I'll add this. Sorry, checkpatch.pl gave me an error with it, that's
why I removed it.

> > +
> > +struct {
> > +	__uint(type, BPF_MAP_TYPE_HASH);
> > +	__uint(max_entries, 2);
> > +	__type(key, __u64);
> > +	__type(value, __u64);
> > +} hash_map SEC(".maps");
> > +
> > +SEC("tp/syscalls/sys_enter_getpgid")
> > +int bpf_lookup_and_delete_test(const void *ctx)
> > +{
> > +	if (set_pid == bpf_get_current_pid_tgid() >> 32)
> > +		bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
> > +
> > +	return 0;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";
> [...]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab
  2021-03-13  8:54   ` Denis Salopek
@ 2021-03-15  0:08     ` Yonghong Song
  0 siblings, 0 replies; 4+ messages in thread
From: Yonghong Song @ 2021-03-15  0:08 UTC (permalink / raw)
  To: Denis Salopek; +Cc: bpf, juraj.vijtiuk, luka.oreskovic, luka.perkov



On 3/13/21 12:54 AM, Denis Salopek wrote:
> Hello,
> 
> Thank you for your feedback and comments.
> 
> On Mon, Mar 01, 2021 at 06:02:37PM -0800, Yonghong Song wrote:
>>
>>
>> On 2/28/21 1:40 AM, Denis Salopek wrote:
>>> Extend the existing bpf_map_lookup_and_delete_elem() functionality to
>>> hashtab maps, in addition to stacks and queues.
>>> Create a new hashtab bpf_map_ops function that does lookup and deletion
>>> of the element under the same bucket lock and add the created map_ops to
>>> bpf.h.
>>> Add the appropriate test cases to 'maps' and 'lru_map' selftests
>>> accompanied with new test_progs.
>>>
>>> Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
>>> Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
>>> Cc: Luka Perkov <luka.perkov@sartura.hr>
>>> Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
>>> ---
>>> v2: Add functionality for LRU/per-CPU, add test_progs tests.
>>> ---
>>>    include/linux/bpf.h                           |   2 +
>>>    kernel/bpf/hashtab.c                          |  80 +++++
>>>    kernel/bpf/syscall.c                          |  14 +-
>>>    .../bpf/prog_tests/lookup_and_delete.c        | 283 ++++++++++++++++++
>>>    .../bpf/progs/test_lookup_and_delete.c        |  26 ++
>>>    tools/testing/selftests/bpf/test_lru_map.c    |   8 +
>>>    tools/testing/selftests/bpf/test_maps.c       |  19 +-
>>>    7 files changed, 430 insertions(+), 2 deletions(-)
>>>    create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
>>>    create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
>>>
>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>> index 4c730863fa77..0bcc4f89af40 100644
>>> --- a/include/linux/bpf.h
>>> +++ b/include/linux/bpf.h
>>> @@ -67,6 +67,8 @@ struct bpf_map_ops {
>>>    	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
>>>    	int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
>>>    				union bpf_attr __user *uattr);
>>> +	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
>>> +					  void *value);
>>>    	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
>>>    					   const union bpf_attr *attr,
>>>    					   union bpf_attr __user *uattr);
>>> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
>>> index 330d721dd2af..8c3334d1b6b3 100644
>>> --- a/kernel/bpf/hashtab.c
>>> +++ b/kernel/bpf/hashtab.c
>>> @@ -1401,6 +1401,82 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
>>>    	rcu_read_unlock();
>>>    }
>>>    
>>> +static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
>>> +					     void *value, bool is_lru_map,
>>> +					     bool is_percpu)
>>> +{
>>> +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
>>> +	u32 hash, key_size, value_size;
>>> +	struct hlist_nulls_head *head;
>>> +	int cpu, off = 0, ret;
>>> +	struct htab_elem *l;
>>> +	unsigned long flags;
>>> +	void __percpu *pptr;
>>> +	struct bucket *b;
>>> +
>>> +	key_size = map->key_size;
>>> +	value_size = round_up(map->value_size, 8);
>>> +
>>> +	hash = htab_map_hash(key, key_size, htab->hashrnd);
>>> +	b = __select_bucket(htab, hash);
>>> +	head = &b->head;
>>> +
>>> +	ret = htab_lock_bucket(htab, b, hash, &flags);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	l = lookup_elem_raw(head, hash, key, key_size);
>>> +	if (l) {
>>> +		if (is_percpu) {
>>> +			pptr = htab_elem_get_ptr(l, key_size);
>>> +			for_each_possible_cpu(cpu) {
>>> +				bpf_long_memcpy(value + off,
>>> +						per_cpu_ptr(pptr, cpu),
>>> +						value_size);
>>> +				off += value_size;
>>> +			}
>>> +		} else {
>>> +			copy_map_value(map, value, l->key + round_up(key_size, 8));
>>
>> For hashtab lookup elem, BPF_F_LOCK flag may be set by user, I think
>> hashtab lookup_and_delete_elem should also support this flag, so user
>> can ensure they always get a lock protected sane value.
>>
>> We have the following libbpf APIs.
>>
>> LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
>> LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void
>> *value,
>>                                            __u64 flags);
>> LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
>>                                                 void *value);
>>
>> Previously, bpf_map_lookup_and_delete_elem only supports queue/stack,
>> which does not need flags as it does not support BPF_F_LOCK so we
>> are fine.
>>
>> Maybe similar to bpf_map_lookup_elem_flags() we add a
>> bpf_map_lookup_and_delete_elem_flags()? Maybe libbpf v1.0
>> can consolidate into a better uniform api.
>>
> 
> If I understood correctly, there shouldn't be much changes for this
> addition:
> - add LIBBPF_API prototype and function in bpf.[hc] - those are
>    practically the same as bpf_map_lookup_elem_flags() but we call
>    BPF_LOOKUP_AND_DELETE_ELEM syscall,

yes.

> - add global declaration for bpf_map_lookup_elem_flags() in libbpf.map,

bpf_map_lookup_and_delete_elem_flags()

> - make the necessary checks for flags and the lock in the functions,

yes.

> - call copy_map_value_locked() if BPF_F_LOCK is set,

yes.

> - mask lock with check_and_init_map_lock().

not sure about, current implementation is supposed to already
take care of this, but please double check.

> 
> Is this right or is there anything else I've missed?

yes, almost right except some minor comments above.

> 
>>> +		}
>>> +
>>> +		hlist_nulls_del_rcu(&l->hash_node);
>>> +		if (!is_lru_map)
>>> +			free_htab_elem(htab, l);
>>> +	} else
>>> +		ret = -ENOENT;
>>> +
>>> +	htab_unlock_bucket(htab, b, hash, flags);
>>> +
>>> +	if (is_lru_map && l)
>>> +		bpf_lru_push_free(&htab->lru, &l->lru_node);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +static int htab_map_lookup_and_delete_elem(struct bpf_map *map,
>>> +					   void *key, void *value)
>>> +{
>>> +	return __htab_map_lookup_and_delete_elem(map, key, value, false, false);
>>> +}
>>> +
>>> +static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
>>> +						  void *key, void *value)
>>> +{
>>> +	return __htab_map_lookup_and_delete_elem(map, key, value, false, true);
>>> +}
>>> +
>>> +static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
>>> +					       void *value)
>>> +{
>>> +	return __htab_map_lookup_and_delete_elem(map, key, value, true, false);
>>> +}
>>> +
>>> +static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
>>> +						      void *key, void *value)
>>> +{
>>> +	return __htab_map_lookup_and_delete_elem(map, key, value, true, true);
>>> +}
>>> +
>>>    static int
>>>    __htab_map_lookup_and_delete_batch(struct bpf_map *map,
>>>    				   const union bpf_attr *attr,
>>> @@ -1934,6 +2010,7 @@ const struct bpf_map_ops htab_map_ops = {
>>>    	.map_free = htab_map_free,
>>>    	.map_get_next_key = htab_map_get_next_key,
>>>    	.map_lookup_elem = htab_map_lookup_elem,
>>> +	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
>>>    	.map_update_elem = htab_map_update_elem,
>>>    	.map_delete_elem = htab_map_delete_elem,
>>>    	.map_gen_lookup = htab_map_gen_lookup,
>>> @@ -1954,6 +2031,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
>>>    	.map_free = htab_map_free,
>>>    	.map_get_next_key = htab_map_get_next_key,
>>>    	.map_lookup_elem = htab_lru_map_lookup_elem,
>>> +	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
>>>    	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
>>>    	.map_update_elem = htab_lru_map_update_elem,
>>>    	.map_delete_elem = htab_lru_map_delete_elem,
>>> @@ -2077,6 +2155,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
>>>    	.map_free = htab_map_free,
>>>    	.map_get_next_key = htab_map_get_next_key,
>>>    	.map_lookup_elem = htab_percpu_map_lookup_elem,
>>> +	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
>>>    	.map_update_elem = htab_percpu_map_update_elem,
>>>    	.map_delete_elem = htab_map_delete_elem,
>>>    	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
>>> @@ -2096,6 +2175,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
>>>    	.map_free = htab_map_free,
>>>    	.map_get_next_key = htab_map_get_next_key,
>>>    	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
>>> +	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
>>>    	.map_update_elem = htab_lru_percpu_map_update_elem,
>>>    	.map_delete_elem = htab_lru_map_delete_elem,
>>>    	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>> index c859bc46d06c..2634aa4a2f37 100644
>>> --- a/kernel/bpf/syscall.c
>>> +++ b/kernel/bpf/syscall.c
>>> @@ -1495,7 +1495,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>>>    		goto err_put;
>>>    	}
>>>    
>>> -	value_size = map->value_size;
>>> +	value_size = bpf_map_value_size(map);
>>>    
>>>    	err = -ENOMEM;
>>>    	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
>>> @@ -1505,6 +1505,18 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>>>    	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
>>>    	    map->map_type == BPF_MAP_TYPE_STACK) {
>>>    		err = map->ops->map_pop_elem(map, value);
>>> +	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
>>> +		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
>>> +		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
>>> +		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
>>> +		if (!bpf_map_is_dev_bound(map)) {
>>> +			bpf_disable_instrumentation();
>>> +			rcu_read_lock();
>>> +			err = map->ops->map_lookup_and_delete_elem(map, key, value);
>>> +			rcu_read_unlock();
>>> +			bpf_enable_instrumentation();
>>> +			maybe_wait_bpf_programs(map);
>>
>> maybe_wait_bpf_programs(map) is mostly for map-in-map.
>> but I think it is okay to put it here in case in the future
>> we will support map-in-map here. If maybe_wait_bpf_programs()
>> get inlined which mostly likely is the case, the compiler
>> should be able to optimize it away.
>>
> 
> I didn't realise at first it's only for map-in-map and forgot to remove
> it later, so I can remove this if you think it's better?

Originally I thought to keep it as the compiler should
be able to optimize it away. But since there is no immediate
use case yet for lookup-and-delete for hash of map-in-maps,
so let us remove maybe_wait_bpf_programs() to avoid confusion.
We can add it later if BPF_MAP_TYPE_HASH_OF_MAPS is added.

> 
>>> +		}
>>>    	} else {
>>>    		err = -ENOTSUPP;
>>>    	}
>>> diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
>>> new file mode 100644
>>> index 000000000000..05123bbcdc1c
>>> --- /dev/null
>>> +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
>>> @@ -0,0 +1,283 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +
>>> +#include <test_progs.h>
>>> +#include "test_lookup_and_delete.skel.h"
>>> +
>>> +#define START_VALUE 1234
>>> +#define NEW_VALUE 4321
>>> +#define MAX_ENTRIES 2
>>> +
>>> +static int duration;
>>> +static int nr_cpus;
>>> +
>>> +static int fill_values(int map_fd)
>>> +{
>>> +	__u64 key, value = START_VALUE;
>>> +	int err;
>>> +
>>> +	for (key = 1; key < MAX_ENTRIES + 1; key++) {
>>> +		err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
>>> +		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
>>
>> You can use
>> 	if (!ASSERT_OK(err, "bpf_map_update_elem"))
>> to save you from explicit "failed" string.
>> The same for some later other CHECK usages.
>>
> 
> Ok.
> 
>>> +			return -1;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int fill_values_percpu(int map_fd)
>>> +{
>>> +	BPF_DECLARE_PERCPU(__u64, value);
>>> +	int i, err;
>>> +	u64 key;
>>> +
>>> +	for (i = 0; i < nr_cpus; i++)
>>> +		bpf_percpu(value, i) = START_VALUE;
>>> +
>>> +	for (key = 1; key < MAX_ENTRIES + 1; key++) {
>>> +		err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
>>> +		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
>>> +			return -1;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>> [...]
>>> diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
>>> new file mode 100644
>>> index 000000000000..eb19de8bb415
>>> --- /dev/null
>>> +++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
>>> @@ -0,0 +1,26 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +
>>> +#include "vmlinux.h"
>>> +#include <bpf/bpf_helpers.h>
>>> +
>>> +__u32 set_pid;
>>> +__u64 set_key;
>>> +__u64 set_value;
>>
>> Please add "= 0" to the above declaration to make
>> it llvm10 friendly.
>>
> 
> Ok, I'll add this. Sorry, checkpatch.pl gave me an error with it, that's
> why I removed it.

Song Liu recently added a patch to suppress the warning:
   commit 5b8f82e1a17695c9e5fec5842b234967782d7e5b
   Author: Song Liu <songliubraving@fb.com>
   Date:   Thu Feb 25 17:22:08 2021 -0800

     checkpatch: do not apply "initialise globals to 0" check to BPF progs

You should be good now.

> 
>>> +
>>> +struct {
>>> +	__uint(type, BPF_MAP_TYPE_HASH);
>>> +	__uint(max_entries, 2);
>>> +	__type(key, __u64);
>>> +	__type(value, __u64);
>>> +} hash_map SEC(".maps");
>>> +
>>> +SEC("tp/syscalls/sys_enter_getpgid")
>>> +int bpf_lookup_and_delete_test(const void *ctx)
>>> +{
>>> +	if (set_pid == bpf_get_current_pid_tgid() >> 32)
>>> +		bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +char _license[] SEC("license") = "GPL";
>> [...]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-03-15  0:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-28  9:40 [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab Denis Salopek
2021-03-02  2:02 ` Yonghong Song
2021-03-13  8:54   ` Denis Salopek
2021-03-15  0:08     ` Yonghong Song

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).