* [RFC PATCH bpf-next v4 1/7] bpf: rename stack trace map operations
2018-10-04 17:12 [RFC PATCH bpf-next v4 0/7] Implement bpf queue/stack maps Mauricio Vasquez B
@ 2018-10-04 17:12 ` Mauricio Vasquez B
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 2/7] bpf/syscall: allow key to be null in map functions Mauricio Vasquez B
` (5 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Mauricio Vasquez B @ 2018-10-04 17:12 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev
In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented. In order to avoid confusion and
a possible name clash rename stack_map_ops to stack_trace_map_ops
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
include/linux/bpf_types.h | 2 +-
kernel/bpf/stackmap.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..658509daacd4 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
#ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 8061a439ef18..bb41e293418d 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
put_callchain_buffers();
}
-const struct bpf_map_ops stack_map_ops = {
+const struct bpf_map_ops stack_trace_map_ops = {
.map_alloc = stack_map_alloc,
.map_free = stack_map_free,
.map_get_next_key = stack_map_get_next_key,
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [RFC PATCH bpf-next v4 2/7] bpf/syscall: allow key to be null in map functions
2018-10-04 17:12 [RFC PATCH bpf-next v4 0/7] Implement bpf queue/stack maps Mauricio Vasquez B
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 1/7] bpf: rename stack trace map operations Mauricio Vasquez B
@ 2018-10-04 17:12 ` Mauricio Vasquez B
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall Mauricio Vasquez B
` (4 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Mauricio Vasquez B @ 2018-10-04 17:12 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev
This commit adds the required logic to allow key being NULL
in case the key_size of the map is 0.
A new __bpf_copy_key function helper only copies the key from
userpsace when key_size != 0, otherwise it enforces that key must be
null.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
kernel/bpf/syscall.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5742df21598c..eb75e8af73ff 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
return -ENOTSUPP;
}
+static void *__bpf_copy_key(void __user *ukey, u64 key_size)
+{
+ if (key_size)
+ return memdup_user(ukey, key_size);
+
+ if (ukey)
+ return ERR_PTR(-EINVAL);
+
+ return NULL;
+}
+
/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
@@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -769,7 +780,7 @@ static int map_update_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -871,7 +882,7 @@ static int map_delete_elem(union bpf_attr *attr)
goto err_put;
}
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
@@ -923,7 +934,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
if (ukey) {
- key = memdup_user(ukey, map->key_size);
+ key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
goto err_put;
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [RFC PATCH bpf-next v4 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall
2018-10-04 17:12 [RFC PATCH bpf-next v4 0/7] Implement bpf queue/stack maps Mauricio Vasquez B
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 1/7] bpf: rename stack trace map operations Mauricio Vasquez B
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 2/7] bpf/syscall: allow key to be null in map functions Mauricio Vasquez B
@ 2018-10-04 17:12 ` Mauricio Vasquez B
2018-10-04 23:37 ` Alexei Starovoitov
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps Mauricio Vasquez B
` (3 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Mauricio Vasquez B @ 2018-10-04 17:12 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev
The following patch implements a bpf queue/stack maps that
provides the peek/pop/push functions. There is not a direct
relationship between those functions and the current maps
syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
this is mapped to the pop operation in the queue/stack maps
and it is still to implement in other kind of maps.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 1 +
kernel/bpf/syscall.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 84 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 027697b6a22f..98c7eeb6d138 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,7 @@ struct bpf_map_ops {
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
+ void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..3bb94aa2d408 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
};
enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index eb75e8af73ff..50957e243bfb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -975,6 +975,85 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
}
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+
+static int map_lookup_and_delete_elem(union bpf_attr *attr)
+{
+ void __user *ukey = u64_to_user_ptr(attr->key);
+ void __user *uvalue = u64_to_user_ptr(attr->value);
+ int ufd = attr->map_fd;
+ struct bpf_map *map;
+ void *key, *value, *ptr;
+ u32 value_size;
+ struct fd f;
+ int err;
+
+ if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
+ if (!map->ops->map_lookup_and_delete_elem) {
+ err = -ENOTSUPP;
+ goto err_put;
+ }
+
+ key = __bpf_copy_key(ukey, map->key_size);
+ if (IS_ERR(key)) {
+ err = PTR_ERR(key);
+ goto err_put;
+ }
+
+ value_size = map->value_size;
+
+ err = -ENOMEM;
+ value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ if (!value)
+ goto free_key;
+
+ err = -EFAULT;
+ if (copy_from_user(value, uvalue, value_size) != 0)
+ goto free_value;
+
+ /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+ * inside bpf map update or delete otherwise deadlocks are possible
+ */
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
+ rcu_read_lock();
+ ptr = map->ops->map_lookup_and_delete_elem(map, key);
+ if (ptr)
+ memcpy(value, ptr, value_size);
+ rcu_read_unlock();
+ err = ptr ? 0 : -ENOENT;
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+
+ if (err)
+ goto free_value;
+
+ if (copy_to_user(uvalue, value, value_size) != 0)
+ goto free_value;
+
+ err = 0;
+
+free_value:
+ kfree(value);
+free_key:
+ kfree(key);
+err_put:
+ fdput(f);
+ return err;
+}
+
static const struct bpf_prog_ops * const bpf_prog_types[] = {
#define BPF_PROG_TYPE(_id, _name) \
[_id] = & _name ## _prog_ops,
@@ -2448,6 +2527,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(&attr, uattr);
break;
+ case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
+ err = map_lookup_and_delete_elem(&attr);
+ break;
default:
err = -EINVAL;
break;
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [RFC PATCH bpf-next v4 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall Mauricio Vasquez B
@ 2018-10-04 23:37 ` Alexei Starovoitov
0 siblings, 0 replies; 14+ messages in thread
From: Alexei Starovoitov @ 2018-10-04 23:37 UTC (permalink / raw)
To: Mauricio Vasquez B; +Cc: Alexei Starovoitov, Daniel Borkmann, netdev
On Thu, Oct 04, 2018 at 07:12:39PM +0200, Mauricio Vasquez B wrote:
> The following patch implements a bpf queue/stack maps that
> provides the peek/pop/push functions. There is not a direct
> relationship between those functions and the current maps
> syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
> this is mapped to the pop operation in the queue/stack maps
> and it is still to implement in other kind of maps.
>
> Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
> ---
> include/linux/bpf.h | 1 +
> include/uapi/linux/bpf.h | 1 +
> kernel/bpf/syscall.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 84 insertions(+)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 027697b6a22f..98c7eeb6d138 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -39,6 +39,7 @@ struct bpf_map_ops {
> void *(*map_lookup_elem)(struct bpf_map *map, void *key);
> int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
> int (*map_delete_elem)(struct bpf_map *map, void *key);
> + void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
>
> /* funcs called by prog_array and perf_event_array map */
> void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index f9187b41dff6..3bb94aa2d408 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -103,6 +103,7 @@ enum bpf_cmd {
> BPF_BTF_LOAD,
> BPF_BTF_GET_FD_BY_ID,
> BPF_TASK_FD_QUERY,
> + BPF_MAP_LOOKUP_AND_DELETE_ELEM,
> };
>
> enum bpf_map_type {
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index eb75e8af73ff..50957e243bfb 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -975,6 +975,85 @@ static int map_get_next_key(union bpf_attr *attr)
> return err;
> }
>
> +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
> +
> +static int map_lookup_and_delete_elem(union bpf_attr *attr)
> +{
> + void __user *ukey = u64_to_user_ptr(attr->key);
> + void __user *uvalue = u64_to_user_ptr(attr->value);
> + int ufd = attr->map_fd;
> + struct bpf_map *map;
> + void *key, *value, *ptr;
> + u32 value_size;
> + struct fd f;
> + int err;
> +
> + if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
copy-paste error
> + return -EINVAL;
> +
> + f = fdget(ufd);
> + map = __bpf_map_get(f);
> + if (IS_ERR(map))
> + return PTR_ERR(map);
> +
> + if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
> + err = -EPERM;
> + goto err_put;
> + }
> +
> + if (!map->ops->map_lookup_and_delete_elem) {
> + err = -ENOTSUPP;
> + goto err_put;
> + }
> +
> + key = __bpf_copy_key(ukey, map->key_size);
> + if (IS_ERR(key)) {
> + err = PTR_ERR(key);
> + goto err_put;
> + }
> +
> + value_size = map->value_size;
> +
> + err = -ENOMEM;
> + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> + if (!value)
> + goto free_key;
> +
> + err = -EFAULT;
> + if (copy_from_user(value, uvalue, value_size) != 0)
> + goto free_value;
> +
> + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
> + * inside bpf map update or delete otherwise deadlocks are possible
> + */
> + preempt_disable();
> + __this_cpu_inc(bpf_prog_active);
> + rcu_read_lock();
> + ptr = map->ops->map_lookup_and_delete_elem(map, key);
> + if (ptr)
> + memcpy(value, ptr, value_size);
> + rcu_read_unlock();
> + err = ptr ? 0 : -ENOENT;
> + __this_cpu_dec(bpf_prog_active);
> + preempt_enable();
messed up formatting
> +
> + if (err)
> + goto free_value;
> +
> + if (copy_to_user(uvalue, value, value_size) != 0)
> + goto free_value;
> +
> + err = 0;
> +
> +free_value:
> + kfree(value);
> +free_key:
> + kfree(key);
> +err_put:
> + fdput(f);
> + return err;
> +}
> +
> static const struct bpf_prog_ops * const bpf_prog_types[] = {
> #define BPF_PROG_TYPE(_id, _name) \
> [_id] = & _name ## _prog_ops,
> @@ -2448,6 +2527,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
> case BPF_TASK_FD_QUERY:
> err = bpf_task_fd_query(&attr, uattr);
> break;
> + case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
> + err = map_lookup_and_delete_elem(&attr);
> + break;
> default:
> err = -EINVAL;
> break;
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps
2018-10-04 17:12 [RFC PATCH bpf-next v4 0/7] Implement bpf queue/stack maps Mauricio Vasquez B
` (2 preceding siblings ...)
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall Mauricio Vasquez B
@ 2018-10-04 17:12 ` Mauricio Vasquez B
2018-10-04 23:57 ` Alexei Starovoitov
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 5/7] bpf: restrict use of peek/push/pop Mauricio Vasquez B
` (2 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Mauricio Vasquez B @ 2018-10-04 17:12 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev
Implement two new kind of maps that support the peek, push and pop
operations.
A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
include/linux/bpf.h | 7 +
include/linux/bpf_types.h | 2
include/uapi/linux/bpf.h | 35 ++++-
kernel/bpf/Makefile | 2
kernel/bpf/core.c | 3
kernel/bpf/helpers.c | 43 ++++++
kernel/bpf/queue_stack_maps.c | 300 +++++++++++++++++++++++++++++++++++++++++
kernel/bpf/syscall.c | 31 +++-
kernel/bpf/verifier.c | 14 +-
net/core/filter.c | 6 +
10 files changed, 424 insertions(+), 19 deletions(-)
create mode 100644 kernel/bpf/queue_stack_maps.c
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 98c7eeb6d138..cad3bc5cffd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,9 @@ struct bpf_map_ops {
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+ int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+ int (*map_pop_elem)(struct bpf_map *map, void *value);
+ int (*map_peek_elem)(struct bpf_map *map, void *value);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -139,6 +142,7 @@ enum bpf_arg_type {
ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
+ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
/* the following constraints used to prototype bpf_memcmp() and other
* functions that access data on eBPF program stack
@@ -825,6 +829,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 658509daacd4..a2ec73aa1ec7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
#endif
#endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3bb94aa2d408..bfa042273fad 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -129,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
};
enum bpf_prog_type {
@@ -463,6 +465,28 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
@@ -790,14 +814,14 @@ union bpf_attr {
*
* int ret;
* struct bpf_tunnel_key key = {};
- *
+ *
* ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
* if (ret < 0)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* if (key.remote_ipv4 != 0x0a000001)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* return TC_ACT_OK; // accept packet
*
* This interface can also be used with all encapsulation devices
@@ -2304,7 +2328,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
- FN(sk_release),
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0488b8258321..17afae9e65f3 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,7 +3,7 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
-obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3f5bf1af0826..8d2db076d123 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32)
const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
const struct bpf_func_proto bpf_map_update_elem_proto __weak;
const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
+const struct bpf_func_proto bpf_map_push_elem_proto __weak;
+const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
+const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6502115e8f55..ab0d5e3f9892 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -76,6 +76,49 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
.arg2_type = ARG_PTR_TO_MAP_KEY,
};
+BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
+{
+ return map->ops->map_push_elem(map, value, flags);
+}
+
+const struct bpf_func_proto bpf_map_push_elem_proto = {
+ .func = bpf_map_push_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_MAP_VALUE,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
+{
+ return map->ops->map_pop_elem(map, value);
+}
+
+const struct bpf_func_proto bpf_map_pop_elem_proto = {
+ .func = bpf_map_pop_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
+};
+
+BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
+{
+ return map->ops->map_peek_elem(map, value);
+}
+
+const struct bpf_func_proto bpf_map_peek_elem_proto = {
+ .func = bpf_map_pop_elem,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
+};
+
const struct bpf_func_proto bpf_get_prandom_u32_proto = {
.func = bpf_user_rnd_u32,
.gpl_only = false,
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
new file mode 100644
index 000000000000..a597c5ba68f6
--- /dev/null
+++ b/kernel/bpf/queue_stack_maps.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * queue_stack_maps.c: BPF queue and stack maps
+ *
+ * Copyright (c) 2018 Politecnico di Torino
+ */
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "percpu_freelist.h"
+
+#define QUEUE_STACK_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+
+struct bpf_queue_stack {
+ struct bpf_map map;
+ raw_spinlock_t lock;
+ u32 head, tail;
+ u32 index_mask;
+ u32 count;
+
+ char elements[0] __aligned(8);
+};
+
+static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
+{
+ return container_of(map, struct bpf_queue_stack, map);
+}
+
+static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
+{
+ return qs->count == 0;
+}
+
+static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
+{
+ return qs->count == qs->map.max_entries;
+}
+
+/* Called from syscall */
+static int queue_stack_map_alloc_check(union bpf_attr *attr)
+{
+ /* check sanity of attributes */
+ if (attr->max_entries == 0 || attr->key_size != 0 ||
+ attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
+ return -EINVAL;
+
+ if (attr->value_size > KMALLOC_MAX_SIZE)
+ /* if value_size is bigger, the user space won't be able to
+ * access the elements.
+ */
+ return -E2BIG;
+
+ return 0;
+}
+
+static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
+{
+ int ret, numa_node = bpf_map_attr_numa_node(attr);
+ u32 max_entries, value_size, index_mask;
+ u64 queue_size, cost, mask64;
+ struct bpf_queue_stack *qs;
+
+ max_entries = attr->max_entries;
+ value_size = attr->value_size;
+
+ /* From arraymap.c:
+ * On 32 bit archs roundup_pow_of_two() with max_entries that has
+ * upper most bit set in u32 space is undefined behavior due to
+ * resulting 1U << 32, so do it manually here in u64 space.
+ */
+ mask64 = fls_long(max_entries - 1);
+ mask64 = 1ULL << mask64;
+ mask64 -= 1;
+
+ index_mask = mask64;
+
+ /* Round up queue size to nearest power of 2 */
+ max_entries = index_mask + 1;
+ /* Check for overflows. */
+ if (max_entries < attr->max_entries)
+ return ERR_PTR(-E2BIG);
+
+ queue_size = sizeof(*qs) + (u64) value_size * max_entries;
+
+ cost = queue_size;
+ if (cost >= U32_MAX - PAGE_SIZE)
+ return ERR_PTR(-E2BIG);
+
+ cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+ ret = bpf_map_precharge_memlock(cost);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ qs = bpf_map_area_alloc(queue_size, numa_node);
+ if (!qs)
+ return ERR_PTR(-ENOMEM);
+
+ memset(qs, 0, sizeof(*qs));
+
+ bpf_map_init_from_attr(&qs->map, attr);
+
+ qs->map.pages = cost;
+ qs->index_mask = index_mask;
+
+ raw_spin_lock_init(&qs->lock);
+
+ return &qs->map;
+}
+
+/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+static void queue_stack_map_free(struct bpf_map *map)
+{
+ struct bpf_queue_stack *qs = bpf_queue_stack(map);
+
+ /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+ * so the programs (can be more than one that used this map) were
+ * disconnected from events. Wait for outstanding critical sections in
+ * these programs to complete
+ */
+ synchronize_rcu();
+
+ bpf_map_area_free(qs);
+}
+
+static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
+{
+ struct bpf_queue_stack *qs = bpf_queue_stack(map);
+ unsigned long flags;
+ int err = 0;
+ void *ptr;
+
+ raw_spin_lock_irqsave(&qs->lock, flags);
+
+ if (queue_stack_map_is_empty(qs)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ ptr = &qs->elements[qs->tail * qs->map.value_size];
+ memcpy(value, ptr, qs->map.value_size);
+
+ if (delete) {
+ qs->tail = (qs->tail + 1) & qs->index_mask;
+ qs->count--;
+ }
+
+out:
+ raw_spin_unlock_irqrestore(&qs->lock, flags);
+ return err;
+}
+
+
+static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
+{
+ struct bpf_queue_stack *qs = bpf_queue_stack(map);
+ unsigned long flags;
+ int err = 0;
+ void *ptr;
+ u32 index;
+
+ raw_spin_lock_irqsave(&qs->lock, flags);
+
+ if (queue_stack_map_is_empty(qs)) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ index = (qs->head - 1) & qs->index_mask;
+ ptr = &qs->elements[index * qs->map.value_size];
+ memcpy(value, ptr, qs->map.value_size);
+
+ if (delete) {
+ qs->head = (qs->head - 1) & qs->index_mask;
+ qs->count--;
+ }
+
+out:
+ raw_spin_unlock_irqrestore(&qs->lock, flags);
+ return err;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_peek_elem(struct bpf_map *map, void *value)
+{
+ return __queue_map_get(map, value, false);
+}
+
+/* Called from syscall or from eBPF program */
+static int stack_map_peek_elem(struct bpf_map *map, void *value)
+{
+ return __stack_map_get(map, value, false);
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_pop_elem(struct bpf_map *map, void *value)
+{
+ return __queue_map_get(map, value, true);
+}
+
+/* Called from syscall or from eBPF program */
+static int stack_map_pop_elem(struct bpf_map *map, void *value)
+{
+ return __stack_map_get(map, value, true);
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
+ u64 flags)
+{
+ struct bpf_queue_stack *qs = bpf_queue_stack(map);
+ unsigned long irq_flags;
+ int err = 0;
+ void *dst;
+
+ /* BPF_EXIST is used to force making room for a new element in case the
+ * map is full
+ */
+ bool replace = (flags & BPF_EXIST);
+
+ /* Check supported flags for queue and stack maps */
+ if (flags & BPF_NOEXIST || flags > BPF_EXIST)
+ return -EINVAL;
+
+ raw_spin_lock_irqsave(&qs->lock, irq_flags);
+
+ if (queue_stack_map_is_full(qs)) {
+ if (!replace) {
+ err = -E2BIG;
+ goto out;
+ }
+ /* advance tail pointer to overwrite oldest element */
+ qs->tail = (qs->tail + 1) & qs->index_mask;
+ qs->count--;
+ }
+
+ dst = &qs->elements[qs->head * qs->map.value_size];
+ memcpy(dst, value, qs->map.value_size);
+
+ qs->head = (qs->head + 1) & qs->index_mask;
+ qs->count++;
+
+out:
+ raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
+ return err;
+}
+
+/* Called from syscall or from eBPF program */
+static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ return NULL;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
+{
+ return -EINVAL;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
+{
+ return -EINVAL;
+}
+
+/* Called from syscall */
+static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
+ void *next_key)
+{
+ return -EINVAL;
+}
+
+const struct bpf_map_ops queue_map_ops = {
+ .map_alloc_check = queue_stack_map_alloc_check,
+ .map_alloc = queue_stack_map_alloc,
+ .map_free = queue_stack_map_free,
+ .map_lookup_elem = queue_stack_map_lookup_elem,
+ .map_update_elem = queue_stack_map_update_elem,
+ .map_delete_elem = queue_stack_map_delete_elem,
+ .map_push_elem = queue_stack_map_push_elem,
+ .map_pop_elem = queue_map_pop_elem,
+ .map_peek_elem = queue_map_peek_elem,
+ .map_get_next_key = queue_stack_map_get_next_key,
+};
+
+const struct bpf_map_ops stack_map_ops = {
+ .map_alloc_check = queue_stack_map_alloc_check,
+ .map_alloc = queue_stack_map_alloc,
+ .map_free = queue_stack_map_free,
+ .map_lookup_elem = queue_stack_map_lookup_elem,
+ .map_update_elem = queue_stack_map_update_elem,
+ .map_delete_elem = queue_stack_map_delete_elem,
+ .map_push_elem = queue_stack_map_push_elem,
+ .map_pop_elem = stack_map_pop_elem,
+ .map_peek_elem = stack_map_peek_elem,
+ .map_get_next_key = queue_stack_map_get_next_key,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 50957e243bfb..c46bf2d38be3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -727,6 +727,9 @@ static int map_lookup_elem(union bpf_attr *attr)
err = bpf_fd_htab_map_lookup_elem(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = map->ops->map_peek_elem(map, value);
} else {
rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key);
@@ -841,6 +844,9 @@ static int map_update_elem(union bpf_attr *attr)
/* rcu_read_lock() is not needed */
err = bpf_fd_reuseport_array_update_elem(map, key, value,
attr->flags);
+ } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = map->ops->map_push_elem(map, value, attr->flags);
} else {
rcu_read_lock();
err = map->ops->map_update_elem(map, key, value, attr->flags);
@@ -1001,11 +1007,6 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
goto err_put;
}
- if (!map->ops->map_lookup_and_delete_elem) {
- err = -ENOTSUPP;
- goto err_put;
- }
-
key = __bpf_copy_key(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
@@ -1028,12 +1029,22 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
*/
preempt_disable();
__this_cpu_inc(bpf_prog_active);
- rcu_read_lock();
- ptr = map->ops->map_lookup_and_delete_elem(map, key);
- if (ptr)
- memcpy(value, ptr, value_size);
- rcu_read_unlock();
+ if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
+ err = map->ops->map_pop_elem(map, value);
+ } else {
+ if (!map->ops->map_lookup_and_delete_elem) {
+ err = -ENOTSUPP;
+ goto free_value;
+ }
+ rcu_read_lock();
+ ptr = map->ops->map_lookup_and_delete_elem(map, key);
+ if (ptr)
+ memcpy(value, ptr, value_size);
+ rcu_read_unlock();
err = ptr ? 0 : -ENOENT;
+ }
+
__this_cpu_dec(bpf_prog_active);
preempt_enable();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 73c81bef6ae8..489667f93061 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2121,7 +2121,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
}
if (arg_type == ARG_PTR_TO_MAP_KEY ||
- arg_type == ARG_PTR_TO_MAP_VALUE) {
+ arg_type == ARG_PTR_TO_MAP_VALUE ||
+ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
expected_type = PTR_TO_STACK;
if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
type != expected_type)
@@ -2191,7 +2192,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
err = check_helper_mem_access(env, regno,
meta->map_ptr->key_size, false,
NULL);
- } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
+ } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+ arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
*/
@@ -2200,9 +2202,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
verbose(env, "invalid map_ptr to access map->value\n");
return -EACCES;
}
+ meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
- NULL);
+ meta);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
@@ -2676,7 +2679,10 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
if (func_id != BPF_FUNC_tail_call &&
func_id != BPF_FUNC_map_lookup_elem &&
func_id != BPF_FUNC_map_update_elem &&
- func_id != BPF_FUNC_map_delete_elem)
+ func_id != BPF_FUNC_map_delete_elem &&
+ func_id != BPF_FUNC_map_push_elem &&
+ func_id != BPF_FUNC_map_pop_elem &&
+ func_id != BPF_FUNC_map_peek_elem)
return 0;
if (meta->map_ptr == NULL) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 591c698bc517..40736e0d9cff 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4993,6 +4993,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_map_push_elem:
+ return &bpf_map_push_elem_proto;
+ case BPF_FUNC_map_pop_elem:
+ return &bpf_map_pop_elem_proto;
+ case BPF_FUNC_map_peek_elem:
+ return &bpf_map_peek_elem_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps Mauricio Vasquez B
@ 2018-10-04 23:57 ` Alexei Starovoitov
[not found] ` <86a2498d-4e2e-7fb9-5139-9738e3796b01@polito.it>
0 siblings, 1 reply; 14+ messages in thread
From: Alexei Starovoitov @ 2018-10-04 23:57 UTC (permalink / raw)
To: Mauricio Vasquez B; +Cc: Alexei Starovoitov, Daniel Borkmann, netdev
On Thu, Oct 04, 2018 at 07:12:44PM +0200, Mauricio Vasquez B wrote:
> Implement two new kind of maps that support the peek, push and pop
> operations.
>
> A use case for this is to keep track of a pool of elements, like
> network ports in a SNAT.
>
> Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
> ---
> include/linux/bpf.h | 7 +
> include/linux/bpf_types.h | 2
> include/uapi/linux/bpf.h | 35 ++++-
> kernel/bpf/Makefile | 2
> kernel/bpf/core.c | 3
> kernel/bpf/helpers.c | 43 ++++++
> kernel/bpf/queue_stack_maps.c | 300 +++++++++++++++++++++++++++++++++++++++++
> kernel/bpf/syscall.c | 31 +++-
> kernel/bpf/verifier.c | 14 +-
> net/core/filter.c | 6 +
> 10 files changed, 424 insertions(+), 19 deletions(-)
> create mode 100644 kernel/bpf/queue_stack_maps.c
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 98c7eeb6d138..cad3bc5cffd1 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -40,6 +40,9 @@ struct bpf_map_ops {
> int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
> int (*map_delete_elem)(struct bpf_map *map, void *key);
> void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
> + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
> + int (*map_pop_elem)(struct bpf_map *map, void *value);
> + int (*map_peek_elem)(struct bpf_map *map, void *value);
>
> /* funcs called by prog_array and perf_event_array map */
> void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
> @@ -139,6 +142,7 @@ enum bpf_arg_type {
> ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */
> ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
> ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */
> + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */
>
> /* the following constraints used to prototype bpf_memcmp() and other
> * functions that access data on eBPF program stack
> @@ -825,6 +829,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
> extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
> extern const struct bpf_func_proto bpf_map_update_elem_proto;
> extern const struct bpf_func_proto bpf_map_delete_elem_proto;
> +extern const struct bpf_func_proto bpf_map_push_elem_proto;
> +extern const struct bpf_func_proto bpf_map_pop_elem_proto;
> +extern const struct bpf_func_proto bpf_map_peek_elem_proto;
>
> extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
> extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index 658509daacd4..a2ec73aa1ec7 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
> BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
> #endif
> #endif
> +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
> +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3bb94aa2d408..bfa042273fad 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -129,6 +129,8 @@ enum bpf_map_type {
> BPF_MAP_TYPE_CGROUP_STORAGE,
> BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
> BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
> + BPF_MAP_TYPE_QUEUE,
> + BPF_MAP_TYPE_STACK,
> };
>
> enum bpf_prog_type {
> @@ -463,6 +465,28 @@ union bpf_attr {
> * Return
> * 0 on success, or a negative error in case of failure.
> *
> + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
> + * Description
> + * Push an element *value* in *map*. *flags* is one of:
> + *
> + * **BPF_EXIST**
> + * If the queue/stack is full, the oldest element is removed to
> + * make room for this.
> + * Return
> + * 0 on success, or a negative error in case of failure.
> + *
> + * int bpf_map_pop_elem(struct bpf_map *map, void *value)
> + * Description
> + * Pop an element from *map*.
> + * Return
> + * 0 on success, or a negative error in case of failure.
> + *
> + * int bpf_map_peek_elem(struct bpf_map *map, void *value)
> + * Description
> + * Get an element from *map* without removing it.
> + * Return
> + * 0 on success, or a negative error in case of failure.
> + *
> * int bpf_probe_read(void *dst, u32 size, const void *src)
> * Description
> * For tracing programs, safely attempt to read *size* bytes from
> @@ -790,14 +814,14 @@ union bpf_attr {
> *
> * int ret;
> * struct bpf_tunnel_key key = {};
> - *
> + *
> * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
> * if (ret < 0)
> * return TC_ACT_SHOT; // drop packet
> - *
> + *
> * if (key.remote_ipv4 != 0x0a000001)
> * return TC_ACT_SHOT; // drop packet
> - *
> + *
> * return TC_ACT_OK; // accept packet
> *
> * This interface can also be used with all encapsulation devices
> @@ -2304,7 +2328,10 @@ union bpf_attr {
> FN(skb_ancestor_cgroup_id), \
> FN(sk_lookup_tcp), \
> FN(sk_lookup_udp), \
> - FN(sk_release),
> + FN(sk_release), \
> + FN(map_push_elem), \
> + FN(map_pop_elem), \
> + FN(map_peek_elem),
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> * function eBPF program intends to call
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 0488b8258321..17afae9e65f3 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -3,7 +3,7 @@ obj-y := core.o
>
> obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
> obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
> -obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
> +obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
> obj-$(CONFIG_BPF_SYSCALL) += disasm.o
> obj-$(CONFIG_BPF_SYSCALL) += btf.o
> ifeq ($(CONFIG_NET),y)
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 3f5bf1af0826..8d2db076d123 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32)
> const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
> const struct bpf_func_proto bpf_map_update_elem_proto __weak;
> const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
> +const struct bpf_func_proto bpf_map_push_elem_proto __weak;
> +const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
> +const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
>
> const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
> const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 6502115e8f55..ab0d5e3f9892 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -76,6 +76,49 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
> .arg2_type = ARG_PTR_TO_MAP_KEY,
> };
>
> +BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
> +{
> + return map->ops->map_push_elem(map, value, flags);
> +}
> +
> +const struct bpf_func_proto bpf_map_push_elem_proto = {
> + .func = bpf_map_push_elem,
> + .gpl_only = false,
> + .pkt_access = true,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_CONST_MAP_PTR,
> + .arg2_type = ARG_PTR_TO_MAP_VALUE,
> + .arg3_type = ARG_ANYTHING,
> +};
> +
> +BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
> +{
> + return map->ops->map_pop_elem(map, value);
> +}
> +
> +const struct bpf_func_proto bpf_map_pop_elem_proto = {
> + .func = bpf_map_pop_elem,
> + .gpl_only = false,
> + .pkt_access = true,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_CONST_MAP_PTR,
> + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
> +};
> +
> +BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
> +{
> + return map->ops->map_peek_elem(map, value);
> +}
> +
> +const struct bpf_func_proto bpf_map_peek_elem_proto = {
> + .func = bpf_map_pop_elem,
> + .gpl_only = false,
> + .pkt_access = true,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_CONST_MAP_PTR,
> + .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
> +};
> +
> const struct bpf_func_proto bpf_get_prandom_u32_proto = {
> .func = bpf_user_rnd_u32,
> .gpl_only = false,
> diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
> new file mode 100644
> index 000000000000..a597c5ba68f6
> --- /dev/null
> +++ b/kernel/bpf/queue_stack_maps.c
> @@ -0,0 +1,300 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * queue_stack_maps.c: BPF queue and stack maps
> + *
> + * Copyright (c) 2018 Politecnico di Torino
> + */
> +#include <linux/bpf.h>
> +#include <linux/list.h>
> +#include <linux/slab.h>
> +#include "percpu_freelist.h"
> +
> +#define QUEUE_STACK_CREATE_FLAG_MASK \
> + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
> +
> +
> +struct bpf_queue_stack {
> + struct bpf_map map;
> + raw_spinlock_t lock;
> + u32 head, tail;
> + u32 index_mask;
> + u32 count;
> +
> + char elements[0] __aligned(8);
> +};
> +
> +static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
> +{
> + return container_of(map, struct bpf_queue_stack, map);
> +}
> +
> +static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
> +{
> + return qs->count == 0;
> +}
> +
> +static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
> +{
> + return qs->count == qs->map.max_entries;
> +}
> +
> +/* Called from syscall */
> +static int queue_stack_map_alloc_check(union bpf_attr *attr)
> +{
> + /* check sanity of attributes */
> + if (attr->max_entries == 0 || attr->key_size != 0 ||
> + attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
> + return -EINVAL;
> +
> + if (attr->value_size > KMALLOC_MAX_SIZE)
> + /* if value_size is bigger, the user space won't be able to
> + * access the elements.
> + */
> + return -E2BIG;
> +
> + return 0;
> +}
> +
> +static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
> +{
> + int ret, numa_node = bpf_map_attr_numa_node(attr);
> + u32 max_entries, value_size, index_mask;
> + u64 queue_size, cost, mask64;
> + struct bpf_queue_stack *qs;
> +
> + max_entries = attr->max_entries;
> + value_size = attr->value_size;
> +
> + /* From arraymap.c:
> + * On 32 bit archs roundup_pow_of_two() with max_entries that has
> + * upper most bit set in u32 space is undefined behavior due to
> + * resulting 1U << 32, so do it manually here in u64 space.
> + */
> + mask64 = fls_long(max_entries - 1);
> + mask64 = 1ULL << mask64;
> + mask64 -= 1;
> +
> + index_mask = mask64;
> +
> + /* Round up queue size to nearest power of 2 */
> + max_entries = index_mask + 1;
what's the point of roundup ?
The memory waste becomes quite large when max_entries are high.
If queue/stack is sized to exact max_entries,
then 'count' can be removed too, right?
> + /* Check for overflows. */
> + if (max_entries < attr->max_entries)
> + return ERR_PTR(-E2BIG);
> +
> + queue_size = sizeof(*qs) + (u64) value_size * max_entries;
> +
> + cost = queue_size;
> + if (cost >= U32_MAX - PAGE_SIZE)
> + return ERR_PTR(-E2BIG);
> +
> + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
> +
> + ret = bpf_map_precharge_memlock(cost);
> + if (ret < 0)
> + return ERR_PTR(ret);
> +
> + qs = bpf_map_area_alloc(queue_size, numa_node);
> + if (!qs)
> + return ERR_PTR(-ENOMEM);
> +
> + memset(qs, 0, sizeof(*qs));
> +
> + bpf_map_init_from_attr(&qs->map, attr);
> +
> + qs->map.pages = cost;
> + qs->index_mask = index_mask;
> +
> + raw_spin_lock_init(&qs->lock);
> +
> + return &qs->map;
> +}
> +
> +/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
> +static void queue_stack_map_free(struct bpf_map *map)
> +{
> + struct bpf_queue_stack *qs = bpf_queue_stack(map);
> +
> + /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
> + * so the programs (can be more than one that used this map) were
> + * disconnected from events. Wait for outstanding critical sections in
> + * these programs to complete
> + */
> + synchronize_rcu();
> +
> + bpf_map_area_free(qs);
> +}
> +
> +static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
> +{
> + struct bpf_queue_stack *qs = bpf_queue_stack(map);
> + unsigned long flags;
> + int err = 0;
> + void *ptr;
> +
> + raw_spin_lock_irqsave(&qs->lock, flags);
> +
> + if (queue_stack_map_is_empty(qs)) {
> + err = -ENOENT;
> + goto out;
> + }
> +
> + ptr = &qs->elements[qs->tail * qs->map.value_size];
> + memcpy(value, ptr, qs->map.value_size);
> +
> + if (delete) {
> + qs->tail = (qs->tail + 1) & qs->index_mask;
> + qs->count--;
> + }
> +
> +out:
> + raw_spin_unlock_irqrestore(&qs->lock, flags);
> + return err;
> +}
> +
> +
> +static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
> +{
> + struct bpf_queue_stack *qs = bpf_queue_stack(map);
> + unsigned long flags;
> + int err = 0;
> + void *ptr;
> + u32 index;
> +
> + raw_spin_lock_irqsave(&qs->lock, flags);
> +
> + if (queue_stack_map_is_empty(qs)) {
> + err = -ENOENT;
> + goto out;
> + }
> +
> + index = (qs->head - 1) & qs->index_mask;
> + ptr = &qs->elements[index * qs->map.value_size];
> + memcpy(value, ptr, qs->map.value_size);
> +
> + if (delete) {
> + qs->head = (qs->head - 1) & qs->index_mask;
> + qs->count--;
> + }
> +
> +out:
> + raw_spin_unlock_irqrestore(&qs->lock, flags);
> + return err;
> +}
> +
> +/* Called from syscall or from eBPF program */
> +static int queue_map_peek_elem(struct bpf_map *map, void *value)
> +{
> + return __queue_map_get(map, value, false);
> +}
> +
> +/* Called from syscall or from eBPF program */
> +static int stack_map_peek_elem(struct bpf_map *map, void *value)
> +{
> + return __stack_map_get(map, value, false);
> +}
> +
> +/* Called from syscall or from eBPF program */
> +static int queue_map_pop_elem(struct bpf_map *map, void *value)
> +{
> + return __queue_map_get(map, value, true);
> +}
> +
> +/* Called from syscall or from eBPF program */
> +static int stack_map_pop_elem(struct bpf_map *map, void *value)
> +{
> + return __stack_map_get(map, value, true);
> +}
> +
> +/* Called from syscall or from eBPF program */
> +static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
> + u64 flags)
> +{
> + struct bpf_queue_stack *qs = bpf_queue_stack(map);
> + unsigned long irq_flags;
> + int err = 0;
> + void *dst;
> +
> + /* BPF_EXIST is used to force making room for a new element in case the
> + * map is full
> + */
> + bool replace = (flags & BPF_EXIST);
> +
> + /* Check supported flags for queue and stack maps */
> + if (flags & BPF_NOEXIST || flags > BPF_EXIST)
> + return -EINVAL;
> +
> + raw_spin_lock_irqsave(&qs->lock, irq_flags);
> +
> + if (queue_stack_map_is_full(qs)) {
> + if (!replace) {
> + err = -E2BIG;
ENOSPC is probably more accurate ?
> + goto out;
> + }
> + /* advance tail pointer to overwrite oldest element */
'oldest' is ambiguous here.
For queue it's true, but for stack it's the last element.
Since stack is poping from the head, push w/exist flag will keep
overwriting the last element.
Pls explain it more clearly in helper description in bpf.h
> + qs->tail = (qs->tail + 1) & qs->index_mask;
> + qs->count--;
> + }
> +
> + dst = &qs->elements[qs->head * qs->map.value_size];
> + memcpy(dst, value, qs->map.value_size);
> +
> + qs->head = (qs->head + 1) & qs->index_mask;
> + qs->count++;
> +
> +out:
> + raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
> + return err;
> +}
> +
> +/* Called from syscall or from eBPF program */
> +static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
> +{
> + return NULL;
> +}
> +
> +/* Called from syscall or from eBPF program */
> +static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
> + void *value, u64 flags)
> +{
> + return -EINVAL;
> +}
> +
> +/* Called from syscall or from eBPF program */
> +static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
> +{
> + return -EINVAL;
> +}
> +
> +/* Called from syscall */
> +static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
> + void *next_key)
> +{
> + return -EINVAL;
> +}
> +
> +const struct bpf_map_ops queue_map_ops = {
> + .map_alloc_check = queue_stack_map_alloc_check,
> + .map_alloc = queue_stack_map_alloc,
> + .map_free = queue_stack_map_free,
> + .map_lookup_elem = queue_stack_map_lookup_elem,
> + .map_update_elem = queue_stack_map_update_elem,
> + .map_delete_elem = queue_stack_map_delete_elem,
> + .map_push_elem = queue_stack_map_push_elem,
> + .map_pop_elem = queue_map_pop_elem,
> + .map_peek_elem = queue_map_peek_elem,
> + .map_get_next_key = queue_stack_map_get_next_key,
> +};
> +
> +const struct bpf_map_ops stack_map_ops = {
> + .map_alloc_check = queue_stack_map_alloc_check,
> + .map_alloc = queue_stack_map_alloc,
> + .map_free = queue_stack_map_free,
> + .map_lookup_elem = queue_stack_map_lookup_elem,
> + .map_update_elem = queue_stack_map_update_elem,
> + .map_delete_elem = queue_stack_map_delete_elem,
> + .map_push_elem = queue_stack_map_push_elem,
> + .map_pop_elem = stack_map_pop_elem,
> + .map_peek_elem = stack_map_peek_elem,
> + .map_get_next_key = queue_stack_map_get_next_key,
> +};
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 50957e243bfb..c46bf2d38be3 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -727,6 +727,9 @@ static int map_lookup_elem(union bpf_attr *attr)
> err = bpf_fd_htab_map_lookup_elem(map, key, value);
> } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
> err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
> + } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> + map->map_type == BPF_MAP_TYPE_STACK) {
> + err = map->ops->map_peek_elem(map, value);
> } else {
> rcu_read_lock();
> ptr = map->ops->map_lookup_elem(map, key);
> @@ -841,6 +844,9 @@ static int map_update_elem(union bpf_attr *attr)
> /* rcu_read_lock() is not needed */
> err = bpf_fd_reuseport_array_update_elem(map, key, value,
> attr->flags);
> + } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> + map->map_type == BPF_MAP_TYPE_STACK) {
> + err = map->ops->map_push_elem(map, value, attr->flags);
> } else {
> rcu_read_lock();
> err = map->ops->map_update_elem(map, key, value, attr->flags);
> @@ -1001,11 +1007,6 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> goto err_put;
> }
>
> - if (!map->ops->map_lookup_and_delete_elem) {
> - err = -ENOTSUPP;
> - goto err_put;
> - }
> -
> key = __bpf_copy_key(ukey, map->key_size);
> if (IS_ERR(key)) {
> err = PTR_ERR(key);
> @@ -1028,12 +1029,22 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
> */
> preempt_disable();
> __this_cpu_inc(bpf_prog_active);
> - rcu_read_lock();
> - ptr = map->ops->map_lookup_and_delete_elem(map, key);
> - if (ptr)
> - memcpy(value, ptr, value_size);
> - rcu_read_unlock();
> + if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> + map->map_type == BPF_MAP_TYPE_STACK) {
> + err = map->ops->map_pop_elem(map, value);
please clean up the paches, so that patch 4 doesn't immediately
deletes the lines that were introduced in patch 3.
Otherwise what was the point of them in patch 3?
> + } else {
> + if (!map->ops->map_lookup_and_delete_elem) {
> + err = -ENOTSUPP;
> + goto free_value;
> + }
> + rcu_read_lock();
> + ptr = map->ops->map_lookup_and_delete_elem(map, key);
> + if (ptr)
> + memcpy(value, ptr, value_size);
> + rcu_read_unlock();
> err = ptr ? 0 : -ENOENT;
> + }
> +
> __this_cpu_dec(bpf_prog_active);
> preempt_enable();
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 73c81bef6ae8..489667f93061 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -2121,7 +2121,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
> }
>
> if (arg_type == ARG_PTR_TO_MAP_KEY ||
> - arg_type == ARG_PTR_TO_MAP_VALUE) {
> + arg_type == ARG_PTR_TO_MAP_VALUE ||
> + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
> expected_type = PTR_TO_STACK;
> if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
> type != expected_type)
> @@ -2191,7 +2192,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
> err = check_helper_mem_access(env, regno,
> meta->map_ptr->key_size, false,
> NULL);
> - } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
> + } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
> + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
> /* bpf_map_xxx(..., map_ptr, ..., value) call:
> * check [value, value + map->value_size) validity
> */
> @@ -2200,9 +2202,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
> verbose(env, "invalid map_ptr to access map->value\n");
> return -EACCES;
> }
> + meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
> err = check_helper_mem_access(env, regno,
> meta->map_ptr->value_size, false,
> - NULL);
> + meta);
> } else if (arg_type_is_mem_size(arg_type)) {
> bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
>
> @@ -2676,7 +2679,10 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
> if (func_id != BPF_FUNC_tail_call &&
> func_id != BPF_FUNC_map_lookup_elem &&
> func_id != BPF_FUNC_map_update_elem &&
> - func_id != BPF_FUNC_map_delete_elem)
> + func_id != BPF_FUNC_map_delete_elem &&
> + func_id != BPF_FUNC_map_push_elem &&
> + func_id != BPF_FUNC_map_pop_elem &&
> + func_id != BPF_FUNC_map_peek_elem)
> return 0;
>
> if (meta->map_ptr == NULL) {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 591c698bc517..40736e0d9cff 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4993,6 +4993,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
> return &bpf_map_update_elem_proto;
> case BPF_FUNC_map_delete_elem:
> return &bpf_map_delete_elem_proto;
> + case BPF_FUNC_map_push_elem:
> + return &bpf_map_push_elem_proto;
> + case BPF_FUNC_map_pop_elem:
> + return &bpf_map_pop_elem_proto;
> + case BPF_FUNC_map_peek_elem:
> + return &bpf_map_peek_elem_proto;
> case BPF_FUNC_get_prandom_u32:
> return &bpf_get_prandom_u32_proto;
> case BPF_FUNC_get_smp_processor_id:
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [RFC PATCH bpf-next v4 5/7] bpf: restrict use of peek/push/pop
2018-10-04 17:12 [RFC PATCH bpf-next v4 0/7] Implement bpf queue/stack maps Mauricio Vasquez B
` (3 preceding siblings ...)
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps Mauricio Vasquez B
@ 2018-10-04 17:12 ` Mauricio Vasquez B
2018-10-04 23:57 ` Alexei Starovoitov
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 6/7] Sync uapi/bpf.h to tools/include Mauricio Vasquez B
2018-10-04 17:13 ` [RFC PATCH bpf-next v4 7/7] selftests/bpf: add test cases for queue and stack maps Mauricio Vasquez B
6 siblings, 1 reply; 14+ messages in thread
From: Mauricio Vasquez B @ 2018-10-04 17:12 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev
Restrict the use of peek, push and pop helpers only to queue and stack
maps.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
kernel/bpf/verifier.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 489667f93061..8b1f1b348782 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2328,6 +2328,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_sk_select_reuseport)
goto error;
break;
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ if (func_id != BPF_FUNC_map_peek_elem &&
+ func_id != BPF_FUNC_map_pop_elem &&
+ func_id != BPF_FUNC_map_push_elem)
+ goto error;
+ break;
default:
break;
}
@@ -2384,6 +2391,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
goto error;
break;
+ case BPF_FUNC_map_peek_elem:
+ case BPF_FUNC_map_pop_elem:
+ case BPF_FUNC_map_push_elem:
+ if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+ map->map_type != BPF_MAP_TYPE_STACK)
+ goto error;
+ break;
default:
break;
}
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [RFC PATCH bpf-next v4 5/7] bpf: restrict use of peek/push/pop
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 5/7] bpf: restrict use of peek/push/pop Mauricio Vasquez B
@ 2018-10-04 23:57 ` Alexei Starovoitov
0 siblings, 0 replies; 14+ messages in thread
From: Alexei Starovoitov @ 2018-10-04 23:57 UTC (permalink / raw)
To: Mauricio Vasquez B; +Cc: Alexei Starovoitov, Daniel Borkmann, netdev
On Thu, Oct 04, 2018 at 07:12:49PM +0200, Mauricio Vasquez B wrote:
> Restrict the use of peek, push and pop helpers only to queue and stack
> maps.
>
> Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
> ---
> kernel/bpf/verifier.c | 14 ++++++++++++++
> 1 file changed, 14 insertions(+)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 489667f93061..8b1f1b348782 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -2328,6 +2328,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
> if (func_id != BPF_FUNC_sk_select_reuseport)
> goto error;
> break;
> + case BPF_MAP_TYPE_QUEUE:
> + case BPF_MAP_TYPE_STACK:
> + if (func_id != BPF_FUNC_map_peek_elem &&
> + func_id != BPF_FUNC_map_pop_elem &&
> + func_id != BPF_FUNC_map_push_elem)
> + goto error;
why this is separate patch?
I think it should be part of previous patch.
> + break;
> default:
> break;
> }
> @@ -2384,6 +2391,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
> if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
> goto error;
> break;
> + case BPF_FUNC_map_peek_elem:
> + case BPF_FUNC_map_pop_elem:
> + case BPF_FUNC_map_push_elem:
> + if (map->map_type != BPF_MAP_TYPE_QUEUE &&
> + map->map_type != BPF_MAP_TYPE_STACK)
> + goto error;
> + break;
> default:
> break;
> }
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [RFC PATCH bpf-next v4 6/7] Sync uapi/bpf.h to tools/include
2018-10-04 17:12 [RFC PATCH bpf-next v4 0/7] Implement bpf queue/stack maps Mauricio Vasquez B
` (4 preceding siblings ...)
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 5/7] bpf: restrict use of peek/push/pop Mauricio Vasquez B
@ 2018-10-04 17:12 ` Mauricio Vasquez B
2018-10-04 17:13 ` [RFC PATCH bpf-next v4 7/7] selftests/bpf: add test cases for queue and stack maps Mauricio Vasquez B
6 siblings, 0 replies; 14+ messages in thread
From: Mauricio Vasquez B @ 2018-10-04 17:12 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev
Sync both files.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
tools/include/uapi/linux/bpf.h | 36 ++++++++++++++++++++++++++++++++----
1 file changed, 32 insertions(+), 4 deletions(-)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..bfa042273fad 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -103,6 +103,7 @@ enum bpf_cmd {
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
};
enum bpf_map_type {
@@ -128,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
};
enum bpf_prog_type {
@@ -462,6 +465,28 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
@@ -789,14 +814,14 @@ union bpf_attr {
*
* int ret;
* struct bpf_tunnel_key key = {};
- *
+ *
* ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
* if (ret < 0)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* if (key.remote_ipv4 != 0x0a000001)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* return TC_ACT_OK; // accept packet
*
* This interface can also be used with all encapsulation devices
@@ -2303,7 +2328,10 @@ union bpf_attr {
FN(skb_ancestor_cgroup_id), \
FN(sk_lookup_tcp), \
FN(sk_lookup_udp), \
- FN(sk_release),
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [RFC PATCH bpf-next v4 7/7] selftests/bpf: add test cases for queue and stack maps
2018-10-04 17:12 [RFC PATCH bpf-next v4 0/7] Implement bpf queue/stack maps Mauricio Vasquez B
` (5 preceding siblings ...)
2018-10-04 17:12 ` [RFC PATCH bpf-next v4 6/7] Sync uapi/bpf.h to tools/include Mauricio Vasquez B
@ 2018-10-04 17:13 ` Mauricio Vasquez B
2018-10-04 23:59 ` Alexei Starovoitov
6 siblings, 1 reply; 14+ messages in thread
From: Mauricio Vasquez B @ 2018-10-04 17:13 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, netdev
Two types of tests are done:
- test_maps: only userspace api.
- test_progs: userspace api and ebpf helpers.
Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
---
tools/lib/bpf/bpf.c | 12 ++
tools/lib/bpf/bpf.h | 1
tools/testing/selftests/bpf/Makefile | 6 +
tools/testing/selftests/bpf/bpf_helpers.h | 7 +
tools/testing/selftests/bpf/test_maps.c | 122 ++++++++++++++++++++
tools/testing/selftests/bpf/test_progs.c | 99 ++++++++++++++++
tools/testing/selftests/bpf/test_queue_map.c | 4 +
tools/testing/selftests/bpf/test_queue_stack_map.h | 59 ++++++++++
tools/testing/selftests/bpf/test_stack_map.c | 4 +
9 files changed, 313 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
create mode 100644 tools/testing/selftests/bpf/test_stack_map.c
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 3878a26a2071..13810c88e1b6 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
}
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
int bpf_map_delete_elem(int fd, const void *key)
{
union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 6f38164b2618..6134ed9517d3 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -86,6 +86,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
__u64 flags);
int bpf_map_lookup_elem(int fd, const void *key, void *value);
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, const void *value);
int bpf_map_delete_elem(int fd, const void *key);
int bpf_map_get_next_key(int fd, const void *key, void *next_key);
int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1381ab81099c..f78cf72832aa 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -36,7 +36,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
- test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_sk_lookup_kern.o
+ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o test_sk_lookup_kern.o \
+ test_queue_map.o test_stack_map.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -114,6 +115,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
+$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
+
BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 1d407b3494f9..58dfcb88f9b4 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value,
(void *) BPF_FUNC_map_update_elem;
static int (*bpf_map_delete_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+ (void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+ (void *) BPF_FUNC_map_peek_elem;
static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
(void *) BPF_FUNC_probe_read;
static unsigned long long (*bpf_ktime_get_ns)(void) =
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 9b552c0fc47d..4db2116e52be 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -15,6 +15,7 @@
#include <string.h>
#include <assert.h>
#include <stdlib.h>
+#include <time.h>
#include <sys/wait.h>
#include <sys/socket.h>
@@ -471,6 +472,122 @@ static void test_devmap(int task, void *data)
close(fd);
}
+static void test_queuemap(int task, void *data)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ int fd, i;
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+ vals[i] = rand();
+
+ /* Invalid key size */
+ fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE,
+ map_flags);
+ assert(fd < 0 && errno == EINVAL);
+
+ fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
+ map_flags);
+ /* Queue map does not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create queuemap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ /* Push MAP_SIZE elements */
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+ /* Check that element cannot be pushed due to max_entries limit */
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ errno == E2BIG);
+
+ /* Peek element */
+ assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]);
+
+ /* Replace half elements */
+ for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+ /* Pop all elements */
+ for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+ val == vals[i]);
+
+ /* Check that there are not elements left */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ errno == ENOENT);
+
+ /* Check that non supported functions set errno to EINVAL */
+ assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
+static void test_stackmap(int task, void *data)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+ int fd, i;
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++)
+ vals[i] = rand();
+
+ /* Invalid key size */
+ fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE,
+ map_flags);
+ assert(fd < 0 && errno == EINVAL);
+
+ fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE,
+ map_flags);
+ /* Stack map does not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create stackmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ /* Push MAP_SIZE elements */
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
+
+ /* Check that element cannot be pushed due to max_entries limit */
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ errno == E2BIG);
+
+ /* Peek element */
+ assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]);
+
+ /* Replace half elements */
+ for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++)
+ assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0);
+
+ /* Pop all elements */
+ for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--)
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 &&
+ val == vals[i]);
+
+ /* Check that there are not elements left */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ errno == ENOENT);
+
+ /* Check that non supported functions set errno to EINVAL */
+ assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <arpa/inet.h>
@@ -1434,10 +1551,15 @@ static void run_all_tests(void)
test_map_wronly();
test_reuseport_array();
+
+ test_queuemap(0, NULL);
+ test_stackmap(0, NULL);
}
int main(void)
{
+ srand(time(NULL));
+
map_flags = 0;
run_all_tests();
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index e8becca9c521..2d3c04f45530 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1735,8 +1735,105 @@ static void test_reference_tracking()
bpf_object__close(obj);
}
+enum {
+ QUEUE,
+ STACK,
+};
+
+static void test_queue_stack_map(int type)
+{
+ const int MAP_SIZE = 32;
+ __u32 vals[MAP_SIZE], duration, retval, size, val;
+ int i, err, prog_fd, map_in_fd, map_out_fd;
+ char file[32], buf[128];
+ struct bpf_object *obj;
+ struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+
+ /* Fill test values to be used */
+ for (i = 0; i < MAP_SIZE; i++)
+ vals[i] = rand();
+
+ if (type == QUEUE)
+ strncpy(file, "./test_queue_map.o", sizeof(file));
+ else if (type == STACK)
+ strncpy(file, "./test_stack_map.o", sizeof(file));
+ else
+ return;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_in_fd = bpf_find_map(__func__, obj, "map_in");
+ if (map_in_fd < 0)
+ goto out;
+
+ map_out_fd = bpf_find_map(__func__, obj, "map_out");
+ if (map_out_fd < 0)
+ goto out;
+
+ /* Push 32 elements to the input map */
+ for (i = 0; i < MAP_SIZE; i++) {
+ err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0);
+ if (err) {
+ error_cnt++;
+ goto out;
+ }
+ }
+
+ /* The eBPF program pushes iph.saddr in the output map,
+ * pops the input map and saves this value in iph.daddr
+ */
+ for (i = 0; i < MAP_SIZE; i++) {
+ if (type == QUEUE) {
+ val = vals[i];
+ pkt_v4.iph.saddr = vals[i] * 5;
+ } else if (type == STACK) {
+ val = vals[MAP_SIZE - 1 - i];
+ pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ if (err || retval || size != sizeof(pkt_v4) ||
+ iph->daddr != val)
+ break;
+ }
+
+ CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val,
+ "bpf_map_pop_elem",
+ "err %d errno %d retval %d size %d iph->daddr %u\n",
+ err, errno, retval, size, iph->daddr);
+
+ /* Queue is empty, program should return TC_ACT_SHOT */
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
+ "check-queue-stack-map-empty",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ /* Check that the program pushed elements correctly */
+ for (i = 0; i < MAP_SIZE; i++) {
+ err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
+ if (err || val != vals[i] * 5)
+ break;
+ }
+
+ CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
+ "bpf_map_push_elem", "err %d value %u\n", err, val);
+
+out:
+ pkt_v4.iph.saddr = 0;
+ bpf_object__close(obj);
+}
+
int main(void)
{
+ srand(time(NULL));
+
jit_enabled = is_jit_enabled();
test_pkt_access();
@@ -1757,6 +1854,8 @@ int main(void)
test_task_fd_query_rawtp();
test_task_fd_query_tp();
test_reference_tracking();
+ test_queue_stack_map(QUEUE);
+ test_queue_stack_map(STACK);
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/test_queue_map.c
new file mode 100644
index 000000000000..87db1f9da33d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_QUEUE
+#include "test_queue_stack_map.h"
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h
new file mode 100644
index 000000000000..295b9b3bc5c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_queue_stack_map.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2018 Politecnico di Torino
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = {
+ .type = MAP_TYPE,
+ .key_size = 0,
+ .value_size = sizeof(__u32),
+ .max_entries = 32,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = {
+ .type = MAP_TYPE,
+ .key_size = 0,
+ .value_size = sizeof(__u32),
+ .max_entries = 32,
+ .map_flags = 0,
+};
+
+SEC("test")
+int _test(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ __u32 value;
+ int err;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ err = bpf_map_pop_elem(&map_in, &value);
+ if (err)
+ return TC_ACT_SHOT;
+
+ iph->daddr = value;
+
+ err = bpf_map_push_elem(&map_out, &iph->saddr, 0);
+ if (err)
+ return TC_ACT_SHOT;
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/test_stack_map.c
new file mode 100644
index 000000000000..31c3880e6da0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stack_map.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Politecnico di Torino
+#define MAP_TYPE BPF_MAP_TYPE_STACK
+#include "test_queue_stack_map.h"
^ permalink raw reply related [flat|nested] 14+ messages in thread