From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexei Starovoitov Subject: Re: [RFC PATCH bpf-next v4 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall Date: Thu, 4 Oct 2018 16:37:44 -0700 Message-ID: <20181004233742.gtnj7ha6uaqggywn@ast-mbp.dhcp.thefacebook.com> References: <153867314370.10087.2202062772042248653.stgit@kernel> <153867315936.10087.889230344298708557.stgit@kernel> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: Alexei Starovoitov , Daniel Borkmann , netdev@vger.kernel.org To: Mauricio Vasquez B Return-path: Received: from mail-pl1-f196.google.com ([209.85.214.196]:45973 "EHLO mail-pl1-f196.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726716AbeJEGdi (ORCPT ); Fri, 5 Oct 2018 02:33:38 -0400 Received: by mail-pl1-f196.google.com with SMTP id y15-v6so5903359plr.12 for ; Thu, 04 Oct 2018 16:37:48 -0700 (PDT) Content-Disposition: inline In-Reply-To: <153867315936.10087.889230344298708557.stgit@kernel> Sender: netdev-owner@vger.kernel.org List-ID: On Thu, Oct 04, 2018 at 07:12:39PM +0200, Mauricio Vasquez B wrote: > The following patch implements a bpf queue/stack maps that > provides the peek/pop/push functions. There is not a direct > relationship between those functions and the current maps > syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added, > this is mapped to the pop operation in the queue/stack maps > and it is still to implement in other kind of maps. > > Signed-off-by: Mauricio Vasquez B > --- > include/linux/bpf.h | 1 + > include/uapi/linux/bpf.h | 1 + > kernel/bpf/syscall.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 84 insertions(+) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 027697b6a22f..98c7eeb6d138 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -39,6 +39,7 @@ struct bpf_map_ops { > void *(*map_lookup_elem)(struct bpf_map *map, void *key); > int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); > int (*map_delete_elem)(struct bpf_map *map, void *key); > + void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key); > > /* funcs called by prog_array and perf_event_array map */ > void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index f9187b41dff6..3bb94aa2d408 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -103,6 +103,7 @@ enum bpf_cmd { > BPF_BTF_LOAD, > BPF_BTF_GET_FD_BY_ID, > BPF_TASK_FD_QUERY, > + BPF_MAP_LOOKUP_AND_DELETE_ELEM, > }; > > enum bpf_map_type { > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index eb75e8af73ff..50957e243bfb 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -975,6 +975,85 @@ static int map_get_next_key(union bpf_attr *attr) > return err; > } > > +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value > + > +static int map_lookup_and_delete_elem(union bpf_attr *attr) > +{ > + void __user *ukey = u64_to_user_ptr(attr->key); > + void __user *uvalue = u64_to_user_ptr(attr->value); > + int ufd = attr->map_fd; > + struct bpf_map *map; > + void *key, *value, *ptr; > + u32 value_size; > + struct fd f; > + int err; > + > + if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) copy-paste error > + return -EINVAL; > + > + f = fdget(ufd); > + map = __bpf_map_get(f); > + if (IS_ERR(map)) > + return PTR_ERR(map); > + > + if (!(f.file->f_mode & FMODE_CAN_WRITE)) { > + err = -EPERM; > + goto err_put; > + } > + > + if (!map->ops->map_lookup_and_delete_elem) { > + err = -ENOTSUPP; > + goto err_put; > + } > + > + key = __bpf_copy_key(ukey, map->key_size); > + if (IS_ERR(key)) { > + err = PTR_ERR(key); > + goto err_put; > + } > + > + value_size = map->value_size; > + > + err = -ENOMEM; > + value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); > + if (!value) > + goto free_key; > + > + err = -EFAULT; > + if (copy_from_user(value, uvalue, value_size) != 0) > + goto free_value; > + > + /* must increment bpf_prog_active to avoid kprobe+bpf triggering from > + * inside bpf map update or delete otherwise deadlocks are possible > + */ > + preempt_disable(); > + __this_cpu_inc(bpf_prog_active); > + rcu_read_lock(); > + ptr = map->ops->map_lookup_and_delete_elem(map, key); > + if (ptr) > + memcpy(value, ptr, value_size); > + rcu_read_unlock(); > + err = ptr ? 0 : -ENOENT; > + __this_cpu_dec(bpf_prog_active); > + preempt_enable(); messed up formatting > + > + if (err) > + goto free_value; > + > + if (copy_to_user(uvalue, value, value_size) != 0) > + goto free_value; > + > + err = 0; > + > +free_value: > + kfree(value); > +free_key: > + kfree(key); > +err_put: > + fdput(f); > + return err; > +} > + > static const struct bpf_prog_ops * const bpf_prog_types[] = { > #define BPF_PROG_TYPE(_id, _name) \ > [_id] = & _name ## _prog_ops, > @@ -2448,6 +2527,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz > case BPF_TASK_FD_QUERY: > err = bpf_task_fd_query(&attr, uattr); > break; > + case BPF_MAP_LOOKUP_AND_DELETE_ELEM: > + err = map_lookup_and_delete_elem(&attr); > + break; > default: > err = -EINVAL; > break; >