From mboxrd@z Thu Jan 1 00:00:00 1970 From: Daniel Borkmann Subject: Re: [PATCH bpf-next v6 2/4] bpf: sockmap, add hash map support Date: Tue, 15 May 2018 21:01:38 +0200 Message-ID: References: <1526317219-7752-1-git-send-email-john.fastabend@gmail.com> <1526317219-7752-3-git-send-email-john.fastabend@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Cc: netdev@vger.kernel.org, davem@davemloft.net To: John Fastabend , ast@kernel.org Return-path: Received: from www62.your-server.de ([213.133.104.62]:47768 "EHLO www62.your-server.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752272AbeEOTBn (ORCPT ); Tue, 15 May 2018 15:01:43 -0400 In-Reply-To: <1526317219-7752-3-git-send-email-john.fastabend@gmail.com> Content-Language: en-US Sender: netdev-owner@vger.kernel.org List-ID: On 05/14/2018 07:00 PM, John Fastabend wrote: > Sockmap is currently backed by an array and enforces keys to be > four bytes. This works well for many use cases and was originally > modeled after devmap which also uses four bytes keys. However, > this has become limiting in larger use cases where a hash would > be more appropriate. For example users may want to use the 5-tuple > of the socket as the lookup key. > > To support this add hash support. > > Signed-off-by: John Fastabend > Acked-by: David S. Miller > --- > include/linux/bpf.h | 8 + > include/linux/bpf_types.h | 1 + > include/uapi/linux/bpf.h | 52 ++++- > kernel/bpf/core.c | 1 + > kernel/bpf/sockmap.c | 494 ++++++++++++++++++++++++++++++++++++++++++++-- > kernel/bpf/verifier.c | 14 +- > net/core/filter.c | 58 ++++++ > 7 files changed, 610 insertions(+), 18 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index a38e474..ed0122b 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -668,6 +668,7 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) > > #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET) > struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); > +struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key); > int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); > #else > static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) > @@ -675,6 +676,12 @@ static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) > return NULL; > } > > +static inline struct sock *__sock_hash_lookup_elem(struct bpf_map *map, > + void *key) > +{ > + return NULL; > +} > + > static inline int sock_map_prog(struct bpf_map *map, > struct bpf_prog *prog, > u32 type) > @@ -724,6 +731,7 @@ static inline void __xsk_map_flush(struct bpf_map *map) > extern const struct bpf_func_proto bpf_get_stackid_proto; > extern const struct bpf_func_proto bpf_get_stack_proto; > extern const struct bpf_func_proto bpf_sock_map_update_proto; > +extern const struct bpf_func_proto bpf_sock_hash_update_proto; > > /* Shared helpers among cBPF and eBPF. */ > void bpf_user_rnd_init_once(void); > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > index d7df1b32..b67f879 100644 > --- a/include/linux/bpf_types.h > +++ b/include/linux/bpf_types.h > @@ -47,6 +47,7 @@ > BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) > #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET) > BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) > +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) > #endif > BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) > #if defined(CONFIG_XDP_SOCKETS) > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 02e4112..1205d86 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -118,6 +118,7 @@ enum bpf_map_type { > BPF_MAP_TYPE_SOCKMAP, > BPF_MAP_TYPE_CPUMAP, > BPF_MAP_TYPE_XSKMAP, > + BPF_MAP_TYPE_SOCKHASH, > }; > > enum bpf_prog_type { > @@ -1855,6 +1856,52 @@ struct bpf_stack_build_id { > * Egress device index on success, 0 if packet needs to continue > * up the stack for further processing or a negative error in case > * of failure. > + * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) When you rebase please fix this up properly next time and add a newline in between the helpers. I fixed this up while applying. > + * Description > + * Add an entry to, or update a sockhash *map* referencing sockets. > + * The *skops* is used as a new value for the entry associated to > + * *key*. *flags* is one of: > + * > + * **BPF_NOEXIST** > + * The entry for *key* must not exist in the map. > + * **BPF_EXIST** > + * The entry for *key* must already exist in the map. > + * **BPF_ANY** > + * No condition on the existence of the entry for *key*. > + * > + * If the *map* has eBPF programs (parser and verdict), those will > + * be inherited by the socket being added. If the socket is > + * already attached to eBPF programs, this results in an error. > + * Return > + * 0 on success, or a negative error in case of failure. [...] > +static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) > +{ > + struct bpf_htab *htab; > + int i, err; > + u64 cost; > + > + if (!capable(CAP_NET_ADMIN)) > + return ERR_PTR(-EPERM); > + > + /* check sanity of attributes */ > + if (attr->max_entries == 0 || attr->value_size != 4 || > + attr->map_flags & ~SOCK_CREATE_FLAG_MASK) > + return ERR_PTR(-EINVAL); > + > + err = bpf_tcp_ulp_register(); > + if (err && err != -EEXIST) > + return ERR_PTR(err); > + > + htab = kzalloc(sizeof(*htab), GFP_USER); > + if (!htab) > + return ERR_PTR(-ENOMEM); > + > + bpf_map_init_from_attr(&htab->map, attr); > + > + htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); > + htab->elem_size = sizeof(struct htab_elem) + > + round_up(htab->map.key_size, 8); > + > + if (htab->n_buckets == 0 || > + htab->n_buckets > U32_MAX / sizeof(struct bucket)) > + goto free_htab; > + > + cost = (u64) htab->n_buckets * sizeof(struct bucket) + > + (u64) htab->elem_size * htab->map.max_entries; > + > + if (cost >= U32_MAX - PAGE_SIZE) > + goto free_htab; Also above two goto free_htab were buggy! Error after bpf_tcp_ulp_register() is either 0 or -EEXIST, if it's 0, then when you bail out here this will cause a NULL pointer dereference in subsequent map setup paths. Adapting the same logic from sock_map_alloc() would have been enough. I've added a err = -EINVAL to the first occasion above this time. > + htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; > + err = bpf_map_precharge_memlock(htab->map.pages); > + if (err) > + goto free_htab; > + > + err = -ENOMEM; > + htab->buckets = bpf_map_area_alloc( > + htab->n_buckets * sizeof(struct bucket), > + htab->map.numa_node); > + if (!htab->buckets) > + goto free_htab; > + > + for (i = 0; i < htab->n_buckets; i++) { > + INIT_HLIST_HEAD(&htab->buckets[i].head); > + raw_spin_lock_init(&htab->buckets[i].lock); > + } > + > + return &htab->map; > +free_htab: > + kfree(htab); > + return ERR_PTR(err); > +}