netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next v2 1/2] bpf: use kvmalloc for map values in syscall
@ 2021-08-17 15:45 Stanislav Fomichev
  2021-08-17 15:45 ` [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls Stanislav Fomichev
  2021-08-17 16:19 ` [PATCH bpf-next v2 1/2] bpf: use kvmalloc for map values in syscall Song Liu
  0 siblings, 2 replies; 6+ messages in thread
From: Stanislav Fomichev @ 2021-08-17 15:45 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev

Use kvmalloc/kvfree for temporary value when manipulating a map via
syscall. kmalloc might not be sufficient for percpu maps where the value
is big (and further multiplied by hundreds of CPUs).

Can be reproduced with netcnt test on qemu with "-smp 255".

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 kernel/bpf/syscall.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7420e1334ab2..075f650d297a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1076,7 +1076,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
-	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
@@ -1091,7 +1091,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 	err = 0;
 
 free_value:
-	kfree(value);
+	kvfree(value);
 free_key:
 	kfree(key);
 err_put:
@@ -1137,16 +1137,10 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 		goto err_put;
 	}
 
-	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
-		value_size = round_up(map->value_size, 8) * num_possible_cpus();
-	else
-		value_size = map->value_size;
+	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
-	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
@@ -1157,7 +1151,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 	err = bpf_map_update_value(map, f, key, value, attr->flags);
 
 free_value:
-	kfree(value);
+	kvfree(value);
 free_key:
 	kfree(key);
 err_put:
@@ -1367,7 +1361,7 @@ int generic_map_update_batch(struct bpf_map *map,
 	if (!key)
 		return -ENOMEM;
 
-	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value) {
 		kfree(key);
 		return -ENOMEM;
@@ -1390,7 +1384,7 @@ int generic_map_update_batch(struct bpf_map *map,
 	if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
 		err = -EFAULT;
 
-	kfree(value);
+	kvfree(value);
 	kfree(key);
 	return err;
 }
@@ -1429,7 +1423,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
 	if (!buf_prevkey)
 		return -ENOMEM;
 
-	buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
+	buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
 	if (!buf) {
 		kfree(buf_prevkey);
 		return -ENOMEM;
@@ -1492,7 +1486,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
 
 free_buf:
 	kfree(buf_prevkey);
-	kfree(buf);
+	kvfree(buf);
 	return err;
 }
 
@@ -1547,7 +1541,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	value_size = bpf_map_value_size(map);
 
 	err = -ENOMEM;
-	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
@@ -1579,7 +1573,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	err = 0;
 
 free_value:
-	kfree(value);
+	kvfree(value);
 free_key:
 	kfree(key);
 err_put:
-- 
2.33.0.rc1.237.g0d66db33f3-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls
  2021-08-17 15:45 [PATCH bpf-next v2 1/2] bpf: use kvmalloc for map values in syscall Stanislav Fomichev
@ 2021-08-17 15:45 ` Stanislav Fomichev
  2021-08-17 16:25   ` Song Liu
  2021-08-18 22:24   ` Daniel Borkmann
  2021-08-17 16:19 ` [PATCH bpf-next v2 1/2] bpf: use kvmalloc for map values in syscall Song Liu
  1 sibling, 2 replies; 6+ messages in thread
From: Stanislav Fomichev @ 2021-08-17 15:45 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev

Same as previous patch but for the keys. memdup_bpfptr is renamed
to vmemdup_bpfptr (and converted to kvmalloc).

Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 include/linux/bpfptr.h | 12 ++++++++++--
 kernel/bpf/syscall.c   | 34 +++++++++++++++++-----------------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index 5cdeab497cb3..84eeffb4316a 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -62,9 +62,17 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
 	return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
 }
 
-static inline void *memdup_bpfptr(bpfptr_t src, size_t len)
+static inline void *vmemdup_bpfptr(bpfptr_t src, size_t len)
 {
-	return memdup_sockptr((sockptr_t) src, len);
+	void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);
+
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+	if (copy_from_sockptr(p, (sockptr_t) src, len)) {
+		kvfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	return p;
 }
 
 static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 075f650d297a..ac230f4234cd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1013,7 +1013,7 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 {
 	if (key_size)
-		return memdup_user(ukey, key_size);
+		return vmemdup_user(ukey, key_size);
 
 	if (ukey)
 		return ERR_PTR(-EINVAL);
@@ -1024,7 +1024,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
 {
 	if (key_size)
-		return memdup_bpfptr(ukey, key_size);
+		return vmemdup_bpfptr(ukey, key_size);
 
 	if (!bpfptr_is_null(ukey))
 		return ERR_PTR(-EINVAL);
@@ -1093,7 +1093,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 free_value:
 	kvfree(value);
 free_key:
-	kfree(key);
+	kvfree(key);
 err_put:
 	fdput(f);
 	return err;
@@ -1153,7 +1153,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 free_value:
 	kvfree(value);
 free_key:
-	kfree(key);
+	kvfree(key);
 err_put:
 	fdput(f);
 	return err;
@@ -1205,7 +1205,7 @@ static int map_delete_elem(union bpf_attr *attr)
 	bpf_enable_instrumentation();
 	maybe_wait_bpf_programs(map);
 out:
-	kfree(key);
+	kvfree(key);
 err_put:
 	fdput(f);
 	return err;
@@ -1247,7 +1247,7 @@ static int map_get_next_key(union bpf_attr *attr)
 	}
 
 	err = -ENOMEM;
-	next_key = kmalloc(map->key_size, GFP_USER);
+	next_key = kvmalloc(map->key_size, GFP_USER);
 	if (!next_key)
 		goto free_key;
 
@@ -1270,9 +1270,9 @@ static int map_get_next_key(union bpf_attr *attr)
 	err = 0;
 
 free_next_key:
-	kfree(next_key);
+	kvfree(next_key);
 free_key:
-	kfree(key);
+	kvfree(key);
 err_put:
 	fdput(f);
 	return err;
@@ -1299,7 +1299,7 @@ int generic_map_delete_batch(struct bpf_map *map,
 	if (!max_count)
 		return 0;
 
-	key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+	key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
 	if (!key)
 		return -ENOMEM;
 
@@ -1326,7 +1326,7 @@ int generic_map_delete_batch(struct bpf_map *map,
 	if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
 		err = -EFAULT;
 
-	kfree(key);
+	kvfree(key);
 	return err;
 }
 
@@ -1357,13 +1357,13 @@ int generic_map_update_batch(struct bpf_map *map,
 	if (!max_count)
 		return 0;
 
-	key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+	key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
 	if (!key)
 		return -ENOMEM;
 
 	value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
 	if (!value) {
-		kfree(key);
+		kvfree(key);
 		return -ENOMEM;
 	}
 
@@ -1385,7 +1385,7 @@ int generic_map_update_batch(struct bpf_map *map,
 		err = -EFAULT;
 
 	kvfree(value);
-	kfree(key);
+	kvfree(key);
 	return err;
 }
 
@@ -1419,13 +1419,13 @@ int generic_map_lookup_batch(struct bpf_map *map,
 	if (put_user(0, &uattr->batch.count))
 		return -EFAULT;
 
-	buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+	buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
 	if (!buf_prevkey)
 		return -ENOMEM;
 
 	buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
 	if (!buf) {
-		kfree(buf_prevkey);
+		kvfree(buf_prevkey);
 		return -ENOMEM;
 	}
 
@@ -1485,7 +1485,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
 		err = -EFAULT;
 
 free_buf:
-	kfree(buf_prevkey);
+	kvfree(buf_prevkey);
 	kvfree(buf);
 	return err;
 }
@@ -1575,7 +1575,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 free_value:
 	kvfree(value);
 free_key:
-	kfree(key);
+	kvfree(key);
 err_put:
 	fdput(f);
 	return err;
-- 
2.33.0.rc1.237.g0d66db33f3-goog


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v2 1/2] bpf: use kvmalloc for map values in syscall
  2021-08-17 15:45 [PATCH bpf-next v2 1/2] bpf: use kvmalloc for map values in syscall Stanislav Fomichev
  2021-08-17 15:45 ` [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls Stanislav Fomichev
@ 2021-08-17 16:19 ` Song Liu
  1 sibling, 0 replies; 6+ messages in thread
From: Song Liu @ 2021-08-17 16:19 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko

On Tue, Aug 17, 2021 at 8:46 AM Stanislav Fomichev <sdf@google.com> wrote:
>
> Use kvmalloc/kvfree for temporary value when manipulating a map via
> syscall. kmalloc might not be sufficient for percpu maps where the value
> is big (and further multiplied by hundreds of CPUs).
>
> Can be reproduced with netcnt test on qemu with "-smp 255".
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>

Acked-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls
  2021-08-17 15:45 ` [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls Stanislav Fomichev
@ 2021-08-17 16:25   ` Song Liu
  2021-08-18 22:24   ` Daniel Borkmann
  1 sibling, 0 replies; 6+ messages in thread
From: Song Liu @ 2021-08-17 16:25 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko

On Tue, Aug 17, 2021 at 8:46 AM Stanislav Fomichev <sdf@google.com> wrote:
>
> Same as previous patch but for the keys. memdup_bpfptr is renamed
> to vmemdup_bpfptr (and converted to kvmalloc).
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>

Acked-by: Song Liu <songliubraving@fb.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls
  2021-08-17 15:45 ` [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls Stanislav Fomichev
  2021-08-17 16:25   ` Song Liu
@ 2021-08-18 22:24   ` Daniel Borkmann
  2021-08-18 23:02     ` sdf
  1 sibling, 1 reply; 6+ messages in thread
From: Daniel Borkmann @ 2021-08-18 22:24 UTC (permalink / raw)
  To: Stanislav Fomichev, netdev, bpf; +Cc: ast, andrii

On 8/17/21 5:45 PM, Stanislav Fomichev wrote:
> Same as previous patch but for the keys. memdup_bpfptr is renamed
> to vmemdup_bpfptr (and converted to kvmalloc).
> 
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>   include/linux/bpfptr.h | 12 ++++++++++--
>   kernel/bpf/syscall.c   | 34 +++++++++++++++++-----------------
>   2 files changed, 27 insertions(+), 19 deletions(-)
> 
> diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
> index 5cdeab497cb3..84eeffb4316a 100644
> --- a/include/linux/bpfptr.h
> +++ b/include/linux/bpfptr.h
> @@ -62,9 +62,17 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
>   	return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
>   }
>   
> -static inline void *memdup_bpfptr(bpfptr_t src, size_t len)
> +static inline void *vmemdup_bpfptr(bpfptr_t src, size_t len)

nit: should we just name it kvmemdup_bpfptr() in that case?

>   {
> -	return memdup_sockptr((sockptr_t) src, len);
> +	void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);
> +
> +	if (!p)
> +		return ERR_PTR(-ENOMEM);
> +	if (copy_from_sockptr(p, (sockptr_t) src, len)) {

Also, I think this one should rather use copy_from_bpfptr() here.

> +		kvfree(p);
> +		return ERR_PTR(-EFAULT);
> +	}
> +	return p;
>   }
>   

Rest lgtm, thanks!

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls
  2021-08-18 22:24   ` Daniel Borkmann
@ 2021-08-18 23:02     ` sdf
  0 siblings, 0 replies; 6+ messages in thread
From: sdf @ 2021-08-18 23:02 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: netdev, bpf, ast, andrii

On 08/19, Daniel Borkmann wrote:
> On 8/17/21 5:45 PM, Stanislav Fomichev wrote:
> > Same as previous patch but for the keys. memdup_bpfptr is renamed
> > to vmemdup_bpfptr (and converted to kvmalloc).
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >   include/linux/bpfptr.h | 12 ++++++++++--
> >   kernel/bpf/syscall.c   | 34 +++++++++++++++++-----------------
> >   2 files changed, 27 insertions(+), 19 deletions(-)
> >
> > diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
> > index 5cdeab497cb3..84eeffb4316a 100644
> > --- a/include/linux/bpfptr.h
> > +++ b/include/linux/bpfptr.h
> > @@ -62,9 +62,17 @@ static inline int copy_to_bpfptr_offset(bpfptr_t  
> dst, size_t offset,
> >   	return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
> >   }
> > -static inline void *memdup_bpfptr(bpfptr_t src, size_t len)
> > +static inline void *vmemdup_bpfptr(bpfptr_t src, size_t len)

> nit: should we just name it kvmemdup_bpfptr() in that case?
Sounds good!

> >   {
> > -	return memdup_sockptr((sockptr_t) src, len);
> > +	void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);
> > +
> > +	if (!p)
> > +		return ERR_PTR(-ENOMEM);
> > +	if (copy_from_sockptr(p, (sockptr_t) src, len)) {

> Also, I think this one should rather use copy_from_bpfptr() here.
Ah, missed that one, thanks!

> > +		kvfree(p);
> > +		return ERR_PTR(-EFAULT);
> > +	}
> > +	return p;
> >   }

> Rest lgtm, thanks!

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-08-18 23:02 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-17 15:45 [PATCH bpf-next v2 1/2] bpf: use kvmalloc for map values in syscall Stanislav Fomichev
2021-08-17 15:45 ` [PATCH bpf-next v2 2/2] bpf: use kvmalloc for map keys in syscalls Stanislav Fomichev
2021-08-17 16:25   ` Song Liu
2021-08-18 22:24   ` Daniel Borkmann
2021-08-18 23:02     ` sdf
2021-08-17 16:19 ` [PATCH bpf-next v2 1/2] bpf: use kvmalloc for map values in syscall Song Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).