All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] bpf: make ringbuf available to modules
@ 2023-07-05  9:19 anton.ivanov
  2023-07-05 11:41 ` Daniel Borkmann
  0 siblings, 1 reply; 4+ messages in thread
From: anton.ivanov @ 2023-07-05  9:19 UTC (permalink / raw)
  To: bpf; +Cc: Anton Ivanov

From: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Ringbuf which was developed as a part of BPF infrastructure is
a very nice, clean, simple and consise API to relay information
from the kernel to userspace. It can be used in critical sections,
interrupt handlers, etc.

This patch exports ringbuf functionality to make it available to
kernel modules.

Demo: https://github.com/kot-begemot-uk/bpfnic-ng

The demo ships to userspace hardware offload notifications
without any mallocs, any workqueue and/or delayed work which
is normally needed to handle these. As a result it is ~ 150
lines of code instead of the 500+ usually needed to achieve the
same result.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
---
 include/linux/bpf.h  | 16 ++++++++++++
 kernel/bpf/inode.c   | 43 ++++++++++++++++++++++++++++++
 kernel/bpf/ringbuf.c | 62 ++++++++++++++++++++++++++++++++++++++------
 3 files changed, 113 insertions(+), 8 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f58895830ada..0882ba4a44dd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2004,6 +2004,20 @@ int  generic_map_delete_batch(struct bpf_map *map,
 struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
 
+void *_bpf_ringbuf_reserve(struct bpf_map *map,
+				 u64 size, u64 flags);
+void _bpf_ringbuf_commit(void *sample, u64 flags);
+void _bpf_ringbuf_discard(void *sample, u64 flags);
+int _bpf_ringbuf_output(struct bpf_map *map,
+			      void *data, u64 size, u64 flags);
+int _bpf_ringbuf_query(struct bpf_map *map, u64 flags);
+int _bpf_user_ringbuf_drain(struct bpf_map *map,
+				  void *callback_fn,
+				  void *callback_ctx,
+				  u64 flags);
+
+
+
 #ifdef CONFIG_MEMCG_KMEM
 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
 			   int node);
@@ -2245,6 +2259,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
 }
 
 struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+struct bpf_map *bpf_map_get_path(const char *name, fmode_t fmod);
+
 int array_map_alloc_check(union bpf_attr *attr);
 
 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 4174f76133df..0518f122df68 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -597,6 +597,49 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ
 }
 EXPORT_SYMBOL(bpf_prog_get_type_path);
 
+static struct bpf_map *__get_map_inode(struct inode *inode, fmode_t fmode)
+{
+	struct bpf_map *map;
+	int ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ);
+
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (inode->i_op == &bpf_prog_iops)
+		return ERR_PTR(-EINVAL);
+	if (inode->i_op == &bpf_link_iops)
+		return ERR_PTR(-EINVAL);
+	if (inode->i_op != &bpf_map_iops)
+		return ERR_PTR(-EPERM);
+
+	map = inode->i_private;
+
+	ret = security_bpf_map(map, fmode);
+
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	bpf_map_inc(map);
+	return map;
+}
+
+struct bpf_map *bpf_map_get_path(const char *name, fmode_t fmode)
+{
+	struct bpf_map *map;
+	struct path path;
+	int ret = kern_path(name, LOOKUP_FOLLOW, &path);
+
+	if (ret)
+		return ERR_PTR(ret);
+	map = __get_map_inode(d_backing_inode(path.dentry), fmode);
+	if (!IS_ERR(map))
+		touch_atime(&path);
+	path_put(&path);
+	return map;
+}
+EXPORT_SYMBOL(bpf_map_get_path);
+
+
 /*
  * Display the mount options in /proc/mounts.
  */
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 875ac9b698d9..4d4750b9f963 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -452,15 +452,21 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
 	return (void *)hdr + BPF_RINGBUF_HDR_SZ;
 }
 
-BPF_CALL_3(bpf_ringbuf_reserve, struct bpf_map *, map, u64, size, u64, flags)
+void *_bpf_ringbuf_reserve(struct bpf_map *map, u64 size, u64 flags)
 {
 	struct bpf_ringbuf_map *rb_map;
 
 	if (unlikely(flags))
-		return 0;
+		return NULL;
 
 	rb_map = container_of(map, struct bpf_ringbuf_map, map);
-	return (unsigned long)__bpf_ringbuf_reserve(rb_map->rb, size);
+	return __bpf_ringbuf_reserve(rb_map->rb, size);
+}
+EXPORT_SYMBOL(_bpf_ringbuf_reserve);
+
+BPF_CALL_3(bpf_ringbuf_reserve, struct bpf_map *, map, u64, size, u64, flags)
+{
+	return (unsigned long)_bpf_ringbuf_reserve(map, size, flags);
 }
 
 const struct bpf_func_proto bpf_ringbuf_reserve_proto = {
@@ -499,6 +505,12 @@ static void bpf_ringbuf_commit(void *sample, u64 flags, bool discard)
 		irq_work_queue(&rb->work);
 }
 
+void _bpf_ringbuf_commit(void *sample, u64 flags)
+{
+	bpf_ringbuf_commit(sample, flags, false);
+}
+EXPORT_SYMBOL(_bpf_ringbuf_commit);
+
 BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags)
 {
 	bpf_ringbuf_commit(sample, flags, false /* discard */);
@@ -512,6 +524,12 @@ const struct bpf_func_proto bpf_ringbuf_submit_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+void _bpf_ringbuf_discard(void *sample, u64 flags)
+{
+	bpf_ringbuf_commit(sample, flags, true);
+}
+EXPORT_SYMBOL(_bpf_ringbuf_discard);
+
 BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags)
 {
 	bpf_ringbuf_commit(sample, flags, true /* discard */);
@@ -525,8 +543,8 @@ const struct bpf_func_proto bpf_ringbuf_discard_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_4(bpf_ringbuf_output, struct bpf_map *, map, void *, data, u64, size,
-	   u64, flags)
+int _bpf_ringbuf_output(struct bpf_map *map, void *data, u64 size,
+	   u64 flags)
 {
 	struct bpf_ringbuf_map *rb_map;
 	void *rec;
@@ -543,6 +561,13 @@ BPF_CALL_4(bpf_ringbuf_output, struct bpf_map *, map, void *, data, u64, size,
 	bpf_ringbuf_commit(rec, flags, false /* discard */);
 	return 0;
 }
+EXPORT_SYMBOL(_bpf_ringbuf_output);
+
+BPF_CALL_4(bpf_ringbuf_output, struct bpf_map *, map, void *, data, u64, size,
+	   u64, flags)
+{
+	return _bpf_ringbuf_output(map, data, size, flags);
+}
 
 const struct bpf_func_proto bpf_ringbuf_output_proto = {
 	.func		= bpf_ringbuf_output,
@@ -553,7 +578,7 @@ const struct bpf_func_proto bpf_ringbuf_output_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, u64, flags)
+int _bpf_ringbuf_query(struct bpf_map *map, u64 flags)
 {
 	struct bpf_ringbuf *rb;
 
@@ -572,6 +597,12 @@ BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, u64, flags)
 		return 0;
 	}
 }
+EXPORT_SYMBOL(_bpf_ringbuf_query);
+
+BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, u64, flags)
+{
+	return _bpf_ringbuf_query(map, flags);
+}
 
 const struct bpf_func_proto bpf_ringbuf_query_proto = {
 	.func		= bpf_ringbuf_query,
@@ -727,8 +758,8 @@ static void __bpf_user_ringbuf_sample_release(struct bpf_ringbuf *rb, size_t siz
 	smp_store_release(&rb->consumer_pos, consumer_pos + rounded_size);
 }
 
-BPF_CALL_4(bpf_user_ringbuf_drain, struct bpf_map *, map,
-	   void *, callback_fn, void *, callback_ctx, u64, flags)
+int __bpf_user_ringbuf_drain(struct bpf_map *map,
+	   void *callback_fn, void *callback_ctx, u64 flags)
 {
 	struct bpf_ringbuf *rb;
 	long samples, discarded_samples = 0, ret = 0;
@@ -784,6 +815,21 @@ BPF_CALL_4(bpf_user_ringbuf_drain, struct bpf_map *, map,
 	return ret;
 }
 
+BPF_CALL_4(bpf_user_ringbuf_drain, struct bpf_map *, map,
+	   void *, callback_fn, void *, callback_ctx, u64, flags)
+{
+	return __bpf_user_ringbuf_drain(map, callback_fn, callback_ctx, flags);
+}
+
+
+int _bpf_user_ringbuf_drain(struct bpf_map *map,
+	   void *callback_fn, void *callback_ctx, u64 flags)
+{
+	return __bpf_user_ringbuf_drain(map, callback_fn, callback_ctx, flags);
+}
+EXPORT_SYMBOL(bpf_user_ringbuf_drain);
+
+
 const struct bpf_func_proto bpf_user_ringbuf_drain_proto = {
 	.func		= bpf_user_ringbuf_drain,
 	.ret_type	= RET_INTEGER,
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] bpf: make ringbuf available to modules
  2023-07-05  9:19 [PATCH] bpf: make ringbuf available to modules anton.ivanov
@ 2023-07-05 11:41 ` Daniel Borkmann
  2023-07-05 13:20   ` Anton Ivanov
  0 siblings, 1 reply; 4+ messages in thread
From: Daniel Borkmann @ 2023-07-05 11:41 UTC (permalink / raw)
  To: anton.ivanov, bpf

On 7/5/23 11:19 AM, anton.ivanov@cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Ringbuf which was developed as a part of BPF infrastructure is
> a very nice, clean, simple and consise API to relay information
> from the kernel to userspace. It can be used in critical sections,
> interrupt handlers, etc.
> 
> This patch exports ringbuf functionality to make it available to
> kernel modules.
> 
> Demo: https://github.com/kot-begemot-uk/bpfnic-ng
> 
> The demo ships to userspace hardware offload notifications
> without any mallocs, any workqueue and/or delayed work which
> is normally needed to handle these. As a result it is ~ 150
> lines of code instead of the 500+ usually needed to achieve the
> same result.
> 
> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>

Given this is only for out-of-tree code, we cannot merge this patch.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] bpf: make ringbuf available to modules
  2023-07-05 11:41 ` Daniel Borkmann
@ 2023-07-05 13:20   ` Anton Ivanov
  2023-07-05 20:40     ` Daniel Borkmann
  0 siblings, 1 reply; 4+ messages in thread
From: Anton Ivanov @ 2023-07-05 13:20 UTC (permalink / raw)
  To: Daniel Borkmann, bpf



On 05/07/2023 12:41, Daniel Borkmann wrote:
> On 7/5/23 11:19 AM, anton.ivanov@cambridgegreys.com wrote:
>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Ringbuf which was developed as a part of BPF infrastructure is
>> a very nice, clean, simple and consise API to relay information
>> from the kernel to userspace. It can be used in critical sections,
>> interrupt handlers, etc.
>>
>> This patch exports ringbuf functionality to make it available to
>> kernel modules.
>>
>> Demo: https://github.com/kot-begemot-uk/bpfnic-ng
>>
>> The demo ships to userspace hardware offload notifications
>> without any mallocs, any workqueue and/or delayed work which
>> is normally needed to handle these. As a result it is ~ 150
>> lines of code instead of the 500+ usually needed to achieve the
>> same result.
>>
>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
> 
> Given this is only for out-of-tree code, we cannot merge this patch.

The out of tree code is simply a demo what you can do with ringbuf outside BPF.

Ringbuf can save anyone writing a device driver quite a lot of work. It is a "ready made" IPC with userspace which requires half the code of any alternative (character device drivers, shared memory, etc).

What I am proposing is that it to make it generic and not BPF only.

> 
> Thanks,
> Daniel
> 

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] bpf: make ringbuf available to modules
  2023-07-05 13:20   ` Anton Ivanov
@ 2023-07-05 20:40     ` Daniel Borkmann
  0 siblings, 0 replies; 4+ messages in thread
From: Daniel Borkmann @ 2023-07-05 20:40 UTC (permalink / raw)
  To: Anton Ivanov, bpf

On 7/5/23 3:20 PM, Anton Ivanov wrote:
> On 05/07/2023 12:41, Daniel Borkmann wrote:
>> On 7/5/23 11:19 AM, anton.ivanov@cambridgegreys.com wrote:
>>> From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>>
>>> Ringbuf which was developed as a part of BPF infrastructure is
>>> a very nice, clean, simple and consise API to relay information
>>> from the kernel to userspace. It can be used in critical sections,
>>> interrupt handlers, etc.
>>>
>>> This patch exports ringbuf functionality to make it available to
>>> kernel modules.
>>>
>>> Demo: https://github.com/kot-begemot-uk/bpfnic-ng
>>>
>>> The demo ships to userspace hardware offload notifications
>>> without any mallocs, any workqueue and/or delayed work which
>>> is normally needed to handle these. As a result it is ~ 150
>>> lines of code instead of the 500+ usually needed to achieve the
>>> same result.
>>>
>>> Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
>>
>> Given this is only for out-of-tree code, we cannot merge this patch.
> 
> The out of tree code is simply a demo what you can do with ringbuf outside BPF.
> 
> Ringbuf can save anyone writing a device driver quite a lot of work. It is a "ready made" IPC with userspace which requires half the code of any alternative (character device drivers, shared memory, etc).
> 
> What I am proposing is that it to make it generic and not BPF only.

That is probably an okay goal (depends on the concrete situation / proposal),
but still this needs to be in upstream tree given we cannot cater for anything
out-of-tree and exporting symbols without good reason for an in-tree user is
and always has been a red flag.

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-07-05 20:40 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-05  9:19 [PATCH] bpf: make ringbuf available to modules anton.ivanov
2023-07-05 11:41 ` Daniel Borkmann
2023-07-05 13:20   ` Anton Ivanov
2023-07-05 20:40     ` Daniel Borkmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.