All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC bpf-next 2/3] bpf: add delayed_work mechanism
  2022-07-11 21:48 [PATCH RFC bpf-next 0/3] Execution context callbacks Delyan Kratunov
@ 2022-07-11 21:48 ` Delyan Kratunov
  2022-07-11 21:48 ` [PATCH RFC bpf-next 1/3] bpf: allow maps to hold bpf_delayed_work fields Delyan Kratunov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 13+ messages in thread
From: Delyan Kratunov @ 2022-07-11 21:48 UTC (permalink / raw)
  To: daniel, ast, andrii, bpf

Add a new helper function that can schedule a callback to execute in a
different context. Initially, only irq_work (i.e. hardirq) is supported.

A key consideration is that we need this to work in an NMI context.
Therefore, we use a queue of pre-allocated llist nodes inside
bpf_delayed_work, which we drain on a per-program basis. To avoid races
on the bpf_delayed_work items, we implement a simple lock scheme based
on cmpxchg ordering.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
---
 include/linux/bpf.h            |  13 ++++
 include/uapi/linux/bpf.h       |  28 ++++++++
 kernel/bpf/core.c              |   8 +++
 kernel/bpf/helpers.c           |  92 ++++++++++++++++++++++++
 kernel/bpf/verifier.c          | 123 ++++++++++++++++++++++++++++++++-
 scripts/bpf_doc.py             |   2 +
 tools/include/uapi/linux/bpf.h |  27 ++++++++
 7 files changed, 292 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ad9d2cfb0411..7325a9a2d10b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -27,6 +27,8 @@
 #include <linux/bpfptr.h>
 #include <linux/btf.h>
 #include <linux/rcupdate_trace.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -460,6 +462,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
 	ARG_PTR_TO_KPTR,	/* pointer to referenced kptr */
 	ARG_PTR_TO_DYNPTR,      /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
+	ARG_PTR_TO_DELAYED_WORK,/* pointer to bpf_delayed_work */
 	__BPF_ARG_TYPE_MAX,
 
 	/* Extended arg_types. */
@@ -1101,6 +1104,9 @@ struct bpf_prog_aux {
 	u32 linfo_idx;
 	u32 num_exentries;
 	struct exception_table_entry *extable;
+
+	/* initialized at load time if program uses delayed work helpers */
+	struct bpf_delayed_irq_work *irq_work;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
@@ -2526,4 +2532,11 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
 int bpf_dynptr_check_size(u32 size);
 
+struct bpf_delayed_irq_work {
+	struct llist_head items;
+	struct irq_work work;
+	struct bpf_prog *prog;
+};
+void bpf_delayed_work_irq_work_cb(struct irq_work *work);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d68fc4f472f1..dc0587bbbe7c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5325,6 +5325,29 @@ union bpf_attr {
  *		**-EACCES** if the SYN cookie is not valid.
  *
  *		**-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ *
+ * long bpf_delayed_work_submit(struct bpf_delayed_work *work, void *cb, void *data, int flags)
+ *     Description
+ *             Submits a function to execute in a different context.
+ *
+ *             *work* must be a member in a map value.
+ *
+ *             *cb* function to call
+ *
+ *             *data* context to pass as sole argument to *cb*. Must be part of
+ *             a map value or NULL.
+ *
+ *             *flags* must be BPF_DELAYED_WORK_IRQWORK
+ *     Return
+ *             0 when work is successfully submitted.
+ *
+ *             **-EINVAL** if *cb* is NULL
+ *
+ *             **-EOPNOTSUP** if called from an NMI handler on an
+ *             architecture without NMI-safe cmpxchg
+ *
+ *             **-EINVAL** if *work* is already in use
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5535,6 +5558,7 @@ union bpf_attr {
 	FN(tcp_raw_gen_syncookie_ipv6),	\
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
+	FN(delayed_work_submit),        \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6699,6 +6723,10 @@ struct bpf_delayed_work {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+enum {
+	BPF_DELAYED_WORK_IRQWORK = (1UL << 0),
+};
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b5ffebcce6cc..1f5093f9442b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2567,6 +2567,14 @@ static void bpf_prog_free_deferred(struct work_struct *work)
 	int i;
 
 	aux = container_of(work, struct bpf_prog_aux, work);
+
+	/* We have already waited for a qs of the appropriate RCU variety,
+	 * so we can expect no further submissions of work. Just wait for
+	 * the currently scheduled work to finish before releasing anything.
+	 */
+	if (aux->irq_work)
+		irq_work_sync(&aux->irq_work->work);
+
 #ifdef CONFIG_BPF_SYSCALL
 	bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
 #endif
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a1c84d256f83..731547d34c35 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,8 @@
 #include <linux/proc_ns.h>
 #include <linux/security.h>
 #include <linux/btf_ids.h>
+#include <linux/irq_work.h>
+#include <linux/llist.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -1575,6 +1577,94 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
 	.arg3_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
 };
 
+struct bpf_delayed_work_kern {
+	struct llist_node item;
+	u64 flags; /* used as a lock field */
+	void (*cb)(void *);
+	void *data;
+} __aligned(8);
+
+#define BPF_DELAYED_WORK_FREE (0)
+#define BPF_DELAYED_WORK_CLAIMED (1)
+#define BPF_DELAYED_WORK_READY (2)
+
+void bpf_delayed_work_irq_work_cb(struct irq_work *work)
+{
+	struct bpf_delayed_irq_work *bpf_irq_work = container_of(work, struct bpf_delayed_irq_work, work);
+	struct bpf_delayed_work_kern *work_item, *next;
+	struct llist_node *work_list = llist_del_all(&bpf_irq_work->items);
+
+	/* Traverse in submission order to preserve ordering semantics */
+	llist_reverse_order(work_list);
+
+	llist_for_each_entry_safe(work_item, next, work_list, item) {
+		WARN_ONCE(work_item->flags != BPF_DELAYED_WORK_READY, "incomplete bpf_delayed_work found");
+
+		work_item->cb(work_item->data);
+
+		work_item->cb = work_item->data = NULL;
+		bpf_prog_put(bpf_irq_work->prog);
+		xchg(&work_item->flags, BPF_DELAYED_WORK_FREE);
+	}
+}
+
+BPF_CALL_5(bpf_delayed_work_submit, struct bpf_delayed_work_kern *, work,
+	   void *, callback_fn, void *, data, int, flags, struct bpf_prog_aux *, aux)
+{
+	u64 ret;
+	struct bpf_prog *prog;
+
+	BUILD_BUG_ON(sizeof(struct bpf_delayed_work_kern) > sizeof(struct bpf_delayed_work));
+	BUILD_BUG_ON(__alignof__(struct bpf_delayed_work_kern) != __alignof__(struct bpf_delayed_work));
+	BTF_TYPE_EMIT(struct bpf_delayed_work);
+
+	if (callback_fn == NULL)
+		return -EINVAL;
+
+	if (flags != BPF_DELAYED_WORK_IRQWORK)
+		return -EOPNOTSUPP;
+
+	if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && in_nmi())
+		return -EOPNOTSUPP;
+
+	ret = cmpxchg(&work->flags, BPF_DELAYED_WORK_FREE, BPF_DELAYED_WORK_CLAIMED);
+	if (ret != 0)
+		return -EINVAL;
+
+	work->data = data;
+	work->cb = callback_fn;
+
+	ret = cmpxchg(&work->flags, BPF_DELAYED_WORK_CLAIMED, BPF_DELAYED_WORK_READY);
+	if (ret != BPF_DELAYED_WORK_CLAIMED) {
+		WARN_ONCE(ret != BPF_DELAYED_WORK_CLAIMED, "bpf_delayed_work item altered while claimed");
+		return -EINVAL;
+	}
+
+	/* Bump the ref count for every work item submitted by the program. */
+	prog = bpf_prog_inc_not_zero(aux->prog);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	llist_add(&work->item, &aux->irq_work->items);
+
+	/* It's okay if this prog's irq_work is already submitted,
+	 * it will walk the same list of callbacks anyway.
+	 */
+	(void) irq_work_queue(&aux->irq_work->work);
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_delayed_work_submit_proto = {
+	.func		= bpf_delayed_work_submit,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_DELAYED_WORK,
+	.arg2_type	= ARG_PTR_TO_FUNC,
+	.arg3_type	= ARG_PTR_TO_MAP_VALUE, /* TODO: need ptr_to_map_value_mem */
+	.arg4_type	= ARG_ANYTHING,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1643,6 +1733,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_dynptr_write_proto;
 	case BPF_FUNC_dynptr_data:
 		return &bpf_dynptr_data_proto;
+	case BPF_FUNC_delayed_work_submit:
+		return &bpf_delayed_work_submit_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9fd311b7a1ff..212cbea5a382 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5490,6 +5490,55 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
 	return 0;
 }
 
+static int process_delayed_work_func(struct bpf_verifier_env *env, int regno,
+			      struct bpf_call_arg_meta *meta)
+{
+	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+	bool is_const = tnum_is_const(reg->var_off);
+	struct bpf_map *map = reg->map_ptr;
+	u64 val = reg->var_off.value;
+
+	if (!is_const) {
+		verbose(env,
+			"R%d doesn't have constant offset. bpf_delayed_work has to be at the constant offset\n",
+			regno);
+		return -EINVAL;
+	}
+	if (!map->btf) {
+		verbose(env, "map '%s' has to have BTF in order to use bpf_delayed_work\n",
+			map->name);
+		return -EINVAL;
+	}
+	if (!map_value_has_delayed_work(map)) {
+		if (map->delayed_work_off == -E2BIG)
+			verbose(env,
+				"map '%s' has more than one 'struct bpf_delayed_work'\n",
+				map->name);
+		else if (map->delayed_work_off == -ENOENT)
+			verbose(env,
+				"map '%s' doesn't have 'struct bpf_delayed_work'\n",
+				map->name);
+		else
+			verbose(env,
+				"map '%s' is not a struct type or bpf_delayed_work is mangled\n",
+				map->name);
+		return -EINVAL;
+	}
+	if (map->delayed_work_off != val + reg->off) {
+		verbose(env, "off %lld doesn't point to 'struct bpf_delayed_work' that is at %d\n",
+			val + reg->off, map->delayed_work_off);
+		return -EINVAL;
+	}
+	if (meta->map_ptr) {
+		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
+		return -EFAULT;
+	}
+
+	meta->map_uid = reg->map_uid;
+	meta->map_ptr = map;
+	return 0;
+}
+
 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
 			     struct bpf_call_arg_meta *meta)
 {
@@ -5677,6 +5726,7 @@ static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK }
 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
 static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types delayed_work_types = { .types = { PTR_TO_MAP_VALUE } };
 
 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@ -5704,6 +5754,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_TIMER]		= &timer_types,
 	[ARG_PTR_TO_KPTR]		= &kptr_types,
 	[ARG_PTR_TO_DYNPTR]		= &stack_ptr_types,
+	[ARG_PTR_TO_DELAYED_WORK]	= &delayed_work_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -6018,6 +6069,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 	} else if (arg_type == ARG_PTR_TO_TIMER) {
 		if (process_timer_func(env, regno, meta))
 			return -EACCES;
+	} else if (arg_type == ARG_PTR_TO_DELAYED_WORK) {
+		if (process_delayed_work_func(env, regno, meta))
+			return -EACCES;
 	} else if (arg_type == ARG_PTR_TO_FUNC) {
 		meta->subprogno = reg->subprogno;
 	} else if (base_type(arg_type) == ARG_PTR_TO_MEM) {
@@ -6670,7 +6724,8 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 
 	if (insn->code == (BPF_JMP | BPF_CALL) &&
 	    insn->src_reg == 0 &&
-	    insn->imm == BPF_FUNC_timer_set_callback) {
+	    (insn->imm == BPF_FUNC_timer_set_callback ||
+	     insn->imm == BPF_FUNC_delayed_work_submit)) {
 		struct bpf_verifier_state *async_cb;
 
 		/* there is no real recursion here. timer callbacks are async */
@@ -6898,6 +6953,30 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static int set_delayed_work_callback_state(struct bpf_verifier_env *env,
+					   struct bpf_func_state *caller,
+					   struct bpf_func_state *callee,
+					   int insn_idx)
+{
+	/* bpf_delayed_work_submit(struct bpf_delayed_work *work,
+	 *  void *callback_fn, void *data, u64 flags);
+	 *
+	 * callback_fn(void *callback_ctx);
+	 */
+	callee->regs[BPF_REG_1].type = PTR_TO_MAP_VALUE;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+	callee->regs[BPF_REG_1].map_ptr = caller->regs[BPF_REG_3].map_ptr;
+
+	/* unused */
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_2]);
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+
+	callee->in_callback_fn = true;
+	return 0;
+}
+
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
 	struct bpf_verifier_state *state = env->cur_state;
@@ -7294,6 +7373,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 				reg_type_str(env, regs[BPF_REG_1].type));
 			return -EACCES;
 		}
+		break;
+	case BPF_FUNC_delayed_work_submit:
+		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+					set_delayed_work_callback_state);
+		break;
 	}
 
 	if (err)
@@ -7468,6 +7552,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
 		env->prog->call_get_stack = true;
 
+	if (func_id == BPF_FUNC_delayed_work_submit) {
+		struct bpf_delayed_irq_work *irq_work = kmalloc(
+			sizeof(struct bpf_delayed_irq_work), GFP_KERNEL);
+		if (!irq_work) {
+			verbose(env, "could not allocate irq_work");
+			return -ENOMEM;
+		}
+
+		init_llist_head(&irq_work->items);
+		irq_work->work = IRQ_WORK_INIT_HARD(&bpf_delayed_work_irq_work_cb);
+		irq_work->prog = env->prog;
+		env->prog->aux->irq_work = irq_work;
+	}
+
+
 	if (func_id == BPF_FUNC_get_func_ip) {
 		if (check_get_func_ip(env))
 			return -ENOTSUPP;
@@ -14061,6 +14160,28 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			goto patch_call_imm;
 		}
 
+		if (insn->imm == BPF_FUNC_delayed_work_submit) {
+			// Add aux as the 5th arg to delayed_work_submit
+			struct bpf_insn ld_addrs[2] = {
+				BPF_LD_IMM64(BPF_REG_5, (long)prog->aux),
+			};
+
+			insn_buf[0] = ld_addrs[0];
+			insn_buf[1] = ld_addrs[1];
+			insn_buf[2] = *insn;
+			cnt = 3;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto patch_call_imm;
+		}
+
+
 		if (insn->imm == BPF_FUNC_task_storage_get ||
 		    insn->imm == BPF_FUNC_sk_storage_get ||
 		    insn->imm == BPF_FUNC_inode_storage_get) {
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index a0ec321469bd..0dd43dc9f388 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -637,6 +637,7 @@ class PrinterHelpers(Printer):
             'struct bpf_dynptr',
             'struct iphdr',
             'struct ipv6hdr',
+            'struct bpf_delayed_work',
     ]
     known_types = {
             '...',
@@ -690,6 +691,7 @@ class PrinterHelpers(Printer):
             'struct bpf_dynptr',
             'struct iphdr',
             'struct ipv6hdr',
+            'struct bpf_delayed_work',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d68fc4f472f1..461417159106 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5325,6 +5325,28 @@ union bpf_attr {
  *		**-EACCES** if the SYN cookie is not valid.
  *
  *		**-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * long bpf_delayed_work_submit(struct bpf_delayed_work *work, void *cb, void *data, int flags)
+ *     Description
+ *             Submits a function to execute in a different context.
+ *
+ *             *work* must be a member in a map value.
+ *
+ *             *cb* function to call
+ *
+ *             *data* context to pass as sole argument to *cb*. Must be part of
+ *             a map value or NULL.
+ *
+ *             *flags* must be BPF_DELAYED_WORK_IRQWORK
+ *     Return
+ *             0 when work is successfully submitted.
+ *
+ *             **-EINVAL** if *cb* is NULL
+ *
+ *             **-EOPNOTSUP** if called from an NMI handler on an
+ *             architecture without NMI-safe cmpxchg
+ *
+ *             **-EINVAL** if *work* is already in use
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5535,6 +5557,7 @@ union bpf_attr {
 	FN(tcp_raw_gen_syncookie_ipv6),	\
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
+	FN(delayed_work_submit),        \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6699,6 +6722,10 @@ struct bpf_delayed_work {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+enum {
+	BPF_DELAYED_WORK_IRQWORK = (1UL << 0),
+};
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
-- 
2.36.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH RFC bpf-next 1/3] bpf: allow maps to hold bpf_delayed_work fields
  2022-07-11 21:48 [PATCH RFC bpf-next 0/3] Execution context callbacks Delyan Kratunov
  2022-07-11 21:48 ` [PATCH RFC bpf-next 2/3] bpf: add delayed_work mechanism Delyan Kratunov
@ 2022-07-11 21:48 ` Delyan Kratunov
  2022-07-14  4:23   ` Andrii Nakryiko
  2022-07-11 21:48 ` [PATCH RFC bpf-next 3/3] selftests: delayed_work tests Delyan Kratunov
  2022-07-12 18:07 ` [PATCH RFC bpf-next 0/3] Execution context callbacks sdf
  3 siblings, 1 reply; 13+ messages in thread
From: Delyan Kratunov @ 2022-07-11 21:48 UTC (permalink / raw)
  To: daniel, ast, andrii, bpf

Similarly to bpf_timer, bpf_delayed_work represents a callback that will
be executed at a later time, in a different execution context.

Its treatment in maps is practically the same as timers (to a degree
that perhaps calls for refactoring), except releasing the work does not
need to release any resources - we will wait for pending executions in
the program destruction path.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
---
 include/linux/bpf.h            |  9 ++++++++-
 include/linux/btf.h            |  1 +
 include/uapi/linux/bpf.h       |  8 ++++++++
 kernel/bpf/btf.c               | 21 +++++++++++++++++++++
 kernel/bpf/syscall.c           | 24 ++++++++++++++++++++++--
 kernel/bpf/verifier.c          |  9 +++++++++
 tools/include/uapi/linux/bpf.h |  8 ++++++++
 7 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0edd7d2c0064..ad9d2cfb0411 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -164,7 +164,8 @@ enum {
 	BPF_MAP_VALUE_OFF_MAX = 8,
 	BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
 				1 + /* for bpf_spin_lock */
-				1,  /* for bpf_timer */
+				1 + /* for bpf_timer */
+				1,  /* for bpf_delayed_work */
 };
 
 enum bpf_kptr_type {
@@ -212,6 +213,7 @@ struct bpf_map {
 	int spin_lock_off; /* >=0 valid offset, <0 error */
 	struct bpf_map_value_off *kptr_off_tab;
 	int timer_off; /* >=0 valid offset, <0 error */
+	int delayed_work_off; /* >=0 valid offset, <0 error */
 	u32 id;
 	int numa_node;
 	u32 btf_key_type_id;
@@ -256,6 +258,11 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
 	return map->timer_off >= 0;
 }
 
+static inline bool map_value_has_delayed_work(const struct bpf_map *map)
+{
+	return map->delayed_work_off >= 0;
+}
+
 static inline bool map_value_has_kptrs(const struct bpf_map *map)
 {
 	return !IS_ERR_OR_NULL(map->kptr_off_tab);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 1bfed7fa0428..2b8f473a6aa0 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -132,6 +132,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   u32 expected_offset, u32 expected_size);
 int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 int btf_find_timer(const struct btf *btf, const struct btf_type *t);
+int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t);
 struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 					  const struct btf_type *t);
 bool btf_type_is_void(const struct btf_type *t);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e81362891596..d68fc4f472f1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6691,6 +6691,14 @@ struct bpf_dynptr {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_delayed_work {
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index f08037c31dd7..e4ab52cc25fe 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3196,6 +3196,7 @@ enum btf_field_type {
 	BTF_FIELD_SPIN_LOCK,
 	BTF_FIELD_TIMER,
 	BTF_FIELD_KPTR,
+	BTF_FIELD_DELAYED_WORK,
 };
 
 enum {
@@ -3283,6 +3284,7 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
 		switch (field_type) {
 		case BTF_FIELD_SPIN_LOCK:
 		case BTF_FIELD_TIMER:
+		case BTF_FIELD_DELAYED_WORK:
 			ret = btf_find_struct(btf, member_type, off, sz,
 					      idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3333,6 +3335,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
 		switch (field_type) {
 		case BTF_FIELD_SPIN_LOCK:
 		case BTF_FIELD_TIMER:
+		case BTF_FIELD_DELAYED_WORK:
 			ret = btf_find_struct(btf, var_type, off, sz,
 					      idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3375,6 +3378,11 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
 		sz = sizeof(struct bpf_timer);
 		align = __alignof__(struct bpf_timer);
 		break;
+	case BTF_FIELD_DELAYED_WORK:
+		name = "bpf_delayed_work";
+		sz = sizeof(struct bpf_delayed_work);
+		align = __alignof__(struct bpf_delayed_work);
+		break;
 	case BTF_FIELD_KPTR:
 		name = NULL;
 		sz = sizeof(u64);
@@ -3421,6 +3429,19 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t)
 	return info.off;
 }
 
+int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t)
+{
+	struct btf_field_info info;
+	int ret;
+
+	ret = btf_find_field(btf, t, BTF_FIELD_DELAYED_WORK, &info, 1);
+	if (ret < 0)
+		return ret;
+	if (!ret)
+		return -ENOENT;
+	return info.off;
+}
+
 struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
 					  const struct btf_type *t)
 {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7d5af5b99f0d..041972305344 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -914,10 +914,11 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
 	bool has_spin_lock = map_value_has_spin_lock(map);
 	bool has_timer = map_value_has_timer(map);
 	bool has_kptrs = map_value_has_kptrs(map);
+	bool has_delayed_work = map_value_has_delayed_work(map);
 	struct bpf_map_off_arr *off_arr;
 	u32 i;
 
-	if (!has_spin_lock && !has_timer && !has_kptrs) {
+	if (!has_spin_lock && !has_timer && !has_kptrs && !has_delayed_work) {
 		map->off_arr = NULL;
 		return 0;
 	}
@@ -953,6 +954,13 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
 		}
 		off_arr->cnt += tab->nr_off;
 	}
+	if (has_delayed_work) {
+		i = off_arr->cnt;
+
+		off_arr->field_off[i] = map->delayed_work_off;
+		off_arr->field_sz[i] = sizeof(struct bpf_delayed_work);
+		off_arr->cnt++;
+	}
 
 	if (off_arr->cnt == 1)
 		return 0;
@@ -1014,6 +1022,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 			return -EOPNOTSUPP;
 	}
 
+	map->delayed_work_off = btf_find_delayed_work(btf, value_type);
+	if (map_value_has_delayed_work(map)) {
+		if (map->map_flags & BPF_F_RDONLY_PROG)
+			return -EACCES;
+		if (map->map_type != BPF_MAP_TYPE_HASH &&
+		    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+		    map->map_type != BPF_MAP_TYPE_ARRAY)
+			return -EOPNOTSUPP;
+	}
+
 	map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
 	if (map_value_has_kptrs(map)) {
 		if (!bpf_capable()) {
@@ -1095,6 +1113,7 @@ static int map_create(union bpf_attr *attr)
 
 	map->spin_lock_off = -EINVAL;
 	map->timer_off = -EINVAL;
+	map->delayed_work_off = -EINVAL;
 	if (attr->btf_key_type_id || attr->btf_value_type_id ||
 	    /* Even the map's value is a kernel's struct,
 	     * the bpf_prog.o must have BTF to begin with
@@ -1863,7 +1882,8 @@ static int map_freeze(const union bpf_attr *attr)
 		return PTR_ERR(map);
 
 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
-	    map_value_has_timer(map) || map_value_has_kptrs(map)) {
+	    map_value_has_timer(map) || map_value_has_kptrs(map) ||
+	    map_value_has_delayed_work(map)) {
 		fdput(f);
 		return -ENOTSUPP;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2859901ffbe3..9fd311b7a1ff 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3817,6 +3817,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 			return -EACCES;
 		}
 	}
+	if (map_value_has_delayed_work(map) && src == ACCESS_DIRECT) {
+		u32 t = map->delayed_work_off;
+
+		if (reg->smin_value + off < t + sizeof(struct bpf_delayed_work) &&
+		     t < reg->umax_value + off + size) {
+			verbose(env, "bpf_delayed_work cannot be accessed directly by load/store regno=%d off=%d\n", regno, off);
+			return -EACCES;
+		}
+	}
 	if (map_value_has_kptrs(map)) {
 		struct bpf_map_value_off *tab = map->kptr_off_tab;
 		int i;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e81362891596..d68fc4f472f1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6691,6 +6691,14 @@ struct bpf_dynptr {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_delayed_work {
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
-- 
2.36.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH RFC bpf-next 3/3] selftests: delayed_work tests
  2022-07-11 21:48 [PATCH RFC bpf-next 0/3] Execution context callbacks Delyan Kratunov
  2022-07-11 21:48 ` [PATCH RFC bpf-next 2/3] bpf: add delayed_work mechanism Delyan Kratunov
  2022-07-11 21:48 ` [PATCH RFC bpf-next 1/3] bpf: allow maps to hold bpf_delayed_work fields Delyan Kratunov
@ 2022-07-11 21:48 ` Delyan Kratunov
  2022-07-12 18:07 ` [PATCH RFC bpf-next 0/3] Execution context callbacks sdf
  3 siblings, 0 replies; 13+ messages in thread
From: Delyan Kratunov @ 2022-07-11 21:48 UTC (permalink / raw)
  To: daniel, ast, andrii, bpf

Basic tests, will develop in further iterations.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
---
 .../selftests/bpf/prog_tests/delayed_work.c   | 29 +++++++++
 .../selftests/bpf/progs/delayed_irqwork.c     | 59 +++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/delayed_work.c
 create mode 100644 tools/testing/selftests/bpf/progs/delayed_irqwork.c

diff --git a/tools/testing/selftests/bpf/prog_tests/delayed_work.c b/tools/testing/selftests/bpf/prog_tests/delayed_work.c
new file mode 100644
index 000000000000..80ed52c8f34c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/delayed_work.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "delayed_irqwork.skel.h"
+
+void test_delayed_work(void)
+{
+	int err;
+	struct delayed_irqwork *skel;
+
+	skel = delayed_irqwork__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	err = delayed_irqwork__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = delayed_irqwork__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	usleep(1000000);
+
+cleanup:
+	delayed_irqwork__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/delayed_irqwork.c b/tools/testing/selftests/bpf/progs/delayed_irqwork.c
new file mode 100644
index 000000000000..9fde66616681
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/delayed_irqwork.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "vmlinux.h"
+
+#include "bpf_misc.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct map_value_type {
+	struct bpf_delayed_work work;
+	struct {
+		__u32 arg1;
+	} data;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, struct map_value_type);
+} map1 SEC(".maps");
+
+static __noinline int callback(void *args)
+{
+	struct map_value_type *val = args;
+
+	bpf_printk("callback: %p, %x", val, val->data.arg1);
+	return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int delayed_irqwork(void *ctx)
+{
+	int ret;
+	__u32 key = 0;
+	struct map_value_type *value = (struct map_value_type *) bpf_map_lookup_elem(&map1, &key);
+
+	if (!value) {
+		bpf_printk("Could not get map value");
+		return 0;
+	}
+
+	value->data.arg1 = 0xcafe;
+	ret = bpf_delayed_work_submit(&value->work, callback, value, BPF_DELAYED_WORK_IRQWORK);
+
+	key = 1;
+	value = (struct map_value_type *) bpf_map_lookup_elem(&map1, &key);
+	if (!value) {
+		bpf_printk("Could not get map value");
+		return 0;
+	}
+	value->data.arg1 = 0xbeef;
+	ret = bpf_delayed_work_submit(&value->work, callback, value, BPF_DELAYED_WORK_IRQWORK);
+
+	return 0;
+}
-- 
2.36.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH RFC bpf-next 0/3] Execution context callbacks
@ 2022-07-11 21:48 Delyan Kratunov
  2022-07-11 21:48 ` [PATCH RFC bpf-next 2/3] bpf: add delayed_work mechanism Delyan Kratunov
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Delyan Kratunov @ 2022-07-11 21:48 UTC (permalink / raw)
  To: daniel, ast, andrii, bpf

BPF developers are sometimes faced with surprising limitations of the execution context
their code runs in. NMI is particularly problematic though userspace data access as a whole
has come up as well (e.g. build id not being available).

This series adds bpf_delayed_work_submit which takes a callback function and a context pointer
and is able to execute the callback from (initially) a hardirq context.

This is an RFC to answer a few questions on direction:

1. Naming is intentionally bad and something I'd like to bikeshed a bit.
"bpf_(defer|submit)_work" was my first instinct but that has workqueue connotations in the kernel.

2. The callback arguments need to be in a map. We can currently express helper arguments taking a
pointer to a map value but not a pointer to _within_ a map value. Should we add a new argument
type or should we just pass the map value pointer to the callback?

3. A lot of the map handling code is verbatim from bpf_timer. This feels icky but I'm not sure if it
justifies a refactor quite yet. Opinions welcome.

4. This functionality is implemented as a single helper call (no matching bpf_delayed_work_init). In practice,
this means that we can't implement the map->usercnt check that bpf_timer_start performs to ensure the
map is referenced from userspace. However, given that a) we wait for pending work before releasing the
bpf_prog, b) the map will be in the bpf_prog's used_maps, and c) the map free path does not need to release
any external resources, and d) the bpf_delayed_work items bump the prog refcnt, I think we can keep this mechanism
a single call.

I'd like to get this right from the start, so do let me know if I'm missing potential execution
contexts that we can't really wait to drain from the bpf_prog free path.

5. This mechanism generalizes to other contexts (e.g., sleepable context on the way back to userspace
a-la set_thread_flag(TIF_UPROBE)), by means of adding the bpf_delayed_work items to other llist_heads.
E.g., we can keep the llist_heads in task_local_storage or in per-cpu structures. I can't think of
anything that requires a more complicated approach (or reserved space in the structs) but do let me
know if I'm wrong.

6. Lastly, the llist approach was dictated by the NMI constraints. RCU lists are out because they need
to synchronize_rcu when splicing from one head to another.

Thanks,
Delyan

Delyan Kratunov (3):
  bpf: allow maps to hold bpf_delayed_work fields
  bpf: add delayed_work mechanism
  selftests: delayed_work tests

 include/linux/bpf.h                           |  22 ++-
 include/linux/btf.h                           |   1 +
 include/uapi/linux/bpf.h                      |  36 +++++
 kernel/bpf/btf.c                              |  21 +++
 kernel/bpf/core.c                             |   8 ++
 kernel/bpf/helpers.c                          |  92 ++++++++++++
 kernel/bpf/syscall.c                          |  24 +++-
 kernel/bpf/verifier.c                         | 132 +++++++++++++++++-
 scripts/bpf_doc.py                            |   2 +
 tools/include/uapi/linux/bpf.h                |  35 +++++
 .../selftests/bpf/prog_tests/delayed_work.c   |  29 ++++
 .../selftests/bpf/progs/delayed_irqwork.c     |  59 ++++++++
 12 files changed, 457 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/delayed_work.c
 create mode 100644 tools/testing/selftests/bpf/progs/delayed_irqwork.c

--
2.36.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 0/3] Execution context callbacks
  2022-07-11 21:48 [PATCH RFC bpf-next 0/3] Execution context callbacks Delyan Kratunov
                   ` (2 preceding siblings ...)
  2022-07-11 21:48 ` [PATCH RFC bpf-next 3/3] selftests: delayed_work tests Delyan Kratunov
@ 2022-07-12 18:07 ` sdf
  2022-07-12 18:42   ` Delyan Kratunov
  3 siblings, 1 reply; 13+ messages in thread
From: sdf @ 2022-07-12 18:07 UTC (permalink / raw)
  To: Delyan Kratunov; +Cc: daniel, ast, andrii, bpf

On 07/11, Delyan Kratunov wrote:
> BPF developers are sometimes faced with surprising limitations of the  
> execution context
> their code runs in. NMI is particularly problematic though userspace data  
> access as a whole
> has come up as well (e.g. build id not being available).

> This series adds bpf_delayed_work_submit which takes a callback function  
> and a context pointer
> and is able to execute the callback from (initially) a hardirq context.

> This is an RFC to answer a few questions on direction:

> 1. Naming is intentionally bad and something I'd like to bikeshed a bit.
> "bpf_(defer|submit)_work" was my first instinct but that has workqueue  
> connotations in the kernel.

> 2. The callback arguments need to be in a map. We can currently express  
> helper arguments taking a
> pointer to a map value but not a pointer to _within_ a map value. Should  
> we add a new argument
> type or should we just pass the map value pointer to the callback?

Passing map value pointer (as you do in the selftest) seems fine; do
you think we need more flexibility here?

> 3. A lot of the map handling code is verbatim from bpf_timer. This feels  
> icky but I'm not sure if it
> justifies a refactor quite yet. Opinions welcome.

+1, it does seem very close to a timer with expiry time == 0.

I don't know what's the exact usecase you're trying to solve exactly, but
have you though of maybe initially supporting something like:

bpf_timer_init(&timer, map, SOME_NEW_DEFERRED_NMI_ONLY_FLAG);
bpf_timer_set_callback(&timer, cg);
bpf_timer_start(&timer, 0, 0);

If you init a timer with that special flag, I'm assuming you can have
special cases in the existing helpers to simulate the delayed work?
Then, the verifier changes should be minimal it seems.

OTOH, having a separate set of helpers seems more clear API-wise :-/

> 4. This functionality is implemented as a single helper call (no matching  
> bpf_delayed_work_init). In practice,
> this means that we can't implement the map->usercnt check that  
> bpf_timer_start performs to ensure the
> map is referenced from userspace. However, given that a) we wait for  
> pending work before releasing the
> bpf_prog, b) the map will be in the bpf_prog's used_maps, and c) the map  
> free path does not need to release
> any external resources, and d) the bpf_delayed_work items bump the prog  
> refcnt, I think we can keep this mechanism
> a single call.

> I'd like to get this right from the start, so do let me know if I'm  
> missing potential execution
> contexts that we can't really wait to drain from the bpf_prog free path.

> 5. This mechanism generalizes to other contexts (e.g., sleepable context  
> on the way back to userspace
> a-la set_thread_flag(TIF_UPROBE)), by means of adding the  
> bpf_delayed_work items to other llist_heads.
> E.g., we can keep the llist_heads in task_local_storage or in per-cpu  
> structures. I can't think of
> anything that requires a more complicated approach (or reserved space in  
> the structs) but do let me
> know if I'm wrong.

> 6. Lastly, the llist approach was dictated by the NMI constraints. RCU  
> lists are out because they need
> to synchronize_rcu when splicing from one head to another.

> Thanks,
> Delyan

> Delyan Kratunov (3):
>    bpf: allow maps to hold bpf_delayed_work fields
>    bpf: add delayed_work mechanism
>    selftests: delayed_work tests

>   include/linux/bpf.h                           |  22 ++-
>   include/linux/btf.h                           |   1 +
>   include/uapi/linux/bpf.h                      |  36 +++++
>   kernel/bpf/btf.c                              |  21 +++
>   kernel/bpf/core.c                             |   8 ++
>   kernel/bpf/helpers.c                          |  92 ++++++++++++
>   kernel/bpf/syscall.c                          |  24 +++-
>   kernel/bpf/verifier.c                         | 132 +++++++++++++++++-
>   scripts/bpf_doc.py                            |   2 +
>   tools/include/uapi/linux/bpf.h                |  35 +++++
>   .../selftests/bpf/prog_tests/delayed_work.c   |  29 ++++
>   .../selftests/bpf/progs/delayed_irqwork.c     |  59 ++++++++
>   12 files changed, 457 insertions(+), 4 deletions(-)
>   create mode 100644 tools/testing/selftests/bpf/prog_tests/delayed_work.c
>   create mode 100644 tools/testing/selftests/bpf/progs/delayed_irqwork.c

> --
> 2.36.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 0/3] Execution context callbacks
  2022-07-12 18:07 ` [PATCH RFC bpf-next 0/3] Execution context callbacks sdf
@ 2022-07-12 18:42   ` Delyan Kratunov
  2022-07-12 22:51     ` sdf
  2022-07-15  1:51     ` Alexei Starovoitov
  0 siblings, 2 replies; 13+ messages in thread
From: Delyan Kratunov @ 2022-07-12 18:42 UTC (permalink / raw)
  To: sdf; +Cc: daniel, ast, andrii, bpf

Thanks for taking a look, Stanislav!

On Tue, 2022-07-12 at 11:07 -0700, sdf@google.com wrote:
> *snip*
> > 2. The callback arguments need to be in a map. We can currently express  
> > helper arguments taking a
> > pointer to a map value but not a pointer to _within_ a map value. Should  
> > we add a new argument
> > type or should we just pass the map value pointer to the callback?
> 
> Passing map value pointer (as you do in the selftest) seems fine; do
> you think we need more flexibility here?

I think it makes a cleaner and more familiar API - the pointer to my data that I give
to the submission function is the one I get in the callback. Requiring it to be a map
value is a little bit quirky (it's not really my data it's pointing to!). I don't
know if it's a lot of work in the verifier to iron out this quirk but if it's
reasonable, I'd be happy to make the developer experience a little more predictable.

> > 3. A lot of the map handling code is verbatim from bpf_timer. This feels  
> > icky but I'm not sure if it
> > justifies a refactor quite yet. Opinions welcome.
> 
> +1, it does seem very close to a timer with expiry time == 0.
> 
> I don't know what's the exact usecase you're trying to solve exactly,

The primary motivating examples are 1) GFP_ATOMIC usage is not safe in NMI aiui, so
switching allocations to hardirq helps and 2) copy_from_user in tracing programs (nmi
or softirq when using software clocks). The latter shows up in insidious ways like
build id not being reliable when retrieving stack traces ([1] is a thread from a
while ago about it).

> but have you though of maybe initially supporting something like:
> 
> bpf_timer_init(&timer, map, SOME_NEW_DEFERRED_NMI_ONLY_FLAG);
> bpf_timer_set_callback(&timer, cg);
> bpf_timer_start(&timer, 0, 0);
> 
> If you init a timer with that special flag, I'm assuming you can have
> special cases in the existing helpers to simulate the delayed work?

Potentially but I have some reservations about drawing this equivalence.

> Then, the verifier changes should be minimal it seems.
> 
> OTOH, having a separate set of helpers seems more clear API-wise :-/

The primary way this differs from timers is that timers already specify an execution
context - the callback will be called from a softirq. 

It doesn't make sense to me to have some "timers" (but only 0-delay, super-special
timers) run in hardirq or, more confusingly, user context. At that point, there's
little in the API to express these differences, (e.g., bpf_copy_from_user_task is
accessible in *this* callback) and the verifier work will be far more challenging (if
at all possible since the init and the set_callback would be split).

I think it's worth thinking about how to unify the handling of timer-like map value
members but I don't think it's worth it trying to shoehorn this functionality into
existing infra.

> *snip*

  [1]: https://lore.kernel.org/bpf/CA+khW7gh=vO8m-_SVnwWwj7kv+EDeUPcuWFqebf2Zmi9T_oEAQ@mail.gmail.com/


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 0/3] Execution context callbacks
  2022-07-12 18:42   ` Delyan Kratunov
@ 2022-07-12 22:51     ` sdf
  2022-07-15  1:51     ` Alexei Starovoitov
  1 sibling, 0 replies; 13+ messages in thread
From: sdf @ 2022-07-12 22:51 UTC (permalink / raw)
  To: Delyan Kratunov; +Cc: daniel, ast, andrii, bpf

On 07/12, Delyan Kratunov wrote:
> Thanks for taking a look, Stanislav!

> On Tue, 2022-07-12 at 11:07 -0700, sdf@google.com wrote:
> > *snip*
> > > 2. The callback arguments need to be in a map. We can currently  
> express
> > > helper arguments taking a
> > > pointer to a map value but not a pointer to _within_ a map value.  
> Should
> > > we add a new argument
> > > type or should we just pass the map value pointer to the callback?
> >
> > Passing map value pointer (as you do in the selftest) seems fine; do
> > you think we need more flexibility here?

> I think it makes a cleaner and more familiar API - the pointer to my data  
> that I give
> to the submission function is the one I get in the callback. Requiring it  
> to be a map
> value is a little bit quirky (it's not really my data it's pointing to!).  
> I don't
> know if it's a lot of work in the verifier to iron out this quirk but if  
> it's
> reasonable, I'd be happy to make the developer experience a little more  
> predictable.

> > > 3. A lot of the map handling code is verbatim from bpf_timer. This  
> feels
> > > icky but I'm not sure if it
> > > justifies a refactor quite yet. Opinions welcome.
> >
> > +1, it does seem very close to a timer with expiry time == 0.
> >
> > I don't know what's the exact usecase you're trying to solve exactly,

> The primary motivating examples are 1) GFP_ATOMIC usage is not safe in  
> NMI aiui, so
> switching allocations to hardirq helps and 2) copy_from_user in tracing  
> programs (nmi
> or softirq when using software clocks). The latter shows up in insidious  
> ways like
> build id not being reliable when retrieving stack traces ([1] is a thread  
> from a
> while ago about it).

> > but have you though of maybe initially supporting something like:
> >
> > bpf_timer_init(&timer, map, SOME_NEW_DEFERRED_NMI_ONLY_FLAG);
> > bpf_timer_set_callback(&timer, cg);
> > bpf_timer_start(&timer, 0, 0);
> >
> > If you init a timer with that special flag, I'm assuming you can have
> > special cases in the existing helpers to simulate the delayed work?

> Potentially but I have some reservations about drawing this equivalence.

> > Then, the verifier changes should be minimal it seems.
> >
> > OTOH, having a separate set of helpers seems more clear API-wise :-/

> The primary way this differs from timers is that timers already specify  
> an execution
> context - the callback will be called from a softirq.�

> It doesn't make sense to me to have some "timers" (but only 0-delay,  
> super-special
> timers) run in hardirq or, more confusingly, user context. At that point,  
> there's
> little in the API to express these differences, (e.g.,  
> bpf_copy_from_user_task is
> accessible in *this* callback) and the verifier work will be far more  
> challenging (if
> at all possible since the init and the set_callback would be split).

> I think it's worth thinking about how to unify the handling of timer-like  
> map value
> members but I don't think it's worth it trying to shoehorn this  
> functionality into
> existing infra.

> > *snip*

>    [1]:  
> https://lore.kernel.org/bpf/CA+khW7gh=vO8m-_SVnwWwj7kv+EDeUPcuWFqebf2Zmi9T_oEAQ@mail.gmail.com/


All valid points. I'm assuming Alexei will take a closer look at this
eventually since I don't have a ton of context about timers :-(

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 1/3] bpf: allow maps to hold bpf_delayed_work fields
  2022-07-11 21:48 ` [PATCH RFC bpf-next 1/3] bpf: allow maps to hold bpf_delayed_work fields Delyan Kratunov
@ 2022-07-14  4:23   ` Andrii Nakryiko
  0 siblings, 0 replies; 13+ messages in thread
From: Andrii Nakryiko @ 2022-07-14  4:23 UTC (permalink / raw)
  To: Delyan Kratunov; +Cc: daniel, ast, andrii, bpf

On Mon, Jul 11, 2022 at 2:48 PM Delyan Kratunov <delyank@fb.com> wrote:
>
> Similarly to bpf_timer, bpf_delayed_work represents a callback that will
> be executed at a later time, in a different execution context.
>
> Its treatment in maps is practically the same as timers (to a degree
> that perhaps calls for refactoring), except releasing the work does not
> need to release any resources - we will wait for pending executions in
> the program destruction path.
>
> Signed-off-by: Delyan Kratunov <delyank@fb.com>
> ---
>  include/linux/bpf.h            |  9 ++++++++-
>  include/linux/btf.h            |  1 +
>  include/uapi/linux/bpf.h       |  8 ++++++++
>  kernel/bpf/btf.c               | 21 +++++++++++++++++++++
>  kernel/bpf/syscall.c           | 24 ++++++++++++++++++++++--
>  kernel/bpf/verifier.c          |  9 +++++++++
>  tools/include/uapi/linux/bpf.h |  8 ++++++++
>  7 files changed, 77 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 0edd7d2c0064..ad9d2cfb0411 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -164,7 +164,8 @@ enum {
>         BPF_MAP_VALUE_OFF_MAX = 8,
>         BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
>                                 1 + /* for bpf_spin_lock */
> -                               1,  /* for bpf_timer */
> +                               1 + /* for bpf_timer */
> +                               1,  /* for bpf_delayed_work */
>  };
>
>  enum bpf_kptr_type {
> @@ -212,6 +213,7 @@ struct bpf_map {
>         int spin_lock_off; /* >=0 valid offset, <0 error */
>         struct bpf_map_value_off *kptr_off_tab;
>         int timer_off; /* >=0 valid offset, <0 error */
> +       int delayed_work_off; /* >=0 valid offset, <0 error */
>         u32 id;
>         int numa_node;
>         u32 btf_key_type_id;
> @@ -256,6 +258,11 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
>         return map->timer_off >= 0;
>  }
>
> +static inline bool map_value_has_delayed_work(const struct bpf_map *map)
> +{
> +       return map->delayed_work_off >= 0;
> +}
> +
>  static inline bool map_value_has_kptrs(const struct bpf_map *map)
>  {
>         return !IS_ERR_OR_NULL(map->kptr_off_tab);
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index 1bfed7fa0428..2b8f473a6aa0 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -132,6 +132,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
>                            u32 expected_offset, u32 expected_size);
>  int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
>  int btf_find_timer(const struct btf *btf, const struct btf_type *t);
> +int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t);
>  struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
>                                           const struct btf_type *t);
>  bool btf_type_is_void(const struct btf_type *t);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index e81362891596..d68fc4f472f1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -6691,6 +6691,14 @@ struct bpf_dynptr {
>         __u64 :64;
>  } __attribute__((aligned(8)));
>
> +struct bpf_delayed_work {
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +} __attribute__((aligned(8)));
> +
>  struct bpf_sysctl {
>         __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
>                                  * Allows 1,2,4-byte read, but no write.
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index f08037c31dd7..e4ab52cc25fe 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3196,6 +3196,7 @@ enum btf_field_type {
>         BTF_FIELD_SPIN_LOCK,
>         BTF_FIELD_TIMER,
>         BTF_FIELD_KPTR,
> +       BTF_FIELD_DELAYED_WORK,
>  };
>
>  enum {
> @@ -3283,6 +3284,7 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
>                 switch (field_type) {
>                 case BTF_FIELD_SPIN_LOCK:
>                 case BTF_FIELD_TIMER:
> +               case BTF_FIELD_DELAYED_WORK:
>                         ret = btf_find_struct(btf, member_type, off, sz,
>                                               idx < info_cnt ? &info[idx] : &tmp);
>                         if (ret < 0)
> @@ -3333,6 +3335,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
>                 switch (field_type) {
>                 case BTF_FIELD_SPIN_LOCK:
>                 case BTF_FIELD_TIMER:
> +               case BTF_FIELD_DELAYED_WORK:
>                         ret = btf_find_struct(btf, var_type, off, sz,
>                                               idx < info_cnt ? &info[idx] : &tmp);
>                         if (ret < 0)
> @@ -3375,6 +3378,11 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
>                 sz = sizeof(struct bpf_timer);
>                 align = __alignof__(struct bpf_timer);
>                 break;
> +       case BTF_FIELD_DELAYED_WORK:
> +               name = "bpf_delayed_work";
> +               sz = sizeof(struct bpf_delayed_work);
> +               align = __alignof__(struct bpf_delayed_work);
> +               break;
>         case BTF_FIELD_KPTR:
>                 name = NULL;
>                 sz = sizeof(u64);
> @@ -3421,6 +3429,19 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t)
>         return info.off;
>  }
>
> +int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t)
> +{
> +       struct btf_field_info info;
> +       int ret;
> +
> +       ret = btf_find_field(btf, t, BTF_FIELD_DELAYED_WORK, &info, 1);
> +       if (ret < 0)
> +               return ret;
> +       if (!ret)
> +               return -ENOENT;
> +       return info.off;
> +}
> +
>  struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
>                                           const struct btf_type *t)
>  {
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 7d5af5b99f0d..041972305344 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -914,10 +914,11 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
>         bool has_spin_lock = map_value_has_spin_lock(map);
>         bool has_timer = map_value_has_timer(map);
>         bool has_kptrs = map_value_has_kptrs(map);
> +       bool has_delayed_work = map_value_has_delayed_work(map);
>         struct bpf_map_off_arr *off_arr;
>         u32 i;
>
> -       if (!has_spin_lock && !has_timer && !has_kptrs) {
> +       if (!has_spin_lock && !has_timer && !has_kptrs && !has_delayed_work) {
>                 map->off_arr = NULL;
>                 return 0;
>         }
> @@ -953,6 +954,13 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
>                 }
>                 off_arr->cnt += tab->nr_off;
>         }
> +       if (has_delayed_work) {
> +               i = off_arr->cnt;
> +
> +               off_arr->field_off[i] = map->delayed_work_off;
> +               off_arr->field_sz[i] = sizeof(struct bpf_delayed_work);
> +               off_arr->cnt++;
> +       }
>
>         if (off_arr->cnt == 1)
>                 return 0;
> @@ -1014,6 +1022,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
>                         return -EOPNOTSUPP;
>         }
>
> +       map->delayed_work_off = btf_find_delayed_work(btf, value_type);
> +       if (map_value_has_delayed_work(map)) {
> +               if (map->map_flags & BPF_F_RDONLY_PROG)
> +                       return -EACCES;
> +               if (map->map_type != BPF_MAP_TYPE_HASH &&
> +                   map->map_type != BPF_MAP_TYPE_LRU_HASH &&
> +                   map->map_type != BPF_MAP_TYPE_ARRAY)
> +                       return -EOPNOTSUPP;
> +       }
> +
>         map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
>         if (map_value_has_kptrs(map)) {
>                 if (!bpf_capable()) {
> @@ -1095,6 +1113,7 @@ static int map_create(union bpf_attr *attr)
>
>         map->spin_lock_off = -EINVAL;
>         map->timer_off = -EINVAL;
> +       map->delayed_work_off = -EINVAL;
>         if (attr->btf_key_type_id || attr->btf_value_type_id ||
>             /* Even the map's value is a kernel's struct,
>              * the bpf_prog.o must have BTF to begin with
> @@ -1863,7 +1882,8 @@ static int map_freeze(const union bpf_attr *attr)
>                 return PTR_ERR(map);
>
>         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
> -           map_value_has_timer(map) || map_value_has_kptrs(map)) {
> +           map_value_has_timer(map) || map_value_has_kptrs(map) ||
> +           map_value_has_delayed_work(map)) {

not introduced by you, but shouldn't this check also check
map_value_has_spinlock()?

>                 fdput(f);
>                 return -ENOTSUPP;
>         }

Also check if you need to modify bpf_map_mmap?


> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 2859901ffbe3..9fd311b7a1ff 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -3817,6 +3817,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
>                         return -EACCES;
>                 }
>         }
> +       if (map_value_has_delayed_work(map) && src == ACCESS_DIRECT) {
> +               u32 t = map->delayed_work_off;
> +
> +               if (reg->smin_value + off < t + sizeof(struct bpf_delayed_work) &&
> +                    t < reg->umax_value + off + size) {
> +                       verbose(env, "bpf_delayed_work cannot be accessed directly by load/store regno=%d off=%d\n", regno, off);
> +                       return -EACCES;
> +               }
> +       }
>         if (map_value_has_kptrs(map)) {
>                 struct bpf_map_value_off *tab = map->kptr_off_tab;
>                 int i;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index e81362891596..d68fc4f472f1 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -6691,6 +6691,14 @@ struct bpf_dynptr {
>         __u64 :64;
>  } __attribute__((aligned(8)));
>
> +struct bpf_delayed_work {
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +} __attribute__((aligned(8)));
> +
>  struct bpf_sysctl {
>         __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
>                                  * Allows 1,2,4-byte read, but no write.
> --
> 2.36.1

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 0/3] Execution context callbacks
  2022-07-12 18:42   ` Delyan Kratunov
  2022-07-12 22:51     ` sdf
@ 2022-07-15  1:51     ` Alexei Starovoitov
  2022-07-15 18:28       ` Delyan Kratunov
  1 sibling, 1 reply; 13+ messages in thread
From: Alexei Starovoitov @ 2022-07-15  1:51 UTC (permalink / raw)
  To: Delyan Kratunov; +Cc: sdf, daniel, ast, andrii, bpf

On Tue, Jul 12, 2022 at 06:42:52PM +0000, Delyan Kratunov wrote:
> 
> > but have you though of maybe initially supporting something like:
> > 
> > bpf_timer_init(&timer, map, SOME_NEW_DEFERRED_NMI_ONLY_FLAG);
> > bpf_timer_set_callback(&timer, cg);
> > bpf_timer_start(&timer, 0, 0);
> > 
> > If you init a timer with that special flag, I'm assuming you can have
> > special cases in the existing helpers to simulate the delayed work?
> 
> Potentially but I have some reservations about drawing this equivalence.

hrtimer api has various: flags. soft vs hard irq, pinned and not.
So the suggestion to treat irq_work callback as special timer flag
actually fits well.

bpf_timer_init + set_callback + start can be a static inline function
named bpf_work_submit() in bpf_helpers.h
(or some new file that will mark the beginning libc-bpf library).
Reusing struct bpf_timer and adding zero-delay callback could probably be
easier for users to learn and consume.

Separately:
+struct bpf_delayed_work {
+       __u64 :64;
+       __u64 :64;
+       __u64 :64;
+       __u64 :64;
+       __u64 :64;
+} __attribute__((aligned(8)));
is not extensible.
It would be better to add indirection to allow kernel side to grow
independently from amount of space consumed in a map value.

Can you think of a way to make irq_work/sleepable callback independent of maps?
Assume bpf_mem_alloc is already available and NMI prog can allocate a typed object.
The usage could be:
struct my_work {
  int a;
  struct task_struct __kptr_ref *t;
};
void my_cb(struct my_work *w);

struct my_work *w = bpf_mem_alloc(allocator, bpf_core_type_id_local(*w));
w->t = ..;
bpf_submit_work(w, my_cb, SLEEPABLE | IRQ_WORK);

Am I day dreaming? :)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 0/3] Execution context callbacks
  2022-07-15  1:51     ` Alexei Starovoitov
@ 2022-07-15 18:28       ` Delyan Kratunov
  2022-07-19 19:02         ` Alexei Starovoitov
  0 siblings, 1 reply; 13+ messages in thread
From: Delyan Kratunov @ 2022-07-15 18:28 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: daniel, sdf, ast, andrii, bpf

On Thu, 2022-07-14 at 18:51 -0700, Alexei Starovoitov wrote:
> On Tue, Jul 12, 2022 at 06:42:52PM +0000, Delyan Kratunov wrote:
> > 
> > > but have you though of maybe initially supporting something like:
> > > 
> > > bpf_timer_init(&timer, map, SOME_NEW_DEFERRED_NMI_ONLY_FLAG);
> > > bpf_timer_set_callback(&timer, cg);
> > > bpf_timer_start(&timer, 0, 0);
> > > 
> > > If you init a timer with that special flag, I'm assuming you can have
> > > special cases in the existing helpers to simulate the delayed work?
> > 
> > Potentially but I have some reservations about drawing this equivalence.
> 
> hrtimer api has various: flags. soft vs hard irq, pinned and not.
> So the suggestion to treat irq_work callback as special timer flag
> actually fits well.
> 
> bpf_timer_init + set_callback + start can be a static inline function
> named bpf_work_submit() in bpf_helpers.h
> (or some new file that will mark the beginning libc-bpf library).
> Reusing struct bpf_timer and adding zero-delay callback could probably be
> easier for users to learn and consume.

To clarify, we're talking about 1) making bpf_timer nmi-safe for _some_ but not all
combinations of parameters and 2) adding new flags to specify an execution context?
It's achievable but it's hard to see how it's the superior solution here.

> 
> Separately:
> +struct bpf_delayed_work {
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +       __u64 :64;
> +} __attribute__((aligned(8)));
> is not extensible.
> It would be better to add indirection to allow kernel side to grow
> independently from amount of space consumed in a map value.

Fair point, I was wondering what to do with it - storing just a pointer sounds
reasonable.

> Can you think of a way to make irq_work/sleepable callback independent of maps?
> Assume bpf_mem_alloc is already available and NMI prog can allocate a typed object.
> The usage could be:
> struct my_work {
>   int a;
>   struct task_struct __kptr_ref *t;
> };
> void my_cb(struct my_work *w);
> 
> struct my_work *w = bpf_mem_alloc(allocator, bpf_core_type_id_local(*w));
> w->t = ..;
> bpf_submit_work(w, my_cb, SLEEPABLE | IRQ_WORK);
> 
> Am I day dreaming? :)

Nothing wrong with dreaming of a better future :) 

(I'm assuming you're thinking of bpf_mem_alloc being fronted by the allocator you
recently sent to the list.)

On a first pass, here are my concerns:

A program and its maps can guarantee a certain amount of storage for work items.
Sizing that storage is difficult but it is yours alone to use. The freelist allocator
can be transiently drained by other programs and starve you of this utility. This is
a new failure mode, so it's worth talking about.

With a generic allocator mechanism, we'll have a hard time enforcing the can't-load-
or-store-into-special-fields logic. I like that guardrail and I'm not sure how we'd
achieve the same guarantees. (In your snippet, we don't have the llist_node on the
work item - do we wrap my_work into something else internally? That would hide the
fields that need protecting at the expense of an extra bpf_mem_alloc allocation.)

Managing the storage returned from bpf_mem_alloc is of course also a concern. We'd
need to treat bpf_submit_work as "releasing" it (really, taking ownership). This path
means more lifecycle analysis in the verifier and explicit and implicit free()s.

I'm not opposed to it overall - the developer experience is very familiar - but I am
primarily worried that allocator failures will be in the same category of issues as
the hash map collisions for stacks. If you want reliability, you just don't use that
type of map - what's the alternative in this hypothetical bpf_mem_alloc future?

-- Delyan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 0/3] Execution context callbacks
  2022-07-15 18:28       ` Delyan Kratunov
@ 2022-07-19 19:02         ` Alexei Starovoitov
  2022-07-19 22:12           ` Delyan Kratunov
  0 siblings, 1 reply; 13+ messages in thread
From: Alexei Starovoitov @ 2022-07-19 19:02 UTC (permalink / raw)
  To: Delyan Kratunov; +Cc: daniel, sdf, ast, andrii, bpf

On Fri, Jul 15, 2022 at 06:28:20PM +0000, Delyan Kratunov wrote:
> On Thu, 2022-07-14 at 18:51 -0700, Alexei Starovoitov wrote:
> > On Tue, Jul 12, 2022 at 06:42:52PM +0000, Delyan Kratunov wrote:
> > > 
> > > > but have you though of maybe initially supporting something like:
> > > > 
> > > > bpf_timer_init(&timer, map, SOME_NEW_DEFERRED_NMI_ONLY_FLAG);
> > > > bpf_timer_set_callback(&timer, cg);
> > > > bpf_timer_start(&timer, 0, 0);
> > > > 
> > > > If you init a timer with that special flag, I'm assuming you can have
> > > > special cases in the existing helpers to simulate the delayed work?
> > > 
> > > Potentially but I have some reservations about drawing this equivalence.
> > 
> > hrtimer api has various: flags. soft vs hard irq, pinned and not.
> > So the suggestion to treat irq_work callback as special timer flag
> > actually fits well.
> > 
> > bpf_timer_init + set_callback + start can be a static inline function
> > named bpf_work_submit() in bpf_helpers.h
> > (or some new file that will mark the beginning libc-bpf library).
> > Reusing struct bpf_timer and adding zero-delay callback could probably be
> > easier for users to learn and consume.
> 
> To clarify, we're talking about 1) making bpf_timer nmi-safe for _some_ but not all
> combinations of parameters and 2) adding new flags to specify an execution context?
> It's achievable but it's hard to see how it's the superior solution here.
> 
> > 
> > Separately:
> > +struct bpf_delayed_work {
> > +       __u64 :64;
> > +       __u64 :64;
> > +       __u64 :64;
> > +       __u64 :64;
> > +       __u64 :64;
> > +} __attribute__((aligned(8)));
> > is not extensible.
> > It would be better to add indirection to allow kernel side to grow
> > independently from amount of space consumed in a map value.
> 
> Fair point, I was wondering what to do with it - storing just a pointer sounds
> reasonable.
> 
> > Can you think of a way to make irq_work/sleepable callback independent of maps?
> > Assume bpf_mem_alloc is already available and NMI prog can allocate a typed object.
> > The usage could be:
> > struct my_work {
> >   int a;
> >   struct task_struct __kptr_ref *t;
> > };
> > void my_cb(struct my_work *w);
> > 
> > struct my_work *w = bpf_mem_alloc(allocator, bpf_core_type_id_local(*w));
> > w->t = ..;
> > bpf_submit_work(w, my_cb, SLEEPABLE | IRQ_WORK);
> > 
> > Am I day dreaming? :)
> 
> Nothing wrong with dreaming of a better future :) 
> 
> (I'm assuming you're thinking of bpf_mem_alloc being fronted by the allocator you
> recently sent to the list.)
> 
> On a first pass, here are my concerns:
> 
> A program and its maps can guarantee a certain amount of storage for work items.
> Sizing that storage is difficult but it is yours alone to use. The freelist allocator
> can be transiently drained by other programs and starve you of this utility. This is
> a new failure mode, so it's worth talking about.

That would be the issue only when progs deliberately share the allocator.
In this stmt:
struct my_work *w = bpf_mem_alloc(allocator, bpf_core_type_id_local(*w));
The 'allocator' can be unique for each prog or shared across few progs in the same .c file.
I wasn't planning to support one global allocator.
Just like one global hash map doesn't quite make sense.
The user has to create an allocator first, get it connected with memcg,
and use the explicit one in their bpf progs/maps.

> With a generic allocator mechanism, we'll have a hard time enforcing the can't-load-
> or-store-into-special-fields logic. I like that guardrail and I'm not sure how we'd
> achieve the same guarantees. (In your snippet, we don't have the llist_node on the
> work item - do we wrap my_work into something else internally? That would hide the
> fields that need protecting at the expense of an extra bpf_mem_alloc allocation.)

bpf_mem_alloc will return referenced PTR_TO_BTF_ID.
Every field in this structure is typed. So it's trivial for the verifier to make
some of them read only or not accesible at all.
'struct my_work' can have an explicit struct bpf_delayed_work field. Example:
struct my_work {
  struct bpf_delayed_work work; // not accessible by prog
  int a; // scalar read/write
  struct task_struct __kptr_ref *t;  // kptr semantics
};

> Managing the storage returned from bpf_mem_alloc is of course also a concern. We'd
> need to treat bpf_submit_work as "releasing" it (really, taking ownership). This path
> means more lifecycle analysis in the verifier and explicit and implicit free()s.

What is the actual concern?
bpf_submit_work will have clear "release" semantics. The verifier already supports it.
The 'my_cb' callback will receive reference PTR_TO_BTF_ID as well and would
have to release it with bpf_mem_free(ma, w).
Here is more complete proposal:

struct {
        __uint(type, BPF_MEM_ALLOC);
} allocator SEC(".maps");

struct my_work {
  struct bpf_delayed_work work;
  int a;
  struct task_struct __kptr_ref *t;
};

void my_cb(struct my_work *w)
{
  // access w
  bpf_mem_free(&allocator, w);
}

void bpf_prog(...)
{
  struct my_work *w = bpf_mem_alloc(&allocator, bpf_core_type_id_local(*w));
  w->t = ..;
  bpf_submit_work(w, my_cb, USE_IRQ_WORK);
}

> I'm not opposed to it overall - the developer experience is very familiar - but I am
> primarily worried that allocator failures will be in the same category of issues as
> the hash map collisions for stacks. If you want reliability, you just don't use that
> type of map - what's the alternative in this hypothetical bpf_mem_alloc future?

Reliability of allocation is certianly necessary.
bpf_mem_alloc will have an ability to _synchronously_ preallocate into freelist
from sleepable context, so bpf prog will have full control of that free list.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 0/3] Execution context callbacks
  2022-07-19 19:02         ` Alexei Starovoitov
@ 2022-07-19 22:12           ` Delyan Kratunov
  2022-07-20  0:54             ` Alexei Starovoitov
  0 siblings, 1 reply; 13+ messages in thread
From: Delyan Kratunov @ 2022-07-19 22:12 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: daniel, sdf, ast, andrii, bpf

On Tue, 2022-07-19 at 12:02 -0700, Alexei Starovoitov wrote:
> On Fri, Jul 15, 2022 at 06:28:20PM +0000, Delyan Kratunov wrote:
> > On Thu, 2022-07-14 at 18:51 -0700, Alexei Starovoitov wrote:
> > > On Tue, Jul 12, 2022 at 06:42:52PM +0000, Delyan Kratunov wrote:
> > > > 
> > > > > but have you though of maybe initially supporting something like:
> > > > > 
> > > > > bpf_timer_init(&timer, map, SOME_NEW_DEFERRED_NMI_ONLY_FLAG);
> > > > > bpf_timer_set_callback(&timer, cg);
> > > > > bpf_timer_start(&timer, 0, 0);
> > > > > 
> > > > > If you init a timer with that special flag, I'm assuming you can have
> > > > > special cases in the existing helpers to simulate the delayed work?
> > > > 
> > > > Potentially but I have some reservations about drawing this equivalence.
> > > 
> > > hrtimer api has various: flags. soft vs hard irq, pinned and not.
> > > So the suggestion to treat irq_work callback as special timer flag
> > > actually fits well.
> > > 
> > > bpf_timer_init + set_callback + start can be a static inline function
> > > named bpf_work_submit() in bpf_helpers.h
> > > (or some new file that will mark the beginning libc-bpf library).
> > > Reusing struct bpf_timer and adding zero-delay callback could probably be
> > > easier for users to learn and consume.
> > 
> > To clarify, we're talking about 1) making bpf_timer nmi-safe for _some_ but not all
> > combinations of parameters and 2) adding new flags to specify an execution context?
> > It's achievable but it's hard to see how it's the superior solution here.
> > 
> > > 
> > > Separately:
> > > +struct bpf_delayed_work {
> > > +       __u64 :64;
> > > +       __u64 :64;
> > > +       __u64 :64;
> > > +       __u64 :64;
> > > +       __u64 :64;
> > > +} __attribute__((aligned(8)));
> > > is not extensible.
> > > It would be better to add indirection to allow kernel side to grow
> > > independently from amount of space consumed in a map value.
> > 
> > Fair point, I was wondering what to do with it - storing just a pointer sounds
> > reasonable.
> > 
> > > Can you think of a way to make irq_work/sleepable callback independent of maps?
> > > Assume bpf_mem_alloc is already available and NMI prog can allocate a typed object.
> > > The usage could be:
> > > struct my_work {
> > >   int a;
> > >   struct task_struct __kptr_ref *t;
> > > };
> > > void my_cb(struct my_work *w);
> > > 
> > > struct my_work *w = bpf_mem_alloc(allocator, bpf_core_type_id_local(*w));
> > > w->t = ..;
> > > bpf_submit_work(w, my_cb, SLEEPABLE | IRQ_WORK);
> > > 
> > > Am I day dreaming? :)
> > 
> > Nothing wrong with dreaming of a better future :) 
> > 
> > (I'm assuming you're thinking of bpf_mem_alloc being fronted by the allocator you
> > recently sent to the list.)
> > 
> > On a first pass, here are my concerns:
> > 
> > A program and its maps can guarantee a certain amount of storage for work items.
> > Sizing that storage is difficult but it is yours alone to use. The freelist allocator
> > can be transiently drained by other programs and starve you of this utility. This is
> > a new failure mode, so it's worth talking about.
> 
> That would be the issue only when progs deliberately share the allocator.
> In this stmt:
> struct my_work *w = bpf_mem_alloc(allocator, bpf_core_type_id_local(*w));
> The 'allocator' can be unique for each prog or shared across few progs in the same .c file.
> I wasn't planning to support one global allocator.
> Just like one global hash map doesn't quite make sense.
> The user has to create an allocator first, get it connected with memcg,
> and use the explicit one in their bpf progs/maps.
> 
> > With a generic allocator mechanism, we'll have a hard time enforcing the can't-load-
> > or-store-into-special-fields logic. I like that guardrail and I'm not sure how we'd
> > achieve the same guarantees. (In your snippet, we don't have the llist_node on the
> > work item - do we wrap my_work into something else internally? That would hide the
> > fields that need protecting at the expense of an extra bpf_mem_alloc allocation.)
> 
> bpf_mem_alloc will return referenced PTR_TO_BTF_ID.
> Every field in this structure is typed. So it's trivial for the verifier to make
> some of them read only or not accesible at all.
> 'struct my_work' can have an explicit struct bpf_delayed_work field. Example:
> struct my_work {
>   struct bpf_delayed_work work; // not accessible by prog
>   int a; // scalar read/write
>   struct task_struct __kptr_ref *t;  // kptr semantics
> };

Sure, anything is possible, it's just more complexity and these checks are not
exactly easy to follow right now. 

Alternatively, we could do the classic allocator thing and allocate accounting space
before the pointer we return. Some magic flag could then expand the space enough to
use for submit_work. Some allocations would be bumped to a higher bucket but that's
okay because it would be conststent overhead for those allocation sites.

> 
> > Managing the storage returned from bpf_mem_alloc is of course also a concern. We'd
> > need to treat bpf_submit_work as "releasing" it (really, taking ownership). This path
> > means more lifecycle analysis in the verifier and explicit and implicit free()s.
> 
> What is the actual concern?
> bpf_submit_work will have clear "release" semantics. The verifier already supports it.
> The 'my_cb' callback will receive reference PTR_TO_BTF_ID as well and would
> have to release it with bpf_mem_free(ma, w).
> Here is more complete proposal:
> 
> struct {
>         __uint(type, BPF_MEM_ALLOC);
> } allocator SEC(".maps");

I like this, so long as we pre-allocate enough to submit more sleepable work
immediately - the first work item the program submits could then prefill more items.

For an even better experience, it would be great if we could specify in the map
definition the number of items of size X we'll need. If we give that lever to the
developer, they can then use it so they never have to orchestrate sleepable work to
call bpf_mem_prealloc explicitly.

> 
> struct my_work {
>   struct bpf_delayed_work work;
>   int a;
>   struct task_struct __kptr_ref *t;
> };
> 
> void my_cb(struct my_work *w)
> {
>   // access w
>   bpf_mem_free(&allocator, w);
> }
> 
> void bpf_prog(...)
> {
>   struct my_work *w = bpf_mem_alloc(&allocator, bpf_core_type_id_local(*w));
>   w->t = ..;
>   bpf_submit_work(w, my_cb, USE_IRQ_WORK);
> }
> 
> > I'm not opposed to it overall - the developer experience is very familiar - but I am
> > primarily worried that allocator failures will be in the same category of issues as
> > the hash map collisions for stacks. If you want reliability, you just don't use that
> > type of map - what's the alternative in this hypothetical bpf_mem_alloc future?
> 
> Reliability of allocation is certianly necessary.
> bpf_mem_alloc will have an ability to _synchronously_ preallocate into freelist
> from sleepable context, so bpf prog will have full control of that free list.

I think having the map initialized and prefilled on load and having sleepable work
from the first version of this mechanism becomes a requirement of this design. Having
the prefill requirements (number of items and size) on the map definition removes the
requirement to have sleepable work from day one.

How do you want to sequence this? Do you plan to do the work to expose bpf_mem_alloc
to programs as part of the initial series or as a later followup? 

-- Delyan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH RFC bpf-next 0/3] Execution context callbacks
  2022-07-19 22:12           ` Delyan Kratunov
@ 2022-07-20  0:54             ` Alexei Starovoitov
  0 siblings, 0 replies; 13+ messages in thread
From: Alexei Starovoitov @ 2022-07-20  0:54 UTC (permalink / raw)
  To: Delyan Kratunov; +Cc: daniel, sdf, ast, andrii, bpf

On Tue, Jul 19, 2022 at 10:12:57PM +0000, Delyan Kratunov wrote:
> On Tue, 2022-07-19 at 12:02 -0700, Alexei Starovoitov wrote:
> > On Fri, Jul 15, 2022 at 06:28:20PM +0000, Delyan Kratunov wrote:
> > > On Thu, 2022-07-14 at 18:51 -0700, Alexei Starovoitov wrote:
> > > > On Tue, Jul 12, 2022 at 06:42:52PM +0000, Delyan Kratunov wrote:
> > > > > 
> > > > > > but have you though of maybe initially supporting something like:
> > > > > > 
> > > > > > bpf_timer_init(&timer, map, SOME_NEW_DEFERRED_NMI_ONLY_FLAG);
> > > > > > bpf_timer_set_callback(&timer, cg);
> > > > > > bpf_timer_start(&timer, 0, 0);
> > > > > > 
> > > > > > If you init a timer with that special flag, I'm assuming you can have
> > > > > > special cases in the existing helpers to simulate the delayed work?
> > > > > 
> > > > > Potentially but I have some reservations about drawing this equivalence.
> > > > 
> > > > hrtimer api has various: flags. soft vs hard irq, pinned and not.
> > > > So the suggestion to treat irq_work callback as special timer flag
> > > > actually fits well.
> > > > 
> > > > bpf_timer_init + set_callback + start can be a static inline function
> > > > named bpf_work_submit() in bpf_helpers.h
> > > > (or some new file that will mark the beginning libc-bpf library).
> > > > Reusing struct bpf_timer and adding zero-delay callback could probably be
> > > > easier for users to learn and consume.
> > > 
> > > To clarify, we're talking about 1) making bpf_timer nmi-safe for _some_ but not all
> > > combinations of parameters and 2) adding new flags to specify an execution context?
> > > It's achievable but it's hard to see how it's the superior solution here.
> > > 
> > > > 
> > > > Separately:
> > > > +struct bpf_delayed_work {
> > > > +       __u64 :64;
> > > > +       __u64 :64;
> > > > +       __u64 :64;
> > > > +       __u64 :64;
> > > > +       __u64 :64;
> > > > +} __attribute__((aligned(8)));
> > > > is not extensible.
> > > > It would be better to add indirection to allow kernel side to grow
> > > > independently from amount of space consumed in a map value.
> > > 
> > > Fair point, I was wondering what to do with it - storing just a pointer sounds
> > > reasonable.
> > > 
> > > > Can you think of a way to make irq_work/sleepable callback independent of maps?
> > > > Assume bpf_mem_alloc is already available and NMI prog can allocate a typed object.
> > > > The usage could be:
> > > > struct my_work {
> > > >   int a;
> > > >   struct task_struct __kptr_ref *t;
> > > > };
> > > > void my_cb(struct my_work *w);
> > > > 
> > > > struct my_work *w = bpf_mem_alloc(allocator, bpf_core_type_id_local(*w));
> > > > w->t = ..;
> > > > bpf_submit_work(w, my_cb, SLEEPABLE | IRQ_WORK);
> > > > 
> > > > Am I day dreaming? :)
> > > 
> > > Nothing wrong with dreaming of a better future :) 
> > > 
> > > (I'm assuming you're thinking of bpf_mem_alloc being fronted by the allocator you
> > > recently sent to the list.)
> > > 
> > > On a first pass, here are my concerns:
> > > 
> > > A program and its maps can guarantee a certain amount of storage for work items.
> > > Sizing that storage is difficult but it is yours alone to use. The freelist allocator
> > > can be transiently drained by other programs and starve you of this utility. This is
> > > a new failure mode, so it's worth talking about.
> > 
> > That would be the issue only when progs deliberately share the allocator.
> > In this stmt:
> > struct my_work *w = bpf_mem_alloc(allocator, bpf_core_type_id_local(*w));
> > The 'allocator' can be unique for each prog or shared across few progs in the same .c file.
> > I wasn't planning to support one global allocator.
> > Just like one global hash map doesn't quite make sense.
> > The user has to create an allocator first, get it connected with memcg,
> > and use the explicit one in their bpf progs/maps.
> > 
> > > With a generic allocator mechanism, we'll have a hard time enforcing the can't-load-
> > > or-store-into-special-fields logic. I like that guardrail and I'm not sure how we'd
> > > achieve the same guarantees. (In your snippet, we don't have the llist_node on the
> > > work item - do we wrap my_work into something else internally? That would hide the
> > > fields that need protecting at the expense of an extra bpf_mem_alloc allocation.)
> > 
> > bpf_mem_alloc will return referenced PTR_TO_BTF_ID.
> > Every field in this structure is typed. So it's trivial for the verifier to make
> > some of them read only or not accesible at all.
> > 'struct my_work' can have an explicit struct bpf_delayed_work field. Example:
> > struct my_work {
> >   struct bpf_delayed_work work; // not accessible by prog
> >   int a; // scalar read/write
> >   struct task_struct __kptr_ref *t;  // kptr semantics
> > };
> 
> Sure, anything is possible, it's just more complexity and these checks are not
> exactly easy to follow right now. 
> 
> Alternatively, we could do the classic allocator thing and allocate accounting space
> before the pointer we return. Some magic flag could then expand the space enough to
> use for submit_work. Some allocations would be bumped to a higher bucket but that's
> okay because it would be conststent overhead for those allocation sites.

Technically we can, but that would be a departure from what we already do.
bpf_spin_lock, bpf_timer, __kptr are normal part of struct-s with different access
restrictions. 'struct bpf_delayed_work' shouldn't be any different.

Another approach would be to let bpf prog allocate 'struct my_work' without
any special fields. Then use nmi-safe allocator inside bpf_submit_work, hide
it completely from bpf side and auto-free after callback is done.
But extra alloc is a performance hit and overall it will be an unusual hack.

May be we can allow bpf_submit_work() to work with referenced ptr_to_btf_id
like above and with normal map value similar to what you've implemented?
We would need to somehow make sure that container_of() operation to cast from
&work either to allocated ptr_to_btf_id or to map value works in both cases.
That would be the most flexible solution and will resemble kernel programming
style the best.

> > 
> > > Managing the storage returned from bpf_mem_alloc is of course also a concern. We'd
> > > need to treat bpf_submit_work as "releasing" it (really, taking ownership). This path
> > > means more lifecycle analysis in the verifier and explicit and implicit free()s.
> > 
> > What is the actual concern?
> > bpf_submit_work will have clear "release" semantics. The verifier already supports it.
> > The 'my_cb' callback will receive reference PTR_TO_BTF_ID as well and would
> > have to release it with bpf_mem_free(ma, w).
> > Here is more complete proposal:
> > 
> > struct {
> >         __uint(type, BPF_MEM_ALLOC);
> > } allocator SEC(".maps");
> 
> I like this, so long as we pre-allocate enough to submit more sleepable work
> immediately - the first work item the program submits could then prefill more items.
> 
> For an even better experience, it would be great if we could specify in the map
> definition the number of items of size X we'll need. If we give that lever to the
> developer, they can then use it so they never have to orchestrate sleepable work to
> call bpf_mem_prealloc explicitly.

Agree. That's the idea. Will work on it.

> 
> > 
> > struct my_work {
> >   struct bpf_delayed_work work;
> >   int a;
> >   struct task_struct __kptr_ref *t;
> > };
> > 
> > void my_cb(struct my_work *w)
> > {
> >   // access w
> >   bpf_mem_free(&allocator, w);
> > }
> > 
> > void bpf_prog(...)
> > {
> >   struct my_work *w = bpf_mem_alloc(&allocator, bpf_core_type_id_local(*w));
> >   w->t = ..;
> >   bpf_submit_work(w, my_cb, USE_IRQ_WORK);
> > }
> > 
> > > I'm not opposed to it overall - the developer experience is very familiar - but I am
> > > primarily worried that allocator failures will be in the same category of issues as
> > > the hash map collisions for stacks. If you want reliability, you just don't use that
> > > type of map - what's the alternative in this hypothetical bpf_mem_alloc future?
> > 
> > Reliability of allocation is certianly necessary.
> > bpf_mem_alloc will have an ability to _synchronously_ preallocate into freelist
> > from sleepable context, so bpf prog will have full control of that free list.
> 
> I think having the map initialized and prefilled on load and having sleepable work
> from the first version of this mechanism becomes a requirement of this design. Having
> the prefill requirements (number of items and size) on the map definition removes the
> requirement to have sleepable work from day one.

I'm not sure why 'sleepable' is a requirement. irq_work will be able to do
synchronous prefill with GFP_NOWAIT. sleepable callback will be able to do
synchronous prefill with GFP_KERNEL. There is a difference, of course,
but it's not a blocker.

> How do you want to sequence this? Do you plan to do the work to expose bpf_mem_alloc
> to programs as part of the initial series or as a later followup? 

Currently thinking as a follow up.
If you have cycles maybe you can help ?
bpf_mem_alloc/free internals are tested and usable already. prefill is not implemented yet.
But the work to do bpf_mem_alloc helper and to expose allocator as a special kind of map
can start already.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-07-20  0:54 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-11 21:48 [PATCH RFC bpf-next 0/3] Execution context callbacks Delyan Kratunov
2022-07-11 21:48 ` [PATCH RFC bpf-next 2/3] bpf: add delayed_work mechanism Delyan Kratunov
2022-07-11 21:48 ` [PATCH RFC bpf-next 1/3] bpf: allow maps to hold bpf_delayed_work fields Delyan Kratunov
2022-07-14  4:23   ` Andrii Nakryiko
2022-07-11 21:48 ` [PATCH RFC bpf-next 3/3] selftests: delayed_work tests Delyan Kratunov
2022-07-12 18:07 ` [PATCH RFC bpf-next 0/3] Execution context callbacks sdf
2022-07-12 18:42   ` Delyan Kratunov
2022-07-12 22:51     ` sdf
2022-07-15  1:51     ` Alexei Starovoitov
2022-07-15 18:28       ` Delyan Kratunov
2022-07-19 19:02         ` Alexei Starovoitov
2022-07-19 22:12           ` Delyan Kratunov
2022-07-20  0:54             ` Alexei Starovoitov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.