All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
@ 2022-04-13 18:32 Stanislav Fomichev
  2022-04-13 19:23 ` Andrii Nakryiko
  0 siblings, 1 reply; 10+ messages in thread
From: Stanislav Fomichev @ 2022-04-13 18:32 UTC (permalink / raw)
  To: netdev, bpf; +Cc: ast, daniel, andrii, Stanislav Fomichev, Martin KaFai Lau

Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros
into functions") switched a bunch of BPF_PROG_RUN macros to inline
routines. This changed the semantic a bit. Due to arguments expansion
of macros, it used to be:

	rcu_read_lock();
	array = rcu_dereference(cgrp->bpf.effective[atype]);
	...

Now, with with inline routines, we have:
	array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
	/* array_rcu can be kfree'd here */
	rcu_read_lock();
	array = rcu_dereference(array_rcu);

I'm assuming in practice rcu subsystem isn't fast enough to trigger
this but let's use rcu API properly: ask callers of BPF_PROG_RUN
to manage rcu themselves.

Also, rename to lower caps to not confuse with macros. Additionally,
drop and expand BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY.

See [1] for more context.

  [1] https://lore.kernel.org/bpf/CAKH8qBs60fOinFdxiiQikK_q0EcVxGvNTQoWvHLEUGbgcj1UYg@mail.gmail.com/T/#u

Cc: Martin KaFai Lau <kafai@fb.com>
Fixes: 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros into functions")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 drivers/media/rc/bpf-lirc.c |  8 +++-
 include/linux/bpf.h         | 70 ++++++-------------------------
 kernel/bpf/cgroup.c         | 84 +++++++++++++++++++++++++++++--------
 kernel/trace/bpf_trace.c    |  5 ++-
 4 files changed, 90 insertions(+), 77 deletions(-)

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 3eff08d7b8e5..fe17c7f98e81 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -216,8 +216,12 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
 
 	raw->bpf_sample = sample;
 
-	if (raw->progs)
-		BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, bpf_prog_run);
+	if (raw->progs) {
+		rcu_read_lock();
+		bpf_prog_run_array(rcu_dereference(raw->progs),
+				   &raw->bpf_sample, bpf_prog_run);
+		rcu_read_unlock();
+	}
 }
 
 /*
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bdb5298735ce..16111924fa3e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1221,7 +1221,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 /* an array of programs to be executed under rcu_lock.
  *
  * Typical usage:
- * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run);
+ * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run);
  *
  * the structure returned by bpf_prog_array_alloc() should be populated
  * with program pointers and the last pointer must be NULL.
@@ -1316,21 +1316,20 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
 typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
 
 static __always_inline int
-BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
+bpf_prog_run_array_cg_flags(const struct bpf_prog_array *array,
 			    const void *ctx, bpf_prog_run_fn run_prog,
 			    int retval, u32 *ret_flags)
 {
 	const struct bpf_prog_array_item *item;
 	const struct bpf_prog *prog;
-	const struct bpf_prog_array *array;
 	struct bpf_run_ctx *old_run_ctx;
 	struct bpf_cg_run_ctx run_ctx;
 	u32 func_ret;
 
+	RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held");
+
 	run_ctx.retval = retval;
 	migrate_disable();
-	rcu_read_lock();
-	array = rcu_dereference(array_rcu);
 	item = &array->items[0];
 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 	while ((prog = READ_ONCE(item->prog))) {
@@ -1342,26 +1341,24 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
 		item++;
 	}
 	bpf_reset_run_ctx(old_run_ctx);
-	rcu_read_unlock();
 	migrate_enable();
 	return run_ctx.retval;
 }
 
 static __always_inline int
-BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
+bpf_prog_run_array_cg(const struct bpf_prog_array *array,
 		      const void *ctx, bpf_prog_run_fn run_prog,
 		      int retval)
 {
 	const struct bpf_prog_array_item *item;
 	const struct bpf_prog *prog;
-	const struct bpf_prog_array *array;
 	struct bpf_run_ctx *old_run_ctx;
 	struct bpf_cg_run_ctx run_ctx;
 
+	RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held");
+
 	run_ctx.retval = retval;
 	migrate_disable();
-	rcu_read_lock();
-	array = rcu_dereference(array_rcu);
 	item = &array->items[0];
 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 	while ((prog = READ_ONCE(item->prog))) {
@@ -1371,27 +1368,26 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
 		item++;
 	}
 	bpf_reset_run_ctx(old_run_ctx);
-	rcu_read_unlock();
 	migrate_enable();
 	return run_ctx.retval;
 }
 
 static __always_inline u32
-BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+bpf_prog_run_array(const struct bpf_prog_array *array,
 		   const void *ctx, bpf_prog_run_fn run_prog)
 {
 	const struct bpf_prog_array_item *item;
 	const struct bpf_prog *prog;
-	const struct bpf_prog_array *array;
 	struct bpf_run_ctx *old_run_ctx;
 	struct bpf_trace_run_ctx run_ctx;
 	u32 ret = 1;
 
-	migrate_disable();
-	rcu_read_lock();
-	array = rcu_dereference(array_rcu);
+	RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held");
+
 	if (unlikely(!array))
-		goto out;
+		return ret;
+
+	migrate_disable();
 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 	item = &array->items[0];
 	while ((prog = READ_ONCE(item->prog))) {
@@ -1400,50 +1396,10 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
 		item++;
 	}
 	bpf_reset_run_ctx(old_run_ctx);
-out:
-	rcu_read_unlock();
 	migrate_enable();
 	return ret;
 }
 
-/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
- * so BPF programs can request cwr for TCP packets.
- *
- * Current cgroup skb programs can only return 0 or 1 (0 to drop the
- * packet. This macro changes the behavior so the low order bit
- * indicates whether the packet should be dropped (0) or not (1)
- * and the next bit is a congestion notification bit. This could be
- * used by TCP to call tcp_enter_cwr()
- *
- * Hence, new allowed return values of CGROUP EGRESS BPF programs are:
- *   0: drop packet
- *   1: keep packet
- *   2: drop packet and cn
- *   3: keep packet and cn
- *
- * This macro then converts it to one of the NET_XMIT or an error
- * code that is then interpreted as drop packet (and no cn):
- *   0: NET_XMIT_SUCCESS  skb should be transmitted
- *   1: NET_XMIT_DROP     skb should be dropped and cn
- *   2: NET_XMIT_CN       skb should be transmitted and cn
- *   3: -err              skb should be dropped
- */
-#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)		\
-	({						\
-		u32 _flags = 0;				\
-		bool _cn;				\
-		u32 _ret;				\
-		_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
-		_cn = _flags & BPF_RET_SET_CN;		\
-		if (_ret && !IS_ERR_VALUE((long)_ret))	\
-			_ret = -EFAULT;			\
-		if (!_ret)				\
-			_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);	\
-		else					\
-			_ret = (_cn ? NET_XMIT_DROP : _ret);		\
-		_ret;					\
-	})
-
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
 extern struct mutex bpf_stats_enabled_mutex;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 128028efda64..f5babdac245c 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1074,15 +1074,45 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	/* compute pointers for the bpf prog */
 	bpf_compute_and_save_data_end(skb, &saved_data_end);
 
+	rcu_read_lock();
 	if (atype == CGROUP_INET_EGRESS) {
-		ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
-			cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
+		u32 flags = 0;
+		bool cn;
+
+		ret = bpf_prog_run_array_cg_flags(
+			rcu_dereference(cgrp->bpf.effective[atype]),
+			skb, __bpf_prog_run_save_cb, 0, &flags);
+
+		/* Return values of CGROUP EGRESS BPF programs are:
+		 *   0: drop packet
+		 *   1: keep packet
+		 *   2: drop packet and cn
+		 *   3: keep packet and cn
+		 *
+		 * The returned value is then converted to one of the NET_XMIT
+		 * or an error code that is then interpreted as drop packet
+		 * (and no cn):
+		 *   0: NET_XMIT_SUCCESS  skb should be transmitted
+		 *   1: NET_XMIT_DROP     skb should be dropped and cn
+		 *   2: NET_XMIT_CN       skb should be transmitted and cn
+		 *   3: -err              skb should be dropped
+		 */
+
+		cn = flags & BPF_RET_SET_CN;
+		if (ret && !IS_ERR_VALUE((long)ret))
+			ret = -EFAULT;
+		if (!ret)
+			ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
+		else
+			ret = (cn ? NET_XMIT_DROP : ret);
 	} else {
-		ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
-					    __bpf_prog_run_save_cb, 0);
+		ret = bpf_prog_run_array_cg(rcu_dereference(cgrp->bpf.effective[atype]),
+					    skb, __bpf_prog_run_save_cb, 0);
 		if (ret && !IS_ERR_VALUE((long)ret))
 			ret = -EFAULT;
 	}
+	rcu_read_unlock();
+
 	bpf_restore_data_end(skb, saved_data_end);
 	__skb_pull(skb, offset);
 	skb->sk = save_sk;
@@ -1108,9 +1138,14 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
 			       enum cgroup_bpf_attach_type atype)
 {
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	int ret;
+
+	rcu_read_lock();
+	ret = bpf_prog_run_array_cg(rcu_dereference(cgrp->bpf.effective[atype]),
+				    sk, bpf_prog_run, 0);
+	rcu_read_unlock();
 
-	return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
-				     bpf_prog_run, 0);
+	return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 
@@ -1142,6 +1177,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 	};
 	struct sockaddr_storage unspec;
 	struct cgroup *cgrp;
+	int ret;
 
 	/* Check socket family since not all sockets represent network
 	 * endpoint (e.g. AF_UNIX).
@@ -1154,9 +1190,12 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 		ctx.uaddr = (struct sockaddr *)&unspec;
 	}
 
+	rcu_read_lock();
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
-					   bpf_prog_run, 0, flags);
+	ret = bpf_prog_run_array_cg_flags(rcu_dereference(cgrp->bpf.effective[atype]),
+					  &ctx, bpf_prog_run, 0, flags);
+	rcu_read_unlock();
+	return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
 
@@ -1181,9 +1220,14 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 				     enum cgroup_bpf_attach_type atype)
 {
 	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	int ret;
+
+	rcu_read_lock();
+	ret = bpf_prog_run_array_cg(rcu_dereference(cgrp->bpf.effective[atype]),
+				    sock_ops, bpf_prog_run, 0);
+	rcu_read_unlock();
 
-	return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
-				     bpf_prog_run, 0);
+	return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
 
@@ -1200,8 +1244,8 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 
 	rcu_read_lock();
 	cgrp = task_dfl_cgroup(current);
-	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-				    bpf_prog_run, 0);
+	ret = bpf_prog_run_array_cg(rcu_dereference(cgrp->bpf.effective[atype]),
+				    &ctx, bpf_prog_run, 0);
 	rcu_read_unlock();
 
 	return ret;
@@ -1366,8 +1410,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
 	rcu_read_lock();
 	cgrp = task_dfl_cgroup(current);
-	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-				    bpf_prog_run, 0);
+	ret = bpf_prog_run_array_cg(rcu_dereference(cgrp->bpf.effective[atype]),
+				    &ctx, bpf_prog_run, 0);
 	rcu_read_unlock();
 
 	kfree(ctx.cur_val);
@@ -1459,8 +1503,10 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 	}
 
 	lock_sock(sk);
-	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
+	rcu_read_lock();
+	ret = bpf_prog_run_array_cg(rcu_dereference(cgrp->bpf.effective[CGROUP_SETSOCKOPT]),
 				    &ctx, bpf_prog_run, 0);
+	rcu_read_unlock();
 	release_sock(sk);
 
 	if (ret)
@@ -1559,8 +1605,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 	}
 
 	lock_sock(sk);
-	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+	rcu_read_lock();
+	ret = bpf_prog_run_array_cg(rcu_dereference(cgrp->bpf.effective[CGROUP_GETSOCKOPT]),
 				    &ctx, bpf_prog_run, retval);
+	rcu_read_unlock();
 	release_sock(sk);
 
 	if (ret < 0)
@@ -1608,8 +1656,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
 	 * be called if that data shouldn't be "exported".
 	 */
 
-	ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+	rcu_read_lock();
+	ret = bpf_prog_run_array_cg(rcu_dereference(cgrp->bpf.effective[CGROUP_GETSOCKOPT]),
 				    &ctx, bpf_prog_run, retval);
+	rcu_read_unlock();
 	if (ret < 0)
 		return ret;
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b26f3da943de..f15b826f9899 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -129,7 +129,10 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 	 * out of events when it was updated in between this and the
 	 * rcu_dereference() which is accepted risk.
 	 */
-	ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
+	rcu_read_lock();
+	ret = bpf_prog_run_array(rcu_dereference(call->prog_array),
+				 ctx, bpf_prog_run);
+	rcu_read_unlock();
 
  out:
 	__this_cpu_dec(bpf_prog_active);
-- 
2.36.0.rc0.470.gd361397f0d-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 18:32 [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines Stanislav Fomichev
@ 2022-04-13 19:23 ` Andrii Nakryiko
  2022-04-13 19:39   ` sdf
  0 siblings, 1 reply; 10+ messages in thread
From: Andrii Nakryiko @ 2022-04-13 19:23 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau

On Wed, Apr 13, 2022 at 11:33 AM Stanislav Fomichev <sdf@google.com> wrote:
>
> Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros
> into functions") switched a bunch of BPF_PROG_RUN macros to inline
> routines. This changed the semantic a bit. Due to arguments expansion
> of macros, it used to be:
>
>         rcu_read_lock();
>         array = rcu_dereference(cgrp->bpf.effective[atype]);
>         ...
>
> Now, with with inline routines, we have:
>         array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
>         /* array_rcu can be kfree'd here */
>         rcu_read_lock();
>         array = rcu_dereference(array_rcu);
>

So subtle difference, wow...

But this open-coding of rcu_read_lock() seems very unfortunate as
well. Would making BPF_PROG_RUN_ARRAY back to a macro which only does
rcu lock/unlock and grabs effective array and then calls static inline
function be a viable solution?

#define BPF_PROG_RUN_ARRAY_CG_FLAGS(array_rcu, ctx, run_prog, ret_flags) \
  ({
      int ret;

      rcu_read_lock();
      ret = __BPF_PROG_RUN_ARRAY_CG_FLAGS(rcu_dereference(array_rcu), ....);
      rcu_read_unlock();
      ret;
  })


where __BPF_PROG_RUN_ARRAY_CG_FLAGS is what
BPF_PROG_RUN_ARRAY_CG_FLAGS is today but with __rcu annotation dropped
(and no internal rcu stuff)?


> I'm assuming in practice rcu subsystem isn't fast enough to trigger
> this but let's use rcu API properly: ask callers of BPF_PROG_RUN
> to manage rcu themselves.
>
> Also, rename to lower caps to not confuse with macros. Additionally,
> drop and expand BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY.
>
> See [1] for more context.
>
>   [1] https://lore.kernel.org/bpf/CAKH8qBs60fOinFdxiiQikK_q0EcVxGvNTQoWvHLEUGbgcj1UYg@mail.gmail.com/T/#u
>
> Cc: Martin KaFai Lau <kafai@fb.com>
> Fixes: 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros into functions")
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>  drivers/media/rc/bpf-lirc.c |  8 +++-
>  include/linux/bpf.h         | 70 ++++++-------------------------
>  kernel/bpf/cgroup.c         | 84 +++++++++++++++++++++++++++++--------
>  kernel/trace/bpf_trace.c    |  5 ++-
>  4 files changed, 90 insertions(+), 77 deletions(-)
>

[...]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 19:23 ` Andrii Nakryiko
@ 2022-04-13 19:39   ` sdf
  2022-04-13 19:52     ` Andrii Nakryiko
  0 siblings, 1 reply; 10+ messages in thread
From: sdf @ 2022-04-13 19:39 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau

On 04/13, Andrii Nakryiko wrote:
> On Wed, Apr 13, 2022 at 11:33 AM Stanislav Fomichev <sdf@google.com>  
> wrote:
> >
> > Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros
> > into functions") switched a bunch of BPF_PROG_RUN macros to inline
> > routines. This changed the semantic a bit. Due to arguments expansion
> > of macros, it used to be:
> >
> >         rcu_read_lock();
> >         array = rcu_dereference(cgrp->bpf.effective[atype]);
> >         ...
> >
> > Now, with with inline routines, we have:
> >         array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
> >         /* array_rcu can be kfree'd here */
> >         rcu_read_lock();
> >         array = rcu_dereference(array_rcu);
> >

> So subtle difference, wow...

> But this open-coding of rcu_read_lock() seems very unfortunate as
> well. Would making BPF_PROG_RUN_ARRAY back to a macro which only does
> rcu lock/unlock and grabs effective array and then calls static inline
> function be a viable solution?

> #define BPF_PROG_RUN_ARRAY_CG_FLAGS(array_rcu, ctx, run_prog, ret_flags) \
>    ({
>        int ret;

>        rcu_read_lock();
>        ret =  
> __BPF_PROG_RUN_ARRAY_CG_FLAGS(rcu_dereference(array_rcu), ....);
>        rcu_read_unlock();
>        ret;
>    })


> where __BPF_PROG_RUN_ARRAY_CG_FLAGS is what
> BPF_PROG_RUN_ARRAY_CG_FLAGS is today but with __rcu annotation dropped
> (and no internal rcu stuff)?

Yeah, that should work. But why do you think it's better to hide them?
I find those automatic rcu locks deep in the call stack a bit obscure
(when reasoning about sleepable vs non-sleepable contexts/bpf).

I, as the caller, know that the effective array is rcu-managed (it
has __rcu annotation) and it seems natural for me to grab rcu lock
while work with it; I might grab it for some other things like cgroup  
anyway.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 19:39   ` sdf
@ 2022-04-13 19:52     ` Andrii Nakryiko
  2022-04-13 22:31       ` Daniel Borkmann
  2022-04-13 22:32       ` Martin KaFai Lau
  0 siblings, 2 replies; 10+ messages in thread
From: Andrii Nakryiko @ 2022-04-13 19:52 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau

On Wed, Apr 13, 2022 at 12:39 PM <sdf@google.com> wrote:
>
> On 04/13, Andrii Nakryiko wrote:
> > On Wed, Apr 13, 2022 at 11:33 AM Stanislav Fomichev <sdf@google.com>
> > wrote:
> > >
> > > Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros
> > > into functions") switched a bunch of BPF_PROG_RUN macros to inline
> > > routines. This changed the semantic a bit. Due to arguments expansion
> > > of macros, it used to be:
> > >
> > >         rcu_read_lock();
> > >         array = rcu_dereference(cgrp->bpf.effective[atype]);
> > >         ...
> > >
> > > Now, with with inline routines, we have:
> > >         array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
> > >         /* array_rcu can be kfree'd here */
> > >         rcu_read_lock();
> > >         array = rcu_dereference(array_rcu);
> > >
>
> > So subtle difference, wow...
>
> > But this open-coding of rcu_read_lock() seems very unfortunate as
> > well. Would making BPF_PROG_RUN_ARRAY back to a macro which only does
> > rcu lock/unlock and grabs effective array and then calls static inline
> > function be a viable solution?
>
> > #define BPF_PROG_RUN_ARRAY_CG_FLAGS(array_rcu, ctx, run_prog, ret_flags) \
> >    ({
> >        int ret;
>
> >        rcu_read_lock();
> >        ret =
> > __BPF_PROG_RUN_ARRAY_CG_FLAGS(rcu_dereference(array_rcu), ....);
> >        rcu_read_unlock();
> >        ret;
> >    })
>
>
> > where __BPF_PROG_RUN_ARRAY_CG_FLAGS is what
> > BPF_PROG_RUN_ARRAY_CG_FLAGS is today but with __rcu annotation dropped
> > (and no internal rcu stuff)?
>
> Yeah, that should work. But why do you think it's better to hide them?
> I find those automatic rcu locks deep in the call stack a bit obscure
> (when reasoning about sleepable vs non-sleepable contexts/bpf).
>
> I, as the caller, know that the effective array is rcu-managed (it
> has __rcu annotation) and it seems natural for me to grab rcu lock
> while work with it; I might grab it for some other things like cgroup
> anyway.

If you think that having this more explicitly is better, I'm fine with
that as well. I thought a simpler invocation pattern would be good,
given we call bpf_prog_run_array variants in quite a lot of places. So
count me indifferent. I'm curious what others think.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 19:52     ` Andrii Nakryiko
@ 2022-04-13 22:31       ` Daniel Borkmann
  2022-04-13 22:32       ` Martin KaFai Lau
  1 sibling, 0 replies; 10+ messages in thread
From: Daniel Borkmann @ 2022-04-13 22:31 UTC (permalink / raw)
  To: Andrii Nakryiko, Stanislav Fomichev
  Cc: Networking, bpf, Alexei Starovoitov, Andrii Nakryiko, Martin KaFai Lau

On 4/13/22 9:52 PM, Andrii Nakryiko wrote:
> On Wed, Apr 13, 2022 at 12:39 PM <sdf@google.com> wrote:
>> On 04/13, Andrii Nakryiko wrote:
>>> On Wed, Apr 13, 2022 at 11:33 AM Stanislav Fomichev <sdf@google.com>
>>> wrote:
>>>>
>>>> Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros
>>>> into functions") switched a bunch of BPF_PROG_RUN macros to inline
>>>> routines. This changed the semantic a bit. Due to arguments expansion
>>>> of macros, it used to be:
>>>>
>>>>          rcu_read_lock();
>>>>          array = rcu_dereference(cgrp->bpf.effective[atype]);
>>>>          ...
>>>>
>>>> Now, with with inline routines, we have:
>>>>          array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
>>>>          /* array_rcu can be kfree'd here */
>>>>          rcu_read_lock();
>>>>          array = rcu_dereference(array_rcu);
>>>>
>>
>>> So subtle difference, wow...
>>
>>> But this open-coding of rcu_read_lock() seems very unfortunate as
>>> well. Would making BPF_PROG_RUN_ARRAY back to a macro which only does
>>> rcu lock/unlock and grabs effective array and then calls static inline
>>> function be a viable solution?
>>
>>> #define BPF_PROG_RUN_ARRAY_CG_FLAGS(array_rcu, ctx, run_prog, ret_flags) \
>>>     ({
>>>         int ret;
>>
>>>         rcu_read_lock();
>>>         ret =
>>> __BPF_PROG_RUN_ARRAY_CG_FLAGS(rcu_dereference(array_rcu), ....);
>>>         rcu_read_unlock();
>>>         ret;
>>>     })
>>
>>
>>> where __BPF_PROG_RUN_ARRAY_CG_FLAGS is what
>>> BPF_PROG_RUN_ARRAY_CG_FLAGS is today but with __rcu annotation dropped
>>> (and no internal rcu stuff)?
>>
>> Yeah, that should work. But why do you think it's better to hide them?
>> I find those automatic rcu locks deep in the call stack a bit obscure
>> (when reasoning about sleepable vs non-sleepable contexts/bpf).
>>
>> I, as the caller, know that the effective array is rcu-managed (it
>> has __rcu annotation) and it seems natural for me to grab rcu lock
>> while work with it; I might grab it for some other things like cgroup
>> anyway.
> 
> If you think that having this more explicitly is better, I'm fine with
> that as well. I thought a simpler invocation pattern would be good,
> given we call bpf_prog_run_array variants in quite a lot of places. So
> count me indifferent. I'm curious what others think.

+1 for explicit, might also be easier to review/audit compared to hidden
in macro.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 19:52     ` Andrii Nakryiko
  2022-04-13 22:31       ` Daniel Borkmann
@ 2022-04-13 22:32       ` Martin KaFai Lau
  2022-04-13 22:52         ` sdf
  1 sibling, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2022-04-13 22:32 UTC (permalink / raw)
  To: Stanislav Fomichev, Andrii Nakryiko
  Cc: Networking, bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko

On Wed, Apr 13, 2022 at 12:52:53PM -0700, Andrii Nakryiko wrote:
> On Wed, Apr 13, 2022 at 12:39 PM <sdf@google.com> wrote:
> >
> > On 04/13, Andrii Nakryiko wrote:
> > > On Wed, Apr 13, 2022 at 11:33 AM Stanislav Fomichev <sdf@google.com>
> > > wrote:
> > > >
> > > > Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of macros
> > > > into functions") switched a bunch of BPF_PROG_RUN macros to inline
> > > > routines. This changed the semantic a bit. Due to arguments expansion
> > > > of macros, it used to be:
> > > >
> > > >         rcu_read_lock();
> > > >         array = rcu_dereference(cgrp->bpf.effective[atype]);
> > > >         ...
> > > >
> > > > Now, with with inline routines, we have:
> > > >         array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
> > > >         /* array_rcu can be kfree'd here */
> > > >         rcu_read_lock();
> > > >         array = rcu_dereference(array_rcu);
> > > >
> >
> > > So subtle difference, wow...
> >
> > > But this open-coding of rcu_read_lock() seems very unfortunate as
> > > well. Would making BPF_PROG_RUN_ARRAY back to a macro which only does
> > > rcu lock/unlock and grabs effective array and then calls static inline
> > > function be a viable solution?
> >
> > > #define BPF_PROG_RUN_ARRAY_CG_FLAGS(array_rcu, ctx, run_prog, ret_flags) \
> > >    ({
> > >        int ret;
> >
> > >        rcu_read_lock();
> > >        ret =
> > > __BPF_PROG_RUN_ARRAY_CG_FLAGS(rcu_dereference(array_rcu), ....);
> > >        rcu_read_unlock();
> > >        ret;
> > >    })
> >
> >
> > > where __BPF_PROG_RUN_ARRAY_CG_FLAGS is what
> > > BPF_PROG_RUN_ARRAY_CG_FLAGS is today but with __rcu annotation dropped
> > > (and no internal rcu stuff)?
> >
> > Yeah, that should work. But why do you think it's better to hide them?
> > I find those automatic rcu locks deep in the call stack a bit obscure
> > (when reasoning about sleepable vs non-sleepable contexts/bpf).
> >
> > I, as the caller, know that the effective array is rcu-managed (it
> > has __rcu annotation) and it seems natural for me to grab rcu lock
> > while work with it; I might grab it for some other things like cgroup
> > anyway.
> 
> If you think that having this more explicitly is better, I'm fine with
> that as well. I thought a simpler invocation pattern would be good,
> given we call bpf_prog_run_array variants in quite a lot of places. So
> count me indifferent. I'm curious what others think.

Would it work if the bpf_prog_run_array_cg() directly takes the
'struct cgroup *cgrp' argument instead of the array ?
bpf_prog_run_array_cg() should know what protection is needed
to get member from the cgrp ptr.  The sk call path should be able
to provide a cgrp ptr.  For current cgrp, pass NULL as the cgrp
pointer and then current will be used in bpf_prog_run_array_cg().
A rcu_read_lock() is needed anyway to get the current's cgrp
and can be done together in bpf_prog_run_array_cg().

That there are only two remaining bpf_prog_run_array() usages
from lirc and bpf_trace which are not too bad to have them
directly do rcu_read_lock on their own struct ?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 22:32       ` Martin KaFai Lau
@ 2022-04-13 22:52         ` sdf
  2022-04-13 23:56           ` Martin KaFai Lau
  2022-04-14  9:30           ` Jakub Kicinski
  0 siblings, 2 replies; 10+ messages in thread
From: sdf @ 2022-04-13 22:52 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Andrii Nakryiko, Networking, bpf, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko

On 04/13, Martin KaFai Lau wrote:
> On Wed, Apr 13, 2022 at 12:52:53PM -0700, Andrii Nakryiko wrote:
> > On Wed, Apr 13, 2022 at 12:39 PM <sdf@google.com> wrote:
> > >
> > > On 04/13, Andrii Nakryiko wrote:
> > > > On Wed, Apr 13, 2022 at 11:33 AM Stanislav Fomichev <sdf@google.com>
> > > > wrote:
> > > > >
> > > > > Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of  
> macros
> > > > > into functions") switched a bunch of BPF_PROG_RUN macros to inline
> > > > > routines. This changed the semantic a bit. Due to arguments  
> expansion
> > > > > of macros, it used to be:
> > > > >
> > > > >         rcu_read_lock();
> > > > >         array = rcu_dereference(cgrp->bpf.effective[atype]);
> > > > >         ...
> > > > >
> > > > > Now, with with inline routines, we have:
> > > > >         array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
> > > > >         /* array_rcu can be kfree'd here */
> > > > >         rcu_read_lock();
> > > > >         array = rcu_dereference(array_rcu);
> > > > >
> > >
> > > > So subtle difference, wow...
> > >
> > > > But this open-coding of rcu_read_lock() seems very unfortunate as
> > > > well. Would making BPF_PROG_RUN_ARRAY back to a macro which only  
> does
> > > > rcu lock/unlock and grabs effective array and then calls static  
> inline
> > > > function be a viable solution?
> > >
> > > > #define BPF_PROG_RUN_ARRAY_CG_FLAGS(array_rcu, ctx, run_prog,  
> ret_flags) \
> > > >    ({
> > > >        int ret;
> > >
> > > >        rcu_read_lock();
> > > >        ret =
> > > > __BPF_PROG_RUN_ARRAY_CG_FLAGS(rcu_dereference(array_rcu), ....);
> > > >        rcu_read_unlock();
> > > >        ret;
> > > >    })
> > >
> > >
> > > > where __BPF_PROG_RUN_ARRAY_CG_FLAGS is what
> > > > BPF_PROG_RUN_ARRAY_CG_FLAGS is today but with __rcu annotation  
> dropped
> > > > (and no internal rcu stuff)?
> > >
> > > Yeah, that should work. But why do you think it's better to hide them?
> > > I find those automatic rcu locks deep in the call stack a bit obscure
> > > (when reasoning about sleepable vs non-sleepable contexts/bpf).
> > >
> > > I, as the caller, know that the effective array is rcu-managed (it
> > > has __rcu annotation) and it seems natural for me to grab rcu lock
> > > while work with it; I might grab it for some other things like cgroup
> > > anyway.
> >
> > If you think that having this more explicitly is better, I'm fine with
> > that as well. I thought a simpler invocation pattern would be good,
> > given we call bpf_prog_run_array variants in quite a lot of places. So
> > count me indifferent. I'm curious what others think.

> Would it work if the bpf_prog_run_array_cg() directly takes the
> 'struct cgroup *cgrp' argument instead of the array ?
> bpf_prog_run_array_cg() should know what protection is needed
> to get member from the cgrp ptr.  The sk call path should be able
> to provide a cgrp ptr.  For current cgrp, pass NULL as the cgrp
> pointer and then current will be used in bpf_prog_run_array_cg().
> A rcu_read_lock() is needed anyway to get the current's cgrp
> and can be done together in bpf_prog_run_array_cg().

> That there are only two remaining bpf_prog_run_array() usages
> from lirc and bpf_trace which are not too bad to have them
> directly do rcu_read_lock on their own struct ?

 From Andrii's original commit message:

     I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to  
accept
     struct cgroup and enum bpf_attach_type instead of bpf_prog_array,  
fetching
     cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that
     required including include/linux/cgroup-defs.h, which I wasn't sure is  
ok with
     everyone.

I guess including cgroup-defs.h/bpf-cgroup-defs.h into bpf.h might still
be somewhat problematic?

But even if we pass the cgroup pointer, I'm assuming that this cgroup  
pointer
is still rcu-managed, right? So the callers still have to rcu-lock.
However, in most places we don't care and do "cgrp =  
sock_cgroup_ptr(&sk->sk_cgrp_data);"
but seems like it depends on the fact that sockets can't (yet?)
change their cgroup association and it's fine to not rcu-lock that
cgroup. Seems fragile, but ok. It always stumbles me when I see:

cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
bpf_prog_run_array_cg_flags(cgrp.bpf->effective[atype], ...)

But then, with current, it becomes:

rcu_read_lock();
cgrp = task_dfl_cgroup(current);
bpf_prog_run_array_cg_flags(cgrp.bpf->effective[atype], ...)
rcu_read_unlock();

Idk, I might be overthinking it. I'll try to see if including
bpf-cgroup-defs.h and passing cgroup_bpf is workable.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 22:52         ` sdf
@ 2022-04-13 23:56           ` Martin KaFai Lau
  2022-04-14 21:41             ` Andrii Nakryiko
  2022-04-14  9:30           ` Jakub Kicinski
  1 sibling, 1 reply; 10+ messages in thread
From: Martin KaFai Lau @ 2022-04-13 23:56 UTC (permalink / raw)
  To: sdf
  Cc: Andrii Nakryiko, Networking, bpf, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko

On Wed, Apr 13, 2022 at 03:52:18PM -0700, sdf@google.com wrote:
> On 04/13, Martin KaFai Lau wrote:
> > On Wed, Apr 13, 2022 at 12:52:53PM -0700, Andrii Nakryiko wrote:
> > > On Wed, Apr 13, 2022 at 12:39 PM <sdf@google.com> wrote:
> > > >
> > > > On 04/13, Andrii Nakryiko wrote:
> > > > > On Wed, Apr 13, 2022 at 11:33 AM Stanislav Fomichev <sdf@google.com>
> > > > > wrote:
> > > > > >
> > > > > > Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of
> > macros
> > > > > > into functions") switched a bunch of BPF_PROG_RUN macros to inline
> > > > > > routines. This changed the semantic a bit. Due to arguments
> > expansion
> > > > > > of macros, it used to be:
> > > > > >
> > > > > >         rcu_read_lock();
> > > > > >         array = rcu_dereference(cgrp->bpf.effective[atype]);
> > > > > >         ...
> > > > > >
> > > > > > Now, with with inline routines, we have:
> > > > > >         array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
> > > > > >         /* array_rcu can be kfree'd here */
> > > > > >         rcu_read_lock();
> > > > > >         array = rcu_dereference(array_rcu);
> > > > > >
> > > >
> > > > > So subtle difference, wow...
> > > >
> > > > > But this open-coding of rcu_read_lock() seems very unfortunate as
> > > > > well. Would making BPF_PROG_RUN_ARRAY back to a macro which only
> > does
> > > > > rcu lock/unlock and grabs effective array and then calls static
> > inline
> > > > > function be a viable solution?
> > > >
> > > > > #define BPF_PROG_RUN_ARRAY_CG_FLAGS(array_rcu, ctx, run_prog,
> > ret_flags) \
> > > > >    ({
> > > > >        int ret;
> > > >
> > > > >        rcu_read_lock();
> > > > >        ret =
> > > > > __BPF_PROG_RUN_ARRAY_CG_FLAGS(rcu_dereference(array_rcu), ....);
> > > > >        rcu_read_unlock();
> > > > >        ret;
> > > > >    })
> > > >
> > > >
> > > > > where __BPF_PROG_RUN_ARRAY_CG_FLAGS is what
> > > > > BPF_PROG_RUN_ARRAY_CG_FLAGS is today but with __rcu annotation
> > dropped
> > > > > (and no internal rcu stuff)?
> > > >
> > > > Yeah, that should work. But why do you think it's better to hide them?
> > > > I find those automatic rcu locks deep in the call stack a bit obscure
> > > > (when reasoning about sleepable vs non-sleepable contexts/bpf).
> > > >
> > > > I, as the caller, know that the effective array is rcu-managed (it
> > > > has __rcu annotation) and it seems natural for me to grab rcu lock
> > > > while work with it; I might grab it for some other things like cgroup
> > > > anyway.
> > >
> > > If you think that having this more explicitly is better, I'm fine with
> > > that as well. I thought a simpler invocation pattern would be good,
> > > given we call bpf_prog_run_array variants in quite a lot of places. So
> > > count me indifferent. I'm curious what others think.
> 
> > Would it work if the bpf_prog_run_array_cg() directly takes the
> > 'struct cgroup *cgrp' argument instead of the array ?
> > bpf_prog_run_array_cg() should know what protection is needed
> > to get member from the cgrp ptr.  The sk call path should be able
> > to provide a cgrp ptr.  For current cgrp, pass NULL as the cgrp
> > pointer and then current will be used in bpf_prog_run_array_cg().
> > A rcu_read_lock() is needed anyway to get the current's cgrp
> > and can be done together in bpf_prog_run_array_cg().
> 
> > That there are only two remaining bpf_prog_run_array() usages
> > from lirc and bpf_trace which are not too bad to have them
> > directly do rcu_read_lock on their own struct ?
> 
> From Andrii's original commit message:
> 
>     I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to
> accept
>     struct cgroup and enum bpf_attach_type instead of bpf_prog_array,
> fetching
>     cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that
>     required including include/linux/cgroup-defs.h, which I wasn't sure is
> ok with
>     everyone.
> 
> I guess including cgroup-defs.h/bpf-cgroup-defs.h into bpf.h might still
> be somewhat problematic?
> 
> But even if we pass the cgroup pointer, I'm assuming that this cgroup
> pointer
> is still rcu-managed, right? So the callers still have to rcu-lock.
> However, in most places we don't care and do "cgrp =
> sock_cgroup_ptr(&sk->sk_cgrp_data);"
> but seems like it depends on the fact that sockets can't (yet?)
> change their cgroup association and it's fine to not rcu-lock that
> cgroup. Seems fragile, but ok.
There is no __rcu tag in struct sock_cgroup_data, so presumably it
won't change or a lock is needed ?  seems to be the former.

> It always stumbles me when I see:
> 
> cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> bpf_prog_run_array_cg_flags(cgrp.bpf->effective[atype], ...)
> 
> But then, with current, it becomes:
> 
> rcu_read_lock();
> cgrp = task_dfl_cgroup(current);
> bpf_prog_run_array_cg_flags(cgrp.bpf->effective[atype], ...)
> rcu_read_unlock();
> 
> Idk, I might be overthinking it. I'll try to see if including
> bpf-cgroup-defs.h and passing cgroup_bpf is workable.
yeah, passing cgroup_bpf and bpf-cgroup-defs.h is a better option.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 22:52         ` sdf
  2022-04-13 23:56           ` Martin KaFai Lau
@ 2022-04-14  9:30           ` Jakub Kicinski
  1 sibling, 0 replies; 10+ messages in thread
From: Jakub Kicinski @ 2022-04-14  9:30 UTC (permalink / raw)
  To: sdf
  Cc: Martin KaFai Lau, Andrii Nakryiko, Networking, bpf,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko

On Wed, 13 Apr 2022 15:52:18 -0700 sdf@google.com wrote:
> I guess including cgroup-defs.h/bpf-cgroup-defs.h into bpf.h might still
> be somewhat problematic?

FWIW including -defs.h into bpf.h should be fine.  Obviously fewer
cross-header deps the better. But the main point is that we don't 
want bpf.h to be included in too many places, so opposite direction 
to what you're asking IIUC.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines
  2022-04-13 23:56           ` Martin KaFai Lau
@ 2022-04-14 21:41             ` Andrii Nakryiko
  0 siblings, 0 replies; 10+ messages in thread
From: Andrii Nakryiko @ 2022-04-14 21:41 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Stanislav Fomichev, Networking, bpf, Alexei Starovoitov,
	Daniel Borkmann, Andrii Nakryiko

On Wed, Apr 13, 2022 at 4:56 PM Martin KaFai Lau <kafai@fb.com> wrote:
>
> On Wed, Apr 13, 2022 at 03:52:18PM -0700, sdf@google.com wrote:
> > On 04/13, Martin KaFai Lau wrote:
> > > On Wed, Apr 13, 2022 at 12:52:53PM -0700, Andrii Nakryiko wrote:
> > > > On Wed, Apr 13, 2022 at 12:39 PM <sdf@google.com> wrote:
> > > > >
> > > > > On 04/13, Andrii Nakryiko wrote:
> > > > > > On Wed, Apr 13, 2022 at 11:33 AM Stanislav Fomichev <sdf@google.com>
> > > > > > wrote:
> > > > > > >
> > > > > > > Commit 7d08c2c91171 ("bpf: Refactor BPF_PROG_RUN_ARRAY family of
> > > macros
> > > > > > > into functions") switched a bunch of BPF_PROG_RUN macros to inline
> > > > > > > routines. This changed the semantic a bit. Due to arguments
> > > expansion
> > > > > > > of macros, it used to be:
> > > > > > >
> > > > > > >         rcu_read_lock();
> > > > > > >         array = rcu_dereference(cgrp->bpf.effective[atype]);
> > > > > > >         ...
> > > > > > >
> > > > > > > Now, with with inline routines, we have:
> > > > > > >         array_rcu = rcu_dereference(cgrp->bpf.effective[atype]);
> > > > > > >         /* array_rcu can be kfree'd here */
> > > > > > >         rcu_read_lock();
> > > > > > >         array = rcu_dereference(array_rcu);
> > > > > > >
> > > > >
> > > > > > So subtle difference, wow...
> > > > >
> > > > > > But this open-coding of rcu_read_lock() seems very unfortunate as
> > > > > > well. Would making BPF_PROG_RUN_ARRAY back to a macro which only
> > > does
> > > > > > rcu lock/unlock and grabs effective array and then calls static
> > > inline
> > > > > > function be a viable solution?
> > > > >
> > > > > > #define BPF_PROG_RUN_ARRAY_CG_FLAGS(array_rcu, ctx, run_prog,
> > > ret_flags) \
> > > > > >    ({
> > > > > >        int ret;
> > > > >
> > > > > >        rcu_read_lock();
> > > > > >        ret =
> > > > > > __BPF_PROG_RUN_ARRAY_CG_FLAGS(rcu_dereference(array_rcu), ....);
> > > > > >        rcu_read_unlock();
> > > > > >        ret;
> > > > > >    })
> > > > >
> > > > >
> > > > > > where __BPF_PROG_RUN_ARRAY_CG_FLAGS is what
> > > > > > BPF_PROG_RUN_ARRAY_CG_FLAGS is today but with __rcu annotation
> > > dropped
> > > > > > (and no internal rcu stuff)?
> > > > >
> > > > > Yeah, that should work. But why do you think it's better to hide them?
> > > > > I find those automatic rcu locks deep in the call stack a bit obscure
> > > > > (when reasoning about sleepable vs non-sleepable contexts/bpf).
> > > > >
> > > > > I, as the caller, know that the effective array is rcu-managed (it
> > > > > has __rcu annotation) and it seems natural for me to grab rcu lock
> > > > > while work with it; I might grab it for some other things like cgroup
> > > > > anyway.
> > > >
> > > > If you think that having this more explicitly is better, I'm fine with
> > > > that as well. I thought a simpler invocation pattern would be good,
> > > > given we call bpf_prog_run_array variants in quite a lot of places. So
> > > > count me indifferent. I'm curious what others think.
> >
> > > Would it work if the bpf_prog_run_array_cg() directly takes the
> > > 'struct cgroup *cgrp' argument instead of the array ?
> > > bpf_prog_run_array_cg() should know what protection is needed
> > > to get member from the cgrp ptr.  The sk call path should be able
> > > to provide a cgrp ptr.  For current cgrp, pass NULL as the cgrp
> > > pointer and then current will be used in bpf_prog_run_array_cg().
> > > A rcu_read_lock() is needed anyway to get the current's cgrp
> > > and can be done together in bpf_prog_run_array_cg().
> >
> > > That there are only two remaining bpf_prog_run_array() usages
> > > from lirc and bpf_trace which are not too bad to have them
> > > directly do rcu_read_lock on their own struct ?
> >
> > From Andrii's original commit message:
> >
> >     I think BPF_PROG_RUN_ARRAY_CG would benefit from further refactoring to
> > accept
> >     struct cgroup and enum bpf_attach_type instead of bpf_prog_array,
> > fetching
> >     cgrp->bpf.effective[type] and RCU-dereferencing it internally. But that
> >     required including include/linux/cgroup-defs.h, which I wasn't sure is
> > ok with
> >     everyone.
> >
> > I guess including cgroup-defs.h/bpf-cgroup-defs.h into bpf.h might still
> > be somewhat problematic?
> >
> > But even if we pass the cgroup pointer, I'm assuming that this cgroup
> > pointer
> > is still rcu-managed, right? So the callers still have to rcu-lock.
> > However, in most places we don't care and do "cgrp =
> > sock_cgroup_ptr(&sk->sk_cgrp_data);"
> > but seems like it depends on the fact that sockets can't (yet?)
> > change their cgroup association and it's fine to not rcu-lock that
> > cgroup. Seems fragile, but ok.
> There is no __rcu tag in struct sock_cgroup_data, so presumably it
> won't change or a lock is needed ?  seems to be the former.
>
> > It always stumbles me when I see:
> >
> > cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> > bpf_prog_run_array_cg_flags(cgrp.bpf->effective[atype], ...)
> >
> > But then, with current, it becomes:
> >
> > rcu_read_lock();
> > cgrp = task_dfl_cgroup(current);
> > bpf_prog_run_array_cg_flags(cgrp.bpf->effective[atype], ...)
> > rcu_read_unlock();
> >
> > Idk, I might be overthinking it. I'll try to see if including
> > bpf-cgroup-defs.h and passing cgroup_bpf is workable.
> yeah, passing cgroup_bpf and bpf-cgroup-defs.h is a better option.

+1. Daniel, would you be fine with this instead of explicit
rcu_read_lock()/rcu_read_unlock() everywhere?

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-04-14 21:42 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-13 18:32 [PATCH bpf-next] bpf: move rcu lock management out of BPF_PROG_RUN routines Stanislav Fomichev
2022-04-13 19:23 ` Andrii Nakryiko
2022-04-13 19:39   ` sdf
2022-04-13 19:52     ` Andrii Nakryiko
2022-04-13 22:31       ` Daniel Borkmann
2022-04-13 22:32       ` Martin KaFai Lau
2022-04-13 22:52         ` sdf
2022-04-13 23:56           ` Martin KaFai Lau
2022-04-14 21:41             ` Andrii Nakryiko
2022-04-14  9:30           ` Jakub Kicinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.