linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering
@ 2022-01-26  0:22 Pavel Begunkov
  2022-01-26  0:28 ` Stanislav Fomichev
  2022-01-26 20:30 ` Martin KaFai Lau
  0 siblings, 2 replies; 7+ messages in thread
From: Pavel Begunkov @ 2022-01-26  0:22 UTC (permalink / raw)
  To: netdev, bpf
  Cc: Alexei Starovoitov, Andrii Nakryiko, Martin KaFai Lau, Song Liu,
	linux-kernel, Stanislav Fomichev, Pavel Begunkov

Even though there is a static key protecting from overhead from
cgroup-bpf skb filtering when there is nothing attached, in many cases
it's not enough as registering a filter for one type will ruin the fast
path for all others. It's observed in production servers I've looked
at but also in laptops, where registration is done during init by
systemd or something else.

Add a per-socket fast path check guarding from such overhead. This
affects both receive and transmit paths of TCP, UDP and other
protocols. It showed ~1% tx/s improvement in small payload UDP
send benchmarks using a real NIC and in a server environment and the
number jumps to 2-3% for preemtible kernels.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---

v2: replace bitmask appoach with empty_prog_array
v3: add "bpf_" prefix to empty_prog_array
v4: replace macros with inline functions
    use cgroup_bpf_sock_enabled for set/getsockopt() filters

 include/linux/bpf-cgroup.h | 26 +++++++++++++++++++++-----
 include/linux/bpf.h        | 13 +++++++++++++
 kernel/bpf/cgroup.c        | 30 ------------------------------
 kernel/bpf/core.c          | 16 ++++------------
 4 files changed, 38 insertions(+), 47 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index b525d8cdc25b..165b0ba3d6c3 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -8,6 +8,7 @@
 #include <linux/jump_label.h>
 #include <linux/percpu.h>
 #include <linux/rbtree.h>
+#include <net/sock.h>
 #include <uapi/linux/bpf.h>
 
 struct sock;
@@ -165,11 +166,23 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 				     void *value, u64 flags);
 
+/* Opportunistic check to see whether we have any BPF program attached*/
+static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
+					   enum cgroup_bpf_attach_type type)
+{
+	struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	struct bpf_prog_array *array;
+
+	array = rcu_access_pointer(cgrp->bpf.effective[type]);
+	return array != &bpf_empty_prog_array.hdr;
+}
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
 	int __ret = 0;							      \
-	if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))		      \
+	if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && sk &&		      \
+	    cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) 	      \
 		__ret = __cgroup_bpf_run_filter_skb(sk, skb,		      \
 						    CGROUP_INET_INGRESS); \
 									      \
@@ -181,9 +194,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
 		typeof(sk) __sk = sk_to_full_sk(sk);			       \
-		if (sk_fullsock(__sk))					       \
+		if (sk_fullsock(__sk) &&				       \
+		    cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS))	       \
 			__ret = __cgroup_bpf_run_filter_skb(__sk, skb,	       \
-						      CGROUP_INET_EGRESS); \
+						      CGROUP_INET_EGRESS);     \
 	}								       \
 	__ret;								       \
 })
@@ -347,7 +361,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 				       kernel_optval)			       \
 ({									       \
 	int __ret = 0;							       \
-	if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT))			       \
+	if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT) &&			       \
+	    cgroup_bpf_sock_enabled(sock, CGROUP_SETSOCKOPT))		       \
 		__ret = __cgroup_bpf_run_filter_setsockopt(sock, level,	       \
 							   optname, optval,    \
 							   optlen,	       \
@@ -367,7 +382,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 				       max_optlen, retval)		       \
 ({									       \
 	int __ret = retval;						       \
-	if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT))			       \
+	if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) &&			       \
+	    cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT))		       \
 		if (!(sock)->sk_prot->bpf_bypass_getsockopt ||		       \
 		    !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
 					tcp_bpf_bypass_getsockopt,	       \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 394305a5e02f..dcfe2de59b59 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1233,6 +1233,19 @@ struct bpf_prog_array {
 	struct bpf_prog_array_item items[];
 };
 
+struct bpf_empty_prog_array {
+	struct bpf_prog_array hdr;
+	struct bpf_prog *null_prog;
+};
+
+/* to avoid allocating empty bpf_prog_array for cgroups that
+ * don't have bpf program attached use one global 'bpf_empty_prog_array'
+ * It will not be modified the caller of bpf_prog_array_alloc()
+ * (since caller requested prog_cnt == 0)
+ * that pointer should be 'freed' by bpf_prog_array_free()
+ */
+extern struct bpf_empty_prog_array bpf_empty_prog_array;
+
 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
 void bpf_prog_array_free(struct bpf_prog_array *progs);
 int bpf_prog_array_length(struct bpf_prog_array *progs);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 279ebbed75a5..098632fdbc45 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1384,20 +1384,6 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 }
 
 #ifdef CONFIG_NET
-static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
-					     enum cgroup_bpf_attach_type attach_type)
-{
-	struct bpf_prog_array *prog_array;
-	bool empty;
-
-	rcu_read_lock();
-	prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
-	empty = bpf_prog_array_is_empty(prog_array);
-	rcu_read_unlock();
-
-	return empty;
-}
-
 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
 			     struct bpf_sockopt_buf *buf)
 {
@@ -1456,19 +1442,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 	};
 	int ret, max_optlen;
 
-	/* Opportunistic check to see whether we have any BPF program
-	 * attached to the hook so we don't waste time allocating
-	 * memory and locking the socket.
-	 */
-	if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
-		return 0;
-
 	/* Allocate a bit more than the initial user buffer for
 	 * BPF program. The canonical use case is overriding
 	 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
 	 */
 	max_optlen = max_t(int, 16, *optlen);
-
 	max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
 	if (max_optlen < 0)
 		return max_optlen;
@@ -1550,15 +1528,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 	};
 	int ret;
 
-	/* Opportunistic check to see whether we have any BPF program
-	 * attached to the hook so we don't waste time allocating
-	 * memory and locking the socket.
-	 */
-	if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
-		return retval;
-
 	ctx.optlen = max_optlen;
-
 	max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
 	if (max_optlen < 0)
 		return max_optlen;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 0a1cfd8544b9..04a8d5bea552 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1968,18 +1968,10 @@ static struct bpf_prog_dummy {
 	},
 };
 
-/* to avoid allocating empty bpf_prog_array for cgroups that
- * don't have bpf program attached use one global 'empty_prog_array'
- * It will not be modified the caller of bpf_prog_array_alloc()
- * (since caller requested prog_cnt == 0)
- * that pointer should be 'freed' by bpf_prog_array_free()
- */
-static struct {
-	struct bpf_prog_array hdr;
-	struct bpf_prog *null_prog;
-} empty_prog_array = {
+struct bpf_empty_prog_array bpf_empty_prog_array = {
 	.null_prog = NULL,
 };
+EXPORT_SYMBOL(bpf_empty_prog_array);
 
 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 {
@@ -1989,12 +1981,12 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 			       (prog_cnt + 1),
 			       flags);
 
-	return &empty_prog_array.hdr;
+	return &bpf_empty_prog_array.hdr;
 }
 
 void bpf_prog_array_free(struct bpf_prog_array *progs)
 {
-	if (!progs || progs == &empty_prog_array.hdr)
+	if (!progs || progs == &bpf_empty_prog_array.hdr)
 		return;
 	kfree_rcu(progs, rcu);
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering
  2022-01-26  0:22 [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering Pavel Begunkov
@ 2022-01-26  0:28 ` Stanislav Fomichev
  2022-01-26 20:30 ` Martin KaFai Lau
  1 sibling, 0 replies; 7+ messages in thread
From: Stanislav Fomichev @ 2022-01-26  0:28 UTC (permalink / raw)
  To: Pavel Begunkov
  Cc: netdev, bpf, Alexei Starovoitov, Andrii Nakryiko,
	Martin KaFai Lau, Song Liu, linux-kernel

On Tue, Jan 25, 2022 at 4:22 PM Pavel Begunkov <asml.silence@gmail.com> wrote:
>
> Even though there is a static key protecting from overhead from
> cgroup-bpf skb filtering when there is nothing attached, in many cases
> it's not enough as registering a filter for one type will ruin the fast
> path for all others. It's observed in production servers I've looked
> at but also in laptops, where registration is done during init by
> systemd or something else.
>
> Add a per-socket fast path check guarding from such overhead. This
> affects both receive and transmit paths of TCP, UDP and other
> protocols. It showed ~1% tx/s improvement in small payload UDP
> send benchmarks using a real NIC and in a server environment and the
> number jumps to 2-3% for preemtible kernels.
>
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> ---
>
> v2: replace bitmask appoach with empty_prog_array
> v3: add "bpf_" prefix to empty_prog_array
> v4: replace macros with inline functions
>     use cgroup_bpf_sock_enabled for set/getsockopt() filters

LGTM, thank you for following up!

Reviewed-by: Stanislav Fomichev <sdf@google.com>

>  include/linux/bpf-cgroup.h | 26 +++++++++++++++++++++-----
>  include/linux/bpf.h        | 13 +++++++++++++
>  kernel/bpf/cgroup.c        | 30 ------------------------------
>  kernel/bpf/core.c          | 16 ++++------------
>  4 files changed, 38 insertions(+), 47 deletions(-)
>
> diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
> index b525d8cdc25b..165b0ba3d6c3 100644
> --- a/include/linux/bpf-cgroup.h
> +++ b/include/linux/bpf-cgroup.h
> @@ -8,6 +8,7 @@
>  #include <linux/jump_label.h>
>  #include <linux/percpu.h>
>  #include <linux/rbtree.h>
> +#include <net/sock.h>
>  #include <uapi/linux/bpf.h>
>
>  struct sock;
> @@ -165,11 +166,23 @@ int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
>  int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>                                      void *value, u64 flags);
>
> +/* Opportunistic check to see whether we have any BPF program attached*/
> +static inline bool cgroup_bpf_sock_enabled(struct sock *sk,
> +                                          enum cgroup_bpf_attach_type type)
> +{
> +       struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> +       struct bpf_prog_array *array;
> +
> +       array = rcu_access_pointer(cgrp->bpf.effective[type]);
> +       return array != &bpf_empty_prog_array.hdr;
> +}
> +
>  /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
>  #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                            \
>  ({                                                                           \
>         int __ret = 0;                                                        \
> -       if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))                  \
> +       if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && sk &&                  \
> +           cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS))         \
>                 __ret = __cgroup_bpf_run_filter_skb(sk, skb,                  \
>                                                     CGROUP_INET_INGRESS); \
>                                                                               \
> @@ -181,9 +194,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>         int __ret = 0;                                                         \
>         if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
>                 typeof(sk) __sk = sk_to_full_sk(sk);                           \
> -               if (sk_fullsock(__sk))                                         \
> +               if (sk_fullsock(__sk) &&                                       \
> +                   cgroup_bpf_sock_enabled(__sk, CGROUP_INET_EGRESS))         \
>                         __ret = __cgroup_bpf_run_filter_skb(__sk, skb,         \
> -                                                     CGROUP_INET_EGRESS); \
> +                                                     CGROUP_INET_EGRESS);     \
>         }                                                                      \
>         __ret;                                                                 \
>  })
> @@ -347,7 +361,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>                                        kernel_optval)                          \
>  ({                                                                            \
>         int __ret = 0;                                                         \
> -       if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT))                             \
> +       if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT) &&                           \
> +           cgroup_bpf_sock_enabled(sock, CGROUP_SETSOCKOPT))                  \
>                 __ret = __cgroup_bpf_run_filter_setsockopt(sock, level,        \
>                                                            optname, optval,    \
>                                                            optlen,             \
> @@ -367,7 +382,8 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
>                                        max_optlen, retval)                     \
>  ({                                                                            \
>         int __ret = retval;                                                    \
> -       if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT))                             \
> +       if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT) &&                           \
> +           cgroup_bpf_sock_enabled(sock, CGROUP_GETSOCKOPT))                  \
>                 if (!(sock)->sk_prot->bpf_bypass_getsockopt ||                 \
>                     !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
>                                         tcp_bpf_bypass_getsockopt,             \
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 394305a5e02f..dcfe2de59b59 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1233,6 +1233,19 @@ struct bpf_prog_array {
>         struct bpf_prog_array_item items[];
>  };
>
> +struct bpf_empty_prog_array {
> +       struct bpf_prog_array hdr;
> +       struct bpf_prog *null_prog;
> +};
> +
> +/* to avoid allocating empty bpf_prog_array for cgroups that
> + * don't have bpf program attached use one global 'bpf_empty_prog_array'
> + * It will not be modified the caller of bpf_prog_array_alloc()
> + * (since caller requested prog_cnt == 0)
> + * that pointer should be 'freed' by bpf_prog_array_free()
> + */
> +extern struct bpf_empty_prog_array bpf_empty_prog_array;
> +
>  struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
>  void bpf_prog_array_free(struct bpf_prog_array *progs);
>  int bpf_prog_array_length(struct bpf_prog_array *progs);
> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 279ebbed75a5..098632fdbc45 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -1384,20 +1384,6 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
>  }
>
>  #ifdef CONFIG_NET
> -static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
> -                                            enum cgroup_bpf_attach_type attach_type)
> -{
> -       struct bpf_prog_array *prog_array;
> -       bool empty;
> -
> -       rcu_read_lock();
> -       prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
> -       empty = bpf_prog_array_is_empty(prog_array);
> -       rcu_read_unlock();
> -
> -       return empty;
> -}
> -
>  static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
>                              struct bpf_sockopt_buf *buf)
>  {
> @@ -1456,19 +1442,11 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
>         };
>         int ret, max_optlen;
>
> -       /* Opportunistic check to see whether we have any BPF program
> -        * attached to the hook so we don't waste time allocating
> -        * memory and locking the socket.
> -        */
> -       if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
> -               return 0;
> -
>         /* Allocate a bit more than the initial user buffer for
>          * BPF program. The canonical use case is overriding
>          * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
>          */
>         max_optlen = max_t(int, 16, *optlen);
> -
>         max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
>         if (max_optlen < 0)
>                 return max_optlen;
> @@ -1550,15 +1528,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
>         };
>         int ret;
>
> -       /* Opportunistic check to see whether we have any BPF program
> -        * attached to the hook so we don't waste time allocating
> -        * memory and locking the socket.
> -        */
> -       if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
> -               return retval;
> -
>         ctx.optlen = max_optlen;
> -
>         max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
>         if (max_optlen < 0)
>                 return max_optlen;
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 0a1cfd8544b9..04a8d5bea552 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1968,18 +1968,10 @@ static struct bpf_prog_dummy {
>         },
>  };
>
> -/* to avoid allocating empty bpf_prog_array for cgroups that
> - * don't have bpf program attached use one global 'empty_prog_array'
> - * It will not be modified the caller of bpf_prog_array_alloc()
> - * (since caller requested prog_cnt == 0)
> - * that pointer should be 'freed' by bpf_prog_array_free()
> - */
> -static struct {
> -       struct bpf_prog_array hdr;
> -       struct bpf_prog *null_prog;
> -} empty_prog_array = {
> +struct bpf_empty_prog_array bpf_empty_prog_array = {
>         .null_prog = NULL,
>  };
> +EXPORT_SYMBOL(bpf_empty_prog_array);
>
>  struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
>  {
> @@ -1989,12 +1981,12 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
>                                (prog_cnt + 1),
>                                flags);
>
> -       return &empty_prog_array.hdr;
> +       return &bpf_empty_prog_array.hdr;
>  }
>
>  void bpf_prog_array_free(struct bpf_prog_array *progs)
>  {
> -       if (!progs || progs == &empty_prog_array.hdr)
> +       if (!progs || progs == &bpf_empty_prog_array.hdr)
>                 return;
>         kfree_rcu(progs, rcu);
>  }
> --
> 2.34.1
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering
  2022-01-26  0:22 [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering Pavel Begunkov
  2022-01-26  0:28 ` Stanislav Fomichev
@ 2022-01-26 20:30 ` Martin KaFai Lau
  2022-01-26 21:28   ` Pavel Begunkov
  1 sibling, 1 reply; 7+ messages in thread
From: Martin KaFai Lau @ 2022-01-26 20:30 UTC (permalink / raw)
  To: Pavel Begunkov
  Cc: netdev, bpf, Alexei Starovoitov, Andrii Nakryiko, Song Liu,
	linux-kernel, Stanislav Fomichev

On Wed, Jan 26, 2022 at 12:22:13AM +0000, Pavel Begunkov wrote:
>  #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
>  ({									      \
>  	int __ret = 0;							      \
> -	if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))		      \
> +	if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && sk &&		      \
From reading sk_filter_trim_cap() where this will be called, sk cannot be NULL.
If yes, the new sk test is not needed.

Others lgtm.

> +	    cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) 	      \
>  		__ret = __cgroup_bpf_run_filter_skb(sk, skb,		      \
>  						    CGROUP_INET_INGRESS); \
>  									      \

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering
  2022-01-26 20:30 ` Martin KaFai Lau
@ 2022-01-26 21:28   ` Pavel Begunkov
  2022-01-27  5:08     ` Alexei Starovoitov
  0 siblings, 1 reply; 7+ messages in thread
From: Pavel Begunkov @ 2022-01-26 21:28 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: netdev, bpf, Alexei Starovoitov, Andrii Nakryiko, Song Liu,
	linux-kernel, Stanislav Fomichev

On 1/26/22 20:30, Martin KaFai Lau wrote:
> On Wed, Jan 26, 2022 at 12:22:13AM +0000, Pavel Begunkov wrote:
>>   #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
>>   ({									      \
>>   	int __ret = 0;							      \
>> -	if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))		      \
>> +	if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && sk &&		      \
>  From reading sk_filter_trim_cap() where this will be called, sk cannot be NULL.
> If yes, the new sk test is not needed.

Well, there is no sane way to verify how it's used considering

EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);

but can remove the check if nobody cares


> Others lgtm.
> 
>> +	    cgroup_bpf_sock_enabled(sk, CGROUP_INET_INGRESS)) 	      \
>>   		__ret = __cgroup_bpf_run_filter_skb(sk, skb,		      \
>>   						    CGROUP_INET_INGRESS); \
>>   									      \

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering
  2022-01-26 21:28   ` Pavel Begunkov
@ 2022-01-27  5:08     ` Alexei Starovoitov
  2022-01-27  7:01       ` Martin KaFai Lau
  2022-01-27 13:53       ` Pavel Begunkov
  0 siblings, 2 replies; 7+ messages in thread
From: Alexei Starovoitov @ 2022-01-27  5:08 UTC (permalink / raw)
  To: Pavel Begunkov
  Cc: Martin KaFai Lau, Network Development, bpf, Alexei Starovoitov,
	Andrii Nakryiko, Song Liu, LKML, Stanislav Fomichev

On Wed, Jan 26, 2022 at 1:29 PM Pavel Begunkov <asml.silence@gmail.com> wrote:
>
> On 1/26/22 20:30, Martin KaFai Lau wrote:
> > On Wed, Jan 26, 2022 at 12:22:13AM +0000, Pavel Begunkov wrote:
> >>   #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                        \
> >>   ({                                                                       \
> >>      int __ret = 0;                                                        \
> >> -    if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))                  \
> >> +    if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && sk &&                  \
> >  From reading sk_filter_trim_cap() where this will be called, sk cannot be NULL.
> > If yes, the new sk test is not needed.
>
> Well, there is no sane way to verify how it's used considering
>
> EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);

BPF_CGROUP_RUN_PROG_INET_INGRESS() is used in one place.
Are you folks saying that you want to remove !sk check
from __cgroup_bpf_run_filter_skb()?
Seems like micro optimization, but sure why not.

EXPORT_SYMBOL() is there because of "ipv6 as a module" mess.
So it's not a concern.

Pls tag the subj of the patch with [PATCH bpf-next]
instead of "for-next".

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering
  2022-01-27  5:08     ` Alexei Starovoitov
@ 2022-01-27  7:01       ` Martin KaFai Lau
  2022-01-27 13:53       ` Pavel Begunkov
  1 sibling, 0 replies; 7+ messages in thread
From: Martin KaFai Lau @ 2022-01-27  7:01 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Pavel Begunkov, Network Development, bpf, Alexei Starovoitov,
	Andrii Nakryiko, Song Liu, LKML, Stanislav Fomichev

On Wed, Jan 26, 2022 at 09:08:24PM -0800, Alexei Starovoitov wrote:
> On Wed, Jan 26, 2022 at 1:29 PM Pavel Begunkov <asml.silence@gmail.com> wrote:
> >
> > On 1/26/22 20:30, Martin KaFai Lau wrote:
> > > On Wed, Jan 26, 2022 at 12:22:13AM +0000, Pavel Begunkov wrote:
> > >>   #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                        \
> > >>   ({                                                                       \
> > >>      int __ret = 0;                                                        \
> > >> -    if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))                  \
> > >> +    if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && sk &&                  \
> > >  From reading sk_filter_trim_cap() where this will be called, sk cannot be NULL.
> > > If yes, the new sk test is not needed.
> >
> > Well, there is no sane way to verify how it's used considering
> >
> > EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
> 
> BPF_CGROUP_RUN_PROG_INET_INGRESS() is used in one place.
> Are you folks saying that you want to remove !sk check
> from __cgroup_bpf_run_filter_skb()?
I meant to remove the newly added "&& sk" test in this patch
instead of the existing "!sk" test in __cgroup_bpf_run_filter_skb().
I think it may be where the confusion is.

There is little reason to add a new unnecessary test.

> Seems like micro optimization, but sure why not.
The whole "if (!sk || !sk_fullsock(sk))" test can probably be
removed from __cgroup_bpf_run_filter_skb().  It would be
a nice clean up but I don't think it has to be done together
with this no-bpf-prog to run optimization-patch.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering
  2022-01-27  5:08     ` Alexei Starovoitov
  2022-01-27  7:01       ` Martin KaFai Lau
@ 2022-01-27 13:53       ` Pavel Begunkov
  1 sibling, 0 replies; 7+ messages in thread
From: Pavel Begunkov @ 2022-01-27 13:53 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Martin KaFai Lau, Network Development, bpf, Alexei Starovoitov,
	Andrii Nakryiko, Song Liu, LKML, Stanislav Fomichev

On 1/27/22 05:08, Alexei Starovoitov wrote:
> On Wed, Jan 26, 2022 at 1:29 PM Pavel Begunkov <asml.silence@gmail.com> wrote:
>>
>> On 1/26/22 20:30, Martin KaFai Lau wrote:
>>> On Wed, Jan 26, 2022 at 12:22:13AM +0000, Pavel Begunkov wrote:
>>>>    #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                        \
>>>>    ({                                                                       \
>>>>       int __ret = 0;                                                        \
>>>> -    if (cgroup_bpf_enabled(CGROUP_INET_INGRESS))                  \
>>>> +    if (cgroup_bpf_enabled(CGROUP_INET_INGRESS) && sk &&                  \
>>>   From reading sk_filter_trim_cap() where this will be called, sk cannot be NULL.
>>> If yes, the new sk test is not needed.
>>
>> Well, there is no sane way to verify how it's used considering
>>
>> EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
> 
> BPF_CGROUP_RUN_PROG_INET_INGRESS() is used in one place.
> Are you folks saying that you want to remove !sk check
> from __cgroup_bpf_run_filter_skb()?
> Seems like micro optimization, but sure why not.
> 
> EXPORT_SYMBOL() is there because of "ipv6 as a module" mess.
> So it's not a concern.

Ok. I'd usually assume symbols for internal use only to be GPL.


> Pls tag the subj of the patch with [PATCH bpf-next]
> instead of "for-next".

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-01-27 13:53 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-26  0:22 [PATCH for-next v4] cgroup/bpf: fast path skb BPF filtering Pavel Begunkov
2022-01-26  0:28 ` Stanislav Fomichev
2022-01-26 20:30 ` Martin KaFai Lau
2022-01-26 21:28   ` Pavel Begunkov
2022-01-27  5:08     ` Alexei Starovoitov
2022-01-27  7:01       ` Martin KaFai Lau
2022-01-27 13:53       ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).