netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn
@ 2019-02-19  5:38 brakmo
  2019-02-19 10:52 ` Daniel Borkmann
  2019-02-19 18:30 ` Eric Dumazet
  0 siblings, 2 replies; 6+ messages in thread
From: brakmo @ 2019-02-19  5:38 UTC (permalink / raw)
  To: netdev; +Cc: Martin Lau, Alexei Starovoitov, Daniel Borkmann --cc=Kernel Team

This patch adds a new bpf helper BPF_FUNC_skb_set_ecn
"int bpf_skb_set_Ecn(struct sk_buff *skb)". It is added to
BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can
be attached to the ingress and egress path. This type of
bpf_prog cannot modify the skb directly.

This helper is used to set the ECN bits (2) of the IPv6 or IPv4
header in skb. It can be used by a bpf_prog to manage egress
network bandwdith limit per cgroupv2 by inducing an ECN
response in the TCP sender (when the packet is ECN enabled).
This works best when using DCTCP.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/uapi/linux/bpf.h | 10 +++++++++-
 net/core/filter.c        | 29 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9e9f4f1a0370..5daf404511f7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2365,6 +2365,13 @@ union bpf_attr {
  *        Make a tcp_sock enter CWR state.
  *    Return
  *        0
+ *
+ * int bpf_skb_set_ecn(struct sk_buf *skb, int val)
+ *	Description
+ *		Sets ECN bits (2) of IP header. Works with IPv6 and IPv4.
+ *		val should be one of 0, 1, 2, 3.
+ *	Return
+ *		-EINVAL on error (e.g. val > 3), 0 otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2464,7 +2471,8 @@ union bpf_attr {
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
 	FN(tcp_sock),			\
-	FN(tcp_enter_cwr),
+	FN(tcp_enter_cwr),		\
+	FN(skb_set_ecn),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index f51c4a781844..275acfb2117d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5438,6 +5438,33 @@ static const struct bpf_func_proto bpf_tcp_enter_cwr_proto = {
 	.ret_type    = RET_INTEGER,
 	.arg1_type    = ARG_PTR_TO_TCP_SOCK,
 };
+
+BPF_CALL_2(bpf_skb_set_ecn, struct sk_buff *, skb, u32, val)
+{
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+	if ((val & ~0x3) != 0)
+		return -EINVAL;
+
+	if (ip6h->version == 6) {
+		ip6h->flow_lbl[0] = (ip6h->flow_lbl[0] & ~0x30) | (val << 4);
+		return 0;
+	} else if (ip6h->version == 4) {
+		struct iphdr *ip4h = (struct iphdr *)ip6h;
+
+		ip4h->tos = (ip4h->tos & ~0x3) | val;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_skb_set_ecn_proto = {
+	.func		= bpf_skb_set_ecn,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5599,6 +5626,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_tcp_sock_proto;
 	case BPF_FUNC_tcp_enter_cwr:
 		return &bpf_tcp_enter_cwr_proto;
+	case BPF_FUNC_skb_set_ecn:
+		return &bpf_skb_set_ecn_proto;
 #endif
 	default:
 		return sk_filter_func_proto(func_id, prog);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn
  2019-02-19  5:38 [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn brakmo
@ 2019-02-19 10:52 ` Daniel Borkmann
  2019-02-19 21:53   ` Daniel Borkmann
  2019-02-19 23:53   ` Lawrence Brakmo
  2019-02-19 18:30 ` Eric Dumazet
  1 sibling, 2 replies; 6+ messages in thread
From: Daniel Borkmann @ 2019-02-19 10:52 UTC (permalink / raw)
  To: brakmo, netdev
  Cc: Martin Lau, Alexei Starovoitov, Daniel Borkmann --cc=Kernel Team

On 02/19/2019 06:38 AM, brakmo wrote:
> This patch adds a new bpf helper BPF_FUNC_skb_set_ecn
> "int bpf_skb_set_Ecn(struct sk_buff *skb)". It is added to
> BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can
> be attached to the ingress and egress path. This type of
> bpf_prog cannot modify the skb directly.
> 
> This helper is used to set the ECN bits (2) of the IPv6 or IPv4
> header in skb. It can be used by a bpf_prog to manage egress
> network bandwdith limit per cgroupv2 by inducing an ECN
> response in the TCP sender (when the packet is ECN enabled).
> This works best when using DCTCP.
> 
> Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
> ---
>  include/uapi/linux/bpf.h | 10 +++++++++-
>  net/core/filter.c        | 29 +++++++++++++++++++++++++++++
>  2 files changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 9e9f4f1a0370..5daf404511f7 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2365,6 +2365,13 @@ union bpf_attr {
>   *        Make a tcp_sock enter CWR state.
>   *    Return
>   *        0
> + *
> + * int bpf_skb_set_ecn(struct sk_buf *skb, int val)

Nit: BPF_CALL_2() has u32 val

> + *	Description
> + *		Sets ECN bits (2) of IP header. Works with IPv6 and IPv4.
> + *		val should be one of 0, 1, 2, 3.
> + *	Return
> + *		-EINVAL on error (e.g. val > 3), 0 otherwise.
>   */
>  #define __BPF_FUNC_MAPPER(FN)		\
>  	FN(unspec),			\
> @@ -2464,7 +2471,8 @@ union bpf_attr {
>  	FN(spin_unlock),		\
>  	FN(sk_fullsock),		\
>  	FN(tcp_sock),			\
> -	FN(tcp_enter_cwr),
> +	FN(tcp_enter_cwr),		\
> +	FN(skb_set_ecn),
>  
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
> diff --git a/net/core/filter.c b/net/core/filter.c
> index f51c4a781844..275acfb2117d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5438,6 +5438,33 @@ static const struct bpf_func_proto bpf_tcp_enter_cwr_proto = {
>  	.ret_type    = RET_INTEGER,
>  	.arg1_type    = ARG_PTR_TO_TCP_SOCK,
>  };
> +
> +BPF_CALL_2(bpf_skb_set_ecn, struct sk_buff *, skb, u32, val)
> +{
> +	struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +
> +	if ((val & ~0x3) != 0)

Nit: INET_ECN_MASK

> +		return -EINVAL;
> +
> +	if (ip6h->version == 6) {
> +		ip6h->flow_lbl[0] = (ip6h->flow_lbl[0] & ~0x30) | (val << 4);
> +		return 0;
> +	} else if (ip6h->version == 4) {
> +		struct iphdr *ip4h = (struct iphdr *)ip6h;
> +
> +		ip4h->tos = (ip4h->tos & ~0x3) | val;
> +		return 0;
> +	}

Couldn't this be done as native BPF code via direct packet access instead?
Afaik, skb->data should most likely points to network header for the hooks
and skb->protocol should be one of ETH_P_IP{,V6}, no?

Aside from this, don't we also have cloned skbs here (in particular from
TCP side)?

Looking at cg_skb_verifier_ops ... it seems there also a bug in the current
code, namely that if we have a direct packet write, we don't make the skb
writable; at that point skb->data is not private. The cg_skb_is_valid_access()
allows to fetch PTR_TO_PACKET{,_END}, so we need a fix like the below for -bpf:

diff --git a/net/core/filter.c b/net/core/filter.c
index f7d0004fc160..34fe6da0a236 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5796,6 +5796,12 @@ static bool sk_filter_is_valid_access(int off, int size,
        return bpf_skb_is_valid_access(off, size, type, prog, info);
 }

+static int cg_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
+                          const struct bpf_prog *prog)
+{
+       return bpf_unclone_prologue(insn_buf, direct_write, prog, 0);
+}
+
 static bool cg_skb_is_valid_access(int off, int size,
                                   enum bpf_access_type type,
                                   const struct bpf_prog *prog,
@@ -7595,6 +7601,7 @@ const struct bpf_verifier_ops cg_skb_verifier_ops = {
        .get_func_proto         = cg_skb_func_proto,
        .is_valid_access        = cg_skb_is_valid_access,
        .convert_ctx_access     = bpf_convert_ctx_access,
+       .gen_prologue           = cg_skb_prologue,
 };

 const struct bpf_prog_ops cg_skb_prog_ops = {

> +	return -EINVAL;
> +}
> +
> +static const struct bpf_func_proto bpf_skb_set_ecn_proto = {
> +	.func		= bpf_skb_set_ecn,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_ANYTHING,
> +};
>  #endif /* CONFIG_INET */
>  
>  bool bpf_helper_changes_pkt_data(void *func)
> @@ -5599,6 +5626,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>  		return &bpf_tcp_sock_proto;
>  	case BPF_FUNC_tcp_enter_cwr:
>  		return &bpf_tcp_enter_cwr_proto;
> +	case BPF_FUNC_skb_set_ecn:
> +		return &bpf_skb_set_ecn_proto;
>  #endif
>  	default:
>  		return sk_filter_func_proto(func_id, prog);
> 


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn
  2019-02-19  5:38 [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn brakmo
  2019-02-19 10:52 ` Daniel Borkmann
@ 2019-02-19 18:30 ` Eric Dumazet
  2019-02-21  3:43   ` Lawrence Brakmo
  1 sibling, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2019-02-19 18:30 UTC (permalink / raw)
  To: brakmo, netdev; +Cc: Martin Lau, Alexei Starovoitov



On 02/18/2019 09:38 PM, brakmo wrote:
> This patch adds a new bpf helper BPF_FUNC_skb_set_ecn
> "int bpf_skb_set_Ecn(struct sk_buff *skb)". It is added to
> BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can
> be attached to the ingress and egress path. This type of
> bpf_prog cannot modify the skb directly.
> 
> This helper is used to set the ECN bits (2) of the IPv6 or IPv4
> header in skb. It can be used by a bpf_prog to manage egress
> network bandwdith limit per cgroupv2 by inducing an ECN
> response in the TCP sender (when the packet is ECN enabled).
> This works best when using DCTCP.


> +
> +BPF_CALL_2(bpf_skb_set_ecn, struct sk_buff *, skb, u32, val)
> +{
> +	struct ipv6hdr *ip6h = ipv6_hdr(skb);
> +
> +	if ((val & ~0x3) != 0)
> +		return -EINVAL;
> +
> +	if (ip6h->version == 6) {
> +		ip6h->flow_lbl[0] = (ip6h->flow_lbl[0] & ~0x30) | (val << 4);
> +		return 0;
> +	} else if (ip6h->version == 4) {
> +		struct iphdr *ip4h = (struct iphdr *)ip6h;
> +
> +		ip4h->tos = (ip4h->tos & ~0x3) | val;

Why is not the IPv4 checksum recomputed here ?

If you leave this task to the caller, this should be documented.

These hard coded constants are not really nice.

Why not simply using INET_ECN_set_ce() which is IPv4/IPv6 ready ?

Do you really need to set anything else than CE ?



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn
  2019-02-19 10:52 ` Daniel Borkmann
@ 2019-02-19 21:53   ` Daniel Borkmann
  2019-02-19 23:53   ` Lawrence Brakmo
  1 sibling, 0 replies; 6+ messages in thread
From: Daniel Borkmann @ 2019-02-19 21:53 UTC (permalink / raw)
  To: brakmo, netdev
  Cc: Martin Lau, Alexei Starovoitov, Daniel Borkmann --cc=Kernel Team

On 02/19/2019 11:52 AM, Daniel Borkmann wrote:
[...]
> Looking at cg_skb_verifier_ops ... it seems there also a bug in the current
> code, namely that if we have a direct packet write, we don't make the skb
> writable; at that point skb->data is not private. The cg_skb_is_valid_access()
> allows to fetch PTR_TO_PACKET{,_END}, so we need a fix like the below for -bpf:

Ah, scratch that thought, I overlooked may_access_direct_pkt_data() prevents
writes for this prog type so not an issue.

> diff --git a/net/core/filter.c b/net/core/filter.c
> index f7d0004fc160..34fe6da0a236 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -5796,6 +5796,12 @@ static bool sk_filter_is_valid_access(int off, int size,
>         return bpf_skb_is_valid_access(off, size, type, prog, info);
>  }
> 
> +static int cg_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
> +                          const struct bpf_prog *prog)
> +{
> +       return bpf_unclone_prologue(insn_buf, direct_write, prog, 0);
> +}
> +
>  static bool cg_skb_is_valid_access(int off, int size,
>                                    enum bpf_access_type type,
>                                    const struct bpf_prog *prog,
> @@ -7595,6 +7601,7 @@ const struct bpf_verifier_ops cg_skb_verifier_ops = {
>         .get_func_proto         = cg_skb_func_proto,
>         .is_valid_access        = cg_skb_is_valid_access,
>         .convert_ctx_access     = bpf_convert_ctx_access,
> +       .gen_prologue           = cg_skb_prologue,
>  };
> 
>  const struct bpf_prog_ops cg_skb_prog_ops = {

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn
  2019-02-19 10:52 ` Daniel Borkmann
  2019-02-19 21:53   ` Daniel Borkmann
@ 2019-02-19 23:53   ` Lawrence Brakmo
  1 sibling, 0 replies; 6+ messages in thread
From: Lawrence Brakmo @ 2019-02-19 23:53 UTC (permalink / raw)
  To: Daniel Borkmann, netdev
  Cc: Martin Lau, Alexei Starovoitov, Daniel Borkmann --cc=Kernel Team


On 2/19/19, 2:52 AM, "netdev-owner@vger.kernel.org on behalf of Daniel Borkmann" <netdev-owner@vger.kernel.org on behalf of daniel@iogearbox.net> wrote:

    On 02/19/2019 06:38 AM, brakmo wrote:
    > This patch adds a new bpf helper BPF_FUNC_skb_set_ecn
    > "int bpf_skb_set_Ecn(struct sk_buff *skb)". It is added to
    > BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can
    > be attached to the ingress and egress path. This type of
    > bpf_prog cannot modify the skb directly.
    > 
    > This helper is used to set the ECN bits (2) of the IPv6 or IPv4
    > header in skb. It can be used by a bpf_prog to manage egress
    > network bandwdith limit per cgroupv2 by inducing an ECN
    > response in the TCP sender (when the packet is ECN enabled).
    > This works best when using DCTCP.
    > 
    > Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
    > ---
    >  include/uapi/linux/bpf.h | 10 +++++++++-
    >  net/core/filter.c        | 29 +++++++++++++++++++++++++++++
    >  2 files changed, 38 insertions(+), 1 deletion(-)
    > 
    > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
    > index 9e9f4f1a0370..5daf404511f7 100644
    > --- a/include/uapi/linux/bpf.h
    > +++ b/include/uapi/linux/bpf.h
    > @@ -2365,6 +2365,13 @@ union bpf_attr {
    >   *        Make a tcp_sock enter CWR state.
    >   *    Return
    >   *        0
    > + *
    > + * int bpf_skb_set_ecn(struct sk_buf *skb, int val)
    
    Nit: BPF_CALL_2() has u32 val

Thanks!
    
    > + *	Description
    > + *		Sets ECN bits (2) of IP header. Works with IPv6 and IPv4.
    > + *		val should be one of 0, 1, 2, 3.
    > + *	Return
    > + *		-EINVAL on error (e.g. val > 3), 0 otherwise.
    >   */
    >  #define __BPF_FUNC_MAPPER(FN)		\
    >  	FN(unspec),			\
    > @@ -2464,7 +2471,8 @@ union bpf_attr {
    >  	FN(spin_unlock),		\
    >  	FN(sk_fullsock),		\
    >  	FN(tcp_sock),			\
    > -	FN(tcp_enter_cwr),
    > +	FN(tcp_enter_cwr),		\
    > +	FN(skb_set_ecn),
    >  
    >  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
    >   * function eBPF program intends to call
    > diff --git a/net/core/filter.c b/net/core/filter.c
    > index f51c4a781844..275acfb2117d 100644
    > --- a/net/core/filter.c
    > +++ b/net/core/filter.c
    > @@ -5438,6 +5438,33 @@ static const struct bpf_func_proto bpf_tcp_enter_cwr_proto = {
    >  	.ret_type    = RET_INTEGER,
    >  	.arg1_type    = ARG_PTR_TO_TCP_SOCK,
    >  };
    > +
    > +BPF_CALL_2(bpf_skb_set_ecn, struct sk_buff *, skb, u32, val)
    > +{
    > +	struct ipv6hdr *ip6h = ipv6_hdr(skb);
    > +
    > +	if ((val & ~0x3) != 0)
    
    Nit: INET_ECN_MASK

Thanks!
    
    > +		return -EINVAL;
    > +
    > +	if (ip6h->version == 6) {
    > +		ip6h->flow_lbl[0] = (ip6h->flow_lbl[0] & ~0x30) | (val << 4);
    > +		return 0;
    > +	} else if (ip6h->version == 4) {
    > +		struct iphdr *ip4h = (struct iphdr *)ip6h;
    > +
    > +		ip4h->tos = (ip4h->tos & ~0x3) | val;
    > +		return 0;
    > +	}
    
    Couldn't this be done as native BPF code via direct packet access instead?
    Afaik, skb->data should most likely points to network header for the hooks
    and skb->protocol should be one of ETH_P_IP{,V6}, no?

Cgroup skb bpf programs do not have write access to packet data. I originally was doing what you propose, by adding write support and changing the ecn value in the bpf program, but Alexei felt that could create problems. Hence this approach.
    
    Aside from this, don't we also have cloned skbs here (in particular from
    TCP side)?
    
    Looking at cg_skb_verifier_ops ... it seems there also a bug in the current
    code, namely that if we have a direct packet write, we don't make the skb
    writable; at that point skb->data is not private. The cg_skb_is_valid_access()
    allows to fetch PTR_TO_PACKET{,_END}, so we need a fix like the below for -bpf:
    
    diff --git a/net/core/filter.c b/net/core/filter.c
    index f7d0004fc160..34fe6da0a236 100644
    --- a/net/core/filter.c
    +++ b/net/core/filter.c
    @@ -5796,6 +5796,12 @@ static bool sk_filter_is_valid_access(int off, int size,
            return bpf_skb_is_valid_access(off, size, type, prog, info);
     }
    
    +static int cg_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
    +                          const struct bpf_prog *prog)
    +{
    +       return bpf_unclone_prologue(insn_buf, direct_write, prog, 0);
    +}
    +
     static bool cg_skb_is_valid_access(int off, int size,
                                       enum bpf_access_type type,
                                       const struct bpf_prog *prog,
    @@ -7595,6 +7601,7 @@ const struct bpf_verifier_ops cg_skb_verifier_ops = {
            .get_func_proto         = cg_skb_func_proto,
            .is_valid_access        = cg_skb_is_valid_access,
            .convert_ctx_access     = bpf_convert_ctx_access,
    +       .gen_prologue           = cg_skb_prologue,
     };
    
     const struct bpf_prog_ops cg_skb_prog_ops = {
    
    > +	return -EINVAL;
    > +}
    > +
    > +static const struct bpf_func_proto bpf_skb_set_ecn_proto = {
    > +	.func		= bpf_skb_set_ecn,
    > +	.gpl_only	= false,
    > +	.ret_type	= RET_INTEGER,
    > +	.arg1_type	= ARG_PTR_TO_CTX,
    > +	.arg2_type	= ARG_ANYTHING,
    > +};
    >  #endif /* CONFIG_INET */
    >  
    >  bool bpf_helper_changes_pkt_data(void *func)
    > @@ -5599,6 +5626,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
    >  		return &bpf_tcp_sock_proto;
    >  	case BPF_FUNC_tcp_enter_cwr:
    >  		return &bpf_tcp_enter_cwr_proto;
    > +	case BPF_FUNC_skb_set_ecn:
    > +		return &bpf_skb_set_ecn_proto;
    >  #endif
    >  	default:
    >  		return sk_filter_func_proto(func_id, prog);
    > 

    
    


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn
  2019-02-19 18:30 ` Eric Dumazet
@ 2019-02-21  3:43   ` Lawrence Brakmo
  0 siblings, 0 replies; 6+ messages in thread
From: Lawrence Brakmo @ 2019-02-21  3:43 UTC (permalink / raw)
  To: Eric Dumazet, netdev; +Cc: Martin Lau, Alexei Starovoitov, Daniel Borkmann

On 2/19/19, 10:30 AM, "Eric Dumazet" <eric.dumazet@gmail.com> wrote:

    
    
    On 02/18/2019 09:38 PM, brakmo wrote:
    > This patch adds a new bpf helper BPF_FUNC_skb_set_ecn
    > "int bpf_skb_set_Ecn(struct sk_buff *skb)". It is added to
    > BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can
    > be attached to the ingress and egress path. This type of
    > bpf_prog cannot modify the skb directly.
    > 
    > This helper is used to set the ECN bits (2) of the IPv6 or IPv4
    > header in skb. It can be used by a bpf_prog to manage egress
    > network bandwdith limit per cgroupv2 by inducing an ECN
    > response in the TCP sender (when the packet is ECN enabled).
    > This works best when using DCTCP.
    
    
    > +
    > +BPF_CALL_2(bpf_skb_set_ecn, struct sk_buff *, skb, u32, val)
    > +{
    > +	struct ipv6hdr *ip6h = ipv6_hdr(skb);
    > +
    > +	if ((val & ~0x3) != 0)
    > +		return -EINVAL;
    > +
    > +	if (ip6h->version == 6) {
    > +		ip6h->flow_lbl[0] = (ip6h->flow_lbl[0] & ~0x30) | (val << 4);
    > +		return 0;
    > +	} else if (ip6h->version == 4) {
    > +		struct iphdr *ip4h = (struct iphdr *)ip6h;
    > +
    > +		ip4h->tos = (ip4h->tos & ~0x3) | val;
    
    Why is not the IPv4 checksum recomputed here ?
    
    If you leave this task to the caller, this should be documented.
    
    These hard coded constants are not really nice.
    
    Why not simply using INET_ECN_set_ce() which is IPv4/IPv6 ready ?
    
    Do you really need to set anything else than CE ?

Good point, thank you. I will use it.
    
    
    


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-02-21  3:43 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-02-19  5:38 [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn brakmo
2019-02-19 10:52 ` Daniel Borkmann
2019-02-19 21:53   ` Daniel Borkmann
2019-02-19 23:53   ` Lawrence Brakmo
2019-02-19 18:30 ` Eric Dumazet
2019-02-21  3:43   ` Lawrence Brakmo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).