[PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn

* [PATCH bpf-next 3/9] bpf: add bpf helper bpf_skb_set_ecn
@ 2019-02-19  5:38 brakmo
  2019-02-19 10:52 ` Daniel Borkmann
  2019-02-19 18:30 ` Eric Dumazet
  0 siblings, 2 replies; 6+ messages in thread
From: brakmo @ 2019-02-19  5:38 UTC (permalink / raw)
  To: netdev; +Cc: Martin Lau, Alexei Starovoitov, Daniel Borkmann --cc=Kernel Team

This patch adds a new bpf helper BPF_FUNC_skb_set_ecn
"int bpf_skb_set_Ecn(struct sk_buff *skb)". It is added to
BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can
be attached to the ingress and egress path. This type of
bpf_prog cannot modify the skb directly.

This helper is used to set the ECN bits (2) of the IPv6 or IPv4
header in skb. It can be used by a bpf_prog to manage egress
network bandwdith limit per cgroupv2 by inducing an ECN
response in the TCP sender (when the packet is ECN enabled).
This works best when using DCTCP.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
---
 include/uapi/linux/bpf.h | 10 +++++++++-
 net/core/filter.c        | 29 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9e9f4f1a0370..5daf404511f7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2365,6 +2365,13 @@ union bpf_attr {
  *        Make a tcp_sock enter CWR state.
  *    Return
  *        0
+ *
+ * int bpf_skb_set_ecn(struct sk_buf *skb, int val)
+ *	Description
+ *		Sets ECN bits (2) of IP header. Works with IPv6 and IPv4.
+ *		val should be one of 0, 1, 2, 3.
+ *	Return
+ *		-EINVAL on error (e.g. val > 3), 0 otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2464,7 +2471,8 @@ union bpf_attr {
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
 	FN(tcp_sock),			\
-	FN(tcp_enter_cwr),
+	FN(tcp_enter_cwr),		\
+	FN(skb_set_ecn),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index f51c4a781844..275acfb2117d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5438,6 +5438,33 @@ static const struct bpf_func_proto bpf_tcp_enter_cwr_proto = {
 	.ret_type    = RET_INTEGER,
 	.arg1_type    = ARG_PTR_TO_TCP_SOCK,
 };
+
+BPF_CALL_2(bpf_skb_set_ecn, struct sk_buff *, skb, u32, val)
+{
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+	if ((val & ~0x3) != 0)
+		return -EINVAL;
+
+	if (ip6h->version == 6) {
+		ip6h->flow_lbl[0] = (ip6h->flow_lbl[0] & ~0x30) | (val << 4);
+		return 0;
+	} else if (ip6h->version == 4) {
+		struct iphdr *ip4h = (struct iphdr *)ip6h;
+
+		ip4h->tos = (ip4h->tos & ~0x3) | val;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_skb_set_ecn_proto = {
+	.func		= bpf_skb_set_ecn,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5599,6 +5626,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_tcp_sock_proto;
 	case BPF_FUNC_tcp_enter_cwr:
 		return &bpf_tcp_enter_cwr_proto;
+	case BPF_FUNC_skb_set_ecn:
+		return &bpf_skb_set_ecn_proto;
 #endif
 	default:
 		return sk_filter_func_proto(func_id, prog);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread