All of lore.kernel.org
 help / color / mirror / Atom feed
From: brakmo <brakmo@fb.com>
To: netdev <netdev@vger.kernel.org>
Cc: Martin Lau <kafai@fb.com>, Alexei Starovoitov <ast@fb.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Eric Dumazet <eric.dumazet@gmail.com>,
	Kernel Team <Kernel-team@fb.com>
Subject: [PATCH v2 bpf-next 2/9] bpf: Add bpf helper bpf_tcp_enter_cwr
Date: Fri, 22 Feb 2019 17:06:56 -0800	[thread overview]
Message-ID: <20190223010703.678070-3-brakmo@fb.com> (raw)
In-Reply-To: <20190223010703.678070-1-brakmo@fb.com>

From: Martin KaFai Lau <kafai@fb.com>

This patch adds a new bpf helper BPF_FUNC_tcp_enter_cwr
"int bpf_tcp_enter_cwr(struct bpf_tcp_sock *tp)".
It is added to BPF_PROG_TYPE_CGROUP_SKB which can be attached
to the egress path where the bpf prog is called by
ip_finish_output() or ip6_finish_output().  The verifier
ensures that the parameter must be a tcp_sock.

This helper makes a tcp_sock enter CWR state.  It can be used
by a bpf_prog to manage egress network bandwidth limit per
cgroupv2.  A later patch will have a sample program to
show how it can be used to limit bandwidth usage per cgroupv2.

To ensure it is only called from BPF_CGROUP_INET_EGRESS, the
attr->expected_attach_type must be specified as BPF_CGROUP_INET_EGRESS
during load time if the prog uses this new helper.
The newly added prog->enforce_expected_attach_type bit will also be set
if this new helper is used.  This bit is for backward compatibility reason
because currently prog->expected_attach_type has been ignored in
BPF_PROG_TYPE_CGROUP_SKB.  During attach time,
prog->expected_attach_type is only enforced if the
prog->enforce_expected_attach_type bit is set.
i.e. prog->expected_attach_type is only enforced if this new helper
is used by the prog.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/linux/bpf.h      |  1 +
 include/linux/filter.h   |  3 ++-
 include/uapi/linux/bpf.h |  9 ++++++++-
 kernel/bpf/syscall.c     | 12 ++++++++++++
 kernel/bpf/verifier.c    |  4 ++++
 net/core/filter.c        | 25 +++++++++++++++++++++++++
 6 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d5ba2fc01af3..2d54ba7cf9dd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -195,6 +195,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
 	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
 	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
+	ARG_PTR_TO_TCP_SOCK,    /* pointer to tcp_sock */
 };
 
 /* type of values returned from helper functions */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f32b3eca5a04..c6e878bdc5a6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -510,7 +510,8 @@ struct bpf_prog {
 				blinded:1,	/* Was blinded */
 				is_func:1,	/* program is a bpf function */
 				kprobe_override:1, /* Do we override a kprobe? */
-				has_callchain_buf:1; /* callchain buffer allocated? */
+				has_callchain_buf:1, /* callchain buffer allocated? */
+				enforce_expected_attach_type:1; /* Enforce expected_attach_type checking at attach time */
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	enum bpf_attach_type	expected_attach_type; /* For some prog types */
 	u32			len;		/* Number of filter blocks */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bcdd2474eee7..95b5058fa945 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2359,6 +2359,12 @@ union bpf_attr {
  *	Return
  *		A **struct bpf_tcp_sock** pointer on success, or NULL in
  *		case of failure.
+ *
+ * int bpf_tcp_enter_cwr(struct bpf_tcp_sock *tp)
+ *	Description
+ *		Make a tcp_sock enter CWR state.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2457,7 +2463,8 @@ union bpf_attr {
 	FN(spin_lock),			\
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
-	FN(tcp_sock),
+	FN(tcp_sock),			\
+	FN(tcp_enter_cwr),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ec7c552af76b..9a478f2875cd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1482,6 +1482,14 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 		default:
 			return -EINVAL;
 		}
+	case BPF_PROG_TYPE_CGROUP_SKB:
+		switch (expected_attach_type) {
+		case BPF_CGROUP_INET_INGRESS:
+		case BPF_CGROUP_INET_EGRESS:
+			return 0;
+		default:
+			return -EINVAL;
+		}
 	default:
 		return 0;
 	}
@@ -1725,6 +1733,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 	case BPF_PROG_TYPE_CGROUP_SOCK:
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
+	case BPF_PROG_TYPE_CGROUP_SKB:
+		return prog->enforce_expected_attach_type &&
+			prog->expected_attach_type != attach_type ?
+			-EINVAL : 0;
 	default:
 		return 0;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1b9496c41383..95fb385c6f3c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2424,6 +2424,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EFAULT;
 		}
 		meta->ptr_id = reg->id;
+	} else if (arg_type == ARG_PTR_TO_TCP_SOCK) {
+		expected_type = PTR_TO_TCP_SOCK;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
 		if (meta->func_id == BPF_FUNC_spin_lock) {
 			if (process_spin_lock(env, regno, true))
diff --git a/net/core/filter.c b/net/core/filter.c
index 97916eedfe69..ca57ef25279c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5426,6 +5426,24 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
 };
 
+BPF_CALL_1(bpf_tcp_enter_cwr, struct tcp_sock *, tp)
+{
+	struct sock *sk = (struct sock *)tp;
+
+	if (sk->sk_state == TCP_ESTABLISHED) {
+		tcp_enter_cwr(sk);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_tcp_enter_cwr_proto = {
+	.func        = bpf_tcp_enter_cwr,
+	.gpl_only    = false,
+	.ret_type    = RET_INTEGER,
+	.arg1_type    = ARG_PTR_TO_TCP_SOCK,
+};
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5585,6 +5603,13 @@ cg_skb_func_proto(enum bpf_func_id func_id, struct bpf_prog *prog)
 #ifdef CONFIG_INET
 	case BPF_FUNC_tcp_sock:
 		return &bpf_tcp_sock_proto;
+	case BPF_FUNC_tcp_enter_cwr:
+		if (prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
+			prog->enforce_expected_attach_type = 1;
+			return &bpf_tcp_enter_cwr_proto;
+		} else {
+			return NULL;
+		}
 #endif
 	default:
 		return sk_filter_func_proto(func_id, prog);
-- 
2.17.1


  parent reply	other threads:[~2019-02-23  1:07 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-23  1:06 [PATCH v2 bpf-next 0/9] bpf: Network Resource Manager (NRM) brakmo
2019-02-23  1:06 ` [PATCH v2 bpf-next 1/9] bpf: Remove const from get_func_proto brakmo
2019-02-23  1:06 ` brakmo [this message]
2019-02-24  1:32   ` [PATCH v2 bpf-next 2/9] bpf: Add bpf helper bpf_tcp_enter_cwr Eric Dumazet
2019-02-24  3:08     ` Martin Lau
2019-02-24  4:44       ` Alexei Starovoitov
2019-02-24 18:00       ` Eric Dumazet
2019-02-25 23:14   ` Stanislav Fomichev
2019-02-26  1:30     ` Martin Lau
2019-02-26  3:32       ` Stanislav Fomichev
2019-02-23  1:06 ` [PATCH v2 bpf-next 3/9] bpf: Test bpf_tcp_enter_cwr in test_verifier brakmo
2019-02-23  1:06 ` [PATCH v2 bpf-next 4/9] bpf: add bpf helper bpf_skb_ecn_set_ce brakmo
2019-02-23  1:14   ` Daniel Borkmann
2019-02-23  7:30     ` Martin Lau
2019-02-25 10:10       ` Daniel Borkmann
2019-02-25 16:52         ` Eric Dumazet
2019-02-23  1:06 ` [PATCH v2 bpf-next 5/9] bpf: Add bpf helper bpf_tcp_check_probe_timer brakmo
2019-02-23  1:07 ` [PATCH v2 bpf-next 6/9] bpf: sync bpf.h to tools and update bpf_helpers.h brakmo
2019-02-23  1:07 ` [PATCH v2 bpf-next 7/9] bpf: Sample NRM BPF program to limit egress bw brakmo
2019-02-23  1:07 ` [PATCH v2 bpf-next 8/9] bpf: User program for testing NRM brakmo
2019-02-23  1:07 ` [PATCH v2 bpf-next 9/9] bpf: NRM test script brakmo
2019-02-23  3:03 ` [PATCH v2 bpf-next 0/9] bpf: Network Resource Manager (NRM) David Ahern
2019-02-23 18:39   ` Eric Dumazet
2019-02-23 20:40     ` Alexei Starovoitov
2019-02-23 20:43       ` Eric Dumazet
2019-02-23 23:25         ` Alexei Starovoitov
2019-02-24  2:58           ` David Ahern
2019-02-24  4:48             ` Alexei Starovoitov
2019-02-25  1:38               ` David Ahern

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190223010703.678070-3-brakmo@fb.com \
    --to=brakmo@fb.com \
    --cc=Kernel-team@fb.com \
    --cc=ast@fb.com \
    --cc=daniel@iogearbox.net \
    --cc=eric.dumazet@gmail.com \
    --cc=kafai@fb.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.