All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH mptcp-next 0/5] BPF packet scheduler
@ 2022-03-17 11:04 Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 1/5] mptcp: add a new sysctl scheduler Geliang Tang
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Geliang Tang @ 2022-03-17 11:04 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

Addressed to the commends in the RFC version:
https://patchwork.kernel.org/project/mptcp/cover/cover.1631011068.git.geliangtang@xiaomi.com/

Depends on the bpf patches queued in the patchwork:

7541fe09cf06 selftests: bpf: add mptcp sched test
1fa592f2770e selftests: bpf: implement bpf_first scheduler
5bbd1d25b994 mptcp: add bpf_mptcp_sched_ops
ade0194842d1 mptcp: add struct mptcp_sched_ops
23bdbd0af9f4 mptcp: add a new sysctl scheduler
772471a34280 selftests: bpf: verify first subflow of mptcp_sock
f3484ecf1bce Squash to "selftests: bpf: verify ca_name of struct mptcp_sock"
0cd058c7c489 selftests: bpf: verify ca_name of struct mptcp_sock
07a66e354126 Squash to "selftests: bpf: test bpf_skc_to_mptcp_sock"
168ee1d1a118 selftests: bpf: test bpf_skc_to_mptcp_sock
7a773e6af914 Squash to "selftests: bpf: add MPTCP test base"
a1933407b912 bpf: add bpf_skc_to_mptcp_sock_proto
0095d8ca9a67 Revert "bpf: add 'bpf_mptcp_sock' structure and helper"
2856d8c8020a Revert "selftests: bpf: add bpf_mptcp_sock() verifier tests"
df0e2a4c394f (origin/export, origin/HEAD) DO-NOT-MERGE: mptcp: enabled by default

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/75

Geliang Tang (5):
  mptcp: add a new sysctl scheduler
  mptcp: add struct mptcp_sched_ops
  mptcp: add bpf_mptcp_sched_ops
  selftests: bpf: implement bpf_first scheduler
  selftests: bpf: add mptcp sched test

 Documentation/networking/mptcp-sysctl.rst     |   8 +
 include/net/mptcp.h                           |  15 ++
 kernel/bpf/bpf_struct_ops_types.h             |   4 +
 net/mptcp/Makefile                            |   2 +-
 net/mptcp/bpf.c                               | 102 +++++++++++
 net/mptcp/ctrl.c                              |  14 ++
 net/mptcp/protocol.c                          |  12 +-
 net/mptcp/protocol.h                          |   9 +
 net/mptcp/sched.c                             |  89 ++++++++++
 tools/testing/selftests/bpf/bpf_tcp_helpers.h |  14 ++
 .../bpf/prog_tests/bpf_mptcp_sched.c          | 158 ++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_first.c |  30 ++++
 12 files changed, 452 insertions(+), 5 deletions(-)
 create mode 100644 net/mptcp/sched.c
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_mptcp_sched.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_first.c

-- 
2.34.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH mptcp-next 1/5] mptcp: add a new sysctl scheduler
  2022-03-17 11:04 [PATCH mptcp-next 0/5] BPF packet scheduler Geliang Tang
@ 2022-03-17 11:04 ` Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 2/5] mptcp: add struct mptcp_sched_ops Geliang Tang
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Geliang Tang @ 2022-03-17 11:04 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added a new sysctl, named scheduler, to support for selection
of different schedulers.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 Documentation/networking/mptcp-sysctl.rst |  8 ++++++++
 include/net/mptcp.h                       |  2 ++
 net/mptcp/ctrl.c                          | 14 ++++++++++++++
 net/mptcp/protocol.h                      |  1 +
 4 files changed, 25 insertions(+)

diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst
index e263dfcc4b40..d9e69fdc7ea3 100644
--- a/Documentation/networking/mptcp-sysctl.rst
+++ b/Documentation/networking/mptcp-sysctl.rst
@@ -75,3 +75,11 @@ stale_loss_cnt - INTEGER
 	This is a per-namespace sysctl.
 
 	Default: 4
+
+scheduler - STRING
+	Select the scheduler of your choice.
+
+	Support for selection of different schedulers. This is a per-namespace
+	sysctl.
+
+	Default: "default"
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 8b1afd6f5cc4..2dde95412fc3 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -284,4 +284,6 @@ static inline int mptcpv6_init(void) { return 0; }
 static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
 #endif
 
+#define MPTCP_SCHED_NAME_MAX 16
+
 #endif /* __NET_MPTCP_H */
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index ae20b7d92e28..4e0811969d8c 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -32,6 +32,7 @@ struct mptcp_pernet {
 	u8 checksum_enabled;
 	u8 allow_join_initial_addr_port;
 	u8 pm_type;
+	char scheduler[MPTCP_SCHED_NAME_MAX];
 };
 
 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -69,6 +70,11 @@ int mptcp_get_pm_type(const struct net *net)
 	return mptcp_get_pernet(net)->pm_type;
 }
 
+char *mptcp_get_scheduler(struct net *net)
+{
+	return mptcp_get_pernet(net)->scheduler;
+}
+
 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 {
 	pernet->mptcp_enabled = 1;
@@ -77,6 +83,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
 	pernet->allow_join_initial_addr_port = 1;
 	pernet->stale_loss_cnt = 4;
 	pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
+	strcpy(pernet->scheduler, "default");
 }
 
 #ifdef CONFIG_SYSCTL
@@ -128,6 +135,12 @@ static struct ctl_table mptcp_sysctl_table[] = {
 		.extra1       = SYSCTL_ZERO,
 		.extra2       = &mptcp_pm_type_max
 	},
+	{
+		.procname = "scheduler",
+		.maxlen	= MPTCP_SCHED_NAME_MAX,
+		.mode = 0644,
+		.proc_handler = proc_dostring,
+	},
 	{}
 };
 
@@ -149,6 +162,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
 	table[3].data = &pernet->allow_join_initial_addr_port;
 	table[4].data = &pernet->stale_loss_cnt;
 	table[5].data = &pernet->pm_type;
+	table[6].data = &pernet->scheduler;
 
 	hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
 	if (!hdr)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c8bada4537e2..c36b9c40c314 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -583,6 +583,7 @@ int mptcp_is_checksum_enabled(const struct net *net);
 int mptcp_allow_join_id0(const struct net *net);
 unsigned int mptcp_stale_loss_cnt(const struct net *net);
 int mptcp_get_pm_type(const struct net *net);
+char *mptcp_get_scheduler(struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 				     struct mptcp_options_received *mp_opt);
 bool __mptcp_retransmit_pending_data(struct sock *sk);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH mptcp-next 2/5] mptcp: add struct mptcp_sched_ops
  2022-03-17 11:04 [PATCH mptcp-next 0/5] BPF packet scheduler Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 1/5] mptcp: add a new sysctl scheduler Geliang Tang
@ 2022-03-17 11:04 ` Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 3/5] mptcp: add bpf_mptcp_sched_ops Geliang Tang
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Geliang Tang @ 2022-03-17 11:04 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added struct mptcp_sched_ops. And define the scheduler
init, register and find functions.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 include/net/mptcp.h  | 13 +++++++
 net/mptcp/Makefile   |  2 +-
 net/mptcp/protocol.c | 12 ++++--
 net/mptcp/protocol.h |  8 ++++
 net/mptcp/sched.c    | 89 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 119 insertions(+), 5 deletions(-)
 create mode 100644 net/mptcp/sched.c

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 2dde95412fc3..bf7f4ba00424 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -286,4 +286,17 @@ static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { }
 
 #define MPTCP_SCHED_NAME_MAX 16
 
+struct mptcp_sched_ops {
+	struct sock *	(*get_subflow)(struct mptcp_sock *msk);
+
+	char			name[MPTCP_SCHED_NAME_MAX];
+	struct module		*owner;
+	struct list_head	list;
+
+	/* initialize private data (optional) */
+	void (*init)(struct sock *sk);
+	/* cleanup private data  (optional) */
+	void (*release)(struct sock *sk);
+} ____cacheline_aligned_in_smp;
+
 #endif /* __NET_MPTCP_H */
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index 0a0608b6b4b4..aa5c10d1b80a 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o
 ccflags-y += -DDEBUG
 
 mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
-	   mib.o pm_netlink.o sockopt.o
+	   mib.o pm_netlink.o sockopt.o sched.o
 
 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
 obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index fbb14dfe62b3..2250b1c11e9b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1424,7 +1424,7 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
  * returns the subflow that will transmit the next DSS
  * additionally updates the rtx timeout
  */
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 {
 	struct subflow_send_info send_info[SSK_MODE_MAX];
 	struct mptcp_subflow_context *subflow;
@@ -1567,7 +1567,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 			int ret = 0;
 
 			prev_ssk = ssk;
-			ssk = mptcp_subflow_get_send(msk);
+			ssk = mptcp_get_subflow(msk);
 
 			/* First check. If the ssk has changed since
 			 * the last round, release prev_ssk
@@ -1634,7 +1634,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 			 * check for a different subflow usage only after
 			 * spooling the first chunk of data
 			 */
-			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+			xmit_ssk = first ? ssk : mptcp_get_subflow(mptcp_sk(sk));
 			if (!xmit_ssk)
 				goto out;
 			if (xmit_ssk != ssk) {
@@ -2609,6 +2609,7 @@ static int mptcp_init_sock(struct sock *sk)
 	 * propagate the correct value
 	 */
 	mptcp_ca_reset(sk);
+	mptcp_sched_data_init(sk);
 
 	sk_sockets_allocated_inc(sk);
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
@@ -2760,6 +2761,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
 	sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
 	sk_stop_timer(sk, &sk->sk_timer);
 	msk->pm.status = 0;
+	msk->sched = NULL;
 
 	/* clears msk->subflow, allowing the following loop to close
 	 * even the initial subflow
@@ -2933,6 +2935,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	msk->snd_una = msk->write_seq;
 	msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
 	msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+	msk->sched = mptcp_sk(sk)->sched;
 
 	if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
 		msk->can_ack = true;
@@ -3070,7 +3073,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 		return;
 
 	if (!sock_owned_by_user(sk)) {
-		struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+		struct sock *xmit_ssk = mptcp_get_subflow(mptcp_sk(sk));
 
 		if (xmit_ssk == ssk)
 			__mptcp_subflow_push_pending(sk, ssk);
@@ -3743,6 +3746,7 @@ void __init mptcp_proto_init(void)
 
 	mptcp_subflow_init();
 	mptcp_pm_init();
+	mptcp_sched_init();
 	mptcp_token_init();
 
 	if (proto_register(&mptcp_prot, MPTCP_USE_SLAB) != 0)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index c36b9c40c314..33720856bfed 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -286,6 +286,7 @@ struct mptcp_sock {
 	struct socket	*subflow; /* outgoing connect/listener/!mp_capable */
 	struct sock	*first;
 	struct mptcp_pm_data	pm;
+	struct mptcp_sched_ops	*sched;
 	struct {
 		u32	space;	/* bytes copied in last measurement window */
 		u32	copied; /* bytes copied in this measurement window */
@@ -584,6 +585,13 @@ int mptcp_allow_join_id0(const struct net *net);
 unsigned int mptcp_stale_loss_cnt(const struct net *net);
 int mptcp_get_pm_type(const struct net *net);
 char *mptcp_get_scheduler(struct net *net);
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
+struct mptcp_sched_ops *mptcp_sched_find(const char *name);
+void mptcp_sched_data_init(struct sock *sk);
+void mptcp_sched_init(void);
+struct sock *mptcp_get_subflow(struct mptcp_sock *msk);
+struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
 				     struct mptcp_options_received *mp_opt);
 bool __mptcp_retransmit_pending_data(struct sock *sk);
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
new file mode 100644
index 000000000000..f1bb3adeb71b
--- /dev/null
+++ b/net/mptcp/sched.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, SUSE.
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/indirect_call_wrapper.h>
+#include "protocol.h"
+
+static DEFINE_SPINLOCK(mptcp_sched_list_lock);
+static LIST_HEAD(mptcp_sched_list);
+
+struct mptcp_sched_ops *mptcp_sched_find(const char *name)
+{
+	struct mptcp_sched_ops *ops;
+
+	list_for_each_entry_rcu(ops, &mptcp_sched_list, list) {
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	return NULL;
+}
+
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+	int ret = 0;
+
+	if (!sched->get_subflow)
+		return -EINVAL;
+
+	spin_lock(&mptcp_sched_list_lock);
+	if (mptcp_sched_find(sched->name)) {
+		pr_debug("%s already registered", sched->name);
+		ret = -EEXIST;
+	} else {
+		list_add_tail_rcu(&sched->list, &mptcp_sched_list);
+		pr_debug("%s registered", sched->name);
+	}
+	spin_unlock(&mptcp_sched_list_lock);
+
+	return ret;
+}
+
+void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched)
+{
+	spin_lock(&mptcp_sched_list_lock);
+	list_del_rcu(&sched->list);
+	spin_unlock(&mptcp_sched_list_lock);
+
+	synchronize_rcu();
+}
+
+void mptcp_sched_data_init(struct sock *sk)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct net *net = sock_net(sk);
+
+	msk->sched = mptcp_sched_find(mptcp_get_scheduler(net));
+	if (!msk->sched) {
+		pr_debug("sched %s not found", mptcp_get_scheduler(net));
+		return;
+	}
+
+	pr_debug("sched=%s", msk->sched->name);
+	if (msk->sched->init)
+		msk->sched->init(sk);
+}
+
+static struct mptcp_sched_ops mptcp_sched_default = {
+	.get_subflow    = mptcp_subflow_get_send,
+	.name           = "default",
+	.owner          = THIS_MODULE,
+};
+
+void mptcp_sched_init(void)
+{
+	mptcp_register_scheduler(&mptcp_sched_default);
+}
+
+struct sock *mptcp_get_subflow(struct mptcp_sock *msk)
+{
+	return INDIRECT_CALL_INET_1(msk->sched->get_subflow,
+				    mptcp_subflow_get_send, msk);
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH mptcp-next 3/5] mptcp: add bpf_mptcp_sched_ops
  2022-03-17 11:04 [PATCH mptcp-next 0/5] BPF packet scheduler Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 1/5] mptcp: add a new sysctl scheduler Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 2/5] mptcp: add struct mptcp_sched_ops Geliang Tang
@ 2022-03-17 11:04 ` Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 4/5] selftests: bpf: implement bpf_first scheduler Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 5/5] selftests: bpf: add mptcp sched test Geliang Tang
  4 siblings, 0 replies; 6+ messages in thread
From: Geliang Tang @ 2022-03-17 11:04 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch implemented a new struct bpf_struct_ops, bpf_mptcp_sched_ops.
Register and unregister the mptcp scheduler in .reg and .unreg.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 kernel/bpf/bpf_struct_ops_types.h |   4 ++
 net/mptcp/bpf.c                   | 102 ++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
index 5678a9ddf817..5a6b0c0d8d3d 100644
--- a/kernel/bpf/bpf_struct_ops_types.h
+++ b/kernel/bpf/bpf_struct_ops_types.h
@@ -8,5 +8,9 @@ BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
 #ifdef CONFIG_INET
 #include <net/tcp.h>
 BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
+#ifdef CONFIG_MPTCP
+#include <net/mptcp.h>
+BPF_STRUCT_OPS_TYPE(mptcp_sched_ops)
+#endif
 #endif
 #endif
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index da79dae559b2..5f7f9d30ecd4 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -8,8 +8,110 @@
  */
 
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
 #include "protocol.h"
 
+extern struct bpf_struct_ops bpf_mptcp_sched_ops;
+extern struct btf *btf_vmlinux;
+
+static u32 optional_ops[] = {
+	offsetof(struct mptcp_sched_ops, init),
+	offsetof(struct mptcp_sched_ops, release),
+	offsetof(struct mptcp_sched_ops, get_subflow),
+};
+
+static const struct bpf_func_proto *
+bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id,
+			       const struct bpf_prog *prog)
+{
+	return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_verifier_ops bpf_mptcp_sched_verifier_ops = {
+	.get_func_proto		= bpf_mptcp_sched_get_func_proto,
+	.is_valid_access	= btf_ctx_access,
+	.btf_struct_access	= btf_struct_access,
+};
+
+static int bpf_mptcp_sched_reg(void *kdata)
+{
+	return mptcp_register_scheduler(kdata);
+}
+
+static void bpf_mptcp_sched_unreg(void *kdata)
+{
+	mptcp_unregister_scheduler(kdata);
+}
+
+static int bpf_mptcp_sched_check_member(const struct btf_type *t,
+					const struct btf_member *member)
+{
+	return 0;
+}
+
+static bool is_optional(u32 member_offset)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
+		if (member_offset == optional_ops[i])
+			return true;
+	}
+
+	return false;
+}
+
+static int bpf_mptcp_sched_init_member(const struct btf_type *t,
+				       const struct btf_member *member,
+				       void *kdata, const void *udata)
+{
+	const struct mptcp_sched_ops *usched;
+	struct mptcp_sched_ops *sched;
+	int prog_fd;
+	u32 moff;
+
+	usched = (const struct mptcp_sched_ops *)udata;
+	sched = (struct mptcp_sched_ops *)kdata;
+
+	moff = __btf_member_bit_offset(t, member) / 8;
+	switch (moff) {
+	case offsetof(struct mptcp_sched_ops, name):
+		if (bpf_obj_name_cpy(sched->name, usched->name,
+				     sizeof(sched->name)) <= 0)
+			return -EINVAL;
+		if (mptcp_sched_find(usched->name))
+			return -EEXIST;
+		return 1;
+	}
+
+	if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
+		return 0;
+
+	/* Ensure bpf_prog is provided for compulsory func ptr */
+	prog_fd = (int)(*(unsigned long *)(udata + moff));
+	if (!prog_fd && !is_optional(moff))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int bpf_mptcp_sched_init(struct btf *btf)
+{
+	return 0;
+}
+
+struct bpf_struct_ops bpf_mptcp_sched_ops = {
+	.verifier_ops	= &bpf_mptcp_sched_verifier_ops,
+	.reg		= bpf_mptcp_sched_reg,
+	.unreg		= bpf_mptcp_sched_unreg,
+	.check_member	= bpf_mptcp_sched_check_member,
+	.init_member	= bpf_mptcp_sched_init_member,
+	.init		= bpf_mptcp_sched_init,
+	.name		= "mptcp_sched_ops",
+};
+
 struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
 {
 	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH mptcp-next 4/5] selftests: bpf: implement bpf_first scheduler
  2022-03-17 11:04 [PATCH mptcp-next 0/5] BPF packet scheduler Geliang Tang
                   ` (2 preceding siblings ...)
  2022-03-17 11:04 ` [PATCH mptcp-next 3/5] mptcp: add bpf_mptcp_sched_ops Geliang Tang
@ 2022-03-17 11:04 ` Geliang Tang
  2022-03-17 11:04 ` [PATCH mptcp-next 5/5] selftests: bpf: add mptcp sched test Geliang Tang
  4 siblings, 0 replies; 6+ messages in thread
From: Geliang Tang @ 2022-03-17 11:04 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch implemented the simplest MPTCP scheduler, named bpf_first,
which always picks the first subflow to send data.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h | 14 +++++++++
 tools/testing/selftests/bpf/progs/bpf_first.c | 30 +++++++++++++++++++
 2 files changed, 44 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_first.c

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index f92357597e63..886d6fa100ce 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -197,6 +197,20 @@ struct tcp_congestion_ops {
 	void *owner;
 };
 
+#define MPTCP_SCHED_NAME_MAX 16
+
+struct mptcp_sched_ops {
+	char name[MPTCP_SCHED_NAME_MAX];
+
+	/* initialize private data (optional) */
+	void (*init)(struct sock *sk);
+	/* cleanup private data  (optional) */
+	void (*release)(struct sock *sk);
+
+	struct sock *(*get_subflow)(struct sock *sk);
+	void *owner;
+};
+
 #define min(a, b) ((a) < (b) ? (a) : (b))
 #define max(a, b) ((a) > (b) ? (a) : (b))
 #define min_not_zero(x, y) ({			\
diff --git a/tools/testing/selftests/bpf/progs/bpf_first.c b/tools/testing/selftests/bpf/progs/bpf_first.c
new file mode 100644
index 000000000000..c30a7cd3b932
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_first.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+char fmt[] = "%s";
+
+/* "struct_ops/" prefix is a requirement */
+SEC("struct_ops/mptcp_sched_first_init")
+void BPF_PROG(mptcp_sched_first_init, struct sock *sk)
+{
+	bpf_trace_printk(fmt, sizeof(fmt), __func__);
+}
+
+struct sock *BPF_STRUCT_OPS(bpf_first_get_subflow, struct sock *sk)
+{
+	struct mptcp_sock *msk = (struct mptcp_sock *)sk;
+
+	return msk->first;
+}
+
+SEC(".struct_ops")
+struct mptcp_sched_ops first = {
+	.init 		= (void *)mptcp_sched_first_init,
+	.get_subflow	= (void *)bpf_first_get_subflow,
+	.name		= "bpf_first",
+};
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH mptcp-next 5/5] selftests: bpf: add mptcp sched test
  2022-03-17 11:04 [PATCH mptcp-next 0/5] BPF packet scheduler Geliang Tang
                   ` (3 preceding siblings ...)
  2022-03-17 11:04 ` [PATCH mptcp-next 4/5] selftests: bpf: implement bpf_first scheduler Geliang Tang
@ 2022-03-17 11:04 ` Geliang Tang
  4 siblings, 0 replies; 6+ messages in thread
From: Geliang Tang @ 2022-03-17 11:04 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch added the MPTCP sched testcases. Use sysctl to set
net.mptcp.scheduler in the tests.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 .../bpf/prog_tests/bpf_mptcp_sched.c          | 158 ++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_mptcp_sched.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mptcp_sched.c b/tools/testing/selftests/bpf/prog_tests/bpf_mptcp_sched.c
new file mode 100644
index 000000000000..b351ae260903
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mptcp_sched.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/err.h>
+#include <netinet/tcp.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_first.skel.h"
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+static int stop, duration;
+
+static void *server(void *arg)
+{
+	int lfd = (int)(long)arg, err = 0, fd;
+	ssize_t nr_sent = 0, bytes = 0;
+	char batch[1500];
+
+	fd = accept(lfd, NULL, NULL);
+	while (fd == -1) {
+		if (errno == EINTR)
+			continue;
+		err = -errno;
+		goto done;
+	}
+
+	if (settimeo(fd, 0)) {
+		err = -errno;
+		goto done;
+	}
+
+	while (bytes < total_bytes && !READ_ONCE(stop)) {
+		nr_sent = send(fd, &batch,
+			       min(total_bytes - bytes, sizeof(batch)), 0);
+		if (nr_sent == -1 && errno == EINTR)
+			continue;
+		if (nr_sent == -1) {
+			err = -errno;
+			break;
+		}
+		bytes += nr_sent;
+	}
+
+	CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
+	      bytes, total_bytes, nr_sent, errno);
+
+done:
+	if (fd >= 0)
+		close(fd);
+	if (err) {
+		WRITE_ONCE(stop, 1);
+		return ERR_PTR(err);
+	}
+	return NULL;
+}
+
+static void do_test(const char *mptcp_sched)
+{
+	struct sockaddr_in6 sa6 = {};
+	ssize_t nr_recv = 0, bytes = 0;
+	int lfd = -1, fd = -1;
+	pthread_t srv_thread;
+	socklen_t addrlen = sizeof(sa6);
+	void *thread_ret;
+	char batch[1500];
+	char cmd[256];
+	int err;
+
+	WRITE_ONCE(stop, 0);
+
+	snprintf(cmd, sizeof(cmd), "sysctl -q net.mptcp.scheduler=%s", mptcp_sched);
+	system(cmd);
+
+	lfd = socket(AF_INET6, SOCK_STREAM, IPPROTO_MPTCP);
+	if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+		return;
+	fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_MPTCP);
+	if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+		close(lfd);
+		return;
+	}
+
+	/* bind, listen and start server thread to accept */
+	sa6.sin6_family = AF_INET6;
+	sa6.sin6_addr = in6addr_loopback;
+	err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
+	if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+		goto done;
+	err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
+	if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+		goto done;
+	err = listen(lfd, 1);
+	if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+		goto done;
+
+	/* connect to server */
+	err = connect(fd, (struct sockaddr *)&sa6, addrlen);
+	if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+		goto done;
+
+	err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
+	if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+		goto done;
+
+	/* recv total_bytes */
+	while (bytes < total_bytes && !READ_ONCE(stop)) {
+		nr_recv = recv(fd, &batch,
+			       min(total_bytes - bytes, sizeof(batch)), 0);
+		if (nr_recv == -1 && errno == EINTR)
+			continue;
+		if (nr_recv == -1)
+			break;
+		bytes += nr_recv;
+	}
+
+	CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
+	      bytes, total_bytes, nr_recv, errno);
+
+	WRITE_ONCE(stop, 1);
+	pthread_join(srv_thread, &thread_ret);
+	CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
+	      PTR_ERR(thread_ret));
+done:
+	close(lfd);
+	close(fd);
+}
+
+static void test_first(void)
+{
+	struct bpf_first *first_skel;
+	struct bpf_link *link;
+
+	first_skel = bpf_first__open_and_load();
+	if (CHECK(!first_skel, "bpf_first__open_and_load", "failed\n"))
+		return;
+
+	link = bpf_map__attach_struct_ops(first_skel->maps.first);
+	if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+		bpf_first__destroy(first_skel);
+		return;
+	}
+
+	do_test("bpf_first");
+
+	bpf_link__destroy(link);
+	bpf_first__destroy(first_skel);
+}
+
+void test_bpf_mptcp_sched(void)
+{
+	if (test__start_subtest("first"))
+		test_first();
+}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-03-17 11:05 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-17 11:04 [PATCH mptcp-next 0/5] BPF packet scheduler Geliang Tang
2022-03-17 11:04 ` [PATCH mptcp-next 1/5] mptcp: add a new sysctl scheduler Geliang Tang
2022-03-17 11:04 ` [PATCH mptcp-next 2/5] mptcp: add struct mptcp_sched_ops Geliang Tang
2022-03-17 11:04 ` [PATCH mptcp-next 3/5] mptcp: add bpf_mptcp_sched_ops Geliang Tang
2022-03-17 11:04 ` [PATCH mptcp-next 4/5] selftests: bpf: implement bpf_first scheduler Geliang Tang
2022-03-17 11:04 ` [PATCH mptcp-next 5/5] selftests: bpf: add mptcp sched test Geliang Tang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.