All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH mptcp-next 0/4] BPF redundant scheduler
@ 2022-05-09 15:01 Geliang Tang
  2022-05-09 15:01 ` [PATCH mptcp-next 1/4] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Geliang Tang @ 2022-05-09 15:01 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

Implements the redundant BPF MPTCP scheduler, which sends all packets
redundantly on all available subflows.

depends on:
 - BPF round-robin scheduler, v13

Geliang Tang (4):
  Squash to "mptcp: add get_subflow wrappers"
  mptcp: add redundant subflows support
  selftests/bpf: add bpf_red scheduler
  selftests/bpf: add bpf_red test

 include/net/mptcp.h                           |  1 +
 net/mptcp/protocol.c                          | 26 ++++++----
 net/mptcp/protocol.h                          |  4 +-
 net/mptcp/sched.c                             | 11 +++-
 .../testing/selftests/bpf/bpf_mptcp_helpers.h |  1 +
 .../testing/selftests/bpf/prog_tests/mptcp.c  | 38 ++++++++++++++
 .../selftests/bpf/progs/mptcp_bpf_red.c       | 52 +++++++++++++++++++
 7 files changed, 120 insertions(+), 13 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_red.c

-- 
2.34.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next 1/4] Squash to "mptcp: add get_subflow wrappers"
  2022-05-09 15:01 [PATCH mptcp-next 0/4] BPF redundant scheduler Geliang Tang
@ 2022-05-09 15:01 ` Geliang Tang
  2022-05-11  0:18   ` Mat Martineau
  2022-05-09 15:01 ` [PATCH mptcp-next 2/4] mptcp: add redundant subflows support Geliang Tang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: Geliang Tang @ 2022-05-09 15:01 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

Add call_again parameter for mptcp_sched_get_send() and
mptcp_sched_get_retrans().

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c | 13 +++++++++----
 net/mptcp/protocol.h |  4 ++--
 net/mptcp/sched.c    | 10 ++++++++--
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3c55e7f45aef..4a55a6e89ed5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1558,6 +1558,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 	};
 	struct mptcp_data_frag *dfrag;
 	int len, copied = 0;
+	bool call_again;
 
 	while ((dfrag = mptcp_send_head(sk))) {
 		info.sent = dfrag->already_sent;
@@ -1567,7 +1568,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 			int ret = 0;
 
 			prev_ssk = ssk;
-			ssk = mptcp_sched_get_send(msk);
+			ssk = mptcp_sched_get_send(msk, &call_again);
 
 			/* First check. If the ssk has changed since
 			 * the last round, release prev_ssk
@@ -1621,6 +1622,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 	struct sock *xmit_ssk;
 	int len, copied = 0;
 	bool first = true;
+	bool call_again;
 
 	info.flags = 0;
 	while ((dfrag = mptcp_send_head(sk))) {
@@ -1634,7 +1636,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 			 * check for a different subflow usage only after
 			 * spooling the first chunk of data
 			 */
-			xmit_ssk = first ? ssk : mptcp_sched_get_send(mptcp_sk(sk));
+			xmit_ssk = first ? ssk : mptcp_sched_get_send(mptcp_sk(sk), &call_again);
 			if (!xmit_ssk)
 				goto out;
 			if (xmit_ssk != ssk) {
@@ -2461,12 +2463,13 @@ static void __mptcp_retrans(struct sock *sk)
 	struct mptcp_data_frag *dfrag;
 	size_t copied = 0;
 	struct sock *ssk;
+	bool call_again;
 	int ret;
 
 	mptcp_clean_una_wakeup(sk);
 
 	/* first check ssk: need to kick "stale" logic */
-	ssk = mptcp_sched_get_retrans(msk);
+	ssk = mptcp_sched_get_retrans(msk, &call_again);
 	dfrag = mptcp_rtx_head(sk);
 	if (!dfrag) {
 		if (mptcp_data_fin_enabled(msk)) {
@@ -3117,11 +3120,13 @@ void __mptcp_data_acked(struct sock *sk)
 
 void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 {
+	bool call_again;
+
 	if (!mptcp_send_head(sk))
 		return;
 
 	if (!sock_owned_by_user(sk)) {
-		struct sock *xmit_ssk = mptcp_sched_get_send(mptcp_sk(sk));
+		struct sock *xmit_ssk = mptcp_sched_get_send(mptcp_sk(sk), &call_again);
 
 		if (xmit_ssk == ssk)
 			__mptcp_subflow_push_pending(sk, ssk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 59a23838782f..2fe0021a678e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -631,8 +631,8 @@ int mptcp_init_sched(struct mptcp_sock *msk,
 void mptcp_release_sched(struct mptcp_sock *msk);
 struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
 struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
-struct sock *mptcp_sched_get_send(struct mptcp_sock *msk);
-struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk);
+struct sock *mptcp_sched_get_send(struct mptcp_sock *msk, bool *call_again);
+struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk, bool *call_again);
 
 static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
 {
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 9ee2d30a6f19..83377cd1a4de 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -111,12 +111,14 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
 	return 0;
 }
 
-struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
+struct sock *mptcp_sched_get_send(struct mptcp_sock *msk, bool *call_again)
 {
 	struct mptcp_sched_data data;
 
 	sock_owned_by_me((struct sock *)msk);
 
+	*call_again = 0;
+
 	/* the following check is moved out of mptcp_subflow_get_send */
 	if (__mptcp_check_fallback(msk)) {
 		if (!msk->first)
@@ -131,15 +133,18 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 	msk->sched->get_subflow(msk, false, &data);
 
 	msk->last_snd = data.sock;
+	*call_again = data.call_again;
 	return data.sock;
 }
 
-struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
+struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk, bool *call_again)
 {
 	struct mptcp_sched_data data;
 
 	sock_owned_by_me((const struct sock *)msk);
 
+	*call_again = 0;
+
 	/* the following check is moved out of mptcp_subflow_get_retrans */
 	if (__mptcp_check_fallback(msk))
 		return NULL;
@@ -151,5 +156,6 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 	msk->sched->get_subflow(msk, true, &data);
 
 	msk->last_snd = data.sock;
+	*call_again = data.call_again;
 	return data.sock;
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next 2/4] mptcp: add redundant subflows support
  2022-05-09 15:01 [PATCH mptcp-next 0/4] BPF redundant scheduler Geliang Tang
  2022-05-09 15:01 ` [PATCH mptcp-next 1/4] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
@ 2022-05-09 15:01 ` Geliang Tang
  2022-05-11  0:56   ` Mat Martineau
  2022-05-09 15:01 ` [PATCH mptcp-next 3/4] selftests/bpf: add bpf_red scheduler Geliang Tang
  2022-05-09 15:01 ` [PATCH mptcp-next 4/4] selftests/bpf: add bpf_red test Geliang Tang
  3 siblings, 1 reply; 7+ messages in thread
From: Geliang Tang @ 2022-05-09 15:01 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch adds the redundant subflows support, sending all packets
redundantly on all available subflows.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 include/net/mptcp.h  |  1 +
 net/mptcp/protocol.c | 13 ++++++++-----
 net/mptcp/sched.c    |  1 +
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 345f27a68eaa..d48c66de8466 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -101,6 +101,7 @@ struct mptcp_out_options {
 struct mptcp_sched_data {
 	struct sock	*sock;
 	bool		call_again;
+	u8		subflows;
 	struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
 };
 
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4a55a6e89ed5..e3e1026aad97 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1591,13 +1591,16 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 				goto out;
 			}
 
-			info.sent += ret;
-			copied += ret;
-			len -= ret;
+			if (!call_again) {
+				info.sent += ret;
+				copied += ret;
+				len -= ret;
 
-			mptcp_update_post_push(msk, dfrag, ret);
+				mptcp_update_post_push(msk, dfrag, ret);
+			}
 		}
-		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+		if (!call_again)
+			WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
 	}
 
 	/* at this point we held the socket lock for the last subflow we used */
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 83377cd1a4de..0d5fc96a2ce0 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -104,6 +104,7 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
 		}
 		data->contexts[i++] = subflow;
 	}
+	data->subflows = i;
 
 	for (; i < MPTCP_SUBFLOWS_MAX; i++)
 		data->contexts[i++] = NULL;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next 3/4] selftests/bpf: add bpf_red scheduler
  2022-05-09 15:01 [PATCH mptcp-next 0/4] BPF redundant scheduler Geliang Tang
  2022-05-09 15:01 ` [PATCH mptcp-next 1/4] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
  2022-05-09 15:01 ` [PATCH mptcp-next 2/4] mptcp: add redundant subflows support Geliang Tang
@ 2022-05-09 15:01 ` Geliang Tang
  2022-05-09 15:01 ` [PATCH mptcp-next 4/4] selftests/bpf: add bpf_red test Geliang Tang
  3 siblings, 0 replies; 7+ messages in thread
From: Geliang Tang @ 2022-05-09 15:01 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch implements the redundant BPF MPTCP scheduler, named bpf_red,
which sends all packets redundantly on all available subflows.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 .../testing/selftests/bpf/bpf_mptcp_helpers.h |  1 +
 .../selftests/bpf/progs/mptcp_bpf_red.c       | 52 +++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_red.c

diff --git a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
index 2d5109f459b4..9004947ee4e4 100644
--- a/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_mptcp_helpers.h
@@ -12,6 +12,7 @@
 struct mptcp_sched_data {
 	struct sock	*sock;
 	bool		call_again;
+	__u8		subflows;
 	struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
 };
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c
new file mode 100644
index 000000000000..74c928e8154f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_red.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include "bpf_mptcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+char fmt[] = "ssk=%p call_again=%u";
+
+SEC("struct_ops/mptcp_sched_red_init")
+void BPF_PROG(mptcp_sched_red_init, const struct mptcp_sock *msk)
+{
+}
+
+SEC("struct_ops/mptcp_sched_red_release")
+void BPF_PROG(mptcp_sched_red_release, const struct mptcp_sock *msk)
+{
+}
+
+void BPF_STRUCT_OPS(bpf_red_get_subflow, const struct mptcp_sock *msk,
+		    bool reinject, struct mptcp_sched_data *data)
+{
+	struct sock *ssk = data->contexts[0]->tcp_sock;
+	int i;
+
+	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+		if (!msk->last_snd || !data->contexts[i])
+			break;
+
+		if (data->contexts[i]->tcp_sock == msk->last_snd) {
+			if (i + 1 == MPTCP_SUBFLOWS_MAX || !data->contexts[i + 1])
+				break;
+
+			ssk = data->contexts[i + 1]->tcp_sock;
+			break;
+		}
+	}
+
+	data->sock = ssk;
+	if (i < data->subflows - 1)
+		data->call_again = 1;
+	else
+		data->call_again = 0;
+}
+
+SEC(".struct_ops")
+struct mptcp_sched_ops red = {
+	.init		= (void *)mptcp_sched_red_init,
+	.release	= (void *)mptcp_sched_red_release,
+	.get_subflow	= (void *)bpf_red_get_subflow,
+	.name		= "bpf_red",
+};
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH mptcp-next 4/4] selftests/bpf: add bpf_red test
  2022-05-09 15:01 [PATCH mptcp-next 0/4] BPF redundant scheduler Geliang Tang
                   ` (2 preceding siblings ...)
  2022-05-09 15:01 ` [PATCH mptcp-next 3/4] selftests/bpf: add bpf_red scheduler Geliang Tang
@ 2022-05-09 15:01 ` Geliang Tang
  3 siblings, 0 replies; 7+ messages in thread
From: Geliang Tang @ 2022-05-09 15:01 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

This patch adds the redundant BPF MPTCP scheduler test. Use sysctl to
set net.mptcp.scheduler to use this sched.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 .../testing/selftests/bpf/prog_tests/mptcp.c  | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 5058daf15ce5..7dd36e92c0b1 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -7,6 +7,7 @@
 #include "network_helpers.h"
 #include "mptcp_bpf_first.skel.h"
 #include "mptcp_bpf_rr.skel.h"
+#include "mptcp_bpf_red.skel.h"
 
 #ifndef TCP_CA_NAME_MAX
 #define TCP_CA_NAME_MAX	16
@@ -401,6 +402,41 @@ static void test_rr(void)
 	mptcp_bpf_rr__destroy(rr_skel);
 }
 
+static void test_red(void)
+{
+	struct mptcp_bpf_red *red_skel;
+	int server_fd, client_fd;
+	struct bpf_link *link;
+
+	red_skel = mptcp_bpf_red__open_and_load();
+	if (!ASSERT_OK_PTR(red_skel, "bpf_red__open_and_load"))
+		return;
+
+	link = bpf_map__attach_struct_ops(red_skel->maps.red);
+	if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+		mptcp_bpf_red__destroy(red_skel);
+		return;
+	}
+
+	system("ip link add veth1 type veth");
+	system("ip addr add 10.0.1.1/24 dev veth1");
+	system("ip link set veth1 up");
+	system("ip mptcp endpoint add 10.0.1.1 subflow");
+	system("sysctl -qw net.mptcp.scheduler=bpf_red");
+	server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+	client_fd = connect_to_mptcp_fd(server_fd, 0);
+
+	send_data(server_fd, client_fd);
+
+	close(client_fd);
+	close(server_fd);
+	system("sysctl -qw net.mptcp.scheduler=default");
+	system("ip mptcp endpoint flush");
+	system("ip link del veth1");
+	bpf_link__destroy(link);
+	mptcp_bpf_red__destroy(red_skel);
+}
+
 void test_mptcp(void)
 {
 	if (test__start_subtest("base"))
@@ -409,4 +445,6 @@ void test_mptcp(void)
 		test_first();
 	if (test__start_subtest("rr"))
 		test_rr();
+	if (test__start_subtest("red"))
+		test_red();
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH mptcp-next 1/4] Squash to "mptcp: add get_subflow wrappers"
  2022-05-09 15:01 ` [PATCH mptcp-next 1/4] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
@ 2022-05-11  0:18   ` Mat Martineau
  0 siblings, 0 replies; 7+ messages in thread
From: Mat Martineau @ 2022-05-11  0:18 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp, Paolo Abeni

On Mon, 9 May 2022, Geliang Tang wrote:

> Add call_again parameter for mptcp_sched_get_send() and
> mptcp_sched_get_retrans().
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 13 +++++++++----
> net/mptcp/protocol.h |  4 ++--
> net/mptcp/sched.c    | 10 ++++++++--
> 3 files changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 3c55e7f45aef..4a55a6e89ed5 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1558,6 +1558,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> 	};
> 	struct mptcp_data_frag *dfrag;
> 	int len, copied = 0;
> +	bool call_again;
>
> 	while ((dfrag = mptcp_send_head(sk))) {
> 		info.sent = dfrag->already_sent;
> @@ -1567,7 +1568,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> 			int ret = 0;
>
> 			prev_ssk = ssk;
> -			ssk = mptcp_sched_get_send(msk);
> +			ssk = mptcp_sched_get_send(msk, &call_again);

One thing to check everywhere call_again is used: we need to make sure the 
BPF code can't set call_again = true every time it returns, which would 
get this code stuck in an infinite loop. Could limit it to 
MPTCP_SUBFLOWS_MAX? The BPF verifier would not help with this because the 
BPF code is repeatedly called.

It's also not good if the scheduler returns the same subflow multiple 
times for the same fragment!

These problems (and my comments on the next patch) make me question 
whether the "call_again" design is the right way to go. For example, the 
BPF scheduler could be called once and return a bitmap of which subflows 
to send on, then a flag could be set in each subflow context that is 
"scheduled" for sending. The send loops (whether in __mptcp_push_pending() 
or the subflow delegate chain) could then check the subflow flags to 
figure out which ones to send on. What do you think?



- Mat

>
> 			/* First check. If the ssk has changed since
> 			 * the last round, release prev_ssk
> @@ -1621,6 +1622,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
> 	struct sock *xmit_ssk;
> 	int len, copied = 0;
> 	bool first = true;
> +	bool call_again;
>
> 	info.flags = 0;
> 	while ((dfrag = mptcp_send_head(sk))) {
> @@ -1634,7 +1636,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
> 			 * check for a different subflow usage only after
> 			 * spooling the first chunk of data
> 			 */
> -			xmit_ssk = first ? ssk : mptcp_sched_get_send(mptcp_sk(sk));
> +			xmit_ssk = first ? ssk : mptcp_sched_get_send(mptcp_sk(sk), &call_again);
> 			if (!xmit_ssk)
> 				goto out;
> 			if (xmit_ssk != ssk) {
> @@ -2461,12 +2463,13 @@ static void __mptcp_retrans(struct sock *sk)
> 	struct mptcp_data_frag *dfrag;
> 	size_t copied = 0;
> 	struct sock *ssk;
> +	bool call_again;
> 	int ret;
>
> 	mptcp_clean_una_wakeup(sk);
>
> 	/* first check ssk: need to kick "stale" logic */
> -	ssk = mptcp_sched_get_retrans(msk);
> +	ssk = mptcp_sched_get_retrans(msk, &call_again);
> 	dfrag = mptcp_rtx_head(sk);
> 	if (!dfrag) {
> 		if (mptcp_data_fin_enabled(msk)) {
> @@ -3117,11 +3120,13 @@ void __mptcp_data_acked(struct sock *sk)
>
> void __mptcp_check_push(struct sock *sk, struct sock *ssk)
> {
> +	bool call_again;
> +
> 	if (!mptcp_send_head(sk))
> 		return;
>
> 	if (!sock_owned_by_user(sk)) {
> -		struct sock *xmit_ssk = mptcp_sched_get_send(mptcp_sk(sk));
> +		struct sock *xmit_ssk = mptcp_sched_get_send(mptcp_sk(sk), &call_again);
>
> 		if (xmit_ssk == ssk)
> 			__mptcp_subflow_push_pending(sk, ssk);
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 59a23838782f..2fe0021a678e 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -631,8 +631,8 @@ int mptcp_init_sched(struct mptcp_sock *msk,
> void mptcp_release_sched(struct mptcp_sock *msk);
> struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
> struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
> -struct sock *mptcp_sched_get_send(struct mptcp_sock *msk);
> -struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk);
> +struct sock *mptcp_sched_get_send(struct mptcp_sock *msk, bool *call_again);
> +struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk, bool *call_again);
>
> static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
> {
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 9ee2d30a6f19..83377cd1a4de 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -111,12 +111,14 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> 	return 0;
> }
>
> -struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> +struct sock *mptcp_sched_get_send(struct mptcp_sock *msk, bool *call_again)
> {
> 	struct mptcp_sched_data data;
>
> 	sock_owned_by_me((struct sock *)msk);
>
> +	*call_again = 0;
> +
> 	/* the following check is moved out of mptcp_subflow_get_send */
> 	if (__mptcp_check_fallback(msk)) {
> 		if (!msk->first)
> @@ -131,15 +133,18 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> 	msk->sched->get_subflow(msk, false, &data);
>
> 	msk->last_snd = data.sock;
> +	*call_again = data.call_again;
> 	return data.sock;
> }
>
> -struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> +struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk, bool *call_again)
> {
> 	struct mptcp_sched_data data;
>
> 	sock_owned_by_me((const struct sock *)msk);
>
> +	*call_again = 0;
> +
> 	/* the following check is moved out of mptcp_subflow_get_retrans */
> 	if (__mptcp_check_fallback(msk))
> 		return NULL;
> @@ -151,5 +156,6 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> 	msk->sched->get_subflow(msk, true, &data);
>
> 	msk->last_snd = data.sock;
> +	*call_again = data.call_again;
> 	return data.sock;
> }
> -- 
> 2.34.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH mptcp-next 2/4] mptcp: add redundant subflows support
  2022-05-09 15:01 ` [PATCH mptcp-next 2/4] mptcp: add redundant subflows support Geliang Tang
@ 2022-05-11  0:56   ` Mat Martineau
  0 siblings, 0 replies; 7+ messages in thread
From: Mat Martineau @ 2022-05-11  0:56 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Mon, 9 May 2022, Geliang Tang wrote:

> This patch adds the redundant subflows support, sending all packets
> redundantly on all available subflows.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> include/net/mptcp.h  |  1 +
> net/mptcp/protocol.c | 13 ++++++++-----
> net/mptcp/sched.c    |  1 +
> 3 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> index 345f27a68eaa..d48c66de8466 100644
> --- a/include/net/mptcp.h
> +++ b/include/net/mptcp.h
> @@ -101,6 +101,7 @@ struct mptcp_out_options {
> struct mptcp_sched_data {
> 	struct sock	*sock;
> 	bool		call_again;
> +	u8		subflows;
> 	struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
> };
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 4a55a6e89ed5..e3e1026aad97 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1591,13 +1591,16 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> 				goto out;

If the updates below were skipped on a previous iteration of the loop, 
they would need to be handled before the 'goto' here.

> 			}
>
> -			info.sent += ret;
> -			copied += ret;
> -			len -= ret;
> +			if (!call_again) {
> +				info.sent += ret;
> +				copied += ret;
> +				len -= ret;
>
> -			mptcp_update_post_push(msk, dfrag, ret);
> +				mptcp_update_post_push(msk, dfrag, ret);

I'm not sure this works, 'ret' is the number of bytes sent on the final 
scheduled subflow. A different amount of data could have been sent on 
other subflows.

The simple fix is to keep track of the largest value of 'ret' when 
repeating this loop and use that to track the amount of data sent. But I'm 
not sure how that will behave when the multiple subflows have different 
latencies, speeds, and window sizes.

Do we want to send the latest unsent data on every scheduled subflow? Or 
is it better to keep the data on the slower subflows contiguous until the 
received DATA_ACKs move msk->snd_una forward? The former case means that 
the slow subflows skip unacked data and increase the possible need for 
reinjecting data. The latter would only skip acked data, leading to fewer 
MPTCP reinjections.

The best thing for now may be to try the simple approach (track the 
largest copied amount) and see how it behaves, then we can discuss based 
on that data.

> +			}
> 		}
> -		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> +		if (!call_again)
> +			WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> 	}
>
> 	/* at this point we held the socket lock for the last subflow we used */
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 83377cd1a4de..0d5fc96a2ce0 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -104,6 +104,7 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> 		}
> 		data->contexts[i++] = subflow;
> 	}
> +	data->subflows = i;
>
> 	for (; i < MPTCP_SUBFLOWS_MAX; i++)
> 		data->contexts[i++] = NULL;


There's also the more complex __mptcp_subflow_push_pending() path and the 
chaining of further sends through mptcp_subflow_delegate(), and the 
retransmit loop.



--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-05-11  0:56 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-09 15:01 [PATCH mptcp-next 0/4] BPF redundant scheduler Geliang Tang
2022-05-09 15:01 ` [PATCH mptcp-next 1/4] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
2022-05-11  0:18   ` Mat Martineau
2022-05-09 15:01 ` [PATCH mptcp-next 2/4] mptcp: add redundant subflows support Geliang Tang
2022-05-11  0:56   ` Mat Martineau
2022-05-09 15:01 ` [PATCH mptcp-next 3/4] selftests/bpf: add bpf_red scheduler Geliang Tang
2022-05-09 15:01 ` [PATCH mptcp-next 4/4] selftests/bpf: add bpf_red test Geliang Tang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.