All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH mptcp-next v2 0/5] BPF packet scheduler
@ 2022-05-23 11:33 Geliang Tang
  2022-05-23 11:33 ` [PATCH mptcp-next v2 1/5] Squash to "mptcp: add struct mptcp_sched_ops" Geliang Tang
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Geliang Tang @ 2022-05-23 11:33 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

- Use new BPF scheduler API:
 unsigned long (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
                              struct mptcp_sched_data *data);
- base-commit: export/20220521T072520

Geliang Tang (5):
  Squash to "mptcp: add struct mptcp_sched_ops"
  Squash to "mptcp: add sched in mptcp_sock"
  Squash to "mptcp: add get_subflow wrappers"
  Squash to "mptcp: add bpf_mptcp_sched_ops"
  Squash to "selftests/bpf: add bpf_first scheduler"

 include/net/mptcp.h                           |  8 ++--
 net/mptcp/bpf.c                               | 37 +--------------
 net/mptcp/sched.c                             | 47 +++++++++++++++----
 tools/testing/selftests/bpf/bpf_tcp_helpers.h | 24 ++++++++--
 .../selftests/bpf/progs/mptcp_bpf_first.c     | 10 ++--
 5 files changed, 69 insertions(+), 57 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH mptcp-next v2 1/5] Squash to "mptcp: add struct mptcp_sched_ops"
  2022-05-23 11:33 [PATCH mptcp-next v2 0/5] BPF packet scheduler Geliang Tang
@ 2022-05-23 11:33 ` Geliang Tang
  2022-05-23 11:33 ` [PATCH mptcp-next v2 2/5] Squash to "mptcp: add sched in mptcp_sock" Geliang Tang
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Geliang Tang @ 2022-05-23 11:33 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

Use bitmap instead of sock in struct mptcp_sched_data.

Please update the commit log:

'''
This patch defines struct mptcp_sched_ops, which has three struct members,
name, owner and list, and three function pointers, init, release and
get_subflow.

Add the scheduler registering, unregistering and finding functions to add,
delete and find a packet scheduler on the global list mptcp_sched_list.

The BPF scheduler function get_subflow() has a struct mptcp_sched_data
parameter, which contains a subflow pointers array. It returns a bitmap of
which subflow or subflows in the array are picked by the scheduler to
send data.
'''

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 include/net/mptcp.h                           |  8 ++++----
 tools/testing/selftests/bpf/bpf_tcp_helpers.h | 12 ++++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 6456ea26e4c7..24a9eb32c1dd 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -97,15 +97,15 @@ struct mptcp_out_options {
 };
 
 #define MPTCP_SCHED_NAME_MAX	16
+#define MPTCP_SUBFLOWS_MAX	8
 
 struct mptcp_sched_data {
-	struct sock	*sock;
-	bool		call_again;
+	struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
 };
 
 struct mptcp_sched_ops {
-	void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
-			    struct mptcp_sched_data *data);
+	unsigned long (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
+				     struct mptcp_sched_data *data);
 
 	char			name[MPTCP_SCHED_NAME_MAX];
 	struct module		*owner;
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index aca4e3c6ac48..17d97e21b1ea 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -231,10 +231,14 @@ extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
 extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
 
 #define MPTCP_SCHED_NAME_MAX	16
+#define MPTCP_SUBFLOWS_MAX	8
+
+struct mptcp_subflow_context {
+	struct	sock *tcp_sock;	    /* tcp sk backpointer */
+} __attribute__((preserve_access_index));
 
 struct mptcp_sched_data {
-	struct sock	*sock;
-	bool		call_again;
+	struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
 };
 
 struct mptcp_sched_ops {
@@ -243,8 +247,8 @@ struct mptcp_sched_ops {
 	void (*init)(const struct mptcp_sock *msk);
 	void (*release)(const struct mptcp_sock *msk);
 
-	void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
-			    struct mptcp_sched_data *data);
+	unsigned long (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
+				     struct mptcp_sched_data *data);
 	void *owner;
 };
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH mptcp-next v2 2/5] Squash to "mptcp: add sched in mptcp_sock"
  2022-05-23 11:33 [PATCH mptcp-next v2 0/5] BPF packet scheduler Geliang Tang
  2022-05-23 11:33 ` [PATCH mptcp-next v2 1/5] Squash to "mptcp: add struct mptcp_sched_ops" Geliang Tang
@ 2022-05-23 11:33 ` Geliang Tang
  2022-05-23 11:33 ` [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Geliang Tang @ 2022-05-23 11:33 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

No need to export sched in bpf_tcp_helpers.h, drop it.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 17d97e21b1ea..cb3db7ea36b9 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -257,7 +257,6 @@ struct mptcp_sock {
 
 	__u32		token;
 	struct sock	*first;
-	struct mptcp_sched_ops	*sched;
 	char		ca_name[TCP_CA_NAME_MAX];
 } __attribute__((preserve_access_index));
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers"
  2022-05-23 11:33 [PATCH mptcp-next v2 0/5] BPF packet scheduler Geliang Tang
  2022-05-23 11:33 ` [PATCH mptcp-next v2 1/5] Squash to "mptcp: add struct mptcp_sched_ops" Geliang Tang
  2022-05-23 11:33 ` [PATCH mptcp-next v2 2/5] Squash to "mptcp: add sched in mptcp_sock" Geliang Tang
@ 2022-05-23 11:33 ` Geliang Tang
  2022-05-24  1:01   ` Mat Martineau
  2022-05-23 11:33 ` [PATCH mptcp-next v2 4/5] Squash to "mptcp: add bpf_mptcp_sched_ops" Geliang Tang
  2022-05-23 11:33 ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Geliang Tang
  4 siblings, 1 reply; 14+ messages in thread
From: Geliang Tang @ 2022-05-23 11:33 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

Please update the commit log:

'''
This patch defines two new wrappers mptcp_sched_get_send() and
mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
Use them instead of using mptcp_subflow_get_send() or
mptcp_subflow_get_retrans() directly.

Set the subflow pointers array in struct mptcp_sched_data before invoking
get_subflow(), then it can be used in get_subflow() in the BPF contexts.

Get the return bitmap of get_subflow() and test which subflow or subflows
are picked by the scheduler.
'''

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 3ceb721e6489..0ef805c489ab 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
 static int mptcp_sched_data_init(struct mptcp_sock *msk,
 				 struct mptcp_sched_data *data)
 {
-	data->sock = NULL;
-	data->call_again = 0;
+	struct mptcp_subflow_context *subflow;
+	int i = 0;
+
+	mptcp_for_each_subflow(msk, subflow) {
+		if (i == MPTCP_SUBFLOWS_MAX) {
+			pr_warn_once("too many subflows");
+			break;
+		}
+		data->contexts[i++] = subflow;
+	}
+
+	for (; i < MPTCP_SUBFLOWS_MAX; i++)
+		data->contexts[i++] = NULL;
 
 	return 0;
 }
@@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
 struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 {
 	struct mptcp_sched_data data;
+	struct sock *ssk = NULL;
+	unsigned long bitmap;
+	int i;
 
 	sock_owned_by_me((struct sock *)msk);
 
@@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 		return mptcp_subflow_get_send(msk);
 
 	mptcp_sched_data_init(msk, &data);
-	msk->sched->get_subflow(msk, false, &data);
+	bitmap = msk->sched->get_subflow(msk, false, &data);
 
-	msk->last_snd = data.sock;
-	return data.sock;
+	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+		if (test_bit(i, &bitmap) && data.contexts[i]) {
+			ssk = data.contexts[i]->tcp_sock;
+			msk->last_snd = ssk;
+			break;
+		}
+	}
+
+	return ssk;
 }
 
 struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 {
 	struct mptcp_sched_data data;
+	struct sock *ssk = NULL;
+	unsigned long bitmap;
+	int i;
 
 	sock_owned_by_me((const struct sock *)msk);
 
@@ -134,8 +158,15 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 		return mptcp_subflow_get_retrans(msk);
 
 	mptcp_sched_data_init(msk, &data);
-	msk->sched->get_subflow(msk, true, &data);
+	bitmap = msk->sched->get_subflow(msk, true, &data);
+
+	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+		if (test_bit(i, &bitmap) && data.contexts[i]) {
+			ssk = data.contexts[i]->tcp_sock;
+			msk->last_snd = ssk;
+			break;
+		}
+	}
 
-	msk->last_snd = data.sock;
-	return data.sock;
+	return ssk;
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH mptcp-next v2 4/5] Squash to "mptcp: add bpf_mptcp_sched_ops"
  2022-05-23 11:33 [PATCH mptcp-next v2 0/5] BPF packet scheduler Geliang Tang
                   ` (2 preceding siblings ...)
  2022-05-23 11:33 ` [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
@ 2022-05-23 11:33 ` Geliang Tang
  2022-05-23 11:33 ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Geliang Tang
  4 siblings, 0 replies; 14+ messages in thread
From: Geliang Tang @ 2022-05-23 11:33 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

Drop the access code for mptcp_sched_data.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/bpf.c | 37 +------------------------------------
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 338146d173f4..218f78514bdf 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -18,8 +18,6 @@
 #ifdef CONFIG_BPF_JIT
 extern struct bpf_struct_ops bpf_mptcp_sched_ops;
 extern struct btf *btf_vmlinux;
-static const struct btf_type *mptcp_sched_type __read_mostly;
-static u32 mptcp_sched_id;
 
 static u32 optional_ops[] = {
 	offsetof(struct mptcp_sched_ops, init),
@@ -40,33 +38,9 @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log,
 					     u32 *next_btf_id,
 					     enum bpf_type_flag *flag)
 {
-	size_t end;
-
-	if (atype == BPF_READ)
+	if (atype == BPF_READ) {
 		return btf_struct_access(log, btf, t, off, size, atype,
 					 next_btf_id, flag);
-
-	if (t != mptcp_sched_type) {
-		bpf_log(log, "only access to mptcp_sched_data is supported\n");
-		return -EACCES;
-	}
-
-	switch (off) {
-	case offsetof(struct mptcp_sched_data, sock):
-		end = offsetofend(struct mptcp_sched_data, sock);
-		break;
-	case offsetof(struct mptcp_sched_data, call_again):
-		end = offsetofend(struct mptcp_sched_data, call_again);
-		break;
-	default:
-		bpf_log(log, "no write support to mptcp_sched_data at off %d\n", off);
-		return -EACCES;
-	}
-
-	if (off + size > end) {
-		bpf_log(log, "access beyond mptcp_sched_data at off %u size %u ended at %zu",
-			off, size, end);
-		return -EACCES;
 	}
 
 	return NOT_INIT;
@@ -142,15 +116,6 @@ static int bpf_mptcp_sched_init_member(const struct btf_type *t,
 
 static int bpf_mptcp_sched_init(struct btf *btf)
 {
-	s32 type_id;
-
-	type_id = btf_find_by_name_kind(btf, "mptcp_sched_data",
-					BTF_KIND_STRUCT);
-	if (type_id < 0)
-		return -EINVAL;
-	mptcp_sched_id = type_id;
-	mptcp_sched_type = btf_type_by_id(btf, mptcp_sched_id);
-
 	return 0;
 }
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler"
  2022-05-23 11:33 [PATCH mptcp-next v2 0/5] BPF packet scheduler Geliang Tang
                   ` (3 preceding siblings ...)
  2022-05-23 11:33 ` [PATCH mptcp-next v2 4/5] Squash to "mptcp: add bpf_mptcp_sched_ops" Geliang Tang
@ 2022-05-23 11:33 ` Geliang Tang
  2022-05-23 11:43   ` Squash to "selftests/bpf: add bpf_first scheduler": Build Failure MPTCP CI
                     ` (2 more replies)
  4 siblings, 3 replies; 14+ messages in thread
From: Geliang Tang @ 2022-05-23 11:33 UTC (permalink / raw)
  To: mptcp; +Cc: Geliang Tang

Add set_bit() helper and use new get_subflow API.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h       | 11 +++++++++++
 tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 10 ++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index cb3db7ea36b9..9c7d33e106a4 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -260,4 +260,15 @@ struct mptcp_sock {
 	char		ca_name[TCP_CA_NAME_MAX];
 } __attribute__((preserve_access_index));
 
+#define _AC(X,Y)	(X##Y)
+#define UL(x)		(_AC(x, UL))
+
+static inline void set_bit(unsigned int nr, volatile unsigned long *addr)
+{
+        unsigned long *p = ((unsigned long *)addr) + (nr / sizeof(unsigned long));
+        unsigned long mask = UL(1) << (nr % sizeof(unsigned long));
+
+        *p  |= mask;
+}
+
 #endif
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
index fd67b5f42964..e5dc53965642 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
@@ -16,11 +16,13 @@ void BPF_PROG(mptcp_sched_first_release, const struct mptcp_sock *msk)
 {
 }
 
-void BPF_STRUCT_OPS(bpf_first_get_subflow, const struct mptcp_sock *msk,
-		    bool reinject, struct mptcp_sched_data *data)
+unsigned long BPF_STRUCT_OPS(bpf_first_get_subflow, const struct mptcp_sock *msk,
+			     bool reinject, struct mptcp_sched_data *data)
 {
-	data->sock = msk->first;
-	data->call_again = 0;
+	unsigned long bitmap = 0;
+
+	set_bit(0, &bitmap);
+	return bitmap;
 }
 
 SEC(".struct_ops")
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: Squash to "selftests/bpf: add bpf_first scheduler": Build Failure
  2022-05-23 11:33 ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Geliang Tang
@ 2022-05-23 11:43   ` MPTCP CI
  2022-05-23 13:33   ` Squash to "selftests/bpf: add bpf_first scheduler": Tests Results MPTCP CI
  2022-05-24  1:02   ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Mat Martineau
  2 siblings, 0 replies; 14+ messages in thread
From: MPTCP CI @ 2022-05-23 11:43 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

Hi Geliang,

Thank you for your modifications, that's great!

But sadly, our CI spotted some issues with it when trying to build it.

You can find more details there:

  https://patchwork.kernel.org/project/mptcp/patch/94abb5c4207ee3fc0827c77816648ad37dccf5d0.1653305364.git.geliang.tang@suse.com/
  https://github.com/multipath-tcp/mptcp_net-next/actions/runs/2370983932

Status: failure
Initiator: MPTCPimporter
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/06c3387f9972

Feel free to reply to this email if you cannot access logs, if you need
some support to fix the error, if this doesn't seem to be caused by your
modifications or if the error is a false positive one.

Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (Tessares)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Squash to "selftests/bpf: add bpf_first scheduler": Tests Results
  2022-05-23 11:33 ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Geliang Tang
  2022-05-23 11:43   ` Squash to "selftests/bpf: add bpf_first scheduler": Build Failure MPTCP CI
@ 2022-05-23 13:33   ` MPTCP CI
  2022-05-24  1:02   ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Mat Martineau
  2 siblings, 0 replies; 14+ messages in thread
From: MPTCP CI @ 2022-05-23 13:33 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

Hi Geliang,

Thank you for your modifications, that's great!

Our CI did some validations and here is its report:

- KVM Validation: normal:
  - Unstable: 1 failed test(s): selftest_mptcp_join 🔴:
  - Task: https://cirrus-ci.com/task/4735571141591040
  - Summary: https://api.cirrus-ci.com/v1/artifact/task/4735571141591040/summary/summary.txt

- {"code":404,"message":
  - "HTTP 404 Not Found"}:
  - Task: https://cirrus-ci.com/task/5861471048433664
  - Summary: https://api.cirrus-ci.com/v1/artifact/task/5861471048433664/summary/summary.txt

Initiator: Patchew Applier
Commits: https://github.com/multipath-tcp/mptcp_net-next/commits/06c3387f9972


If there are some issues, you can reproduce them using the same environment as
the one used by the CI thanks to a docker image, e.g.:

    $ cd [kernel source code]
    $ docker run -v "${PWD}:${PWD}:rw" -w "${PWD}" --privileged --rm -it \
        --pull always mptcp/mptcp-upstream-virtme-docker:latest \
        auto-debug

For more details:

    https://github.com/multipath-tcp/mptcp-upstream-virtme-docker


Please note that despite all the efforts that have been already done to have a
stable tests suite when executed on a public CI like here, it is possible some
reported issues are not due to your modifications. Still, do not hesitate to
help us improve that ;-)

Cheers,
MPTCP GH Action bot
Bot operated by Matthieu Baerts (Tessares)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers"
  2022-05-23 11:33 ` [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
@ 2022-05-24  1:01   ` Mat Martineau
  2022-05-26 12:18     ` Geliang Tang
  0 siblings, 1 reply; 14+ messages in thread
From: Mat Martineau @ 2022-05-24  1:01 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Mon, 23 May 2022, Geliang Tang wrote:

> Please update the commit log:
>
> '''
> This patch defines two new wrappers mptcp_sched_get_send() and
> mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
> Use them instead of using mptcp_subflow_get_send() or
> mptcp_subflow_get_retrans() directly.
>
> Set the subflow pointers array in struct mptcp_sched_data before invoking
> get_subflow(), then it can be used in get_subflow() in the BPF contexts.
>
> Get the return bitmap of get_subflow() and test which subflow or subflows
> are picked by the scheduler.
> '''
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 39 insertions(+), 8 deletions(-)
>
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 3ceb721e6489..0ef805c489ab 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
> static int mptcp_sched_data_init(struct mptcp_sock *msk,
> 				 struct mptcp_sched_data *data)
> {
> -	data->sock = NULL;
> -	data->call_again = 0;
> +	struct mptcp_subflow_context *subflow;
> +	int i = 0;
> +
> +	mptcp_for_each_subflow(msk, subflow) {
> +		if (i == MPTCP_SUBFLOWS_MAX) {
> +			pr_warn_once("too many subflows");
> +			break;
> +		}
> +		data->contexts[i++] = subflow;
> +	}
> +
> +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
> +		data->contexts[i++] = NULL;
>
> 	return 0;
> }
> @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> {
> 	struct mptcp_sched_data data;
> +	struct sock *ssk = NULL;
> +	unsigned long bitmap;
> +	int i;
>
> 	sock_owned_by_me((struct sock *)msk);
>
> @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> 		return mptcp_subflow_get_send(msk);
>
> 	mptcp_sched_data_init(msk, &data);
> -	msk->sched->get_subflow(msk, false, &data);
> +	bitmap = msk->sched->get_subflow(msk, false, &data);
>
> -	msk->last_snd = data.sock;
> -	return data.sock;
> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> +			ssk = data.contexts[i]->tcp_sock;
> +			msk->last_snd = ssk;
> +			break;
> +		}
> +	}
> +
> +	return ssk;

The commit that this gets squashed too also ignores call_again, so is this 
code that just returns the ssk for the first bit in the bitmap also 
placeholder code?


It also seems like correlate the bitmap bits with the data.contexts array 
makes the bitmap require extra work. What do you think about using an 
array instead, like:

struct mptcp_sched_data {
        struct mptcp_subflow_context *context;
        bool is_scheduled;
};

And passing an array of that struct to the BPF code? Then the is_scheduled 
flag could be set for the corresponding subflow.

Do you think that array-based API would be clearer than the bitmap to 
someone writing a BPF scheduler?


- Mat


> }
>
> struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> {
> 	struct mptcp_sched_data data;
> +	struct sock *ssk = NULL;
> +	unsigned long bitmap;
> +	int i;
>
> 	sock_owned_by_me((const struct sock *)msk);
>
> @@ -134,8 +158,15 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> 		return mptcp_subflow_get_retrans(msk);
>
> 	mptcp_sched_data_init(msk, &data);
> -	msk->sched->get_subflow(msk, true, &data);
> +	bitmap = msk->sched->get_subflow(msk, true, &data);
> +
> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> +			ssk = data.contexts[i]->tcp_sock;
> +			msk->last_snd = ssk;
> +			break;
> +		}
> +	}
>
> -	msk->last_snd = data.sock;
> -	return data.sock;
> +	return ssk;
> }
> -- 
> 2.34.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler"
  2022-05-23 11:33 ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Geliang Tang
  2022-05-23 11:43   ` Squash to "selftests/bpf: add bpf_first scheduler": Build Failure MPTCP CI
  2022-05-23 13:33   ` Squash to "selftests/bpf: add bpf_first scheduler": Tests Results MPTCP CI
@ 2022-05-24  1:02   ` Mat Martineau
  2 siblings, 0 replies; 14+ messages in thread
From: Mat Martineau @ 2022-05-24  1:02 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Mon, 23 May 2022, Geliang Tang wrote:

> Add set_bit() helper and use new get_subflow API.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> tools/testing/selftests/bpf/bpf_tcp_helpers.h       | 11 +++++++++++
> tools/testing/selftests/bpf/progs/mptcp_bpf_first.c | 10 ++++++----
> 2 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> index cb3db7ea36b9..9c7d33e106a4 100644
> --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> @@ -260,4 +260,15 @@ struct mptcp_sock {
> 	char		ca_name[TCP_CA_NAME_MAX];
> } __attribute__((preserve_access_index));
>
> +#define _AC(X,Y)	(X##Y)
> +#define UL(x)		(_AC(x, UL))
> +
> +static inline void set_bit(unsigned int nr, volatile unsigned long *addr)
> +{
> +        unsigned long *p = ((unsigned long *)addr) + (nr / sizeof(unsigned long));
> +        unsigned long mask = UL(1) << (nr % sizeof(unsigned long));
> +
> +        *p  |= mask;
> +}
> +
> #endif
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
> index fd67b5f42964..e5dc53965642 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
> @@ -16,11 +16,13 @@ void BPF_PROG(mptcp_sched_first_release, const struct mptcp_sock *msk)
> {
> }
>
> -void BPF_STRUCT_OPS(bpf_first_get_subflow, const struct mptcp_sock *msk,
> -		    bool reinject, struct mptcp_sched_data *data)
> +unsigned long BPF_STRUCT_OPS(bpf_first_get_subflow, const struct mptcp_sock *msk,
> +			     bool reinject, struct mptcp_sched_data *data)
> {
> -	data->sock = msk->first;
> -	data->call_again = 0;
> +	unsigned long bitmap = 0;
> +
> +	set_bit(0, &bitmap);
> +	return bitmap;

It might be more realistic to return the first non-backup subflow (or 
first backup if there are only backup subflows). Do you think that would 
be better as a replacement for this test or as an additional test?

- Mat

> }
>
> SEC(".struct_ops")
> -- 
> 2.34.1
>
>
>

--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers"
  2022-05-24  1:01   ` Mat Martineau
@ 2022-05-26 12:18     ` Geliang Tang
  2022-05-26 23:48       ` Mat Martineau
  0 siblings, 1 reply; 14+ messages in thread
From: Geliang Tang @ 2022-05-26 12:18 UTC (permalink / raw)
  To: Mat Martineau; +Cc: mptcp

[-- Attachment #1: Type: text/plain, Size: 4710 bytes --]

Hi Mat,

On Mon, May 23, 2022 at 06:01:03PM -0700, Mat Martineau wrote:
> On Mon, 23 May 2022, Geliang Tang wrote:
> 
> > Please update the commit log:
> > 
> > '''
> > This patch defines two new wrappers mptcp_sched_get_send() and
> > mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
> > Use them instead of using mptcp_subflow_get_send() or
> > mptcp_subflow_get_retrans() directly.
> > 
> > Set the subflow pointers array in struct mptcp_sched_data before invoking
> > get_subflow(), then it can be used in get_subflow() in the BPF contexts.
> > 
> > Get the return bitmap of get_subflow() and test which subflow or subflows
> > are picked by the scheduler.
> > '''
> > 
> > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > ---
> > net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
> > 1 file changed, 39 insertions(+), 8 deletions(-)
> > 
> > diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> > index 3ceb721e6489..0ef805c489ab 100644
> > --- a/net/mptcp/sched.c
> > +++ b/net/mptcp/sched.c
> > @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
> > static int mptcp_sched_data_init(struct mptcp_sock *msk,
> > 				 struct mptcp_sched_data *data)
> > {
> > -	data->sock = NULL;
> > -	data->call_again = 0;
> > +	struct mptcp_subflow_context *subflow;
> > +	int i = 0;
> > +
> > +	mptcp_for_each_subflow(msk, subflow) {
> > +		if (i == MPTCP_SUBFLOWS_MAX) {
> > +			pr_warn_once("too many subflows");
> > +			break;
> > +		}
> > +		data->contexts[i++] = subflow;
> > +	}
> > +
> > +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
> > +		data->contexts[i++] = NULL;
> > 
> > 	return 0;
> > }
> > @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> > struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> > {
> > 	struct mptcp_sched_data data;
> > +	struct sock *ssk = NULL;
> > +	unsigned long bitmap;
> > +	int i;
> > 
> > 	sock_owned_by_me((struct sock *)msk);
> > 
> > @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> > 		return mptcp_subflow_get_send(msk);
> > 
> > 	mptcp_sched_data_init(msk, &data);
> > -	msk->sched->get_subflow(msk, false, &data);
> > +	bitmap = msk->sched->get_subflow(msk, false, &data);
> > 
> > -	msk->last_snd = data.sock;
> > -	return data.sock;
> > +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> > +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> > +			ssk = data.contexts[i]->tcp_sock;
> > +			msk->last_snd = ssk;
> > +			break;
> > +		}
> > +	}
> > +
> > +	return ssk;
> 
> The commit that this gets squashed too also ignores call_again, so is this
> code that just returns the ssk for the first bit in the bitmap also
> placeholder code?

Yes. Since the redundant scheduler is still under development and there's
still a lot of work to be done, I plan to support single subflow schedulers
in this series first. The multiple subflows schedulers will be added later.

> 
> 
> It also seems like correlate the bitmap bits with the data.contexts array
> makes the bitmap require extra work. What do you think about using an array
> instead, like:
> 
> struct mptcp_sched_data {
>        struct mptcp_subflow_context *context;
>        bool is_scheduled;
> };
> 
> And passing an array of that struct to the BPF code? Then the is_scheduled
> flag could be set for the corresponding subflow.
> 
> Do you think that array-based API would be clearer than the bitmap to
> someone writing a BPF scheduler?

I tried to implement this array-based API, but it's not going well. Array
parameters are not easily supported in BPF functions. And the write access
permissions of array members is not easy to allow in BPF. I haven't found
a solution to these two issues yet. Here are codes and error logs in the
attachment.

Thanks,
-Geliang

> 
> 
> - Mat
> 
> 
> > }
> > 
> > struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> > {
> > 	struct mptcp_sched_data data;
> > +	struct sock *ssk = NULL;
> > +	unsigned long bitmap;
> > +	int i;
> > 
> > 	sock_owned_by_me((const struct sock *)msk);
> > 
> > @@ -134,8 +158,15 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> > 		return mptcp_subflow_get_retrans(msk);
> > 
> > 	mptcp_sched_data_init(msk, &data);
> > -	msk->sched->get_subflow(msk, true, &data);
> > +	bitmap = msk->sched->get_subflow(msk, true, &data);
> > +
> > +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> > +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> > +			ssk = data.contexts[i]->tcp_sock;
> > +			msk->last_snd = ssk;
> > +			break;
> > +		}
> > +	}
> > 
> > -	msk->last_snd = data.sock;
> > -	return data.sock;
> > +	return ssk;
> > }
> > -- 
> > 2.34.1
> > 
> > 
> > 
> 
> --
> Mat Martineau
> Intel
> 

[-- Attachment #2: 0001-new-api.patch --]
[-- Type: text/x-patch, Size: 9173 bytes --]

From 6b45c73f0621a3ba001712615b53bc89c47e9c80 Mon Sep 17 00:00:00 2001
Message-Id: <6b45c73f0621a3ba001712615b53bc89c47e9c80.1653401254.git.geliang.tang@suse.com>
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 24 May 2022 16:57:53 +0800
Subject: [PATCH] new api

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 include/net/mptcp.h                           |  7 ++--
 net/mptcp/bpf.c                               | 33 +++++++++++++++++++
 net/mptcp/sched.c                             | 33 ++++++++++---------
 tools/testing/selftests/bpf/bpf_tcp_helpers.h | 18 +++-------
 .../selftests/bpf/progs/mptcp_bpf_first.c     |  9 ++---
 .../selftests/bpf/progs/mptcp_bpf_rr.c        | 13 ++++----
 6 files changed, 67 insertions(+), 46 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 24a9eb32c1dd..279b46536b64 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -100,12 +100,13 @@ struct mptcp_out_options {
 #define MPTCP_SUBFLOWS_MAX	8
 
 struct mptcp_sched_data {
-	struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
+	struct mptcp_subflow_context *context;
+	bool is_scheduled;
 };
 
 struct mptcp_sched_ops {
-	unsigned long (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
-				     struct mptcp_sched_data *data);
+	void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
+			    struct mptcp_sched_data contexts[]);
 
 	char			name[MPTCP_SCHED_NAME_MAX];
 	struct module		*owner;
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 218f78514bdf..89b7fa68e363 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -18,6 +18,8 @@
 #ifdef CONFIG_BPF_JIT
 extern struct bpf_struct_ops bpf_mptcp_sched_ops;
 extern struct btf *btf_vmlinux;
+static const struct btf_type *mptcp_sched_type __read_mostly;
+static u32 mptcp_sched_id;
 
 static u32 optional_ops[] = {
 	offsetof(struct mptcp_sched_ops, init),
@@ -38,11 +40,33 @@ static int bpf_mptcp_sched_btf_struct_access(struct bpf_verifier_log *log,
 					     u32 *next_btf_id,
 					     enum bpf_type_flag *flag)
 {
+	size_t end;
+
 	if (atype == BPF_READ) {
 		return btf_struct_access(log, btf, t, off, size, atype,
 					 next_btf_id, flag);
 	}
 
+	if (t != mptcp_sched_type) {
+		bpf_log(log, "only access to mptcp_sched_data is supported\n");
+		return -EACCES;
+	}
+
+	switch (off) {
+	case offsetof(struct mptcp_sched_data, is_scheduled):
+		end = offsetofend(struct mptcp_sched_data, is_scheduled);
+		break;
+	default:
+		bpf_log(log, "no write support to mptcp_sched_data at off %d\n", off);
+		return -EACCES;
+	}
+
+	if (off + size > end) {
+		bpf_log(log, "access beyond mptcp_sched_data at off %u size %u ended at %zu",
+			off, size, end);
+		return -EACCES;
+	}
+
 	return NOT_INIT;
 }
 
@@ -116,6 +140,15 @@ static int bpf_mptcp_sched_init_member(const struct btf_type *t,
 
 static int bpf_mptcp_sched_init(struct btf *btf)
 {
+	s32 type_id;
+
+	type_id = btf_find_by_name_kind(btf, "mptcp_sched_data",
+					BTF_KIND_STRUCT);
+	if (type_id < 0)
+		return -EINVAL;
+	mptcp_sched_id = type_id;
+	mptcp_sched_type = btf_type_by_id(btf, mptcp_sched_id);
+
 	return 0;
 }
 
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 0ef805c489ab..83ea956cdb9d 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -89,7 +89,7 @@ void mptcp_release_sched(struct mptcp_sock *msk)
 }
 
 static int mptcp_sched_data_init(struct mptcp_sock *msk,
-				 struct mptcp_sched_data *data)
+				 struct mptcp_sched_data contexts[])
 {
 	struct mptcp_subflow_context *subflow;
 	int i = 0;
@@ -99,20 +99,22 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
 			pr_warn_once("too many subflows");
 			break;
 		}
-		data->contexts[i++] = subflow;
+		contexts[i++].context = subflow;
+		contexts[i++].is_scheduled = 0;
 	}
 
-	for (; i < MPTCP_SUBFLOWS_MAX; i++)
-		data->contexts[i++] = NULL;
+	for (; i < MPTCP_SUBFLOWS_MAX; i++) {
+		contexts[i++].context = NULL;
+		contexts[i++].is_scheduled = 0;
+	}
 
 	return 0;
 }
 
 struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 {
-	struct mptcp_sched_data data;
+	struct mptcp_sched_data contexts[MPTCP_SUBFLOWS_MAX];
 	struct sock *ssk = NULL;
-	unsigned long bitmap;
 	int i;
 
 	sock_owned_by_me((struct sock *)msk);
@@ -127,12 +129,12 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 	if (!msk->sched)
 		return mptcp_subflow_get_send(msk);
 
-	mptcp_sched_data_init(msk, &data);
-	bitmap = msk->sched->get_subflow(msk, false, &data);
+	mptcp_sched_data_init(msk, contexts);
+	msk->sched->get_subflow(msk, false, contexts);
 
 	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
-		if (test_bit(i, &bitmap) && data.contexts[i]) {
-			ssk = data.contexts[i]->tcp_sock;
+		if (contexts[i].is_scheduled) {
+			ssk = contexts[i].context->tcp_sock;
 			msk->last_snd = ssk;
 			break;
 		}
@@ -143,9 +145,8 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 
 struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 {
-	struct mptcp_sched_data data;
+	struct mptcp_sched_data contexts[MPTCP_SUBFLOWS_MAX];
 	struct sock *ssk = NULL;
-	unsigned long bitmap;
 	int i;
 
 	sock_owned_by_me((const struct sock *)msk);
@@ -157,12 +158,12 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 	if (!msk->sched)
 		return mptcp_subflow_get_retrans(msk);
 
-	mptcp_sched_data_init(msk, &data);
-	bitmap = msk->sched->get_subflow(msk, true, &data);
+	mptcp_sched_data_init(msk, contexts);
+	msk->sched->get_subflow(msk, true, contexts);
 
 	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
-		if (test_bit(i, &bitmap) && data.contexts[i]) {
-			ssk = data.contexts[i]->tcp_sock;
+		if (contexts[i].is_scheduled) {
+			ssk = contexts[i].context->tcp_sock;
 			msk->last_snd = ssk;
 			break;
 		}
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 0f29b47260ff..cfa1fada3793 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -238,7 +238,8 @@ struct mptcp_subflow_context {
 } __attribute__((preserve_access_index));
 
 struct mptcp_sched_data {
-	struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
+	struct mptcp_subflow_context *context;
+	bool is_scheduled;
 };
 
 struct mptcp_sched_ops {
@@ -247,8 +248,8 @@ struct mptcp_sched_ops {
 	void (*init)(const struct mptcp_sock *msk);
 	void (*release)(const struct mptcp_sock *msk);
 
-	unsigned long (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
-				     struct mptcp_sched_data *data);
+	void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
+			    struct mptcp_sched_data contexts[]);
 	void *owner;
 };
 
@@ -261,15 +262,4 @@ struct mptcp_sock {
 	char		ca_name[TCP_CA_NAME_MAX];
 } __attribute__((preserve_access_index));
 
-#define _AC(X,Y)	(X##Y)
-#define UL(x)		(_AC(x, UL))
-
-static inline void set_bit(unsigned int nr, volatile unsigned long *addr)
-{
-        unsigned long *p = ((unsigned long *)addr) + (nr / sizeof(unsigned long));
-        unsigned long mask = UL(1) << (nr % sizeof(unsigned long));
-
-        *p  |= mask;
-}
-
 #endif
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
index e5dc53965642..739f34768664 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
@@ -16,13 +16,10 @@ void BPF_PROG(mptcp_sched_first_release, const struct mptcp_sock *msk)
 {
 }
 
-unsigned long BPF_STRUCT_OPS(bpf_first_get_subflow, const struct mptcp_sock *msk,
-			     bool reinject, struct mptcp_sched_data *data)
+void BPF_STRUCT_OPS(bpf_first_get_subflow, const struct mptcp_sock *msk,
+		    bool reinject, struct mptcp_sched_data contexts[])
 {
-	unsigned long bitmap = 0;
-
-	set_bit(0, &bitmap);
-	return bitmap;
+	contexts[0].is_scheduled = 1;
 }
 
 SEC(".struct_ops")
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
index ef475bd33dd7..c2635fd14b0e 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
@@ -16,18 +16,18 @@ void BPF_PROG(mptcp_sched_rr_release, const struct mptcp_sock *msk)
 {
 }
 
-unsigned long BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
-			     bool reinject, struct mptcp_sched_data *data)
+void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
+		    bool reinject, struct mptcp_sched_data contexts[])
 {
 	unsigned long bitmap = 0;
 	int nr = 0;
 
 	for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
-		if (!msk->last_snd || !data->contexts[i])
+		if (!msk->last_snd || !contexts[i].context)
 			break;
 
-		if (data->contexts[i]->tcp_sock == msk->last_snd) {
-			if (i + 1 == MPTCP_SUBFLOWS_MAX || !data->contexts[i + 1])
+		if (contexts[i].context->tcp_sock == msk->last_snd) {
+			if (i + 1 == MPTCP_SUBFLOWS_MAX || !contexts[i + 1].context)
 				break;
 
 			nr = i + 1;
@@ -35,8 +35,7 @@ unsigned long BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
 		}
 	}
 
-	set_bit(nr, &bitmap);
-	return bitmap;
+	contexts[nr].is_scheduled = 1;
 }
 
 SEC(".struct_ops")
-- 
2.34.1


[-- Attachment #3: 0002-new-api.log --]
[-- Type: text/plain, Size: 7890 bytes --]

#106 mptcp:FAIL
test_base:PASS:test__join_cgroup 0 nsec
test_base:PASS:start_server 0 nsec
run_test:PASS:skel_open_load 0 nsec
run_test:PASS:skel_attach 0 nsec
run_test:PASS:bpf_program__fd 0 nsec
run_test:PASS:bpf_map__fd 0 nsec
run_test:PASS:bpf_prog_attach 0 nsec
run_test:PASS:connect to fd 0 nsec
verify_tsk:PASS:bpf_map_lookup_elem 0 nsec
verify_tsk:PASS:unexpected invoked count 0 nsec
verify_tsk:PASS:unexpected is_mptcp 0 nsec
test_base:PASS:run_test tcp 0 nsec
test_base:PASS:start_mptcp_server 0 nsec
run_test:PASS:skel_open_load 0 nsec
run_test:PASS:skel_attach 0 nsec
run_test:PASS:bpf_program__fd 0 nsec
run_test:PASS:bpf_map__fd 0 nsec
run_test:PASS:bpf_prog_attach 0 nsec
run_test:PASS:connect to fd 0 nsec
verify_msk:PASS:invalid token 0 nsec
get_msk_ca_name:PASS:failed to open tcp_congestion_control 0 nsec
get_msk_ca_name:PASS:failed to read ca_name 0 nsec
verify_msk:PASS:bpf_map_lookup_elem 0 nsec
verify_msk:PASS:unexpected invoked count 0 nsec
verify_msk:PASS:unexpected is_mptcp 0 nsec
verify_msk:PASS:unexpected token 0 nsec
verify_msk:PASS:unexpected first 0 nsec
verify_msk:PASS:unexpected ca_name 0 nsec
test_base:PASS:run_test mptcp 0 nsec
#106/1 mptcp/base:OK
test_first:PASS:bpf_first__open_and_load 0 nsec
test_first:PASS:bpf_map__attach_struct_ops 0 nsec
send_data:PASS:pthread_create 0 nsec
server:PASS:send 0 nsec
send_data:PASS:recv 0 nsec
send_data:PASS:pthread_join 0 nsec
#106/2 mptcp/first:OK
libbpf: prog 'bpf_rr_get_subflow': BPF program load failed: Permission denied
libbpf: prog 'bpf_rr_get_subflow': -- BEGIN PROG LOAD LOG --
R1 type=ctx expected=fp
0: R1=ctx(off=0,imm=0) R10=fp0
; void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
0: (b7) r3 = 0                        ; R3_w=0
; void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
1: (79) r2 = *(u64 *)(r1 +16)
func 'get_subflow' arg2 has btf_id 27881 type STRUCT 'mptcp_sched_data'
2: R1=ctx(off=0,imm=0) R2_w=ptr_mptcp_sched_data(off=0,imm=0)
2: (79) r1 = *(u64 *)(r1 +0)
func 'get_subflow' arg0 has btf_id 137236 type STRUCT 'mptcp_sock'
3: R1_w=ptr_mptcp_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
3: (79) r4 = *(u64 *)(r1 +1448)       ; R1_w=ptr_mptcp_sock(off=0,imm=0) R4_w=ptr_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
4: (15) if r4 == 0x0 goto pc+44       ; R4_w=ptr_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
5: (79) r5 = *(u64 *)(r2 +0)          ; R2_w=ptr_mptcp_sched_data(off=0,imm=0) R5_w=ptr_mptcp_subflow_context(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
6: (15) if r5 == 0x0 goto pc+42       ; R5_w=ptr_mptcp_subflow_context(off=0,imm=0)
7: (b7) r4 = 1                        ; R4_w=1
; if (contexts[i].context->tcp_sock == msk->last_snd) {
8: (79) r1 = *(u64 *)(r1 +1448)       ; R1_w=ptr_sock(off=0,imm=0)
; if (contexts[i].context->tcp_sock == msk->last_snd) {
9: (79) r5 = *(u64 *)(r5 +176)        ; R5=ptr_sock(off=0,imm=0)
; if (contexts[i].context->tcp_sock == msk->last_snd) {
10: (5d) if r5 != r1 goto pc+8        ; R1=ptr_sock(off=0,imm=0) R5=ptr_sock(off=0,imm=0)
; if (i + 1 == MPTCP_SUBFLOWS_MAX || !contexts[i + 1].context)
11: (bf) r1 = r4                      ; R1_w=1 R4=1
12: (67) r1 <<= 4                     ; R1_w=16
13: (bf) r5 = r2                      ; R2=ptr_mptcp_sched_data(off=0,imm=0) R5_w=ptr_mptcp_sched_data(off=0,imm=0)
14: (0f) r5 += r1                     ; R1_w=P16 R5_w=ptr_mptcp_sched_data(off=16,imm=0)
15: (79) r1 = *(u64 *)(r5 +0)
access beyond struct mptcp_sched_data at off 16 size 8
processed 16 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1
-- END PROG LOAD LOG --
libbpf: failed to load program 'bpf_rr_get_subflow'
libbpf: failed to load object 'mptcp_bpf_rr'
libbpf: failed to load BPF skeleton 'mptcp_bpf_rr': -13
test_rr:FAIL:bpf_rr__open_and_load unexpected error: -13
#106/3 mptcp/rr:FAIL

All error logs:
#106 mptcp:FAIL
test_base:PASS:test__join_cgroup 0 nsec
test_base:PASS:start_server 0 nsec
run_test:PASS:skel_open_load 0 nsec
run_test:PASS:skel_attach 0 nsec
run_test:PASS:bpf_program__fd 0 nsec
run_test:PASS:bpf_map__fd 0 nsec
run_test:PASS:bpf_prog_attach 0 nsec
run_test:PASS:connect to fd 0 nsec
verify_tsk:PASS:bpf_map_lookup_elem 0 nsec
verify_tsk:PASS:unexpected invoked count 0 nsec
verify_tsk:PASS:unexpected is_mptcp 0 nsec
test_base:PASS:run_test tcp 0 nsec
test_base:PASS:start_mptcp_server 0 nsec
run_test:PASS:skel_open_load 0 nsec
run_test:PASS:skel_attach 0 nsec
run_test:PASS:bpf_program__fd 0 nsec
run_test:PASS:bpf_map__fd 0 nsec
run_test:PASS:bpf_prog_attach 0 nsec
run_test:PASS:connect to fd 0 nsec
verify_msk:PASS:invalid token 0 nsec
get_msk_ca_name:PASS:failed to open tcp_congestion_control 0 nsec
get_msk_ca_name:PASS:failed to read ca_name 0 nsec
verify_msk:PASS:bpf_map_lookup_elem 0 nsec
verify_msk:PASS:unexpected invoked count 0 nsec
verify_msk:PASS:unexpected is_mptcp 0 nsec
verify_msk:PASS:unexpected token 0 nsec
verify_msk:PASS:unexpected first 0 nsec
verify_msk:PASS:unexpected ca_name 0 nsec
test_base:PASS:run_test mptcp 0 nsec
#106/1 mptcp/base:OK
test_first:PASS:bpf_first__open_and_load 0 nsec
test_first:PASS:bpf_map__attach_struct_ops 0 nsec
send_data:PASS:pthread_create 0 nsec
server:PASS:send 0 nsec
send_data:PASS:recv 0 nsec
send_data:PASS:pthread_join 0 nsec
#106/2 mptcp/first:OK
libbpf: prog 'bpf_rr_get_subflow': BPF program load failed: Permission denied
libbpf: prog 'bpf_rr_get_subflow': -- BEGIN PROG LOAD LOG --
R1 type=ctx expected=fp
0: R1=ctx(off=0,imm=0) R10=fp0
; void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
0: (b7) r3 = 0                        ; R3_w=0
; void BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
1: (79) r2 = *(u64 *)(r1 +16)
func 'get_subflow' arg2 has btf_id 27881 type STRUCT 'mptcp_sched_data'
2: R1=ctx(off=0,imm=0) R2_w=ptr_mptcp_sched_data(off=0,imm=0)
2: (79) r1 = *(u64 *)(r1 +0)
func 'get_subflow' arg0 has btf_id 137236 type STRUCT 'mptcp_sock'
3: R1_w=ptr_mptcp_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
3: (79) r4 = *(u64 *)(r1 +1448)       ; R1_w=ptr_mptcp_sock(off=0,imm=0) R4_w=ptr_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
4: (15) if r4 == 0x0 goto pc+44       ; R4_w=ptr_sock(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
5: (79) r5 = *(u64 *)(r2 +0)          ; R2_w=ptr_mptcp_sched_data(off=0,imm=0) R5_w=ptr_mptcp_subflow_context(off=0,imm=0)
; if (!msk->last_snd || !contexts[i].context)
6: (15) if r5 == 0x0 goto pc+42       ; R5_w=ptr_mptcp_subflow_context(off=0,imm=0)
7: (b7) r4 = 1                        ; R4_w=1
; if (contexts[i].context->tcp_sock == msk->last_snd) {
8: (79) r1 = *(u64 *)(r1 +1448)       ; R1_w=ptr_sock(off=0,imm=0)
; if (contexts[i].context->tcp_sock == msk->last_snd) {
9: (79) r5 = *(u64 *)(r5 +176)        ; R5=ptr_sock(off=0,imm=0)
; if (contexts[i].context->tcp_sock == msk->last_snd) {
10: (5d) if r5 != r1 goto pc+8        ; R1=ptr_sock(off=0,imm=0) R5=ptr_sock(off=0,imm=0)
; if (i + 1 == MPTCP_SUBFLOWS_MAX || !contexts[i + 1].context)
11: (bf) r1 = r4                      ; R1_w=1 R4=1
12: (67) r1 <<= 4                     ; R1_w=16
13: (bf) r5 = r2                      ; R2=ptr_mptcp_sched_data(off=0,imm=0) R5_w=ptr_mptcp_sched_data(off=0,imm=0)
14: (0f) r5 += r1                     ; R1_w=P16 R5_w=ptr_mptcp_sched_data(off=16,imm=0)
15: (79) r1 = *(u64 *)(r5 +0)
access beyond struct mptcp_sched_data at off 16 size 8
processed 16 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1
-- END PROG LOAD LOG --
libbpf: failed to load program 'bpf_rr_get_subflow'
libbpf: failed to load object 'mptcp_bpf_rr'
libbpf: failed to load BPF skeleton 'mptcp_bpf_rr': -13
test_rr:FAIL:bpf_rr__open_and_load unexpected error: -13
#106/3 mptcp/rr:FAIL
Summary: 0/2 PASSED, 0 SKIPPED, 1 FAILED

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers"
  2022-05-26 12:18     ` Geliang Tang
@ 2022-05-26 23:48       ` Mat Martineau
  2022-05-27 15:27         ` Geliang Tang
  0 siblings, 1 reply; 14+ messages in thread
From: Mat Martineau @ 2022-05-26 23:48 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Thu, 26 May 2022, Geliang Tang wrote:

> Hi Mat,
>
> On Mon, May 23, 2022 at 06:01:03PM -0700, Mat Martineau wrote:
>> On Mon, 23 May 2022, Geliang Tang wrote:
>>
>>> Please update the commit log:
>>>
>>> '''
>>> This patch defines two new wrappers mptcp_sched_get_send() and
>>> mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
>>> Use them instead of using mptcp_subflow_get_send() or
>>> mptcp_subflow_get_retrans() directly.
>>>
>>> Set the subflow pointers array in struct mptcp_sched_data before invoking
>>> get_subflow(), then it can be used in get_subflow() in the BPF contexts.
>>>
>>> Get the return bitmap of get_subflow() and test which subflow or subflows
>>> are picked by the scheduler.
>>> '''
>>>
>>> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
>>> ---
>>> net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
>>> 1 file changed, 39 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
>>> index 3ceb721e6489..0ef805c489ab 100644
>>> --- a/net/mptcp/sched.c
>>> +++ b/net/mptcp/sched.c
>>> @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
>>> static int mptcp_sched_data_init(struct mptcp_sock *msk,
>>> 				 struct mptcp_sched_data *data)
>>> {
>>> -	data->sock = NULL;
>>> -	data->call_again = 0;
>>> +	struct mptcp_subflow_context *subflow;
>>> +	int i = 0;
>>> +
>>> +	mptcp_for_each_subflow(msk, subflow) {
>>> +		if (i == MPTCP_SUBFLOWS_MAX) {
>>> +			pr_warn_once("too many subflows");
>>> +			break;
>>> +		}
>>> +		data->contexts[i++] = subflow;
>>> +	}
>>> +
>>> +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
>>> +		data->contexts[i++] = NULL;
>>>
>>> 	return 0;
>>> }
>>> @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
>>> struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
>>> {
>>> 	struct mptcp_sched_data data;
>>> +	struct sock *ssk = NULL;
>>> +	unsigned long bitmap;
>>> +	int i;
>>>
>>> 	sock_owned_by_me((struct sock *)msk);
>>>
>>> @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
>>> 		return mptcp_subflow_get_send(msk);
>>>
>>> 	mptcp_sched_data_init(msk, &data);
>>> -	msk->sched->get_subflow(msk, false, &data);
>>> +	bitmap = msk->sched->get_subflow(msk, false, &data);
>>>
>>> -	msk->last_snd = data.sock;
>>> -	return data.sock;
>>> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
>>> +		if (test_bit(i, &bitmap) && data.contexts[i]) {
>>> +			ssk = data.contexts[i]->tcp_sock;
>>> +			msk->last_snd = ssk;
>>> +			break;
>>> +		}
>>> +	}
>>> +
>>> +	return ssk;
>>
>> The commit that this gets squashed too also ignores call_again, so is this
>> code that just returns the ssk for the first bit in the bitmap also
>> placeholder code?
>
> Yes. Since the redundant scheduler is still under development and there's
> still a lot of work to be done, I plan to support single subflow schedulers
> in this series first. The multiple subflows schedulers will be added later.
>
>>
>>
>> It also seems like correlate the bitmap bits with the data.contexts array
>> makes the bitmap require extra work. What do you think about using an array
>> instead, like:
>>
>> struct mptcp_sched_data {
>>        struct mptcp_subflow_context *context;
>>        bool is_scheduled;
>> };
>>
>> And passing an array of that struct to the BPF code? Then the is_scheduled
>> flag could be set for the corresponding subflow.
>>
>> Do you think that array-based API would be clearer than the bitmap to
>> someone writing a BPF scheduler?
>
> I tried to implement this array-based API, but it's not going well. Array
> parameters are not easily supported in BPF functions. And the write access
> permissions of array members is not easy to allow in BPF. I haven't found
> a solution to these two issues yet. Here are codes and error logs in the
> attachment.
>

Yeah, after looking at your logs and trying a few experiments, I 
definitely agree that array parameters are not well supported by the BPF 
verifies.

It looks like the bpf verifier was inspecting the args for get_subflow in 
mptcp_sched_ops:

void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
 		    struct mptcp_sched_data contexts[]);

and thinking 'contexts' was a pointer to a single struct mptcp_sched_data 
instance, instead of an array. The verifier can't guarantee safe access 
for a variable-length array so that does make some sense.

I tried changing the code to:

void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
                     struct mptcp_sched_data (*contexts)[MPTCP_SUBFLOWS_MAX]);

so the third arg was a "pointer to array of structs, with 
MPTCP_SUBFLOWS_MAX elements in the array". The verifier didn't like that 
either:

"""
func 'get_subflow' arg2 type ARRAY is not a struct
"""

That error message is printed by btf_ctx_access(). It might be possible to 
customize bpf_mptcp_sched_verifier_ops to handle that, but it seems 
complicated.


We could instead use mptcp_sched_data to contain all the parameters 
(including an array of structs):

struct mptcp_sched_subflow {
 	struct mptcp_subflow_context *context;
 	bool is_scheduled;
};

struct mptcp_sched_data {
 	/* Moving the msk and reinject args here is optional, but
 	 * it seemed like a good way to group all of the data
 	 * for a bpf scheduler to use */
 	const struct mptcp_sock *msk;
 	bool reinject;
 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
};

struct mptcp_sched_ops {
 	void (*get_subflow)(struct mptcp_sched_data *data);

 	char			name[MPTCP_SCHED_NAME_MAX];
 	struct module		*owner;
 	struct list_head	list;

 	void (*init)(const struct mptcp_sock *msk);
 	void (*release)(const struct mptcp_sock *msk);
} ____cacheline_aligned_in_smp;

It looks like btf_struct_access() and btf_struct_walk() know how to handle 
an array *inside* a struct, so MPTCP would not need as much custom 
verifier code. This seems like a better fit than my array idea - hopefully 
it's more workable.

Do you think this seems like a reasonable interface for BPF 
scheduler code?


--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers"
  2022-05-26 23:48       ` Mat Martineau
@ 2022-05-27 15:27         ` Geliang Tang
  2022-05-27 20:03           ` Mat Martineau
  0 siblings, 1 reply; 14+ messages in thread
From: Geliang Tang @ 2022-05-27 15:27 UTC (permalink / raw)
  To: Mat Martineau; +Cc: mptcp

Hi Mat,

On Thu, May 26, 2022 at 04:48:41PM -0700, Mat Martineau wrote:
> On Thu, 26 May 2022, Geliang Tang wrote:
> 
> > Hi Mat,
> > 
> > On Mon, May 23, 2022 at 06:01:03PM -0700, Mat Martineau wrote:
> > > On Mon, 23 May 2022, Geliang Tang wrote:
> > > 
> > > > Please update the commit log:
> > > > 
> > > > '''
> > > > This patch defines two new wrappers mptcp_sched_get_send() and
> > > > mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
> > > > Use them instead of using mptcp_subflow_get_send() or
> > > > mptcp_subflow_get_retrans() directly.
> > > > 
> > > > Set the subflow pointers array in struct mptcp_sched_data before invoking
> > > > get_subflow(), then it can be used in get_subflow() in the BPF contexts.
> > > > 
> > > > Get the return bitmap of get_subflow() and test which subflow or subflows
> > > > are picked by the scheduler.
> > > > '''
> > > > 
> > > > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > > > ---
> > > > net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
> > > > 1 file changed, 39 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> > > > index 3ceb721e6489..0ef805c489ab 100644
> > > > --- a/net/mptcp/sched.c
> > > > +++ b/net/mptcp/sched.c
> > > > @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
> > > > static int mptcp_sched_data_init(struct mptcp_sock *msk,
> > > > 				 struct mptcp_sched_data *data)
> > > > {
> > > > -	data->sock = NULL;
> > > > -	data->call_again = 0;
> > > > +	struct mptcp_subflow_context *subflow;
> > > > +	int i = 0;
> > > > +
> > > > +	mptcp_for_each_subflow(msk, subflow) {
> > > > +		if (i == MPTCP_SUBFLOWS_MAX) {
> > > > +			pr_warn_once("too many subflows");
> > > > +			break;
> > > > +		}
> > > > +		data->contexts[i++] = subflow;
> > > > +	}
> > > > +
> > > > +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
> > > > +		data->contexts[i++] = NULL;
> > > > 
> > > > 	return 0;
> > > > }
> > > > @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> > > > struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> > > > {
> > > > 	struct mptcp_sched_data data;
> > > > +	struct sock *ssk = NULL;
> > > > +	unsigned long bitmap;
> > > > +	int i;
> > > > 
> > > > 	sock_owned_by_me((struct sock *)msk);
> > > > 
> > > > @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> > > > 		return mptcp_subflow_get_send(msk);
> > > > 
> > > > 	mptcp_sched_data_init(msk, &data);
> > > > -	msk->sched->get_subflow(msk, false, &data);
> > > > +	bitmap = msk->sched->get_subflow(msk, false, &data);
> > > > 
> > > > -	msk->last_snd = data.sock;
> > > > -	return data.sock;
> > > > +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> > > > +		if (test_bit(i, &bitmap) && data.contexts[i]) {
> > > > +			ssk = data.contexts[i]->tcp_sock;
> > > > +			msk->last_snd = ssk;
> > > > +			break;
> > > > +		}
> > > > +	}
> > > > +
> > > > +	return ssk;
> > > 
> > > The commit that this gets squashed too also ignores call_again, so is this
> > > code that just returns the ssk for the first bit in the bitmap also
> > > placeholder code?
> > 
> > Yes. Since the redundant scheduler is still under development and there's
> > still a lot of work to be done, I plan to support single subflow schedulers
> > in this series first. The multiple subflows schedulers will be added later.
> > 
> > > 
> > > 
> > > It also seems like correlate the bitmap bits with the data.contexts array
> > > makes the bitmap require extra work. What do you think about using an array
> > > instead, like:
> > > 
> > > struct mptcp_sched_data {
> > >        struct mptcp_subflow_context *context;
> > >        bool is_scheduled;
> > > };
> > > 
> > > And passing an array of that struct to the BPF code? Then the is_scheduled
> > > flag could be set for the corresponding subflow.
> > > 
> > > Do you think that array-based API would be clearer than the bitmap to
> > > someone writing a BPF scheduler?
> > 
> > I tried to implement this array-based API, but it's not going well. Array
> > parameters are not easily supported in BPF functions. And the write access
> > permissions of array members is not easy to allow in BPF. I haven't found
> > a solution to these two issues yet. Here are codes and error logs in the
> > attachment.
> > 
> 
> Yeah, after looking at your logs and trying a few experiments, I definitely
> agree that array parameters are not well supported by the BPF verifies.
> 
> It looks like the bpf verifier was inspecting the args for get_subflow in
> mptcp_sched_ops:
> 
> void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
> 		    struct mptcp_sched_data contexts[]);
> 
> and thinking 'contexts' was a pointer to a single struct mptcp_sched_data
> instance, instead of an array. The verifier can't guarantee safe access for
> a variable-length array so that does make some sense.
> 
> I tried changing the code to:
> 
> void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
>                     struct mptcp_sched_data (*contexts)[MPTCP_SUBFLOWS_MAX]);
> 
> so the third arg was a "pointer to array of structs, with MPTCP_SUBFLOWS_MAX
> elements in the array". The verifier didn't like that either:
> 
> """
> func 'get_subflow' arg2 type ARRAY is not a struct
> """
> 
> That error message is printed by btf_ctx_access(). It might be possible to
> customize bpf_mptcp_sched_verifier_ops to handle that, but it seems
> complicated.
> 
> 
> We could instead use mptcp_sched_data to contain all the parameters
> (including an array of structs):
> 
> struct mptcp_sched_subflow {
> 	struct mptcp_subflow_context *context;
> 	bool is_scheduled;
> };
> 
> struct mptcp_sched_data {
> 	/* Moving the msk and reinject args here is optional, but
> 	 * it seemed like a good way to group all of the data
> 	 * for a bpf scheduler to use */
> 	const struct mptcp_sock *msk;
> 	bool reinject;
> 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
> };
> 
> struct mptcp_sched_ops {
> 	void (*get_subflow)(struct mptcp_sched_data *data);
> 
> 	char			name[MPTCP_SCHED_NAME_MAX];
> 	struct module		*owner;
> 	struct list_head	list;
> 
> 	void (*init)(const struct mptcp_sock *msk);
> 	void (*release)(const struct mptcp_sock *msk);
> } ____cacheline_aligned_in_smp;
> 
> It looks like btf_struct_access() and btf_struct_walk() know how to handle
> an array *inside* a struct, so MPTCP would not need as much custom verifier
> code. This seems like a better fit than my array idea - hopefully it's more
> workable.

It's hard to get the write access to is_scheduled in this case.

If we add another member bitmap in mptcp_sched_data like this:

struct mptcp_sched_subflow {
	struct mptcp_subflow_context *context;
	bool is_scheduled;
};

struct mptcp_sched_data {
	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
	const struct mptcp_sock *msk;
	bool reinject;
	unsigned bitmap;
};

It's easy to calculate the offset in bpf_mptcp_sched_btf_struct_access():

	switch (off) {
	case offsetof(struct mptcp_sched_data, bitmap):
		end = offsetofend(struct mptcp_sched_data, bitmap);
		break;

But it's hard to calculate the offsets of is_scheduled, we need to have
write access for 8 different offsets.

We may calculate them like this:

data->contexts[0].is_scheduled	offset = 0 + sizeof(struct mptcp_subflow_context)
data->contexts[1].is_scheduled  offset = 1 * sizeof(mptcp_sched_subflow) + sizeof(struct mptcp_subflow_context *)
data->contexts[2].is_scheduled  ...
data->contexts[3].is_scheduled  ...
data->contexts[4].is_scheduled  ...
data->contexts[5].is_scheduled  ...
data->contexts[6].is_scheduled  ...
data->contexts[7].is_scheduled  offset = 7 * sizeof(mptcp_sched_subflow) + sizeof(struct mptcp_subflow_context *)

But it doesn't work. I haven't found a solution yet.

Maybe we also need to consider the actual number of subflows, which makes
it more complicated.


If we make the array read only, just write the bitmap member or return a
bitmap, we can avoid dealing with these complex offsets.


Anyway, I will continue to solve this write access issue, but I also want
to hear your opinion.

Thanks,
-Geliang

> 
> Do you think this seems like a reasonable interface for BPF scheduler code?
> 
> 
> --
> Mat Martineau
> Intel
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers"
  2022-05-27 15:27         ` Geliang Tang
@ 2022-05-27 20:03           ` Mat Martineau
  0 siblings, 0 replies; 14+ messages in thread
From: Mat Martineau @ 2022-05-27 20:03 UTC (permalink / raw)
  To: Geliang Tang; +Cc: mptcp

On Fri, 27 May 2022, Geliang Tang wrote:

> Hi Mat,
>
> On Thu, May 26, 2022 at 04:48:41PM -0700, Mat Martineau wrote:
>> On Thu, 26 May 2022, Geliang Tang wrote:
>>
>>> Hi Mat,
>>>
>>> On Mon, May 23, 2022 at 06:01:03PM -0700, Mat Martineau wrote:
>>>> On Mon, 23 May 2022, Geliang Tang wrote:
>>>>
>>>>> Please update the commit log:
>>>>>
>>>>> '''
>>>>> This patch defines two new wrappers mptcp_sched_get_send() and
>>>>> mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
>>>>> Use them instead of using mptcp_subflow_get_send() or
>>>>> mptcp_subflow_get_retrans() directly.
>>>>>
>>>>> Set the subflow pointers array in struct mptcp_sched_data before invoking
>>>>> get_subflow(), then it can be used in get_subflow() in the BPF contexts.
>>>>>
>>>>> Get the return bitmap of get_subflow() and test which subflow or subflows
>>>>> are picked by the scheduler.
>>>>> '''
>>>>>
>>>>> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
>>>>> ---
>>>>> net/mptcp/sched.c | 47 +++++++++++++++++++++++++++++++++++++++--------
>>>>> 1 file changed, 39 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
>>>>> index 3ceb721e6489..0ef805c489ab 100644
>>>>> --- a/net/mptcp/sched.c
>>>>> +++ b/net/mptcp/sched.c
>>>>> @@ -91,8 +91,19 @@ void mptcp_release_sched(struct mptcp_sock *msk)
>>>>> static int mptcp_sched_data_init(struct mptcp_sock *msk,
>>>>> 				 struct mptcp_sched_data *data)
>>>>> {
>>>>> -	data->sock = NULL;
>>>>> -	data->call_again = 0;
>>>>> +	struct mptcp_subflow_context *subflow;
>>>>> +	int i = 0;
>>>>> +
>>>>> +	mptcp_for_each_subflow(msk, subflow) {
>>>>> +		if (i == MPTCP_SUBFLOWS_MAX) {
>>>>> +			pr_warn_once("too many subflows");
>>>>> +			break;
>>>>> +		}
>>>>> +		data->contexts[i++] = subflow;
>>>>> +	}
>>>>> +
>>>>> +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
>>>>> +		data->contexts[i++] = NULL;
>>>>>
>>>>> 	return 0;
>>>>> }
>>>>> @@ -100,6 +111,9 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
>>>>> struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
>>>>> {
>>>>> 	struct mptcp_sched_data data;
>>>>> +	struct sock *ssk = NULL;
>>>>> +	unsigned long bitmap;
>>>>> +	int i;
>>>>>
>>>>> 	sock_owned_by_me((struct sock *)msk);
>>>>>
>>>>> @@ -114,15 +128,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
>>>>> 		return mptcp_subflow_get_send(msk);
>>>>>
>>>>> 	mptcp_sched_data_init(msk, &data);
>>>>> -	msk->sched->get_subflow(msk, false, &data);
>>>>> +	bitmap = msk->sched->get_subflow(msk, false, &data);
>>>>>
>>>>> -	msk->last_snd = data.sock;
>>>>> -	return data.sock;
>>>>> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
>>>>> +		if (test_bit(i, &bitmap) && data.contexts[i]) {
>>>>> +			ssk = data.contexts[i]->tcp_sock;
>>>>> +			msk->last_snd = ssk;
>>>>> +			break;
>>>>> +		}
>>>>> +	}
>>>>> +
>>>>> +	return ssk;
>>>>
>>>> The commit that this gets squashed too also ignores call_again, so is this
>>>> code that just returns the ssk for the first bit in the bitmap also
>>>> placeholder code?
>>>
>>> Yes. Since the redundant scheduler is still under development and there's
>>> still a lot of work to be done, I plan to support single subflow schedulers
>>> in this series first. The multiple subflows schedulers will be added later.
>>>
>>>>
>>>>
>>>> It also seems like correlate the bitmap bits with the data.contexts array
>>>> makes the bitmap require extra work. What do you think about using an array
>>>> instead, like:
>>>>
>>>> struct mptcp_sched_data {
>>>>        struct mptcp_subflow_context *context;
>>>>        bool is_scheduled;
>>>> };
>>>>
>>>> And passing an array of that struct to the BPF code? Then the is_scheduled
>>>> flag could be set for the corresponding subflow.
>>>>
>>>> Do you think that array-based API would be clearer than the bitmap to
>>>> someone writing a BPF scheduler?
>>>
>>> I tried to implement this array-based API, but it's not going well. Array
>>> parameters are not easily supported in BPF functions. And the write access
>>> permissions of array members is not easy to allow in BPF. I haven't found
>>> a solution to these two issues yet. Here are codes and error logs in the
>>> attachment.
>>>
>>
>> Yeah, after looking at your logs and trying a few experiments, I definitely
>> agree that array parameters are not well supported by the BPF verifies.
>>
>> It looks like the bpf verifier was inspecting the args for get_subflow in
>> mptcp_sched_ops:
>>
>> void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
>> 		    struct mptcp_sched_data contexts[]);
>>
>> and thinking 'contexts' was a pointer to a single struct mptcp_sched_data
>> instance, instead of an array. The verifier can't guarantee safe access for
>> a variable-length array so that does make some sense.
>>
>> I tried changing the code to:
>>
>> void (*get_subflow)(const struct mptcp_sock *msk, bool reinject,
>>                     struct mptcp_sched_data (*contexts)[MPTCP_SUBFLOWS_MAX]);
>>
>> so the third arg was a "pointer to array of structs, with MPTCP_SUBFLOWS_MAX
>> elements in the array". The verifier didn't like that either:
>>
>> """
>> func 'get_subflow' arg2 type ARRAY is not a struct
>> """
>>
>> That error message is printed by btf_ctx_access(). It might be possible to
>> customize bpf_mptcp_sched_verifier_ops to handle that, but it seems
>> complicated.
>>
>>
>> We could instead use mptcp_sched_data to contain all the parameters
>> (including an array of structs):
>>
>> struct mptcp_sched_subflow {
>> 	struct mptcp_subflow_context *context;
>> 	bool is_scheduled;
>> };
>>
>> struct mptcp_sched_data {
>> 	/* Moving the msk and reinject args here is optional, but
>> 	 * it seemed like a good way to group all of the data
>> 	 * for a bpf scheduler to use */
>> 	const struct mptcp_sock *msk;
>> 	bool reinject;
>> 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
>> };
>>
>> struct mptcp_sched_ops {
>> 	void (*get_subflow)(struct mptcp_sched_data *data);
>>
>> 	char			name[MPTCP_SCHED_NAME_MAX];
>> 	struct module		*owner;
>> 	struct list_head	list;
>>
>> 	void (*init)(const struct mptcp_sock *msk);
>> 	void (*release)(const struct mptcp_sock *msk);
>> } ____cacheline_aligned_in_smp;
>>
>> It looks like btf_struct_access() and btf_struct_walk() know how to handle
>> an array *inside* a struct, so MPTCP would not need as much custom verifier
>> code. This seems like a better fit than my array idea - hopefully it's more
>> workable.
>
> It's hard to get the write access to is_scheduled in this case.
>
> If we add another member bitmap in mptcp_sched_data like this:
>
> struct mptcp_sched_subflow {
> 	struct mptcp_subflow_context *context;
> 	bool is_scheduled;
> };
>
> struct mptcp_sched_data {
> 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
> 	const struct mptcp_sock *msk;
> 	bool reinject;
> 	unsigned bitmap;
> };
>
> It's easy to calculate the offset in bpf_mptcp_sched_btf_struct_access():
>
> 	switch (off) {
> 	case offsetof(struct mptcp_sched_data, bitmap):
> 		end = offsetofend(struct mptcp_sched_data, bitmap);
> 		break;
>
> But it's hard to calculate the offsets of is_scheduled, we need to have
> write access for 8 different offsets.
>
> We may calculate them like this:
>
> data->contexts[0].is_scheduled	offset = 0 + sizeof(struct mptcp_subflow_context)
> data->contexts[1].is_scheduled  offset = 1 * sizeof(mptcp_sched_subflow) + sizeof(struct mptcp_subflow_context *)
> data->contexts[2].is_scheduled  ...
> data->contexts[3].is_scheduled  ...
> data->contexts[4].is_scheduled  ...
> data->contexts[5].is_scheduled  ...
> data->contexts[6].is_scheduled  ...
> data->contexts[7].is_scheduled  offset = 7 * sizeof(mptcp_sched_subflow) + sizeof(struct mptcp_subflow_context *)
>
> But it doesn't work. I haven't found a solution yet.
>
> Maybe we also need to consider the actual number of subflows, which makes
> it more complicated.
>
>
> If we make the array read only, just write the bitmap member or return a
> bitmap, we can avoid dealing with these complex offsets.
>
>
> Anyway, I will continue to solve this write access issue, but I also want
> to hear your opinion.


I think using a loop to evaluate possible offsets within the array is not 
too complex.


Going back to this layout:

struct mptcp_sched_data {
 	const struct mptcp_sock *msk;
 	bool reinject;
 	struct mptcp_sched_subflow subflows[MPTCP_SUBFLOWS_MAX];
};

The offset limits of is_scheduled within each mptcp_sched_subflow are:

nested_offset = offsetof(struct mptcp_sched_subflow, is_scheduled);
nested_offset_end = offsetofend(struct mptcp_sched_subflow, is_scheduled);

and the starting offset of each element in the subflows[] array is:

static size_t subflow_offset(int i)
{
 	return offsetof(struct mptcp_sched_data, subflows) + i * sizeof(struct mptcp_sched_subflow);
}


Then I think the offset 'off' can be checked for write access in a loop:

for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
 	size_t soff = subflow_offset(i);
 	if (off == soff + nested_offset && off + size <= soff + nested_offset_end)
 		return NOT_INIT; /* offsets match up with is_scheduled */
}


--
Mat Martineau
Intel

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2022-05-27 20:03 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-23 11:33 [PATCH mptcp-next v2 0/5] BPF packet scheduler Geliang Tang
2022-05-23 11:33 ` [PATCH mptcp-next v2 1/5] Squash to "mptcp: add struct mptcp_sched_ops" Geliang Tang
2022-05-23 11:33 ` [PATCH mptcp-next v2 2/5] Squash to "mptcp: add sched in mptcp_sock" Geliang Tang
2022-05-23 11:33 ` [PATCH mptcp-next v2 3/5] Squash to "mptcp: add get_subflow wrappers" Geliang Tang
2022-05-24  1:01   ` Mat Martineau
2022-05-26 12:18     ` Geliang Tang
2022-05-26 23:48       ` Mat Martineau
2022-05-27 15:27         ` Geliang Tang
2022-05-27 20:03           ` Mat Martineau
2022-05-23 11:33 ` [PATCH mptcp-next v2 4/5] Squash to "mptcp: add bpf_mptcp_sched_ops" Geliang Tang
2022-05-23 11:33 ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Geliang Tang
2022-05-23 11:43   ` Squash to "selftests/bpf: add bpf_first scheduler": Build Failure MPTCP CI
2022-05-23 13:33   ` Squash to "selftests/bpf: add bpf_first scheduler": Tests Results MPTCP CI
2022-05-24  1:02   ` [PATCH mptcp-next v2 5/5] Squash to "selftests/bpf: add bpf_first scheduler" Mat Martineau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.