All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH bpf-next 0/2] Introduce TCP_ULP option for bpf_{set,get}sockopt
@ 2021-12-09  9:02 Tony Lu
  2021-12-09  9:02 ` [PATCH bpf-next 1/2] bpf: Use switch statement in _bpf_setsockopt Tony Lu
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Tony Lu @ 2021-12-09  9:02 UTC (permalink / raw)
  To: ast, daniel, andrii; +Cc: bpf, netdev

This patch set introduces a new option TCP_ULP for bpf_{set,get}sockopt
helper. The bpf prog can set and get TCP_ULP sock option on demand.

With this, the bpf prog can set TCP_ULP based on strategies when socket
create or other's socket hook point. For example, the bpf prog can
control which socket should use tls or smc (WIP) ULP modules without
modifying the applications.

Patch 1 replaces if statement with switch to make it easy to extend.

Patch 2 introduces TCP_ULP sock option.

Tony Lu (2):
  bpf: Use switch statement in _bpf_setsockopt
  bpf: Introduce TCP_ULP option for bpf_{set,get}sockopt

 include/uapi/linux/bpf.h       |   3 +-
 net/core/filter.c              | 180 ++++++++++++++++++---------------
 tools/include/uapi/linux/bpf.h |   3 +-
 3 files changed, 104 insertions(+), 82 deletions(-)

-- 
2.32.0.3.g01195cf9f


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH bpf-next 1/2] bpf: Use switch statement in _bpf_setsockopt
  2021-12-09  9:02 [PATCH bpf-next 0/2] Introduce TCP_ULP option for bpf_{set,get}sockopt Tony Lu
@ 2021-12-09  9:02 ` Tony Lu
  2021-12-09  9:02 ` [PATCH bpf-next 2/2] bpf: Introduce TCP_ULP option for bpf_{set,get}sockopt Tony Lu
  2021-12-09 19:27 ` [PATCH bpf-next 0/2] " John Fastabend
  2 siblings, 0 replies; 5+ messages in thread
From: Tony Lu @ 2021-12-09  9:02 UTC (permalink / raw)
  To: ast, daniel, andrii; +Cc: bpf, netdev

This replaces if with switch statement in _bpf_setsockopt() to make it
easy to extend more options.

Signed-off-by: Tony Lu <tonylu@linux.alibaba.com>
Reviewed-by: Qiao Ma <mqaio@linux.alibaba.com>
---
 net/core/filter.c | 164 ++++++++++++++++++++++++----------------------
 1 file changed, 84 insertions(+), 80 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index fe27c91e3758..1e6b68ff13db 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4857,95 +4857,99 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 #endif
 	} else if (level == SOL_TCP &&
 		   sk->sk_prot->setsockopt == tcp_setsockopt) {
-		if (optname == TCP_CONGESTION) {
+		struct inet_connection_sock *icsk = inet_csk(sk);
+		struct tcp_sock *tp = tcp_sk(sk);
+		unsigned long timeout;
+
+		switch (optname) {
+		case TCP_CONGESTION: {
 			char name[TCP_CA_NAME_MAX];
 
 			strncpy(name, optval, min_t(long, optlen,
 						    TCP_CA_NAME_MAX-1));
 			name[TCP_CA_NAME_MAX-1] = 0;
-			ret = tcp_set_congestion_control(sk, name, false, true);
-		} else {
-			struct inet_connection_sock *icsk = inet_csk(sk);
-			struct tcp_sock *tp = tcp_sk(sk);
-			unsigned long timeout;
+			return tcp_set_congestion_control(sk, name, false, true);
+		}
+		default:
+			break;
+		}
 
-			if (optlen != sizeof(int))
-				return -EINVAL;
+		if (optlen != sizeof(int))
+			return -EINVAL;
 
-			val = *((int *)optval);
-			/* Only some options are supported */
-			switch (optname) {
-			case TCP_BPF_IW:
-				if (val <= 0 || tp->data_segs_out > tp->syn_data)
-					ret = -EINVAL;
-				else
-					tp->snd_cwnd = val;
-				break;
-			case TCP_BPF_SNDCWND_CLAMP:
-				if (val <= 0) {
-					ret = -EINVAL;
-				} else {
-					tp->snd_cwnd_clamp = val;
-					tp->snd_ssthresh = val;
-				}
-				break;
-			case TCP_BPF_DELACK_MAX:
-				timeout = usecs_to_jiffies(val);
-				if (timeout > TCP_DELACK_MAX ||
-				    timeout < TCP_TIMEOUT_MIN)
-					return -EINVAL;
-				inet_csk(sk)->icsk_delack_max = timeout;
-				break;
-			case TCP_BPF_RTO_MIN:
-				timeout = usecs_to_jiffies(val);
-				if (timeout > TCP_RTO_MIN ||
-				    timeout < TCP_TIMEOUT_MIN)
-					return -EINVAL;
-				inet_csk(sk)->icsk_rto_min = timeout;
-				break;
-			case TCP_SAVE_SYN:
-				if (val < 0 || val > 1)
-					ret = -EINVAL;
-				else
-					tp->save_syn = val;
-				break;
-			case TCP_KEEPIDLE:
-				ret = tcp_sock_set_keepidle_locked(sk, val);
-				break;
-			case TCP_KEEPINTVL:
-				if (val < 1 || val > MAX_TCP_KEEPINTVL)
-					ret = -EINVAL;
-				else
-					tp->keepalive_intvl = val * HZ;
-				break;
-			case TCP_KEEPCNT:
-				if (val < 1 || val > MAX_TCP_KEEPCNT)
-					ret = -EINVAL;
-				else
-					tp->keepalive_probes = val;
-				break;
-			case TCP_SYNCNT:
-				if (val < 1 || val > MAX_TCP_SYNCNT)
-					ret = -EINVAL;
-				else
-					icsk->icsk_syn_retries = val;
-				break;
-			case TCP_USER_TIMEOUT:
-				if (val < 0)
-					ret = -EINVAL;
-				else
-					icsk->icsk_user_timeout = val;
-				break;
-			case TCP_NOTSENT_LOWAT:
-				tp->notsent_lowat = val;
-				sk->sk_write_space(sk);
-				break;
-			case TCP_WINDOW_CLAMP:
-				ret = tcp_set_window_clamp(sk, val);
-				break;
-			default:
+		val = *((int *)optval);
+		/* Only some options are supported */
+		switch (optname) {
+		case TCP_BPF_IW:
+			if (val <= 0 || tp->data_segs_out > tp->syn_data)
+				ret = -EINVAL;
+			else
+				tp->snd_cwnd = val;
+			break;
+		case TCP_BPF_SNDCWND_CLAMP:
+			if (val <= 0) {
 				ret = -EINVAL;
+			} else {
+				tp->snd_cwnd_clamp = val;
+				tp->snd_ssthresh = val;
 			}
+			break;
+		case TCP_BPF_DELACK_MAX:
+			timeout = usecs_to_jiffies(val);
+			if (timeout > TCP_DELACK_MAX ||
+			    timeout < TCP_TIMEOUT_MIN)
+				return -EINVAL;
+			inet_csk(sk)->icsk_delack_max = timeout;
+			break;
+		case TCP_BPF_RTO_MIN:
+			timeout = usecs_to_jiffies(val);
+			if (timeout > TCP_RTO_MIN ||
+			    timeout < TCP_TIMEOUT_MIN)
+				return -EINVAL;
+			inet_csk(sk)->icsk_rto_min = timeout;
+			break;
+		case TCP_SAVE_SYN:
+			if (val < 0 || val > 1)
+				ret = -EINVAL;
+			else
+				tp->save_syn = val;
+			break;
+		case TCP_KEEPIDLE:
+			ret = tcp_sock_set_keepidle_locked(sk, val);
+			break;
+		case TCP_KEEPINTVL:
+			if (val < 1 || val > MAX_TCP_KEEPINTVL)
+				ret = -EINVAL;
+			else
+				tp->keepalive_intvl = val * HZ;
+			break;
+		case TCP_KEEPCNT:
+			if (val < 1 || val > MAX_TCP_KEEPCNT)
+				ret = -EINVAL;
+			else
+				tp->keepalive_probes = val;
+			break;
+		case TCP_SYNCNT:
+			if (val < 1 || val > MAX_TCP_SYNCNT)
+				ret = -EINVAL;
+			else
+				icsk->icsk_syn_retries = val;
+			break;
+		case TCP_USER_TIMEOUT:
+			if (val < 0)
+				ret = -EINVAL;
+			else
+				icsk->icsk_user_timeout = val;
+			break;
+		case TCP_NOTSENT_LOWAT:
+			tp->notsent_lowat = val;
+			sk->sk_write_space(sk);
+			break;
+		case TCP_WINDOW_CLAMP:
+			ret = tcp_set_window_clamp(sk, val);
+			break;
+		default:
+			ret = -EINVAL;
 		}
 #endif
 	} else {
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH bpf-next 2/2] bpf: Introduce TCP_ULP option for bpf_{set,get}sockopt
  2021-12-09  9:02 [PATCH bpf-next 0/2] Introduce TCP_ULP option for bpf_{set,get}sockopt Tony Lu
  2021-12-09  9:02 ` [PATCH bpf-next 1/2] bpf: Use switch statement in _bpf_setsockopt Tony Lu
@ 2021-12-09  9:02 ` Tony Lu
  2021-12-09 19:27 ` [PATCH bpf-next 0/2] " John Fastabend
  2 siblings, 0 replies; 5+ messages in thread
From: Tony Lu @ 2021-12-09  9:02 UTC (permalink / raw)
  To: ast, daniel, andrii; +Cc: bpf, netdev

This introduces a new option TCP_ULP for bpf_{set,get}sockopt helper. It
helps prog to change TCP_ULP sockopt on demand.

People who want to set ULP based on strategies when socket create or
other's hook point, they can attach to BPF_CGROUP_INET_SOCK_CREATE or
other types, and judge based on user-defined rules to trigger
bpf_set_sockopt(.., IPPROTO_TCP, TCP_ULP) and set socket ULP. For
example, the bpf prog can control which socket should use tls ULP
modules without intrusively modifying the applications.

With this, it makes flexible to control ULP strategies with BPF prog.

Signed-off-by: Tony Lu <tonylu@linux.alibaba.com>
Reviewed-by: Qiao Ma <mqaio@linux.alibaba.com>
---
 include/uapi/linux/bpf.h       |  3 ++-
 net/core/filter.c              | 16 ++++++++++++++++
 tools/include/uapi/linux/bpf.h |  3 ++-
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c26871263f1f..7372283f92be 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2505,7 +2505,8 @@ union bpf_attr {
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
  * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
  * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
+ *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
+ *		  **TCP_ULP**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  * 	Return
diff --git a/net/core/filter.c b/net/core/filter.c
index 1e6b68ff13db..88d7f047f9c0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4870,6 +4870,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 			name[TCP_CA_NAME_MAX-1] = 0;
 			return tcp_set_congestion_control(sk, name, false, true);
 		}
+		case TCP_ULP: {
+			char name[TCP_ULP_NAME_MAX];
+
+			strncpy(name, optval, min_t(long, optlen,
+						    TCP_ULP_NAME_MAX - 1));
+			name[TCP_ULP_NAME_MAX - 1] = 0;
+			return tcp_set_ulp(sk, name);
+		}
 		default:
 			break;
 		}
@@ -5000,6 +5008,14 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
 			strncpy(optval, icsk->icsk_ca_ops->name, optlen);
 			optval[optlen - 1] = 0;
 			break;
+		case TCP_ULP:
+			icsk = inet_csk(sk);
+
+			if (!icsk->icsk_ulp_ops || optlen <= 1)
+				goto err_clear;
+			strncpy(optval, icsk->icsk_ulp_ops->name, optlen);
+			optval[optlen - 1] = 0;
+			break;
 		case TCP_SAVED_SYN:
 			tp = tcp_sk(sk);
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c26871263f1f..7372283f92be 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2505,7 +2505,8 @@ union bpf_attr {
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
  * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
  * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
- *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**.
+ *		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
+ *		  **TCP_ULP**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  * 	Return
-- 
2.32.0.3.g01195cf9f


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* RE: [PATCH bpf-next 0/2] Introduce TCP_ULP option for bpf_{set,get}sockopt
  2021-12-09  9:02 [PATCH bpf-next 0/2] Introduce TCP_ULP option for bpf_{set,get}sockopt Tony Lu
  2021-12-09  9:02 ` [PATCH bpf-next 1/2] bpf: Use switch statement in _bpf_setsockopt Tony Lu
  2021-12-09  9:02 ` [PATCH bpf-next 2/2] bpf: Introduce TCP_ULP option for bpf_{set,get}sockopt Tony Lu
@ 2021-12-09 19:27 ` John Fastabend
  2021-12-10  2:54   ` Tony Lu
  2 siblings, 1 reply; 5+ messages in thread
From: John Fastabend @ 2021-12-09 19:27 UTC (permalink / raw)
  To: Tony Lu, ast, daniel, andrii; +Cc: bpf, netdev

Tony Lu wrote:
> This patch set introduces a new option TCP_ULP for bpf_{set,get}sockopt
> helper. The bpf prog can set and get TCP_ULP sock option on demand.
> 
> With this, the bpf prog can set TCP_ULP based on strategies when socket
> create or other's socket hook point. For example, the bpf prog can
> control which socket should use tls or smc (WIP) ULP modules without
> modifying the applications.
> 
> Patch 1 replaces if statement with switch to make it easy to extend.
> 
> Patch 2 introduces TCP_ULP sock option.

Can you be a bit more specific on what ULP you are going to load on
demand here and how that would work? For TLS I can't see how this will
work, please elaborate. Because the user space side (e.g. openssl) behaves
differently if running in kTLS vs uTLS modes I don't think you can
from kernel side just flip it on? I'm a bit intrigued though on what
might happen if we do did do this on an active socket, but seems it
wouldn't be normal TLS with handshake and keys at that point? I'm
not sure we need to block it from happening, but struggling to see
how its useful at the moment.

The smc case looks promising, but for that we need to get the order
correct and merge smc first and then this series.

Also this will need a selftests.

Thanks,
John

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH bpf-next 0/2] Introduce TCP_ULP option for bpf_{set,get}sockopt
  2021-12-09 19:27 ` [PATCH bpf-next 0/2] " John Fastabend
@ 2021-12-10  2:54   ` Tony Lu
  0 siblings, 0 replies; 5+ messages in thread
From: Tony Lu @ 2021-12-10  2:54 UTC (permalink / raw)
  To: John Fastabend; +Cc: ast, daniel, andrii, bpf, netdev

On Thu, Dec 09, 2021 at 11:27:41AM -0800, John Fastabend wrote:
> Tony Lu wrote:
> > This patch set introduces a new option TCP_ULP for bpf_{set,get}sockopt
> > helper. The bpf prog can set and get TCP_ULP sock option on demand.
> > 
> > With this, the bpf prog can set TCP_ULP based on strategies when socket
> > create or other's socket hook point. For example, the bpf prog can
> > control which socket should use tls or smc (WIP) ULP modules without
> > modifying the applications.
> > 
> > Patch 1 replaces if statement with switch to make it easy to extend.
> > 
> > Patch 2 introduces TCP_ULP sock option.
> 
> Can you be a bit more specific on what ULP you are going to load on
> demand here and how that would work? For TLS I can't see how this will
> work, please elaborate. Because the user space side (e.g. openssl) behaves
> differently if running in kTLS vs uTLS modes I don't think you can
> from kernel side just flip it on? I'm a bit intrigued though on what
> might happen if we do did do this on an active socket, but seems it
> wouldn't be normal TLS with handshake and keys at that point? I'm
> not sure we need to block it from happening, but struggling to see
> how its useful at the moment.
> 
> The smc case looks promising, but for that we need to get the order
> correct and merge smc first and then this series.

Yep, we are developing a set of patch to do with smc for transparent
replacement. The smc provides the ability to be compatible with TCP,
the applications can be replaced with smc without no side effects.
In most cases, it is impossible to modify the compiled application
binary or inject into applications' containers with LD_PRELOAD. So we
are using smc ULP to replace TCP with smc when socket create.

These patches will be sent out soon. I will send them after smc's
patches. Thank you.

> 
> Also this will need a selftests.

I will fix it.

> 
> Thanks,
> John

Thanks,
Tony Lu

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-12-10  2:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-09  9:02 [PATCH bpf-next 0/2] Introduce TCP_ULP option for bpf_{set,get}sockopt Tony Lu
2021-12-09  9:02 ` [PATCH bpf-next 1/2] bpf: Use switch statement in _bpf_setsockopt Tony Lu
2021-12-09  9:02 ` [PATCH bpf-next 2/2] bpf: Introduce TCP_ULP option for bpf_{set,get}sockopt Tony Lu
2021-12-09 19:27 ` [PATCH bpf-next 0/2] " John Fastabend
2021-12-10  2:54   ` Tony Lu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.