All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cong Wang <xiyou.wangcong@gmail.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, duanxiongchun@bytedance.com,
	wangdongdong.6@bytedance.com, jiang.wang@bytedance.com,
	Cong Wang <cong.wang@bytedance.com>,
	John Fastabend <john.fastabend@gmail.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Jakub Sitnicki <jakub@cloudflare.com>,
	Lorenz Bauer <lmb@cloudflare.com>
Subject: [Patch bpf-next 06/19] sock: introduce sk_prot->update_proto()
Date: Tue,  2 Feb 2021 20:16:23 -0800	[thread overview]
Message-ID: <20210203041636.38555-7-xiyou.wangcong@gmail.com> (raw)
In-Reply-To: <20210203041636.38555-1-xiyou.wangcong@gmail.com>

From: Cong Wang <cong.wang@bytedance.com>

Currently sockmap calls into each protocol to update the struct
proto and replace it. This certainly won't work when the protocol
is implemented as a module, for example, AF_UNIX.

Introduce a new ops sk->sk_prot->update_proto(), so each protocol
can implement its own way to replace the struct proto.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/linux/skmsg.h | 18 +++---------------
 include/net/sock.h    |  3 +++
 include/net/tcp.h     |  2 +-
 include/net/udp.h     |  2 +-
 net/core/sock_map.c   | 22 +++-------------------
 net/ipv4/tcp_bpf.c    | 20 +++++++++++++++++---
 net/ipv4/tcp_ipv4.c   |  3 +++
 net/ipv4/udp.c        |  3 +++
 net/ipv4/udp_bpf.c    | 14 ++++++++++++--
 net/ipv6/tcp_ipv6.c   |  3 +++
 net/ipv6/udp.c        |  3 +++
 11 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index cb79b1afa556..cb94d0f89c08 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -97,6 +97,7 @@ struct sk_psock {
 	void (*saved_close)(struct sock *sk, long timeout);
 	void (*saved_write_space)(struct sock *sk);
 	void (*saved_data_ready)(struct sock *sk);
+	int  (*saved_update_proto)(struct sock *sk, bool restore);
 	struct proto			*sk_proto;
 	struct sk_psock_work_state	work_state;
 	struct work_struct		work;
@@ -335,25 +336,12 @@ static inline void sk_psock_cork_free(struct sk_psock *psock)
 	}
 }
 
-static inline void sk_psock_update_proto(struct sock *sk,
-					 struct sk_psock *psock,
-					 struct proto *ops)
-{
-	/* Pairs with lockless read in sk_clone_lock() */
-	WRITE_ONCE(sk->sk_prot, ops);
-}
-
 static inline void sk_psock_restore_proto(struct sock *sk,
 					  struct sk_psock *psock)
 {
 	sk->sk_prot->unhash = psock->saved_unhash;
-	if (inet_csk_has_ulp(sk)) {
-		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
-	} else {
-		sk->sk_write_space = psock->saved_write_space;
-		/* Pairs with lockless read in sk_clone_lock() */
-		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
-	}
+	if (psock->saved_update_proto)
+		psock->saved_update_proto(sk, true);
 }
 
 static inline void sk_psock_set_state(struct sk_psock *psock,
diff --git a/include/net/sock.h b/include/net/sock.h
index 7644ea64a376..e474a9202be8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1184,6 +1184,9 @@ struct proto {
 	void			(*unhash)(struct sock *sk);
 	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
+#ifdef CONFIG_BPF_SOCK_MAP
+	int			(*update_proto)(struct sock *sk, bool restore);
+#endif
 
 	/* Keeping track of sockets in use */
 #ifdef CONFIG_PROC_FS
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f7591768525d..c2fff35859b6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2183,7 +2183,7 @@ struct sk_msg;
 struct sk_psock;
 
 #ifdef CONFIG_BPF_SOCK_MAP
-struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int tcp_bpf_update_proto(struct sock *sk, bool restore);
 void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
 #else
 static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
diff --git a/include/net/udp.h b/include/net/udp.h
index 0ff921e6b866..e3e5dfc8e0f0 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -513,7 +513,7 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
 
 #ifdef CONFIG_BPF_SOCK_MAP
 struct sk_psock;
-struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
+int udp_bpf_update_proto(struct sock *sk, bool restore);
 #endif /* CONFIG_BPF_SOCK_MAP */
 
 #endif	/* _UDP_H */
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index f827f1ecefcc..255067e5c73a 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -181,26 +181,10 @@ static void sock_map_unref(struct sock *sk, void *link_raw)
 
 static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
 {
-	struct proto *prot;
-
-	switch (sk->sk_type) {
-	case SOCK_STREAM:
-		prot = tcp_bpf_get_proto(sk, psock);
-		break;
-
-	case SOCK_DGRAM:
-		prot = udp_bpf_get_proto(sk, psock);
-		break;
-
-	default:
+	if (!sk->sk_prot->update_proto)
 		return -EINVAL;
-	}
-
-	if (IS_ERR(prot))
-		return PTR_ERR(prot);
-
-	sk_psock_update_proto(sk, psock, prot);
-	return 0;
+	psock->saved_update_proto = sk->sk_prot->update_proto;
+	return sk->sk_prot->update_proto(sk, false);
 }
 
 static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 2252f1d90676..16e00802ccba 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -601,19 +601,33 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
 	       ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
 }
 
-struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
+int tcp_bpf_update_proto(struct sock *sk, bool restore)
 {
+	struct sk_psock *psock = sk_psock(sk);
 	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
 	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
 
+	if (restore) {
+		if (inet_csk_has_ulp(sk)) {
+			tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
+		} else {
+			sk->sk_write_space = psock->saved_write_space;
+			/* Pairs with lockless read in sk_clone_lock() */
+			WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+		}
+		return 0;
+	}
+
 	if (sk->sk_family == AF_INET6) {
 		if (tcp_bpf_assert_proto_ops(psock->sk_proto))
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 
 		tcp_bpf_check_v6_needs_rebuild(psock->sk_proto);
 	}
 
-	return &tcp_bpf_prots[family][config];
+	/* Pairs with lockless read in sk_clone_lock() */
+	WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
+	return 0;
 }
 
 /* If a child got cloned from a listening socket that had tcp_bpf
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 62b6fd385a47..d7c30b762cc3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2803,6 +2803,9 @@ struct proto tcp_prot = {
 	.hash			= inet_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,
+#ifdef CONFIG_BPF_SOCK_MAP
+	.update_proto		= tcp_bpf_update_proto,
+#endif
 	.enter_memory_pressure	= tcp_enter_memory_pressure,
 	.leave_memory_pressure	= tcp_leave_memory_pressure,
 	.stream_memory_free	= tcp_stream_memory_free,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c67e483fce41..84ab4f2e874a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2843,6 +2843,9 @@ struct proto udp_prot = {
 	.unhash			= udp_lib_unhash,
 	.rehash			= udp_v4_rehash,
 	.get_port		= udp_v4_get_port,
+#ifdef CONFIG_BPF_SOCK_MAP
+	.update_proto		= udp_bpf_update_proto,
+#endif
 	.memory_allocated	= &udp_memory_allocated,
 	.sysctl_mem		= sysctl_udp_mem,
 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_udp_wmem_min),
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7a94791efc1a..595836088e85 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -41,12 +41,22 @@ static int __init udp_bpf_v4_build_proto(void)
 }
 core_initcall(udp_bpf_v4_build_proto);
 
-struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
+int udp_bpf_update_proto(struct sock *sk, bool restore)
 {
 	int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6;
+	struct sk_psock *psock = sk_psock(sk);
+
+	if (restore) {
+		sk->sk_write_space = psock->saved_write_space;
+		/* Pairs with lockless read in sk_clone_lock() */
+		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+		return 0;
+	}
 
 	if (sk->sk_family == AF_INET6)
 		udp_bpf_check_v6_needs_rebuild(psock->sk_proto);
 
-	return &udp_bpf_prots[family];
+	/* Pairs with lockless read in sk_clone_lock() */
+	WRITE_ONCE(sk->sk_prot, &udp_bpf_prots[family]);
+	return 0;
 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8539715ff035..77b11799a3fe 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2131,6 +2131,9 @@ struct proto tcpv6_prot = {
 	.hash			= inet6_hash,
 	.unhash			= inet_unhash,
 	.get_port		= inet_csk_get_port,
+#ifdef CONFIG_BPF_SOCK_MAP
+	.update_proto		= tcp_bpf_update_proto,
+#endif
 	.enter_memory_pressure	= tcp_enter_memory_pressure,
 	.leave_memory_pressure	= tcp_leave_memory_pressure,
 	.stream_memory_free	= tcp_stream_memory_free,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a02ac875a923..66ebdfc83c95 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1711,6 +1711,9 @@ struct proto udpv6_prot = {
 	.unhash			= udp_lib_unhash,
 	.rehash			= udp_v6_rehash,
 	.get_port		= udp_v6_get_port,
+#ifdef CONFIG_BPF_SOCK_MAP
+	.update_proto		= udp_bpf_update_proto,
+#endif
 	.memory_allocated	= &udp_memory_allocated,
 	.sysctl_mem		= sysctl_udp_mem,
 	.sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
-- 
2.25.1


  parent reply	other threads:[~2021-02-03  4:20 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-03  4:16 [Patch bpf-next 00/19] sock_map: add non-TCP and cross-protocol support Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 01/19] bpf: rename BPF_STREAM_PARSER to BPF_SOCK_MAP Cong Wang
2021-02-05 10:32   ` Jakub Sitnicki
2021-02-09  1:40     ` Cong Wang
2021-02-08  8:21   ` John Fastabend
2021-02-08  9:50     ` Lorenz Bauer
2021-02-09  1:45     ` Cong Wang
2021-02-09  6:48       ` John Fastabend
2021-02-03  4:16 ` [Patch bpf-next 02/19] skmsg: get rid of struct sk_psock_parser Cong Wang
2021-02-05 11:25   ` Jakub Sitnicki
2021-02-08  8:39     ` John Fastabend
2021-02-09  0:19       ` Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 03/19] skmsg: use skb ext instead of TCP_SKB_CB Cong Wang
2021-02-05 22:09   ` Jakub Sitnicki
2021-02-08 18:56     ` Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 04/19] sock_map: rename skb_parser and skb_verdict Cong Wang
2021-02-08  8:27   ` John Fastabend
2021-02-03  4:16 ` [Patch bpf-next 05/19] sock_map: introduce BPF_SK_SKB_VERDICT Cong Wang
2021-02-08  8:31   ` John Fastabend
2021-02-03  4:16 ` Cong Wang [this message]
2021-02-03  4:16 ` [Patch bpf-next 07/19] udp: implement ->sendmsg_locked() Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 08/19] udp: implement ->read_sock() for sockmap Cong Wang
2021-02-08  9:48   ` Lorenz Bauer
2021-02-09  1:35     ` Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 09/19] udp: add ->read_sock() and ->sendmsg_locked() to ipv6 Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 10/19] af_unix: implement ->sendmsg_locked for dgram socket Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 11/19] af_unix: implement ->read_sock() for sockmap Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 12/19] af_unix: implement ->update_proto() Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 13/19] af_unix: set TCP_ESTABLISHED for datagram sockets too Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 14/19] skmsg: extract __tcp_bpf_recvmsg() and tcp_bpf_wait_data() Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 15/19] udp: implement udp_bpf_recvmsg() for sockmap Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 16/19] af_unix: implement unix_dgram_bpf_recvmsg() Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 17/19] sock_map: update sock type checks Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 18/19] selftests/bpf: add test cases for unix and udp sockmap Cong Wang
2021-02-05 10:53   ` Jakub Sitnicki
2021-02-08 18:43     ` Cong Wang
2021-02-03  4:16 ` [Patch bpf-next 19/19] selftests/bpf: add test case for redirection between udp and unix Cong Wang
2021-02-03 17:48 ` [Patch bpf-next 00/19] sock_map: add non-TCP and cross-protocol support Alexei Starovoitov
2021-02-03 19:22   ` Cong Wang
2021-02-03 20:29     ` John Fastabend

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210203041636.38555-7-xiyou.wangcong@gmail.com \
    --to=xiyou.wangcong@gmail.com \
    --cc=bpf@vger.kernel.org \
    --cc=cong.wang@bytedance.com \
    --cc=daniel@iogearbox.net \
    --cc=duanxiongchun@bytedance.com \
    --cc=jakub@cloudflare.com \
    --cc=jiang.wang@bytedance.com \
    --cc=john.fastabend@gmail.com \
    --cc=lmb@cloudflare.com \
    --cc=netdev@vger.kernel.org \
    --cc=wangdongdong.6@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.