linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] Introduce IPPROTO_SMC
@ 2024-05-10  4:12 D. Wythe
  2024-05-10  4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-10  4:12 UTC (permalink / raw)
  To: kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet

From: "D. Wythe" <alibuda@linux.alibaba.com>

This patch allows to create smc socket via AF_INET,
similar to the following code,

/* create v4 smc sock */
v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);

/* create v6 smc sock */
v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);

There are several reasons why we believe it is appropriate here:

1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
address. There is no AF_SMC address at all.

2. Create smc socket in the AF_INET(6) path, which allows us to reuse
the infrastructure of AF_INET(6) path, such as common ebpf hooks.
Otherwise, smc have to implement it again in AF_SMC path. Such as:
  1. Replace IPPROTO_TCP with IPPROTO_SMC in the socket() syscall
     initiated by the user, without the use of LD-PRELOAD.
  2. Select whether immediate fallback is required based on peer's port/ip
     before connect().

A very significant result is that we can now use eBPF to implement smc_run
instead of LD_PRELOAD, who is completely ineffective in scenarios of static
linking.

Another potential value is that we are attempting to optimize the performance of
fallback socks, where merging socks is an important part, and it relies on the
creation of SMC sockets under the AF_INET path. (More information :
https://lore.kernel.org/netdev/1699442703-25015-1-git-send-email-alibuda@linux.alibaba.com/T/)

D. Wythe (2):
  net/smc: refatoring initialization of smc sock
  net/smc: Introduce IPPROTO_SMC

 include/uapi/linux/in.h |   2 +
 net/smc/af_smc.c        | 222 +++++++++++++++++++++++++++++++++++++++---------
 net/smc/inet_smc.h      |  32 +++++++
 3 files changed, 214 insertions(+), 42 deletions(-)
 create mode 100644 net/smc/inet_smc.h

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
  2024-05-10  4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
@ 2024-05-10  4:12 ` D. Wythe
  2024-05-10  9:50   ` Dust Li
  2024-05-11 12:21   ` Zhu Yanjun
  2024-05-10  4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-10  4:12 UTC (permalink / raw)
  To: kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet

From: "D. Wythe" <alibuda@linux.alibaba.com>

This patch aims to isolate the shared components of SMC socket
allocation by introducing smc_sock_init() for sock initialization
and __smc_create_clcsk() for the initialization of clcsock.

This is in preparation for the subsequent implementation of the
AF_INET version of SMC.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/smc/af_smc.c | 93 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 41 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9389f0c..1f03724 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
 		return;
 }
 
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
-				   int protocol)
+static void smc_sock_init(struct net *net, struct sock *sk, int protocol)
 {
-	struct smc_sock *smc;
-	struct proto *prot;
-	struct sock *sk;
-
-	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
-	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
-	if (!sk)
-		return NULL;
+	struct smc_sock *smc = smc_sk(sk);
 
-	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
 	sk->sk_state = SMC_INIT;
-	sk->sk_destruct = smc_destruct;
 	sk->sk_protocol = protocol;
+	mutex_init(&smc->clcsock_release_lock);
 	WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
 	WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
-	smc = smc_sk(sk);
 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 	INIT_WORK(&smc->connect_work, smc_connect_work);
 	INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
 	INIT_LIST_HEAD(&smc->accept_q);
 	spin_lock_init(&smc->accept_q_lock);
 	spin_lock_init(&smc->conn.send_lock);
-	sk->sk_prot->hash(sk);
-	mutex_init(&smc->clcsock_release_lock);
 	smc_init_saved_callbacks(smc);
+	smc->limit_smc_hs = net->smc.limit_smc_hs;
+	smc->use_fallback = false; /* assume rdma capability first */
+	smc->fallback_rsn = 0;
+
+	sk->sk_destruct = smc_destruct;
+	sk->sk_prot->hash(sk);
+}
+
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+				   int protocol)
+{
+	struct proto *prot;
+	struct sock *sk;
+
+	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
+	if (!sk)
+		return NULL;
+
+	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
+	smc_sock_init(net, sk, protocol);
 
 	return sk;
 }
@@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
 	.splice_read	= smc_splice_read,
 };
 
+static int __smc_create_clcsk(struct net *net, struct sock *sk, int family)
+{
+	struct smc_sock *smc = smc_sk(sk);
+	int rc;
+
+	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+			      &smc->clcsock);
+	if (rc) {
+		sk_common_release(sk);
+		return rc;
+	}
+
+	/* smc_clcsock_release() does not wait smc->clcsock->sk's
+	 * destruction;  its sk_state might not be TCP_CLOSE after
+	 * smc->sk is close()d, and TCP timers can be fired later,
+	 * which need net ref.
+	 */
+	sk = smc->clcsock->sk;
+	__netns_tracker_free(net, &sk->ns_tracker, false);
+	sk->sk_net_refcnt = 1;
+	get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+	sock_inuse_add(net, 1);
+	return 0;
+}
+
 static int __smc_create(struct net *net, struct socket *sock, int protocol,
 			int kern, struct socket *clcsock)
 {
@@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
 
 	/* create internal TCP socket for CLC handshake and fallback */
 	smc = smc_sk(sk);
-	smc->use_fallback = false; /* assume rdma capability first */
-	smc->fallback_rsn = 0;
-
-	/* default behavior from limit_smc_hs in every net namespace */
-	smc->limit_smc_hs = net->smc.limit_smc_hs;
 
 	rc = 0;
-	if (!clcsock) {
-		rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
-				      &smc->clcsock);
-		if (rc) {
-			sk_common_release(sk);
-			goto out;
-		}
-
-		/* smc_clcsock_release() does not wait smc->clcsock->sk's
-		 * destruction;  its sk_state might not be TCP_CLOSE after
-		 * smc->sk is close()d, and TCP timers can be fired later,
-		 * which need net ref.
-		 */
-		sk = smc->clcsock->sk;
-		__netns_tracker_free(net, &sk->ns_tracker, false);
-		sk->sk_net_refcnt = 1;
-		get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
-		sock_inuse_add(net, 1);
-	} else {
+	if (!clcsock)
+		rc = __smc_create_clcsk(net, sk, family);
+	else
 		smc->clcsock = clcsock;
-	}
-
 out:
 	return rc;
 }
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
  2024-05-10  4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
  2024-05-10  4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
@ 2024-05-10  4:12 ` D. Wythe
  2024-05-10  9:57   ` Dust Li
                     ` (2 more replies)
  2024-05-10  9:14 ` [PATCH net-next 0/2] " D. Wythe
  2024-05-10 10:22 ` Wenjia Zhang
  3 siblings, 3 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-10  4:12 UTC (permalink / raw)
  To: kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet

From: "D. Wythe" <alibuda@linux.alibaba.com>

This patch allows to create smc socket via AF_INET,
similar to the following code,

/* create v4 smc sock */
v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);

/* create v6 smc sock */
v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);

There are several reasons why we believe it is appropriate here:

1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
address. There is no AF_SMC address at all.

2. Create smc socket in the AF_INET(6) path, which allows us to reuse
the infrastructure of AF_INET(6) path, such as common ebpf hooks.
Otherwise, smc have to implement it again in AF_SMC path.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 include/uapi/linux/in.h |   2 +
 net/smc/af_smc.c        | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/inet_smc.h      |  32 ++++++++++++
 3 files changed, 162 insertions(+), 1 deletion(-)
 create mode 100644 net/smc/inet_smc.h

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index e682ab6..74c12e33 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -83,6 +83,8 @@ enum {
 #define IPPROTO_RAW		IPPROTO_RAW
   IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
 #define IPPROTO_MPTCP		IPPROTO_MPTCP
+  IPPROTO_SMC = 263,		/* Shared Memory Communications */
+#define IPPROTO_SMC		IPPROTO_SMC
   IPPROTO_MAX
 };
 #endif
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1f03724..b4557828 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -54,6 +54,7 @@
 #include "smc_tracepoint.h"
 #include "smc_sysctl.h"
 #include "smc_loopback.h"
+#include "inet_smc.h"
 
 static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
 						 * creation on server
@@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 	.create	= smc_create,
 };
 
+int smc_inet_init_sock(struct sock *sk)
+{
+	struct net *net = sock_net(sk);
+
+	/* init common smc sock */
+	smc_sock_init(net, sk, IPPROTO_SMC);
+	/* create clcsock */
+	return __smc_create_clcsk(net, sk, sk->sk_family);
+}
+
 static int smc_ulp_init(struct sock *sk)
 {
 	struct socket *tcp = sk->sk_socket;
@@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
 	.clone		= smc_ulp_clone,
 };
 
+struct proto smc_inet_prot = {
+	.name			= "INET_SMC",
+	.owner			= THIS_MODULE,
+	.init			= smc_inet_init_sock,
+	.hash			= smc_hash_sk,
+	.unhash			= smc_unhash_sk,
+	.release_cb		= smc_release_cb,
+	.obj_size		= sizeof(struct smc_sock),
+	.h.smc_hash	= &smc_v4_hashinfo,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
+};
+
+const struct proto_ops smc_inet_stream_ops = {
+	.family		= PF_INET,
+	.owner		= THIS_MODULE,
+	.release	= smc_release,
+	.bind		= smc_bind,
+	.connect	= smc_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= smc_accept,
+	.getname	= smc_getname,
+	.poll		= smc_poll,
+	.ioctl		= smc_ioctl,
+	.listen		= smc_listen,
+	.shutdown	= smc_shutdown,
+	.setsockopt	= smc_setsockopt,
+	.getsockopt	= smc_getsockopt,
+	.sendmsg	= smc_sendmsg,
+	.recvmsg	= smc_recvmsg,
+	.mmap		= sock_no_mmap,
+	.splice_read	= smc_splice_read,
+};
+
+struct inet_protosw smc_inet_protosw = {
+	.type       = SOCK_STREAM,
+	.protocol   = IPPROTO_SMC,
+	.prot   = &smc_inet_prot,
+	.ops    = &smc_inet_stream_ops,
+	.flags  = INET_PROTOSW_ICSK,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct proto smc_inet6_prot = {
+	.name			= "INET6_SMC",
+	.owner			= THIS_MODULE,
+	.init			= smc_inet_init_sock,
+	.hash			= smc_hash_sk,
+	.unhash			= smc_unhash_sk,
+	.release_cb		= smc_release_cb,
+	.obj_size		= sizeof(struct smc_sock),
+	.h.smc_hash		= &smc_v6_hashinfo,
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
+};
+
+const struct proto_ops smc_inet6_stream_ops = {
+	.family		= PF_INET6,
+	.owner		= THIS_MODULE,
+	.release	= smc_release,
+	.bind		= smc_bind,
+	.connect	= smc_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= smc_accept,
+	.getname	= smc_getname,
+	.poll		= smc_poll,
+	.ioctl		= smc_ioctl,
+	.listen		= smc_listen,
+	.shutdown	= smc_shutdown,
+	.setsockopt	= smc_setsockopt,
+	.getsockopt	= smc_getsockopt,
+	.sendmsg	= smc_sendmsg,
+	.recvmsg	= smc_recvmsg,
+	.mmap		= sock_no_mmap,
+	.splice_read	= smc_splice_read,
+};
+
+struct inet_protosw smc_inet6_protosw = {
+	.type       = SOCK_STREAM,
+	.protocol   = IPPROTO_SMC,
+	.prot   = &smc_inet6_prot,
+	.ops    = &smc_inet6_stream_ops,
+	.flags  = INET_PROTOSW_ICSK,
+};
+#endif
+
 unsigned int smc_net_id;
 
 static __net_init int smc_net_init(struct net *net)
@@ -3595,9 +3690,28 @@ static int __init smc_init(void)
 		goto out_lo;
 	}
 
+	rc = proto_register(&smc_inet_prot, 1);
+	if (rc) {
+		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
+		goto out_ulp;
+	}
+	inet_register_protosw(&smc_inet_protosw);
+#if IS_ENABLED(CONFIG_IPV6)
+	rc = proto_register(&smc_inet6_prot, 1);
+	if (rc) {
+		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
+		goto out_inet_prot;
+	}
+	inet6_register_protosw(&smc_inet6_protosw);
+#endif
+
 	static_branch_enable(&tcp_have_smc);
 	return 0;
-
+out_inet_prot:
+	inet_unregister_protosw(&smc_inet_protosw);
+	proto_unregister(&smc_inet_prot);
+out_ulp:
+	tcp_unregister_ulp(&smc_ulp_ops);
 out_lo:
 	smc_loopback_exit();
 out_ib:
@@ -3634,6 +3748,10 @@ static int __init smc_init(void)
 static void __exit smc_exit(void)
 {
 	static_branch_disable(&tcp_have_smc);
+	inet_unregister_protosw(&smc_inet_protosw);
+#if IS_ENABLED(CONFIG_IPV6)
+	inet6_unregister_protosw(&smc_inet6_protosw);
+#endif
 	tcp_unregister_ulp(&smc_ulp_ops);
 	sock_unregister(PF_SMC);
 	smc_core_exit();
@@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
 	destroy_workqueue(smc_hs_wq);
 	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
+	proto_unregister(&smc_inet_prot);
+#if IS_ENABLED(CONFIG_IPV6)
+	proto_unregister(&smc_inet6_prot);
+#endif
 	smc_pnet_exit();
 	smc_nl_exit();
 	smc_clc_exit();
@@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_SMC);
 MODULE_ALIAS_TCP_ULP("smc");
+/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
+#if IS_ENABLED(CONFIG_IPV6)
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
+#endif
 MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
new file mode 100644
index 00000000..fcdcb61
--- /dev/null
+++ b/net/smc/inet_smc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Definitions for the SMC module (socket related)
+
+ *  Copyright IBM Corp. 2016
+ *
+ */
+#ifndef __INET_SMC
+#define __INET_SMC
+
+#include <net/protocol.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+extern struct proto smc_inet_prot;
+extern const struct proto_ops smc_inet_stream_ops;
+extern struct inet_protosw smc_inet_protosw;
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+/* MUST after net/tcp.h or warning */
+#include <net/transp_v6.h>
+extern struct proto smc_inet6_prot;
+extern const struct proto_ops smc_inet6_stream_ops;
+extern struct inet_protosw smc_inet6_protosw;
+#endif
+
+int smc_inet_init_sock(struct sock *sk);
+
+#endif // __INET_SMC
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 0/2] Introduce IPPROTO_SMC
  2024-05-10  4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
  2024-05-10  4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
  2024-05-10  4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
@ 2024-05-10  9:14 ` D. Wythe
  2024-05-10 10:22 ` Wenjia Zhang
  3 siblings, 0 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-10  9:14 UTC (permalink / raw)
  To: kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet



On 5/10/24 12:12 PM, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
>
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
>
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);

A eBPF version of smc_run is also available here:

https://github.com/D-Wythe/smc-tools/tree/ipproto_smc

You can test IPPROTO_SMC by script:

   smc_run.pid COMMAND

While you can still test AF_SMC by script:

   smc_run COMMAND

D. Wythe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
  2024-05-10  4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
@ 2024-05-10  9:50   ` Dust Li
  2024-05-11  2:26     ` D. Wythe
  2024-05-11 12:21   ` Zhu Yanjun
  1 sibling, 1 reply; 15+ messages in thread
From: Dust Li @ 2024-05-10  9:50 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet

On 2024-05-10 12:12:12, D. Wythe wrote:
>From: "D. Wythe" <alibuda@linux.alibaba.com>
>
>This patch aims to isolate the shared components of SMC socket
>allocation by introducing smc_sock_init() for sock initialization
>and __smc_create_clcsk() for the initialization of clcsock.
>
>This is in preparation for the subsequent implementation of the
>AF_INET version of SMC.
>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> net/smc/af_smc.c | 93 +++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 52 insertions(+), 41 deletions(-)
>
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index 9389f0c..1f03724 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
> 		return;
> }
> 
>-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
>-				   int protocol)
>+static void smc_sock_init(struct net *net, struct sock *sk, int protocol)
> {
>-	struct smc_sock *smc;
>-	struct proto *prot;
>-	struct sock *sk;
>-
>-	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>-	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>-	if (!sk)
>-		return NULL;
>+	struct smc_sock *smc = smc_sk(sk);
> 
>-	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
> 	sk->sk_state = SMC_INIT;
>-	sk->sk_destruct = smc_destruct;
> 	sk->sk_protocol = protocol;
>+	mutex_init(&smc->clcsock_release_lock);
> 	WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
> 	WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
>-	smc = smc_sk(sk);
> 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
> 	INIT_WORK(&smc->connect_work, smc_connect_work);
> 	INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
> 	INIT_LIST_HEAD(&smc->accept_q);
> 	spin_lock_init(&smc->accept_q_lock);
> 	spin_lock_init(&smc->conn.send_lock);
>-	sk->sk_prot->hash(sk);
>-	mutex_init(&smc->clcsock_release_lock);
> 	smc_init_saved_callbacks(smc);
>+	smc->limit_smc_hs = net->smc.limit_smc_hs;
>+	smc->use_fallback = false; /* assume rdma capability first */
>+	smc->fallback_rsn = 0;
>+
>+	sk->sk_destruct = smc_destruct;
>+	sk->sk_prot->hash(sk);

Why change the order here ? e.g.

Before:
    sk->sk_destruct = smc_destruct;
    mutex_init(&smc->clcsock_release_lock);
After
    mutex_init(&smc->clcsock_release_lock);
    sk->sk_destruct = smc_destruct;

Same for sk->sk_prot->hash(sk)


>+}
>+
>+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
>+				   int protocol)
>+{
>+	struct proto *prot;
>+	struct sock *sk;
>+
>+	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>+	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>+	if (!sk)
>+		return NULL;
>+
>+	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>+	smc_sock_init(net, sk, protocol);
> 
> 	return sk;
> }
>@@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
> 	.splice_read	= smc_splice_read,
> };
> 
>+static int __smc_create_clcsk(struct net *net, struct sock *sk, int family)

Why add '__' prefix here ?


>+{
>+	struct smc_sock *smc = smc_sk(sk);
>+	int rc;
>+
>+	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>+			      &smc->clcsock);
>+	if (rc) {
>+		sk_common_release(sk);
>+		return rc;
>+	}
>+
>+	/* smc_clcsock_release() does not wait smc->clcsock->sk's
>+	 * destruction;  its sk_state might not be TCP_CLOSE after
>+	 * smc->sk is close()d, and TCP timers can be fired later,
>+	 * which need net ref.
>+	 */
>+	sk = smc->clcsock->sk;
>+	__netns_tracker_free(net, &sk->ns_tracker, false);
>+	sk->sk_net_refcnt = 1;
>+	get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>+	sock_inuse_add(net, 1);
>+	return 0;
>+}
>+
> static int __smc_create(struct net *net, struct socket *sock, int protocol,
> 			int kern, struct socket *clcsock)
> {
>@@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
> 
> 	/* create internal TCP socket for CLC handshake and fallback */
> 	smc = smc_sk(sk);
>-	smc->use_fallback = false; /* assume rdma capability first */
>-	smc->fallback_rsn = 0;
>-
>-	/* default behavior from limit_smc_hs in every net namespace */
>-	smc->limit_smc_hs = net->smc.limit_smc_hs;
> 
> 	rc = 0;
>-	if (!clcsock) {
>-		rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>-				      &smc->clcsock);
>-		if (rc) {
>-			sk_common_release(sk);
>-			goto out;
>-		}
>-
>-		/* smc_clcsock_release() does not wait smc->clcsock->sk's
>-		 * destruction;  its sk_state might not be TCP_CLOSE after
>-		 * smc->sk is close()d, and TCP timers can be fired later,
>-		 * which need net ref.
>-		 */
>-		sk = smc->clcsock->sk;
>-		__netns_tracker_free(net, &sk->ns_tracker, false);
>-		sk->sk_net_refcnt = 1;
>-		get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>-		sock_inuse_add(net, 1);
>-	} else {
>+	if (!clcsock)
>+		rc = __smc_create_clcsk(net, sk, family);
>+	else
> 		smc->clcsock = clcsock;
>-	}
>-
> out:
> 	return rc;
> }
>-- 
>1.8.3.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
  2024-05-10  4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
@ 2024-05-10  9:57   ` Dust Li
  2024-05-11  2:23     ` D. Wythe
  2024-05-10 17:09   ` kernel test robot
  2024-05-10 18:32   ` kernel test robot
  2 siblings, 1 reply; 15+ messages in thread
From: Dust Li @ 2024-05-10  9:57 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet

On 2024-05-10 12:12:13, D. Wythe wrote:
>From: "D. Wythe" <alibuda@linux.alibaba.com>
>
>This patch allows to create smc socket via AF_INET,
>similar to the following code,
>
>/* create v4 smc sock */
>v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>
>/* create v6 smc sock */
>v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>
>There are several reasons why we believe it is appropriate here:
>
>1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>address. There is no AF_SMC address at all.
>
>2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>Otherwise, smc have to implement it again in AF_SMC path.
>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> include/uapi/linux/in.h |   2 +
> net/smc/af_smc.c        | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
> net/smc/inet_smc.h      |  32 ++++++++++++
> 3 files changed, 162 insertions(+), 1 deletion(-)
> create mode 100644 net/smc/inet_smc.h
>
>diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>index e682ab6..74c12e33 100644
>--- a/include/uapi/linux/in.h
>+++ b/include/uapi/linux/in.h
>@@ -83,6 +83,8 @@ enum {
> #define IPPROTO_RAW		IPPROTO_RAW
>   IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
> #define IPPROTO_MPTCP		IPPROTO_MPTCP
>+  IPPROTO_SMC = 263,		/* Shared Memory Communications */
                                                           ^ use tab to align here
>+#define IPPROTO_SMC		IPPROTO_SMC
>   IPPROTO_MAX
> };
> #endif
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index 1f03724..b4557828 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -54,6 +54,7 @@
> #include "smc_tracepoint.h"
> #include "smc_sysctl.h"
> #include "smc_loopback.h"
>+#include "inet_smc.h"
> 
> static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
> 						 * creation on server
>@@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
> 	.create	= smc_create,
> };
> 

Why not put those whole bunch of inet staff into smc_inet.c ?
Looks like your smc_inet.h is meanless without smc_inet.c

>+int smc_inet_init_sock(struct sock *sk)
>+{
>+	struct net *net = sock_net(sk);
>+
>+	/* init common smc sock */
>+	smc_sock_init(net, sk, IPPROTO_SMC);
>+	/* create clcsock */
>+	return __smc_create_clcsk(net, sk, sk->sk_family);
>+}
>+
> static int smc_ulp_init(struct sock *sk)
> {
> 	struct socket *tcp = sk->sk_socket;
>@@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
> 	.clone		= smc_ulp_clone,
> };
> 
>+struct proto smc_inet_prot = {
>+	.name			= "INET_SMC",
>+	.owner			= THIS_MODULE,
>+	.init			= smc_inet_init_sock,
>+	.hash			= smc_hash_sk,
>+	.unhash			= smc_unhash_sk,
>+	.release_cb		= smc_release_cb,
>+	.obj_size		= sizeof(struct smc_sock),
>+	.h.smc_hash	= &smc_v4_hashinfo,
>+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
                ^
Align please.


>+};
>+
>+const struct proto_ops smc_inet_stream_ops = {
>+	.family		= PF_INET,
>+	.owner		= THIS_MODULE,
>+	.release	= smc_release,
>+	.bind		= smc_bind,
>+	.connect	= smc_connect,
>+	.socketpair	= sock_no_socketpair,
>+	.accept		= smc_accept,
>+	.getname	= smc_getname,
>+	.poll		= smc_poll,
>+	.ioctl		= smc_ioctl,
>+	.listen		= smc_listen,
>+	.shutdown	= smc_shutdown,
>+	.setsockopt	= smc_setsockopt,
>+	.getsockopt	= smc_getsockopt,
>+	.sendmsg	= smc_sendmsg,
>+	.recvmsg	= smc_recvmsg,
>+	.mmap		= sock_no_mmap,
>+	.splice_read	= smc_splice_read,

Ditto

>+};
>+
>+struct inet_protosw smc_inet_protosw = {
>+	.type       = SOCK_STREAM,
>+	.protocol   = IPPROTO_SMC,
>+	.prot   = &smc_inet_prot,
Ditto

>+	.ops    = &smc_inet_stream_ops,
>+	.flags  = INET_PROTOSW_ICSK,
>+};
>+
>+#if IS_ENABLED(CONFIG_IPV6)
>+struct proto smc_inet6_prot = {
>+	.name			= "INET6_SMC",
>+	.owner			= THIS_MODULE,
>+	.init			= smc_inet_init_sock,
>+	.hash			= smc_hash_sk,
>+	.unhash			= smc_unhash_sk,
>+	.release_cb		= smc_release_cb,
>+	.obj_size		= sizeof(struct smc_sock),
>+	.h.smc_hash		= &smc_v6_hashinfo,
>+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
>+};
>+
>+const struct proto_ops smc_inet6_stream_ops = {
>+	.family		= PF_INET6,
>+	.owner		= THIS_MODULE,
>+	.release	= smc_release,
>+	.bind		= smc_bind,
>+	.connect	= smc_connect,
>+	.socketpair	= sock_no_socketpair,
>+	.accept		= smc_accept,
>+	.getname	= smc_getname,
>+	.poll		= smc_poll,
>+	.ioctl		= smc_ioctl,
>+	.listen		= smc_listen,
>+	.shutdown	= smc_shutdown,
>+	.setsockopt	= smc_setsockopt,
>+	.getsockopt	= smc_getsockopt,
>+	.sendmsg	= smc_sendmsg,
>+	.recvmsg	= smc_recvmsg,
>+	.mmap		= sock_no_mmap,
>+	.splice_read	= smc_splice_read,
Ditto

>+};
>+
>+struct inet_protosw smc_inet6_protosw = {
>+	.type       = SOCK_STREAM,
>+	.protocol   = IPPROTO_SMC,
>+	.prot   = &smc_inet6_prot,
>+	.ops    = &smc_inet6_stream_ops,
>+	.flags  = INET_PROTOSW_ICSK,
Ditto

>+};
>+#endif
>+
> unsigned int smc_net_id;
> 
> static __net_init int smc_net_init(struct net *net)
>@@ -3595,9 +3690,28 @@ static int __init smc_init(void)
> 		goto out_lo;
> 	}
> 
>+	rc = proto_register(&smc_inet_prot, 1);
>+	if (rc) {
>+		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>+		goto out_ulp;
>+	}
>+	inet_register_protosw(&smc_inet_protosw);
>+#if IS_ENABLED(CONFIG_IPV6)
>+	rc = proto_register(&smc_inet6_prot, 1);
>+	if (rc) {
>+		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>+		goto out_inet_prot;
>+	}
>+	inet6_register_protosw(&smc_inet6_protosw);
>+#endif
>+
> 	static_branch_enable(&tcp_have_smc);
> 	return 0;
>-
>+out_inet_prot:
>+	inet_unregister_protosw(&smc_inet_protosw);
>+	proto_unregister(&smc_inet_prot);
>+out_ulp:
>+	tcp_unregister_ulp(&smc_ulp_ops);
> out_lo:
> 	smc_loopback_exit();
> out_ib:
>@@ -3634,6 +3748,10 @@ static int __init smc_init(void)
> static void __exit smc_exit(void)
> {
> 	static_branch_disable(&tcp_have_smc);
>+	inet_unregister_protosw(&smc_inet_protosw);
>+#if IS_ENABLED(CONFIG_IPV6)
>+	inet6_unregister_protosw(&smc_inet6_protosw);
>+#endif
> 	tcp_unregister_ulp(&smc_ulp_ops);
> 	sock_unregister(PF_SMC);
> 	smc_core_exit();
>@@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
> 	destroy_workqueue(smc_hs_wq);
> 	proto_unregister(&smc_proto6);
> 	proto_unregister(&smc_proto);
>+	proto_unregister(&smc_inet_prot);
>+#if IS_ENABLED(CONFIG_IPV6)
>+	proto_unregister(&smc_inet6_prot);
>+#endif
> 	smc_pnet_exit();
> 	smc_nl_exit();
> 	smc_clc_exit();
>@@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
> MODULE_LICENSE("GPL");
> MODULE_ALIAS_NETPROTO(PF_SMC);
> MODULE_ALIAS_TCP_ULP("smc");
>+/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>+#if IS_ENABLED(CONFIG_IPV6)
>+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>+#endif
> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>new file mode 100644
>index 00000000..fcdcb61
>--- /dev/null
>+++ b/net/smc/inet_smc.h
>@@ -0,0 +1,32 @@
>+/* SPDX-License-Identifier: GPL-2.0 */
>+/*
>+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>+ *
>+ *  Definitions for the SMC module (socket related)
>+
>+ *  Copyright IBM Corp. 2016

You should update this.

>+ *
>+ */
>+#ifndef __INET_SMC
>+#define __INET_SMC
>+
>+#include <net/protocol.h>
>+#include <net/sock.h>
>+#include <net/tcp.h>
>+
>+extern struct proto smc_inet_prot;
>+extern const struct proto_ops smc_inet_stream_ops;
>+extern struct inet_protosw smc_inet_protosw;
>+
>+#if IS_ENABLED(CONFIG_IPV6)
>+#include <net/ipv6.h>
>+/* MUST after net/tcp.h or warning */
>+#include <net/transp_v6.h>
>+extern struct proto smc_inet6_prot;
>+extern const struct proto_ops smc_inet6_stream_ops;
>+extern struct inet_protosw smc_inet6_protosw;
>+#endif
>+
>+int smc_inet_init_sock(struct sock *sk);
>+
>+#endif // __INET_SMC
         ^
         use /* __INET_SMC */ instead

>-- 
>1.8.3.1
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 0/2] Introduce IPPROTO_SMC
  2024-05-10  4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
                   ` (2 preceding siblings ...)
  2024-05-10  9:14 ` [PATCH net-next 0/2] " D. Wythe
@ 2024-05-10 10:22 ` Wenjia Zhang
  3 siblings, 0 replies; 15+ messages in thread
From: Wenjia Zhang @ 2024-05-10 10:22 UTC (permalink / raw)
  To: D. Wythe, kgraul, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet



On 10.05.24 06:12, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
> 
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
> 
> /* create v6 smc sock */
> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
> 
> There are several reasons why we believe it is appropriate here:
> 
> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> address. There is no AF_SMC address at all.
> 
> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> Otherwise, smc have to implement it again in AF_SMC path. Such as:
>    1. Replace IPPROTO_TCP with IPPROTO_SMC in the socket() syscall
>       initiated by the user, without the use of LD-PRELOAD.
>    2. Select whether immediate fallback is required based on peer's port/ip
>       before connect().
> 
> A very significant result is that we can now use eBPF to implement smc_run
> instead of LD_PRELOAD, who is completely ineffective in scenarios of static
> linking.
> 

> Another potential value is that we are attempting to optimize the performance of
> fallback socks, where merging socks is an important part, and it relies on the
> creation of SMC sockets under the AF_INET path. (More information :
> https://lore.kernel.org/netdev/1699442703-25015-1-git-send-email-alibuda@linux.alibaba.com/T/)
> 
> D. Wythe (2):
>    net/smc: refatoring initialization of smc sock
>    net/smc: Introduce IPPROTO_SMC
> 
>   include/uapi/linux/in.h |   2 +
>   net/smc/af_smc.c        | 222 +++++++++++++++++++++++++++++++++++++++---------
>   net/smc/inet_smc.h      |  32 +++++++
>   3 files changed, 214 insertions(+), 42 deletions(-)
>   create mode 100644 net/smc/inet_smc.h
> 
Replacing the preload library is indeed a good reason for this method. 
And that could be a new era for smc. However, there are still some 
details we need to take care of. Thus, I'd like to ask for more time to 
review and test these patches.

Thank you,
Wenjia

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
  2024-05-10  4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
  2024-05-10  9:57   ` Dust Li
@ 2024-05-10 17:09   ` kernel test robot
  2024-05-10 18:32   ` kernel test robot
  2 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2024-05-10 17:09 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: oe-kbuild-all, kuba, davem, netdev, linux-s390, linux-rdma,
	tonylu, pabeni, edumazet

Hi Wythe,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/D-Wythe/net-smc-refatoring-initialization-of-smc-sock/20240510-121442
base:   net-next/main
patch link:    https://lore.kernel.org/r/1715314333-107290-3-git-send-email-alibuda%40linux.alibaba.com
patch subject: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
config: i386-buildonly-randconfig-002-20240510 (https://download.01.org/0day-ci/archive/20240511/202405110124.GxQs28cK-lkp@intel.com/config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240511/202405110124.GxQs28cK-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405110124.GxQs28cK-lkp@intel.com/

All warnings (new ones prefixed by >>):

   net/smc/af_smc.c: In function 'smc_init':
>> net/smc/af_smc.c:3710:1: warning: label 'out_inet_prot' defined but not used [-Wunused-label]
    out_inet_prot:
    ^~~~~~~~~~~~~


vim +/out_inet_prot +3710 net/smc/af_smc.c

  3707	
  3708		static_branch_enable(&tcp_have_smc);
  3709		return 0;
> 3710	out_inet_prot:
  3711		inet_unregister_protosw(&smc_inet_protosw);
  3712		proto_unregister(&smc_inet_prot);
  3713	out_ulp:
  3714		tcp_unregister_ulp(&smc_ulp_ops);
  3715	out_lo:
  3716		smc_loopback_exit();
  3717	out_ib:
  3718		smc_ib_unregister_client();
  3719	out_sock:
  3720		sock_unregister(PF_SMC);
  3721	out_proto6:
  3722		proto_unregister(&smc_proto6);
  3723	out_proto:
  3724		proto_unregister(&smc_proto);
  3725	out_core:
  3726		smc_core_exit();
  3727	out_alloc_wqs:
  3728		destroy_workqueue(smc_close_wq);
  3729	out_alloc_hs_wq:
  3730		destroy_workqueue(smc_hs_wq);
  3731	out_alloc_tcp_ls_wq:
  3732		destroy_workqueue(smc_tcp_ls_wq);
  3733	out_pnet:
  3734		smc_pnet_exit();
  3735	out_nl:
  3736		smc_nl_exit();
  3737	out_ism:
  3738		smc_clc_exit();
  3739		smc_ism_exit();
  3740	out_pernet_subsys_stat:
  3741		unregister_pernet_subsys(&smc_net_stat_ops);
  3742	out_pernet_subsys:
  3743		unregister_pernet_subsys(&smc_net_ops);
  3744	
  3745		return rc;
  3746	}
  3747	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
  2024-05-10  4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
  2024-05-10  9:57   ` Dust Li
  2024-05-10 17:09   ` kernel test robot
@ 2024-05-10 18:32   ` kernel test robot
  2 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2024-05-10 18:32 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: llvm, oe-kbuild-all, kuba, davem, netdev, linux-s390, linux-rdma,
	tonylu, pabeni, edumazet

Hi Wythe,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/D-Wythe/net-smc-refatoring-initialization-of-smc-sock/20240510-121442
base:   net-next/main
patch link:    https://lore.kernel.org/r/1715314333-107290-3-git-send-email-alibuda%40linux.alibaba.com
patch subject: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
config: x86_64-randconfig-002-20240510 (https://download.01.org/0day-ci/archive/20240511/202405110225.7378HJe1-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240511/202405110225.7378HJe1-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405110225.7378HJe1-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> net/smc/af_smc.c:3710:1: warning: unused label 'out_inet_prot' [-Wunused-label]
    3710 | out_inet_prot:
         | ^~~~~~~~~~~~~~
   1 warning generated.


vim +/out_inet_prot +3710 net/smc/af_smc.c

  3707	
  3708		static_branch_enable(&tcp_have_smc);
  3709		return 0;
> 3710	out_inet_prot:
  3711		inet_unregister_protosw(&smc_inet_protosw);
  3712		proto_unregister(&smc_inet_prot);
  3713	out_ulp:
  3714		tcp_unregister_ulp(&smc_ulp_ops);
  3715	out_lo:
  3716		smc_loopback_exit();
  3717	out_ib:
  3718		smc_ib_unregister_client();
  3719	out_sock:
  3720		sock_unregister(PF_SMC);
  3721	out_proto6:
  3722		proto_unregister(&smc_proto6);
  3723	out_proto:
  3724		proto_unregister(&smc_proto);
  3725	out_core:
  3726		smc_core_exit();
  3727	out_alloc_wqs:
  3728		destroy_workqueue(smc_close_wq);
  3729	out_alloc_hs_wq:
  3730		destroy_workqueue(smc_hs_wq);
  3731	out_alloc_tcp_ls_wq:
  3732		destroy_workqueue(smc_tcp_ls_wq);
  3733	out_pnet:
  3734		smc_pnet_exit();
  3735	out_nl:
  3736		smc_nl_exit();
  3737	out_ism:
  3738		smc_clc_exit();
  3739		smc_ism_exit();
  3740	out_pernet_subsys_stat:
  3741		unregister_pernet_subsys(&smc_net_stat_ops);
  3742	out_pernet_subsys:
  3743		unregister_pernet_subsys(&smc_net_ops);
  3744	
  3745		return rc;
  3746	}
  3747	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
  2024-05-10  9:57   ` Dust Li
@ 2024-05-11  2:23     ` D. Wythe
  2024-05-11  2:46       ` Dust Li
  0 siblings, 1 reply; 15+ messages in thread
From: D. Wythe @ 2024-05-11  2:23 UTC (permalink / raw)
  To: dust.li, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet



On 5/10/24 5:57 PM, Dust Li wrote:
> On 2024-05-10 12:12:13, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch allows to create smc socket via AF_INET,
>> similar to the following code,
>>
>> /* create v4 smc sock */
>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>
>> /* create v6 smc sock */
>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>
>> There are several reasons why we believe it is appropriate here:
>>
>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> address. There is no AF_SMC address at all.
>>
>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> Otherwise, smc have to implement it again in AF_SMC path.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>> include/uapi/linux/in.h |   2 +
>> net/smc/af_smc.c        | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
>> net/smc/inet_smc.h      |  32 ++++++++++++
>> 3 files changed, 162 insertions(+), 1 deletion(-)
>> create mode 100644 net/smc/inet_smc.h
>>
>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>> index e682ab6..74c12e33 100644
>> --- a/include/uapi/linux/in.h
>> +++ b/include/uapi/linux/in.h
>> @@ -83,6 +83,8 @@ enum {
>> #define IPPROTO_RAW		IPPROTO_RAW
>>    IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
>> #define IPPROTO_MPTCP		IPPROTO_MPTCP
>> +  IPPROTO_SMC = 263,		/* Shared Memory Communications */
>                                                             ^ use tab to align here

There is a problem here, all previous definitions were aligned with 2 
spaces.

>> +#define IPPROTO_SMC		IPPROTO_SMC
>>    IPPROTO_MAX
>> };
>> #endif
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 1f03724..b4557828 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -54,6 +54,7 @@
>> #include "smc_tracepoint.h"
>> #include "smc_sysctl.h"
>> #include "smc_loopback.h"
>> +#include "inet_smc.h"
>>
>> static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
>> 						 * creation on server
>> @@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
>> 	.create	= smc_create,
>> };
>>
> Why not put those whole bunch of inet staff into smc_inet.c ?
> Looks like your smc_inet.h is meanless without smc_inet.c
>

This header file was originally reserved for future merging of socks. If 
nobody likes it, I can move it to the
af_smc.c

>> +int smc_inet_init_sock(struct sock *sk)
>> +{
>> +	struct net *net = sock_net(sk);
>> +
>> +	/* init common smc sock */
>> +	smc_sock_init(net, sk, IPPROTO_SMC);
>> +	/* create clcsock */
>> +	return __smc_create_clcsk(net, sk, sk->sk_family);
>> +}
>> +
>> static int smc_ulp_init(struct sock *sk)
>> {
>> 	struct socket *tcp = sk->sk_socket;
>> @@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
>> 	.clone		= smc_ulp_clone,
>> };
>>
>> +struct proto smc_inet_prot = {
>> +	.name			= "INET_SMC",
>> +	.owner			= THIS_MODULE,
>> +	.init			= smc_inet_init_sock,
>> +	.hash			= smc_hash_sk,
>> +	.unhash			= smc_unhash_sk,
>> +	.release_cb		= smc_release_cb,
>> +	.obj_size		= sizeof(struct smc_sock),
>> +	.h.smc_hash	= &smc_v4_hashinfo,
>> +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>                  ^
> Align please.
>
Got it.
>> +};
>> +
>> +const struct proto_ops smc_inet_stream_ops = {
>> +	.family		= PF_INET,
>> +	.owner		= THIS_MODULE,
>> +	.release	= smc_release,
>> +	.bind		= smc_bind,
>> +	.connect	= smc_connect,
>> +	.socketpair	= sock_no_socketpair,
>> +	.accept		= smc_accept,
>> +	.getname	= smc_getname,
>> +	.poll		= smc_poll,
>> +	.ioctl		= smc_ioctl,
>> +	.listen		= smc_listen,
>> +	.shutdown	= smc_shutdown,
>> +	.setsockopt	= smc_setsockopt,
>> +	.getsockopt	= smc_getsockopt,
>> +	.sendmsg	= smc_sendmsg,
>> +	.recvmsg	= smc_recvmsg,
>> +	.mmap		= sock_no_mmap,
>> +	.splice_read	= smc_splice_read,
> Ditto
>
>> +};
>> +
>> +struct inet_protosw smc_inet_protosw = {
>> +	.type       = SOCK_STREAM,
>> +	.protocol   = IPPROTO_SMC,
>> +	.prot   = &smc_inet_prot,
> Ditto
>
>> +	.ops    = &smc_inet_stream_ops,
>> +	.flags  = INET_PROTOSW_ICSK,
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +struct proto smc_inet6_prot = {
>> +	.name			= "INET6_SMC",
>> +	.owner			= THIS_MODULE,
>> +	.init			= smc_inet_init_sock,
>> +	.hash			= smc_hash_sk,
>> +	.unhash			= smc_unhash_sk,
>> +	.release_cb		= smc_release_cb,
>> +	.obj_size		= sizeof(struct smc_sock),
>> +	.h.smc_hash		= &smc_v6_hashinfo,
>> +	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
>> +};
>> +
>> +const struct proto_ops smc_inet6_stream_ops = {
>> +	.family		= PF_INET6,
>> +	.owner		= THIS_MODULE,
>> +	.release	= smc_release,
>> +	.bind		= smc_bind,
>> +	.connect	= smc_connect,
>> +	.socketpair	= sock_no_socketpair,
>> +	.accept		= smc_accept,
>> +	.getname	= smc_getname,
>> +	.poll		= smc_poll,
>> +	.ioctl		= smc_ioctl,
>> +	.listen		= smc_listen,
>> +	.shutdown	= smc_shutdown,
>> +	.setsockopt	= smc_setsockopt,
>> +	.getsockopt	= smc_getsockopt,
>> +	.sendmsg	= smc_sendmsg,
>> +	.recvmsg	= smc_recvmsg,
>> +	.mmap		= sock_no_mmap,
>> +	.splice_read	= smc_splice_read,
> Ditto
>
>> +};
>> +
>> +struct inet_protosw smc_inet6_protosw = {
>> +	.type       = SOCK_STREAM,
>> +	.protocol   = IPPROTO_SMC,
>> +	.prot   = &smc_inet6_prot,
>> +	.ops    = &smc_inet6_stream_ops,
>> +	.flags  = INET_PROTOSW_ICSK,
> Ditto
>
>> +};
>> +#endif
>> +
>> unsigned int smc_net_id;
>>
>> static __net_init int smc_net_init(struct net *net)
>> @@ -3595,9 +3690,28 @@ static int __init smc_init(void)
>> 		goto out_lo;
>> 	}
>>
>> +	rc = proto_register(&smc_inet_prot, 1);
>> +	if (rc) {
>> +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>> +		goto out_ulp;
>> +	}
>> +	inet_register_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +	rc = proto_register(&smc_inet6_prot, 1);
>> +	if (rc) {
>> +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>> +		goto out_inet_prot;
>> +	}
>> +	inet6_register_protosw(&smc_inet6_protosw);
>> +#endif
>> +
>> 	static_branch_enable(&tcp_have_smc);
>> 	return 0;
>> -
>> +out_inet_prot:
>> +	inet_unregister_protosw(&smc_inet_protosw);
>> +	proto_unregister(&smc_inet_prot);
>> +out_ulp:
>> +	tcp_unregister_ulp(&smc_ulp_ops);
>> out_lo:
>> 	smc_loopback_exit();
>> out_ib:
>> @@ -3634,6 +3748,10 @@ static int __init smc_init(void)
>> static void __exit smc_exit(void)
>> {
>> 	static_branch_disable(&tcp_have_smc);
>> +	inet_unregister_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +	inet6_unregister_protosw(&smc_inet6_protosw);
>> +#endif
>> 	tcp_unregister_ulp(&smc_ulp_ops);
>> 	sock_unregister(PF_SMC);
>> 	smc_core_exit();
>> @@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
>> 	destroy_workqueue(smc_hs_wq);
>> 	proto_unregister(&smc_proto6);
>> 	proto_unregister(&smc_proto);
>> +	proto_unregister(&smc_inet_prot);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +	proto_unregister(&smc_inet6_prot);
>> +#endif
>> 	smc_pnet_exit();
>> 	smc_nl_exit();
>> 	smc_clc_exit();
>> @@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
>> MODULE_LICENSE("GPL");
>> MODULE_ALIAS_NETPROTO(PF_SMC);
>> MODULE_ALIAS_TCP_ULP("smc");
>> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>> +#endif
>> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>> new file mode 100644
>> index 00000000..fcdcb61
>> --- /dev/null
>> +++ b/net/smc/inet_smc.h
>> @@ -0,0 +1,32 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>> + *
>> + *  Definitions for the SMC module (socket related)
>> +
>> + *  Copyright IBM Corp. 2016
> You should update this.
Got it.
>> + *
>> + */
>> +#ifndef __INET_SMC
>> +#define __INET_SMC
>> +
>> +#include <net/protocol.h>
>> +#include <net/sock.h>
>> +#include <net/tcp.h>
>> +
>> +extern struct proto smc_inet_prot;
>> +extern const struct proto_ops smc_inet_stream_ops;
>> +extern struct inet_protosw smc_inet_protosw;
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +#include <net/ipv6.h>
>> +/* MUST after net/tcp.h or warning */
>> +#include <net/transp_v6.h>
>> +extern struct proto smc_inet6_prot;
>> +extern const struct proto_ops smc_inet6_stream_ops;
>> +extern struct inet_protosw smc_inet6_protosw;
>> +#endif
>> +
>> +int smc_inet_init_sock(struct sock *sk);
>> +
>> +#endif // __INET_SMC
>           ^
>           use /* __INET_SMC */ instead
>
>> -- 
>> 1.8.3.1
>>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
  2024-05-10  9:50   ` Dust Li
@ 2024-05-11  2:26     ` D. Wythe
  0 siblings, 0 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-11  2:26 UTC (permalink / raw)
  To: dust.li, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet



On 5/10/24 5:50 PM, Dust Li wrote:
> On 2024-05-10 12:12:12, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch aims to isolate the shared components of SMC socket
>> allocation by introducing smc_sock_init() for sock initialization
>> and __smc_create_clcsk() for the initialization of clcsock.
>>
>> This is in preparation for the subsequent implementation of the
>> AF_INET version of SMC.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>> net/smc/af_smc.c | 93 +++++++++++++++++++++++++++++++-------------------------
>> 1 file changed, 52 insertions(+), 41 deletions(-)
>>
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 9389f0c..1f03724 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
>> 		return;
>> }
>>
>> -static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
>> -				   int protocol)
>> +static void smc_sock_init(struct net *net, struct sock *sk, int protocol)
>> {
>> -	struct smc_sock *smc;
>> -	struct proto *prot;
>> -	struct sock *sk;
>> -
>> -	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>> -	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>> -	if (!sk)
>> -		return NULL;
>> +	struct smc_sock *smc = smc_sk(sk);
>>
>> -	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>> 	sk->sk_state = SMC_INIT;
>> -	sk->sk_destruct = smc_destruct;
>> 	sk->sk_protocol = protocol;
>> +	mutex_init(&smc->clcsock_release_lock);
>> 	WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
>> 	WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
>> -	smc = smc_sk(sk);
>> 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
>> 	INIT_WORK(&smc->connect_work, smc_connect_work);
>> 	INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
>> 	INIT_LIST_HEAD(&smc->accept_q);
>> 	spin_lock_init(&smc->accept_q_lock);
>> 	spin_lock_init(&smc->conn.send_lock);
>> -	sk->sk_prot->hash(sk);
>> -	mutex_init(&smc->clcsock_release_lock);
>> 	smc_init_saved_callbacks(smc);
>> +	smc->limit_smc_hs = net->smc.limit_smc_hs;
>> +	smc->use_fallback = false; /* assume rdma capability first */
>> +	smc->fallback_rsn = 0;
>> +
>> +	sk->sk_destruct = smc_destruct;
>> +	sk->sk_prot->hash(sk);
> Why change the order here ? e.g.
>
> Before:
>      sk->sk_destruct = smc_destruct;
>      mutex_init(&smc->clcsock_release_lock);
> After
>      mutex_init(&smc->clcsock_release_lock);
>      sk->sk_destruct = smc_destruct;
>
> Same for sk->sk_prot->hash(sk)

Yes, you are right, I will fix it in the next version.
>
>
>> +}
>> +
>> +static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
>> +				   int protocol)
>> +{
>> +	struct proto *prot;
>> +	struct sock *sk;
>> +
>> +	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>> +	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>> +	if (!sk)
>> +		return NULL;
>> +
>> +	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>> +	smc_sock_init(net, sk, protocol);
>>
>> 	return sk;
>> }
>> @@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
>> 	.splice_read	= smc_splice_read,
>> };
>>
>> +static int __smc_create_clcsk(struct net *net, struct sock *sk, int family)
> Why add '__' prefix here ?

Good question, I also realize that this is not suitable, I will delete 
it in the next version.

Thanks.

>
>> +{
>> +	struct smc_sock *smc = smc_sk(sk);
>> +	int rc;
>> +
>> +	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>> +			      &smc->clcsock);
>> +	if (rc) {
>> +		sk_common_release(sk);
>> +		return rc;
>> +	}
>> +
>> +	/* smc_clcsock_release() does not wait smc->clcsock->sk's
>> +	 * destruction;  its sk_state might not be TCP_CLOSE after
>> +	 * smc->sk is close()d, and TCP timers can be fired later,
>> +	 * which need net ref.
>> +	 */
>> +	sk = smc->clcsock->sk;
>> +	__netns_tracker_free(net, &sk->ns_tracker, false);
>> +	sk->sk_net_refcnt = 1;
>> +	get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>> +	sock_inuse_add(net, 1);
>> +	return 0;
>> +}
>> +
>> static int __smc_create(struct net *net, struct socket *sock, int protocol,
>> 			int kern, struct socket *clcsock)
>> {
>> @@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
>>
>> 	/* create internal TCP socket for CLC handshake and fallback */
>> 	smc = smc_sk(sk);
>> -	smc->use_fallback = false; /* assume rdma capability first */
>> -	smc->fallback_rsn = 0;
>> -
>> -	/* default behavior from limit_smc_hs in every net namespace */
>> -	smc->limit_smc_hs = net->smc.limit_smc_hs;
>>
>> 	rc = 0;
>> -	if (!clcsock) {
>> -		rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>> -				      &smc->clcsock);
>> -		if (rc) {
>> -			sk_common_release(sk);
>> -			goto out;
>> -		}
>> -
>> -		/* smc_clcsock_release() does not wait smc->clcsock->sk's
>> -		 * destruction;  its sk_state might not be TCP_CLOSE after
>> -		 * smc->sk is close()d, and TCP timers can be fired later,
>> -		 * which need net ref.
>> -		 */
>> -		sk = smc->clcsock->sk;
>> -		__netns_tracker_free(net, &sk->ns_tracker, false);
>> -		sk->sk_net_refcnt = 1;
>> -		get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>> -		sock_inuse_add(net, 1);
>> -	} else {
>> +	if (!clcsock)
>> +		rc = __smc_create_clcsk(net, sk, family);
>> +	else
>> 		smc->clcsock = clcsock;
>> -	}
>> -
>> out:
>> 	return rc;
>> }
>> -- 
>> 1.8.3.1
>>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
  2024-05-11  2:23     ` D. Wythe
@ 2024-05-11  2:46       ` Dust Li
  2024-05-11  3:02         ` D. Wythe
  0 siblings, 1 reply; 15+ messages in thread
From: Dust Li @ 2024-05-11  2:46 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet

On 2024-05-11 10:23:31, D. Wythe wrote:
>
>
>On 5/10/24 5:57 PM, Dust Li wrote:
>> On 2024-05-10 12:12:13, D. Wythe wrote:
>> > From: "D. Wythe" <alibuda@linux.alibaba.com>
>> > 
>> > This patch allows to create smc socket via AF_INET,
>> > similar to the following code,
>> > 
>> > /* create v4 smc sock */
>> > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>> > 
>> > /* create v6 smc sock */
>> > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>> > 
>> > There are several reasons why we believe it is appropriate here:
>> > 
>> > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> > address. There is no AF_SMC address at all.
>> > 
>> > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> > the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> > Otherwise, smc have to implement it again in AF_SMC path.
>> > 
>> > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> > ---
>> > include/uapi/linux/in.h |   2 +
>> > net/smc/af_smc.c        | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
>> > net/smc/inet_smc.h      |  32 ++++++++++++
>> > 3 files changed, 162 insertions(+), 1 deletion(-)
>> > create mode 100644 net/smc/inet_smc.h
>> > 
>> > diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>> > index e682ab6..74c12e33 100644
>> > --- a/include/uapi/linux/in.h
>> > +++ b/include/uapi/linux/in.h
>> > @@ -83,6 +83,8 @@ enum {
>> > #define IPPROTO_RAW		IPPROTO_RAW
>> >    IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
>> > #define IPPROTO_MPTCP		IPPROTO_MPTCP
>> > +  IPPROTO_SMC = 263,		/* Shared Memory Communications */
>>                                                             ^ use tab to align here
>
>There is a problem here, all previous definitions were aligned with 2 spaces.

I mean the tab in the annotation in the end, not the space at the beginning.

>
>> > +#define IPPROTO_SMC		IPPROTO_SMC
>> >    IPPROTO_MAX
>> > };
>> > #endif
>> > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> > index 1f03724..b4557828 100644
>> > --- a/net/smc/af_smc.c
>> > +++ b/net/smc/af_smc.c
>> > @@ -54,6 +54,7 @@
>> > #include "smc_tracepoint.h"
>> > #include "smc_sysctl.h"
>> > #include "smc_loopback.h"
>> > +#include "inet_smc.h"
>> > 
>> > static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
>> > 						 * creation on server
>> > @@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
>> > 	.create	= smc_create,
>> > };
>> > 
>> Why not put those whole bunch of inet staff into smc_inet.c ?
>> Looks like your smc_inet.h is meanless without smc_inet.c
>> 
>
>This header file was originally reserved for future merging of socks. If
>nobody likes it, I can move it to the
>af_smc.c

I prefer adding a new smc_inet.c, af_smc.c is already very large.

Best regards,
Dust

>
>> > +int smc_inet_init_sock(struct sock *sk)
>> > +{
>> > +	struct net *net = sock_net(sk);
>> > +
>> > +	/* init common smc sock */
>> > +	smc_sock_init(net, sk, IPPROTO_SMC);
>> > +	/* create clcsock */
>> > +	return __smc_create_clcsk(net, sk, sk->sk_family);
>> > +}
>> > +
>> > static int smc_ulp_init(struct sock *sk)
>> > {
>> > 	struct socket *tcp = sk->sk_socket;
>> > @@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
>> > 	.clone		= smc_ulp_clone,
>> > };
>> > 
>> > +struct proto smc_inet_prot = {
>> > +	.name			= "INET_SMC",
>> > +	.owner			= THIS_MODULE,
>> > +	.init			= smc_inet_init_sock,
>> > +	.hash			= smc_hash_sk,
>> > +	.unhash			= smc_unhash_sk,
>> > +	.release_cb		= smc_release_cb,
>> > +	.obj_size		= sizeof(struct smc_sock),
>> > +	.h.smc_hash	= &smc_v4_hashinfo,
>> > +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>>                  ^
>> Align please.
>> 
>Got it.
>> > +};
>> > +
>> > +const struct proto_ops smc_inet_stream_ops = {
>> > +	.family		= PF_INET,
>> > +	.owner		= THIS_MODULE,
>> > +	.release	= smc_release,
>> > +	.bind		= smc_bind,
>> > +	.connect	= smc_connect,
>> > +	.socketpair	= sock_no_socketpair,
>> > +	.accept		= smc_accept,
>> > +	.getname	= smc_getname,
>> > +	.poll		= smc_poll,
>> > +	.ioctl		= smc_ioctl,
>> > +	.listen		= smc_listen,
>> > +	.shutdown	= smc_shutdown,
>> > +	.setsockopt	= smc_setsockopt,
>> > +	.getsockopt	= smc_getsockopt,
>> > +	.sendmsg	= smc_sendmsg,
>> > +	.recvmsg	= smc_recvmsg,
>> > +	.mmap		= sock_no_mmap,
>> > +	.splice_read	= smc_splice_read,
>> Ditto
>> 
>> > +};
>> > +
>> > +struct inet_protosw smc_inet_protosw = {
>> > +	.type       = SOCK_STREAM,
>> > +	.protocol   = IPPROTO_SMC,
>> > +	.prot   = &smc_inet_prot,
>> Ditto
>> 
>> > +	.ops    = &smc_inet_stream_ops,
>> > +	.flags  = INET_PROTOSW_ICSK,
>> > +};
>> > +
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +struct proto smc_inet6_prot = {
>> > +	.name			= "INET6_SMC",
>> > +	.owner			= THIS_MODULE,
>> > +	.init			= smc_inet_init_sock,
>> > +	.hash			= smc_hash_sk,
>> > +	.unhash			= smc_unhash_sk,
>> > +	.release_cb		= smc_release_cb,
>> > +	.obj_size		= sizeof(struct smc_sock),
>> > +	.h.smc_hash		= &smc_v6_hashinfo,
>> > +	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
>> > +};
>> > +
>> > +const struct proto_ops smc_inet6_stream_ops = {
>> > +	.family		= PF_INET6,
>> > +	.owner		= THIS_MODULE,
>> > +	.release	= smc_release,
>> > +	.bind		= smc_bind,
>> > +	.connect	= smc_connect,
>> > +	.socketpair	= sock_no_socketpair,
>> > +	.accept		= smc_accept,
>> > +	.getname	= smc_getname,
>> > +	.poll		= smc_poll,
>> > +	.ioctl		= smc_ioctl,
>> > +	.listen		= smc_listen,
>> > +	.shutdown	= smc_shutdown,
>> > +	.setsockopt	= smc_setsockopt,
>> > +	.getsockopt	= smc_getsockopt,
>> > +	.sendmsg	= smc_sendmsg,
>> > +	.recvmsg	= smc_recvmsg,
>> > +	.mmap		= sock_no_mmap,
>> > +	.splice_read	= smc_splice_read,
>> Ditto
>> 
>> > +};
>> > +
>> > +struct inet_protosw smc_inet6_protosw = {
>> > +	.type       = SOCK_STREAM,
>> > +	.protocol   = IPPROTO_SMC,
>> > +	.prot   = &smc_inet6_prot,
>> > +	.ops    = &smc_inet6_stream_ops,
>> > +	.flags  = INET_PROTOSW_ICSK,
>> Ditto
>> 
>> > +};
>> > +#endif
>> > +
>> > unsigned int smc_net_id;
>> > 
>> > static __net_init int smc_net_init(struct net *net)
>> > @@ -3595,9 +3690,28 @@ static int __init smc_init(void)
>> > 		goto out_lo;
>> > 	}
>> > 
>> > +	rc = proto_register(&smc_inet_prot, 1);
>> > +	if (rc) {
>> > +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>> > +		goto out_ulp;
>> > +	}
>> > +	inet_register_protosw(&smc_inet_protosw);
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +	rc = proto_register(&smc_inet6_prot, 1);
>> > +	if (rc) {
>> > +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>> > +		goto out_inet_prot;
>> > +	}
>> > +	inet6_register_protosw(&smc_inet6_protosw);
>> > +#endif
>> > +
>> > 	static_branch_enable(&tcp_have_smc);
>> > 	return 0;
>> > -
>> > +out_inet_prot:
>> > +	inet_unregister_protosw(&smc_inet_protosw);
>> > +	proto_unregister(&smc_inet_prot);
>> > +out_ulp:
>> > +	tcp_unregister_ulp(&smc_ulp_ops);
>> > out_lo:
>> > 	smc_loopback_exit();
>> > out_ib:
>> > @@ -3634,6 +3748,10 @@ static int __init smc_init(void)
>> > static void __exit smc_exit(void)
>> > {
>> > 	static_branch_disable(&tcp_have_smc);
>> > +	inet_unregister_protosw(&smc_inet_protosw);
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +	inet6_unregister_protosw(&smc_inet6_protosw);
>> > +#endif
>> > 	tcp_unregister_ulp(&smc_ulp_ops);
>> > 	sock_unregister(PF_SMC);
>> > 	smc_core_exit();
>> > @@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
>> > 	destroy_workqueue(smc_hs_wq);
>> > 	proto_unregister(&smc_proto6);
>> > 	proto_unregister(&smc_proto);
>> > +	proto_unregister(&smc_inet_prot);
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +	proto_unregister(&smc_inet6_prot);
>> > +#endif
>> > 	smc_pnet_exit();
>> > 	smc_nl_exit();
>> > 	smc_clc_exit();
>> > @@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
>> > MODULE_LICENSE("GPL");
>> > MODULE_ALIAS_NETPROTO(PF_SMC);
>> > MODULE_ALIAS_TCP_ULP("smc");
>> > +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>> > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>> > +#endif
>> > MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>> > diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>> > new file mode 100644
>> > index 00000000..fcdcb61
>> > --- /dev/null
>> > +++ b/net/smc/inet_smc.h
>> > @@ -0,0 +1,32 @@
>> > +/* SPDX-License-Identifier: GPL-2.0 */
>> > +/*
>> > + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>> > + *
>> > + *  Definitions for the SMC module (socket related)
>> > +
>> > + *  Copyright IBM Corp. 2016
>> You should update this.
>Got it.
>> > + *
>> > + */
>> > +#ifndef __INET_SMC
>> > +#define __INET_SMC
>> > +
>> > +#include <net/protocol.h>
>> > +#include <net/sock.h>
>> > +#include <net/tcp.h>
>> > +
>> > +extern struct proto smc_inet_prot;
>> > +extern const struct proto_ops smc_inet_stream_ops;
>> > +extern struct inet_protosw smc_inet_protosw;
>> > +
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +#include <net/ipv6.h>
>> > +/* MUST after net/tcp.h or warning */
>> > +#include <net/transp_v6.h>
>> > +extern struct proto smc_inet6_prot;
>> > +extern const struct proto_ops smc_inet6_stream_ops;
>> > +extern struct inet_protosw smc_inet6_protosw;
>> > +#endif
>> > +
>> > +int smc_inet_init_sock(struct sock *sk);
>> > +
>> > +#endif // __INET_SMC
>>           ^
>>           use /* __INET_SMC */ instead
>> 
>> > -- 
>> > 1.8.3.1
>> > 
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
  2024-05-11  2:46       ` Dust Li
@ 2024-05-11  3:02         ` D. Wythe
  0 siblings, 0 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-11  3:02 UTC (permalink / raw)
  To: dust.li, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet



On 5/11/24 10:46 AM, Dust Li wrote:
> On 2024-05-11 10:23:31, D. Wythe wrote:
>>
>> On 5/10/24 5:57 PM, Dust Li wrote:
>>> On 2024-05-10 12:12:13, D. Wythe wrote:
>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>
>>>> This patch allows to create smc socket via AF_INET,
>>>> similar to the following code,
>>>>
>>>> /* create v4 smc sock */
>>>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>>>
>>>> /* create v6 smc sock */
>>>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>>>
>>>> There are several reasons why we believe it is appropriate here:
>>>>
>>>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>>>> address. There is no AF_SMC address at all.
>>>>
>>>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>>>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>>>> Otherwise, smc have to implement it again in AF_SMC path.
>>>>
>>>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>>>> ---
>>>> include/uapi/linux/in.h |   2 +
>>>> net/smc/af_smc.c        | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
>>>> net/smc/inet_smc.h      |  32 ++++++++++++
>>>> 3 files changed, 162 insertions(+), 1 deletion(-)
>>>> create mode 100644 net/smc/inet_smc.h
>>>>
>>>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>>>> index e682ab6..74c12e33 100644
>>>> --- a/include/uapi/linux/in.h
>>>> +++ b/include/uapi/linux/in.h
>>>> @@ -83,6 +83,8 @@ enum {
>>>> #define IPPROTO_RAW		IPPROTO_RAW
>>>>     IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
>>>> #define IPPROTO_MPTCP		IPPROTO_MPTCP
>>>> +  IPPROTO_SMC = 263,		/* Shared Memory Communications */
>>>                                                              ^ use tab to align here
>> There is a problem here, all previous definitions were aligned with 2 spaces.
> I mean the tab in the annotation in the end, not the space at the beginning.

Oh... that's true.  Thanks for that.

>>>> +#define IPPROTO_SMC		IPPROTO_SMC
>>>>     IPPROTO_MAX
>>>> };
>>>> #endif
>>>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>>>> index 1f03724..b4557828 100644
>>>> --- a/net/smc/af_smc.c
>>>> +++ b/net/smc/af_smc.c
>>>> @@ -54,6 +54,7 @@
>>>> #include "smc_tracepoint.h"
>>>> #include "smc_sysctl.h"
>>>> #include "smc_loopback.h"
>>>> +#include "inet_smc.h"
>>>>
>>>> static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
>>>> 						 * creation on server
>>>> @@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
>>>> 	.create	= smc_create,
>>>> };
>>>>
>>> Why not put those whole bunch of inet staff into smc_inet.c ?
>>> Looks like your smc_inet.h is meanless without smc_inet.c
>>>
>> This header file was originally reserved for future merging of socks. If
>> nobody likes it, I can move it to the
>> af_smc.c
> I prefer adding a new smc_inet.c, af_smc.c is already very large.
>
> Best regards,
> Dust

Sounds Reasonable. I'll try it in next version.

Thanks,
D. Wythe
>
>>>> +int smc_inet_init_sock(struct sock *sk)
>>>> +{
>>>> +	struct net *net = sock_net(sk);
>>>> +
>>>> +	/* init common smc sock */
>>>> +	smc_sock_init(net, sk, IPPROTO_SMC);
>>>> +	/* create clcsock */
>>>> +	return __smc_create_clcsk(net, sk, sk->sk_family);
>>>> +}
>>>> +
>>>> static int smc_ulp_init(struct sock *sk)
>>>> {
>>>> 	struct socket *tcp = sk->sk_socket;
>>>> @@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
>>>> 	.clone		= smc_ulp_clone,
>>>> };
>>>>
>>>> +struct proto smc_inet_prot = {
>>>> +	.name			= "INET_SMC",
>>>> +	.owner			= THIS_MODULE,
>>>> +	.init			= smc_inet_init_sock,
>>>> +	.hash			= smc_hash_sk,
>>>> +	.unhash			= smc_unhash_sk,
>>>> +	.release_cb		= smc_release_cb,
>>>> +	.obj_size		= sizeof(struct smc_sock),
>>>> +	.h.smc_hash	= &smc_v4_hashinfo,
>>>> +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>>>                   ^
>>> Align please.
>>>
>> Got it.
>>>> +};
>>>> +
>>>> +const struct proto_ops smc_inet_stream_ops = {
>>>> +	.family		= PF_INET,
>>>> +	.owner		= THIS_MODULE,
>>>> +	.release	= smc_release,
>>>> +	.bind		= smc_bind,
>>>> +	.connect	= smc_connect,
>>>> +	.socketpair	= sock_no_socketpair,
>>>> +	.accept		= smc_accept,
>>>> +	.getname	= smc_getname,
>>>> +	.poll		= smc_poll,
>>>> +	.ioctl		= smc_ioctl,
>>>> +	.listen		= smc_listen,
>>>> +	.shutdown	= smc_shutdown,
>>>> +	.setsockopt	= smc_setsockopt,
>>>> +	.getsockopt	= smc_getsockopt,
>>>> +	.sendmsg	= smc_sendmsg,
>>>> +	.recvmsg	= smc_recvmsg,
>>>> +	.mmap		= sock_no_mmap,
>>>> +	.splice_read	= smc_splice_read,
>>> Ditto
>>>
>>>> +};
>>>> +
>>>> +struct inet_protosw smc_inet_protosw = {
>>>> +	.type       = SOCK_STREAM,
>>>> +	.protocol   = IPPROTO_SMC,
>>>> +	.prot   = &smc_inet_prot,
>>> Ditto
>>>
>>>> +	.ops    = &smc_inet_stream_ops,
>>>> +	.flags  = INET_PROTOSW_ICSK,
>>>> +};
>>>> +
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +struct proto smc_inet6_prot = {
>>>> +	.name			= "INET6_SMC",
>>>> +	.owner			= THIS_MODULE,
>>>> +	.init			= smc_inet_init_sock,
>>>> +	.hash			= smc_hash_sk,
>>>> +	.unhash			= smc_unhash_sk,
>>>> +	.release_cb		= smc_release_cb,
>>>> +	.obj_size		= sizeof(struct smc_sock),
>>>> +	.h.smc_hash		= &smc_v6_hashinfo,
>>>> +	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
>>>> +};
>>>> +
>>>> +const struct proto_ops smc_inet6_stream_ops = {
>>>> +	.family		= PF_INET6,
>>>> +	.owner		= THIS_MODULE,
>>>> +	.release	= smc_release,
>>>> +	.bind		= smc_bind,
>>>> +	.connect	= smc_connect,
>>>> +	.socketpair	= sock_no_socketpair,
>>>> +	.accept		= smc_accept,
>>>> +	.getname	= smc_getname,
>>>> +	.poll		= smc_poll,
>>>> +	.ioctl		= smc_ioctl,
>>>> +	.listen		= smc_listen,
>>>> +	.shutdown	= smc_shutdown,
>>>> +	.setsockopt	= smc_setsockopt,
>>>> +	.getsockopt	= smc_getsockopt,
>>>> +	.sendmsg	= smc_sendmsg,
>>>> +	.recvmsg	= smc_recvmsg,
>>>> +	.mmap		= sock_no_mmap,
>>>> +	.splice_read	= smc_splice_read,
>>> Ditto
>>>
>>>> +};
>>>> +
>>>> +struct inet_protosw smc_inet6_protosw = {
>>>> +	.type       = SOCK_STREAM,
>>>> +	.protocol   = IPPROTO_SMC,
>>>> +	.prot   = &smc_inet6_prot,
>>>> +	.ops    = &smc_inet6_stream_ops,
>>>> +	.flags  = INET_PROTOSW_ICSK,
>>> Ditto
>>>
>>>> +};
>>>> +#endif
>>>> +
>>>> unsigned int smc_net_id;
>>>>
>>>> static __net_init int smc_net_init(struct net *net)
>>>> @@ -3595,9 +3690,28 @@ static int __init smc_init(void)
>>>> 		goto out_lo;
>>>> 	}
>>>>
>>>> +	rc = proto_register(&smc_inet_prot, 1);
>>>> +	if (rc) {
>>>> +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>>>> +		goto out_ulp;
>>>> +	}
>>>> +	inet_register_protosw(&smc_inet_protosw);
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +	rc = proto_register(&smc_inet6_prot, 1);
>>>> +	if (rc) {
>>>> +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>>>> +		goto out_inet_prot;
>>>> +	}
>>>> +	inet6_register_protosw(&smc_inet6_protosw);
>>>> +#endif
>>>> +
>>>> 	static_branch_enable(&tcp_have_smc);
>>>> 	return 0;
>>>> -
>>>> +out_inet_prot:
>>>> +	inet_unregister_protosw(&smc_inet_protosw);
>>>> +	proto_unregister(&smc_inet_prot);
>>>> +out_ulp:
>>>> +	tcp_unregister_ulp(&smc_ulp_ops);
>>>> out_lo:
>>>> 	smc_loopback_exit();
>>>> out_ib:
>>>> @@ -3634,6 +3748,10 @@ static int __init smc_init(void)
>>>> static void __exit smc_exit(void)
>>>> {
>>>> 	static_branch_disable(&tcp_have_smc);
>>>> +	inet_unregister_protosw(&smc_inet_protosw);
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +	inet6_unregister_protosw(&smc_inet6_protosw);
>>>> +#endif
>>>> 	tcp_unregister_ulp(&smc_ulp_ops);
>>>> 	sock_unregister(PF_SMC);
>>>> 	smc_core_exit();
>>>> @@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
>>>> 	destroy_workqueue(smc_hs_wq);
>>>> 	proto_unregister(&smc_proto6);
>>>> 	proto_unregister(&smc_proto);
>>>> +	proto_unregister(&smc_inet_prot);
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +	proto_unregister(&smc_inet6_prot);
>>>> +#endif
>>>> 	smc_pnet_exit();
>>>> 	smc_nl_exit();
>>>> 	smc_clc_exit();
>>>> @@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
>>>> MODULE_LICENSE("GPL");
>>>> MODULE_ALIAS_NETPROTO(PF_SMC);
>>>> MODULE_ALIAS_TCP_ULP("smc");
>>>> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>>>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>>>> +#endif
>>>> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>>>> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>>>> new file mode 100644
>>>> index 00000000..fcdcb61
>>>> --- /dev/null
>>>> +++ b/net/smc/inet_smc.h
>>>> @@ -0,0 +1,32 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +/*
>>>> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>>>> + *
>>>> + *  Definitions for the SMC module (socket related)
>>>> +
>>>> + *  Copyright IBM Corp. 2016
>>> You should update this.
>> Got it.
>>>> + *
>>>> + */
>>>> +#ifndef __INET_SMC
>>>> +#define __INET_SMC
>>>> +
>>>> +#include <net/protocol.h>
>>>> +#include <net/sock.h>
>>>> +#include <net/tcp.h>
>>>> +
>>>> +extern struct proto smc_inet_prot;
>>>> +extern const struct proto_ops smc_inet_stream_ops;
>>>> +extern struct inet_protosw smc_inet_protosw;
>>>> +
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +#include <net/ipv6.h>
>>>> +/* MUST after net/tcp.h or warning */
>>>> +#include <net/transp_v6.h>
>>>> +extern struct proto smc_inet6_prot;
>>>> +extern const struct proto_ops smc_inet6_stream_ops;
>>>> +extern struct inet_protosw smc_inet6_protosw;
>>>> +#endif
>>>> +
>>>> +int smc_inet_init_sock(struct sock *sk);
>>>> +
>>>> +#endif // __INET_SMC
>>>            ^
>>>            use /* __INET_SMC */ instead
>>>
>>>> -- 
>>>> 1.8.3.1
>>>>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
  2024-05-10  4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
  2024-05-10  9:50   ` Dust Li
@ 2024-05-11 12:21   ` Zhu Yanjun
  2024-05-13  3:22     ` D. Wythe
  1 sibling, 1 reply; 15+ messages in thread
From: Zhu Yanjun @ 2024-05-11 12:21 UTC (permalink / raw)
  To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet

在 2024/5/10 6:12, D. Wythe 写道:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> This patch aims to isolate the shared components of SMC socket
> allocation by introducing smc_sock_init() for sock initialization
> and __smc_create_clcsk() for the initialization of clcsock.
> 
> This is in preparation for the subsequent implementation of the
> AF_INET version of SMC.
> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>   net/smc/af_smc.c | 93 +++++++++++++++++++++++++++++++-------------------------
>   1 file changed, 52 insertions(+), 41 deletions(-)
> 
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 9389f0c..1f03724 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
>   		return;
>   }
>   
> -static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
> -				   int protocol)
> +static void smc_sock_init(struct net *net, struct sock *sk, int protocol)
>   {
> -	struct smc_sock *smc;
> -	struct proto *prot;
> -	struct sock *sk;
> -
> -	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
> -	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
> -	if (!sk)
> -		return NULL;
> +	struct smc_sock *smc = smc_sk(sk);
>   
> -	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>   	sk->sk_state = SMC_INIT;
> -	sk->sk_destruct = smc_destruct;
>   	sk->sk_protocol = protocol;
> +	mutex_init(&smc->clcsock_release_lock);

Please add mutex_destroy(&smc->clcsock_release_lock); when 
smc->clcsock_release_lock is no longer used.

Or else some tools will notify errors.

Zhu Yanjun

>   	WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
>   	WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
> -	smc = smc_sk(sk);
>   	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
>   	INIT_WORK(&smc->connect_work, smc_connect_work);
>   	INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
>   	INIT_LIST_HEAD(&smc->accept_q);
>   	spin_lock_init(&smc->accept_q_lock);
>   	spin_lock_init(&smc->conn.send_lock);
> -	sk->sk_prot->hash(sk);
> -	mutex_init(&smc->clcsock_release_lock);
>   	smc_init_saved_callbacks(smc);
> +	smc->limit_smc_hs = net->smc.limit_smc_hs;
> +	smc->use_fallback = false; /* assume rdma capability first */
> +	smc->fallback_rsn = 0;
> +
> +	sk->sk_destruct = smc_destruct;
> +	sk->sk_prot->hash(sk);
> +}
> +
> +static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
> +				   int protocol)
> +{
> +	struct proto *prot;
> +	struct sock *sk;
> +
> +	prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
> +	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
> +	if (!sk)
> +		return NULL;
> +
> +	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
> +	smc_sock_init(net, sk, protocol);
>   
>   	return sk;
>   }
> @@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
>   	.splice_read	= smc_splice_read,
>   };
>   
> +static int __smc_create_clcsk(struct net *net, struct sock *sk, int family)
> +{
> +	struct smc_sock *smc = smc_sk(sk);
> +	int rc;
> +
> +	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
> +			      &smc->clcsock);
> +	if (rc) {
> +		sk_common_release(sk);
> +		return rc;
> +	}
> +
> +	/* smc_clcsock_release() does not wait smc->clcsock->sk's
> +	 * destruction;  its sk_state might not be TCP_CLOSE after
> +	 * smc->sk is close()d, and TCP timers can be fired later,
> +	 * which need net ref.
> +	 */
> +	sk = smc->clcsock->sk;
> +	__netns_tracker_free(net, &sk->ns_tracker, false);
> +	sk->sk_net_refcnt = 1;
> +	get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
> +	sock_inuse_add(net, 1);
> +	return 0;
> +}
> +
>   static int __smc_create(struct net *net, struct socket *sock, int protocol,
>   			int kern, struct socket *clcsock)
>   {
> @@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
>   
>   	/* create internal TCP socket for CLC handshake and fallback */
>   	smc = smc_sk(sk);
> -	smc->use_fallback = false; /* assume rdma capability first */
> -	smc->fallback_rsn = 0;
> -
> -	/* default behavior from limit_smc_hs in every net namespace */
> -	smc->limit_smc_hs = net->smc.limit_smc_hs;
>   
>   	rc = 0;
> -	if (!clcsock) {
> -		rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
> -				      &smc->clcsock);
> -		if (rc) {
> -			sk_common_release(sk);
> -			goto out;
> -		}
> -
> -		/* smc_clcsock_release() does not wait smc->clcsock->sk's
> -		 * destruction;  its sk_state might not be TCP_CLOSE after
> -		 * smc->sk is close()d, and TCP timers can be fired later,
> -		 * which need net ref.
> -		 */
> -		sk = smc->clcsock->sk;
> -		__netns_tracker_free(net, &sk->ns_tracker, false);
> -		sk->sk_net_refcnt = 1;
> -		get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
> -		sock_inuse_add(net, 1);
> -	} else {
> +	if (!clcsock)
> +		rc = __smc_create_clcsk(net, sk, family);
> +	else
>   		smc->clcsock = clcsock;
> -	}
> -
>   out:
>   	return rc;
>   }


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
  2024-05-11 12:21   ` Zhu Yanjun
@ 2024-05-13  3:22     ` D. Wythe
  0 siblings, 0 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-13  3:22 UTC (permalink / raw)
  To: Zhu Yanjun, kgraul, wenjia, jaka, wintera, guwen
  Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet



On 5/11/24 8:21 PM, Zhu Yanjun wrote:
> 在 2024/5/10 6:12, D. Wythe 写道:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch aims to isolate the shared components of SMC socket
>> allocation by introducing smc_sock_init() for sock initialization
>> and __smc_create_clcsk() for the initialization of clcsock.
>>
>> This is in preparation for the subsequent implementation of the
>> AF_INET version of SMC.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   net/smc/af_smc.c | 93 
>> +++++++++++++++++++++++++++++++-------------------------
>>   1 file changed, 52 insertions(+), 41 deletions(-)
>>
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 9389f0c..1f03724 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
>>           return;
>>   }
>>   -static struct sock *smc_sock_alloc(struct net *net, struct socket 
>> *sock,
>> -                   int protocol)
>> +static void smc_sock_init(struct net *net, struct sock *sk, int 
>> protocol)
>>   {
>> -    struct smc_sock *smc;
>> -    struct proto *prot;
>> -    struct sock *sk;
>> -
>> -    prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>> -    sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>> -    if (!sk)
>> -        return NULL;
>> +    struct smc_sock *smc = smc_sk(sk);
>>   -    sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>>       sk->sk_state = SMC_INIT;
>> -    sk->sk_destruct = smc_destruct;
>>       sk->sk_protocol = protocol;
>> +    mutex_init(&smc->clcsock_release_lock);
>
> Please add mutex_destroy(&smc->clcsock_release_lock); when 
> smc->clcsock_release_lock is no longer used.
>
> Or else some tools will notify errors.
>
> Zhu Yanjun


It seems that the problem you mentioned is not caused by this patch, 
after all, this patch is solely for refactoring.
Adding the fix you mentioned in this refactoring patch would not be 
appropriate. Perhaps, you could submit a separate
patch to address the issue. What do you think?

D. Wythe

>
>>       WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
>>       WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
>> -    smc = smc_sk(sk);
>>       INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
>>       INIT_WORK(&smc->connect_work, smc_connect_work);
>>       INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
>>       INIT_LIST_HEAD(&smc->accept_q);
>>       spin_lock_init(&smc->accept_q_lock);
>>       spin_lock_init(&smc->conn.send_lock);
>> -    sk->sk_prot->hash(sk);
>> -    mutex_init(&smc->clcsock_release_lock);
>>       smc_init_saved_callbacks(smc);
>> +    smc->limit_smc_hs = net->smc.limit_smc_hs;
>> +    smc->use_fallback = false; /* assume rdma capability first */
>> +    smc->fallback_rsn = 0;
>> +
>> +    sk->sk_destruct = smc_destruct;
>> +    sk->sk_prot->hash(sk);
>> +}
>> +
>> +static struct sock *smc_sock_alloc(struct net *net, struct socket 
>> *sock,
>> +                   int protocol)
>> +{
>> +    struct proto *prot;
>> +    struct sock *sk;
>> +
>> +    prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>> +    sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>> +    if (!sk)
>> +        return NULL;
>> +
>> +    sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>> +    smc_sock_init(net, sk, protocol);
>>         return sk;
>>   }
>> @@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket 
>> *sock, loff_t *ppos,
>>       .splice_read    = smc_splice_read,
>>   };
>>   +static int __smc_create_clcsk(struct net *net, struct sock *sk, 
>> int family)
>> +{
>> +    struct smc_sock *smc = smc_sk(sk);
>> +    int rc;
>> +
>> +    rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>> +                  &smc->clcsock);
>> +    if (rc) {
>> +        sk_common_release(sk);
>> +        return rc;
>> +    }
>> +
>> +    /* smc_clcsock_release() does not wait smc->clcsock->sk's
>> +     * destruction;  its sk_state might not be TCP_CLOSE after
>> +     * smc->sk is close()d, and TCP timers can be fired later,
>> +     * which need net ref.
>> +     */
>> +    sk = smc->clcsock->sk;
>> +    __netns_tracker_free(net, &sk->ns_tracker, false);
>> +    sk->sk_net_refcnt = 1;
>> +    get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>> +    sock_inuse_add(net, 1);
>> +    return 0;
>> +}
>> +
>>   static int __smc_create(struct net *net, struct socket *sock, int 
>> protocol,
>>               int kern, struct socket *clcsock)
>>   {
>> @@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, 
>> struct socket *sock, int protocol,
>>         /* create internal TCP socket for CLC handshake and fallback */
>>       smc = smc_sk(sk);
>> -    smc->use_fallback = false; /* assume rdma capability first */
>> -    smc->fallback_rsn = 0;
>> -
>> -    /* default behavior from limit_smc_hs in every net namespace */
>> -    smc->limit_smc_hs = net->smc.limit_smc_hs;
>>         rc = 0;
>> -    if (!clcsock) {
>> -        rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>> -                      &smc->clcsock);
>> -        if (rc) {
>> -            sk_common_release(sk);
>> -            goto out;
>> -        }
>> -
>> -        /* smc_clcsock_release() does not wait smc->clcsock->sk's
>> -         * destruction;  its sk_state might not be TCP_CLOSE after
>> -         * smc->sk is close()d, and TCP timers can be fired later,
>> -         * which need net ref.
>> -         */
>> -        sk = smc->clcsock->sk;
>> -        __netns_tracker_free(net, &sk->ns_tracker, false);
>> -        sk->sk_net_refcnt = 1;
>> -        get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>> -        sock_inuse_add(net, 1);
>> -    } else {
>> +    if (!clcsock)
>> +        rc = __smc_create_clcsk(net, sk, family);
>> +    else
>>           smc->clcsock = clcsock;
>> -    }
>> -
>>   out:
>>       return rc;
>>   }


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2024-05-13  3:22 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-10  4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
2024-05-10  4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
2024-05-10  9:50   ` Dust Li
2024-05-11  2:26     ` D. Wythe
2024-05-11 12:21   ` Zhu Yanjun
2024-05-13  3:22     ` D. Wythe
2024-05-10  4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
2024-05-10  9:57   ` Dust Li
2024-05-11  2:23     ` D. Wythe
2024-05-11  2:46       ` Dust Li
2024-05-11  3:02         ` D. Wythe
2024-05-10 17:09   ` kernel test robot
2024-05-10 18:32   ` kernel test robot
2024-05-10  9:14 ` [PATCH net-next 0/2] " D. Wythe
2024-05-10 10:22 ` Wenjia Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).