* [PATCH net-next 0/2] Introduce IPPROTO_SMC
@ 2024-05-10 4:12 D. Wythe
2024-05-10 4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
` (3 more replies)
0 siblings, 4 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-10 4:12 UTC (permalink / raw)
To: kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
From: "D. Wythe" <alibuda@linux.alibaba.com>
This patch allows to create smc socket via AF_INET,
similar to the following code,
/* create v4 smc sock */
v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
/* create v6 smc sock */
v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
There are several reasons why we believe it is appropriate here:
1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
address. There is no AF_SMC address at all.
2. Create smc socket in the AF_INET(6) path, which allows us to reuse
the infrastructure of AF_INET(6) path, such as common ebpf hooks.
Otherwise, smc have to implement it again in AF_SMC path. Such as:
1. Replace IPPROTO_TCP with IPPROTO_SMC in the socket() syscall
initiated by the user, without the use of LD-PRELOAD.
2. Select whether immediate fallback is required based on peer's port/ip
before connect().
A very significant result is that we can now use eBPF to implement smc_run
instead of LD_PRELOAD, who is completely ineffective in scenarios of static
linking.
Another potential value is that we are attempting to optimize the performance of
fallback socks, where merging socks is an important part, and it relies on the
creation of SMC sockets under the AF_INET path. (More information :
https://lore.kernel.org/netdev/1699442703-25015-1-git-send-email-alibuda@linux.alibaba.com/T/)
D. Wythe (2):
net/smc: refatoring initialization of smc sock
net/smc: Introduce IPPROTO_SMC
include/uapi/linux/in.h | 2 +
net/smc/af_smc.c | 222 +++++++++++++++++++++++++++++++++++++++---------
net/smc/inet_smc.h | 32 +++++++
3 files changed, 214 insertions(+), 42 deletions(-)
create mode 100644 net/smc/inet_smc.h
--
1.8.3.1
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
2024-05-10 4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
@ 2024-05-10 4:12 ` D. Wythe
2024-05-10 9:50 ` Dust Li
2024-05-11 12:21 ` Zhu Yanjun
2024-05-10 4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
` (2 subsequent siblings)
3 siblings, 2 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-10 4:12 UTC (permalink / raw)
To: kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
From: "D. Wythe" <alibuda@linux.alibaba.com>
This patch aims to isolate the shared components of SMC socket
allocation by introducing smc_sock_init() for sock initialization
and __smc_create_clcsk() for the initialization of clcsock.
This is in preparation for the subsequent implementation of the
AF_INET version of SMC.
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
net/smc/af_smc.c | 93 +++++++++++++++++++++++++++++++-------------------------
1 file changed, 52 insertions(+), 41 deletions(-)
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 9389f0c..1f03724 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
return;
}
-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
- int protocol)
+static void smc_sock_init(struct net *net, struct sock *sk, int protocol)
{
- struct smc_sock *smc;
- struct proto *prot;
- struct sock *sk;
-
- prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
- sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
- if (!sk)
- return NULL;
+ struct smc_sock *smc = smc_sk(sk);
- sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
sk->sk_state = SMC_INIT;
- sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
+ mutex_init(&smc->clcsock_release_lock);
WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
- smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
spin_lock_init(&smc->conn.send_lock);
- sk->sk_prot->hash(sk);
- mutex_init(&smc->clcsock_release_lock);
smc_init_saved_callbacks(smc);
+ smc->limit_smc_hs = net->smc.limit_smc_hs;
+ smc->use_fallback = false; /* assume rdma capability first */
+ smc->fallback_rsn = 0;
+
+ sk->sk_destruct = smc_destruct;
+ sk->sk_prot->hash(sk);
+}
+
+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
+ int protocol)
+{
+ struct proto *prot;
+ struct sock *sk;
+
+ prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
+ sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
+ if (!sk)
+ return NULL;
+
+ sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
+ smc_sock_init(net, sk, protocol);
return sk;
}
@@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
.splice_read = smc_splice_read,
};
+static int __smc_create_clcsk(struct net *net, struct sock *sk, int family)
+{
+ struct smc_sock *smc = smc_sk(sk);
+ int rc;
+
+ rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
+ &smc->clcsock);
+ if (rc) {
+ sk_common_release(sk);
+ return rc;
+ }
+
+ /* smc_clcsock_release() does not wait smc->clcsock->sk's
+ * destruction; its sk_state might not be TCP_CLOSE after
+ * smc->sk is close()d, and TCP timers can be fired later,
+ * which need net ref.
+ */
+ sk = smc->clcsock->sk;
+ __netns_tracker_free(net, &sk->ns_tracker, false);
+ sk->sk_net_refcnt = 1;
+ get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
+ return 0;
+}
+
static int __smc_create(struct net *net, struct socket *sock, int protocol,
int kern, struct socket *clcsock)
{
@@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
/* create internal TCP socket for CLC handshake and fallback */
smc = smc_sk(sk);
- smc->use_fallback = false; /* assume rdma capability first */
- smc->fallback_rsn = 0;
-
- /* default behavior from limit_smc_hs in every net namespace */
- smc->limit_smc_hs = net->smc.limit_smc_hs;
rc = 0;
- if (!clcsock) {
- rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
- &smc->clcsock);
- if (rc) {
- sk_common_release(sk);
- goto out;
- }
-
- /* smc_clcsock_release() does not wait smc->clcsock->sk's
- * destruction; its sk_state might not be TCP_CLOSE after
- * smc->sk is close()d, and TCP timers can be fired later,
- * which need net ref.
- */
- sk = smc->clcsock->sk;
- __netns_tracker_free(net, &sk->ns_tracker, false);
- sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
- sock_inuse_add(net, 1);
- } else {
+ if (!clcsock)
+ rc = __smc_create_clcsk(net, sk, family);
+ else
smc->clcsock = clcsock;
- }
-
out:
return rc;
}
--
1.8.3.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
2024-05-10 4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
2024-05-10 4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
@ 2024-05-10 4:12 ` D. Wythe
2024-05-10 9:57 ` Dust Li
` (2 more replies)
2024-05-10 9:14 ` [PATCH net-next 0/2] " D. Wythe
2024-05-10 10:22 ` Wenjia Zhang
3 siblings, 3 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-10 4:12 UTC (permalink / raw)
To: kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
From: "D. Wythe" <alibuda@linux.alibaba.com>
This patch allows to create smc socket via AF_INET,
similar to the following code,
/* create v4 smc sock */
v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
/* create v6 smc sock */
v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
There are several reasons why we believe it is appropriate here:
1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
address. There is no AF_SMC address at all.
2. Create smc socket in the AF_INET(6) path, which allows us to reuse
the infrastructure of AF_INET(6) path, such as common ebpf hooks.
Otherwise, smc have to implement it again in AF_SMC path.
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
include/uapi/linux/in.h | 2 +
net/smc/af_smc.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
net/smc/inet_smc.h | 32 ++++++++++++
3 files changed, 162 insertions(+), 1 deletion(-)
create mode 100644 net/smc/inet_smc.h
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index e682ab6..74c12e33 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -83,6 +83,8 @@ enum {
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
+ IPPROTO_SMC = 263, /* Shared Memory Communications */
+#define IPPROTO_SMC IPPROTO_SMC
IPPROTO_MAX
};
#endif
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1f03724..b4557828 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -54,6 +54,7 @@
#include "smc_tracepoint.h"
#include "smc_sysctl.h"
#include "smc_loopback.h"
+#include "inet_smc.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
.create = smc_create,
};
+int smc_inet_init_sock(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+
+ /* init common smc sock */
+ smc_sock_init(net, sk, IPPROTO_SMC);
+ /* create clcsock */
+ return __smc_create_clcsk(net, sk, sk->sk_family);
+}
+
static int smc_ulp_init(struct sock *sk)
{
struct socket *tcp = sk->sk_socket;
@@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
.clone = smc_ulp_clone,
};
+struct proto smc_inet_prot = {
+ .name = "INET_SMC",
+ .owner = THIS_MODULE,
+ .init = smc_inet_init_sock,
+ .hash = smc_hash_sk,
+ .unhash = smc_unhash_sk,
+ .release_cb = smc_release_cb,
+ .obj_size = sizeof(struct smc_sock),
+ .h.smc_hash = &smc_v4_hashinfo,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
+};
+
+const struct proto_ops smc_inet_stream_ops = {
+ .family = PF_INET,
+ .owner = THIS_MODULE,
+ .release = smc_release,
+ .bind = smc_bind,
+ .connect = smc_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = smc_accept,
+ .getname = smc_getname,
+ .poll = smc_poll,
+ .ioctl = smc_ioctl,
+ .listen = smc_listen,
+ .shutdown = smc_shutdown,
+ .setsockopt = smc_setsockopt,
+ .getsockopt = smc_getsockopt,
+ .sendmsg = smc_sendmsg,
+ .recvmsg = smc_recvmsg,
+ .mmap = sock_no_mmap,
+ .splice_read = smc_splice_read,
+};
+
+struct inet_protosw smc_inet_protosw = {
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_SMC,
+ .prot = &smc_inet_prot,
+ .ops = &smc_inet_stream_ops,
+ .flags = INET_PROTOSW_ICSK,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct proto smc_inet6_prot = {
+ .name = "INET6_SMC",
+ .owner = THIS_MODULE,
+ .init = smc_inet_init_sock,
+ .hash = smc_hash_sk,
+ .unhash = smc_unhash_sk,
+ .release_cb = smc_release_cb,
+ .obj_size = sizeof(struct smc_sock),
+ .h.smc_hash = &smc_v6_hashinfo,
+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
+};
+
+const struct proto_ops smc_inet6_stream_ops = {
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = smc_release,
+ .bind = smc_bind,
+ .connect = smc_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = smc_accept,
+ .getname = smc_getname,
+ .poll = smc_poll,
+ .ioctl = smc_ioctl,
+ .listen = smc_listen,
+ .shutdown = smc_shutdown,
+ .setsockopt = smc_setsockopt,
+ .getsockopt = smc_getsockopt,
+ .sendmsg = smc_sendmsg,
+ .recvmsg = smc_recvmsg,
+ .mmap = sock_no_mmap,
+ .splice_read = smc_splice_read,
+};
+
+struct inet_protosw smc_inet6_protosw = {
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_SMC,
+ .prot = &smc_inet6_prot,
+ .ops = &smc_inet6_stream_ops,
+ .flags = INET_PROTOSW_ICSK,
+};
+#endif
+
unsigned int smc_net_id;
static __net_init int smc_net_init(struct net *net)
@@ -3595,9 +3690,28 @@ static int __init smc_init(void)
goto out_lo;
}
+ rc = proto_register(&smc_inet_prot, 1);
+ if (rc) {
+ pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
+ goto out_ulp;
+ }
+ inet_register_protosw(&smc_inet_protosw);
+#if IS_ENABLED(CONFIG_IPV6)
+ rc = proto_register(&smc_inet6_prot, 1);
+ if (rc) {
+ pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
+ goto out_inet_prot;
+ }
+ inet6_register_protosw(&smc_inet6_protosw);
+#endif
+
static_branch_enable(&tcp_have_smc);
return 0;
-
+out_inet_prot:
+ inet_unregister_protosw(&smc_inet_protosw);
+ proto_unregister(&smc_inet_prot);
+out_ulp:
+ tcp_unregister_ulp(&smc_ulp_ops);
out_lo:
smc_loopback_exit();
out_ib:
@@ -3634,6 +3748,10 @@ static int __init smc_init(void)
static void __exit smc_exit(void)
{
static_branch_disable(&tcp_have_smc);
+ inet_unregister_protosw(&smc_inet_protosw);
+#if IS_ENABLED(CONFIG_IPV6)
+ inet6_unregister_protosw(&smc_inet6_protosw);
+#endif
tcp_unregister_ulp(&smc_ulp_ops);
sock_unregister(PF_SMC);
smc_core_exit();
@@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
destroy_workqueue(smc_hs_wq);
proto_unregister(&smc_proto6);
proto_unregister(&smc_proto);
+ proto_unregister(&smc_inet_prot);
+#if IS_ENABLED(CONFIG_IPV6)
+ proto_unregister(&smc_inet6_prot);
+#endif
smc_pnet_exit();
smc_nl_exit();
smc_clc_exit();
@@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_SMC);
MODULE_ALIAS_TCP_ULP("smc");
+/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
+#if IS_ENABLED(CONFIG_IPV6)
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
+#endif
MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
new file mode 100644
index 00000000..fcdcb61
--- /dev/null
+++ b/net/smc/inet_smc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Definitions for the SMC module (socket related)
+
+ * Copyright IBM Corp. 2016
+ *
+ */
+#ifndef __INET_SMC
+#define __INET_SMC
+
+#include <net/protocol.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+extern struct proto smc_inet_prot;
+extern const struct proto_ops smc_inet_stream_ops;
+extern struct inet_protosw smc_inet_protosw;
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+/* MUST after net/tcp.h or warning */
+#include <net/transp_v6.h>
+extern struct proto smc_inet6_prot;
+extern const struct proto_ops smc_inet6_stream_ops;
+extern struct inet_protosw smc_inet6_protosw;
+#endif
+
+int smc_inet_init_sock(struct sock *sk);
+
+#endif // __INET_SMC
--
1.8.3.1
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 0/2] Introduce IPPROTO_SMC
2024-05-10 4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
2024-05-10 4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
2024-05-10 4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
@ 2024-05-10 9:14 ` D. Wythe
2024-05-10 10:22 ` Wenjia Zhang
3 siblings, 0 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-10 9:14 UTC (permalink / raw)
To: kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 5/10/24 12:12 PM, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
>
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
>
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
A eBPF version of smc_run is also available here:
https://github.com/D-Wythe/smc-tools/tree/ipproto_smc
You can test IPPROTO_SMC by script:
smc_run.pid COMMAND
While you can still test AF_SMC by script:
smc_run COMMAND
D. Wythe
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
2024-05-10 4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
@ 2024-05-10 9:50 ` Dust Li
2024-05-11 2:26 ` D. Wythe
2024-05-11 12:21 ` Zhu Yanjun
1 sibling, 1 reply; 15+ messages in thread
From: Dust Li @ 2024-05-10 9:50 UTC (permalink / raw)
To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 2024-05-10 12:12:12, D. Wythe wrote:
>From: "D. Wythe" <alibuda@linux.alibaba.com>
>
>This patch aims to isolate the shared components of SMC socket
>allocation by introducing smc_sock_init() for sock initialization
>and __smc_create_clcsk() for the initialization of clcsock.
>
>This is in preparation for the subsequent implementation of the
>AF_INET version of SMC.
>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> net/smc/af_smc.c | 93 +++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 52 insertions(+), 41 deletions(-)
>
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index 9389f0c..1f03724 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
> return;
> }
>
>-static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
>- int protocol)
>+static void smc_sock_init(struct net *net, struct sock *sk, int protocol)
> {
>- struct smc_sock *smc;
>- struct proto *prot;
>- struct sock *sk;
>-
>- prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>- sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>- if (!sk)
>- return NULL;
>+ struct smc_sock *smc = smc_sk(sk);
>
>- sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
> sk->sk_state = SMC_INIT;
>- sk->sk_destruct = smc_destruct;
> sk->sk_protocol = protocol;
>+ mutex_init(&smc->clcsock_release_lock);
> WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
> WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
>- smc = smc_sk(sk);
> INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
> INIT_WORK(&smc->connect_work, smc_connect_work);
> INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
> INIT_LIST_HEAD(&smc->accept_q);
> spin_lock_init(&smc->accept_q_lock);
> spin_lock_init(&smc->conn.send_lock);
>- sk->sk_prot->hash(sk);
>- mutex_init(&smc->clcsock_release_lock);
> smc_init_saved_callbacks(smc);
>+ smc->limit_smc_hs = net->smc.limit_smc_hs;
>+ smc->use_fallback = false; /* assume rdma capability first */
>+ smc->fallback_rsn = 0;
>+
>+ sk->sk_destruct = smc_destruct;
>+ sk->sk_prot->hash(sk);
Why change the order here ? e.g.
Before:
sk->sk_destruct = smc_destruct;
mutex_init(&smc->clcsock_release_lock);
After
mutex_init(&smc->clcsock_release_lock);
sk->sk_destruct = smc_destruct;
Same for sk->sk_prot->hash(sk)
>+}
>+
>+static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
>+ int protocol)
>+{
>+ struct proto *prot;
>+ struct sock *sk;
>+
>+ prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>+ sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>+ if (!sk)
>+ return NULL;
>+
>+ sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>+ smc_sock_init(net, sk, protocol);
>
> return sk;
> }
>@@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
> .splice_read = smc_splice_read,
> };
>
>+static int __smc_create_clcsk(struct net *net, struct sock *sk, int family)
Why add '__' prefix here ?
>+{
>+ struct smc_sock *smc = smc_sk(sk);
>+ int rc;
>+
>+ rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>+ &smc->clcsock);
>+ if (rc) {
>+ sk_common_release(sk);
>+ return rc;
>+ }
>+
>+ /* smc_clcsock_release() does not wait smc->clcsock->sk's
>+ * destruction; its sk_state might not be TCP_CLOSE after
>+ * smc->sk is close()d, and TCP timers can be fired later,
>+ * which need net ref.
>+ */
>+ sk = smc->clcsock->sk;
>+ __netns_tracker_free(net, &sk->ns_tracker, false);
>+ sk->sk_net_refcnt = 1;
>+ get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>+ sock_inuse_add(net, 1);
>+ return 0;
>+}
>+
> static int __smc_create(struct net *net, struct socket *sock, int protocol,
> int kern, struct socket *clcsock)
> {
>@@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
>
> /* create internal TCP socket for CLC handshake and fallback */
> smc = smc_sk(sk);
>- smc->use_fallback = false; /* assume rdma capability first */
>- smc->fallback_rsn = 0;
>-
>- /* default behavior from limit_smc_hs in every net namespace */
>- smc->limit_smc_hs = net->smc.limit_smc_hs;
>
> rc = 0;
>- if (!clcsock) {
>- rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>- &smc->clcsock);
>- if (rc) {
>- sk_common_release(sk);
>- goto out;
>- }
>-
>- /* smc_clcsock_release() does not wait smc->clcsock->sk's
>- * destruction; its sk_state might not be TCP_CLOSE after
>- * smc->sk is close()d, and TCP timers can be fired later,
>- * which need net ref.
>- */
>- sk = smc->clcsock->sk;
>- __netns_tracker_free(net, &sk->ns_tracker, false);
>- sk->sk_net_refcnt = 1;
>- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>- sock_inuse_add(net, 1);
>- } else {
>+ if (!clcsock)
>+ rc = __smc_create_clcsk(net, sk, family);
>+ else
> smc->clcsock = clcsock;
>- }
>-
> out:
> return rc;
> }
>--
>1.8.3.1
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
2024-05-10 4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
@ 2024-05-10 9:57 ` Dust Li
2024-05-11 2:23 ` D. Wythe
2024-05-10 17:09 ` kernel test robot
2024-05-10 18:32 ` kernel test robot
2 siblings, 1 reply; 15+ messages in thread
From: Dust Li @ 2024-05-10 9:57 UTC (permalink / raw)
To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 2024-05-10 12:12:13, D. Wythe wrote:
>From: "D. Wythe" <alibuda@linux.alibaba.com>
>
>This patch allows to create smc socket via AF_INET,
>similar to the following code,
>
>/* create v4 smc sock */
>v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>
>/* create v6 smc sock */
>v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>
>There are several reasons why we believe it is appropriate here:
>
>1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>address. There is no AF_SMC address at all.
>
>2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>Otherwise, smc have to implement it again in AF_SMC path.
>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> include/uapi/linux/in.h | 2 +
> net/smc/af_smc.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
> net/smc/inet_smc.h | 32 ++++++++++++
> 3 files changed, 162 insertions(+), 1 deletion(-)
> create mode 100644 net/smc/inet_smc.h
>
>diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>index e682ab6..74c12e33 100644
>--- a/include/uapi/linux/in.h
>+++ b/include/uapi/linux/in.h
>@@ -83,6 +83,8 @@ enum {
> #define IPPROTO_RAW IPPROTO_RAW
> IPPROTO_MPTCP = 262, /* Multipath TCP connection */
> #define IPPROTO_MPTCP IPPROTO_MPTCP
>+ IPPROTO_SMC = 263, /* Shared Memory Communications */
^ use tab to align here
>+#define IPPROTO_SMC IPPROTO_SMC
> IPPROTO_MAX
> };
> #endif
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index 1f03724..b4557828 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -54,6 +54,7 @@
> #include "smc_tracepoint.h"
> #include "smc_sysctl.h"
> #include "smc_loopback.h"
>+#include "inet_smc.h"
>
> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
> * creation on server
>@@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
> .create = smc_create,
> };
>
Why not put those whole bunch of inet staff into smc_inet.c ?
Looks like your smc_inet.h is meanless without smc_inet.c
>+int smc_inet_init_sock(struct sock *sk)
>+{
>+ struct net *net = sock_net(sk);
>+
>+ /* init common smc sock */
>+ smc_sock_init(net, sk, IPPROTO_SMC);
>+ /* create clcsock */
>+ return __smc_create_clcsk(net, sk, sk->sk_family);
>+}
>+
> static int smc_ulp_init(struct sock *sk)
> {
> struct socket *tcp = sk->sk_socket;
>@@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
> .clone = smc_ulp_clone,
> };
>
>+struct proto smc_inet_prot = {
>+ .name = "INET_SMC",
>+ .owner = THIS_MODULE,
>+ .init = smc_inet_init_sock,
>+ .hash = smc_hash_sk,
>+ .unhash = smc_unhash_sk,
>+ .release_cb = smc_release_cb,
>+ .obj_size = sizeof(struct smc_sock),
>+ .h.smc_hash = &smc_v4_hashinfo,
>+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
^
Align please.
>+};
>+
>+const struct proto_ops smc_inet_stream_ops = {
>+ .family = PF_INET,
>+ .owner = THIS_MODULE,
>+ .release = smc_release,
>+ .bind = smc_bind,
>+ .connect = smc_connect,
>+ .socketpair = sock_no_socketpair,
>+ .accept = smc_accept,
>+ .getname = smc_getname,
>+ .poll = smc_poll,
>+ .ioctl = smc_ioctl,
>+ .listen = smc_listen,
>+ .shutdown = smc_shutdown,
>+ .setsockopt = smc_setsockopt,
>+ .getsockopt = smc_getsockopt,
>+ .sendmsg = smc_sendmsg,
>+ .recvmsg = smc_recvmsg,
>+ .mmap = sock_no_mmap,
>+ .splice_read = smc_splice_read,
Ditto
>+};
>+
>+struct inet_protosw smc_inet_protosw = {
>+ .type = SOCK_STREAM,
>+ .protocol = IPPROTO_SMC,
>+ .prot = &smc_inet_prot,
Ditto
>+ .ops = &smc_inet_stream_ops,
>+ .flags = INET_PROTOSW_ICSK,
>+};
>+
>+#if IS_ENABLED(CONFIG_IPV6)
>+struct proto smc_inet6_prot = {
>+ .name = "INET6_SMC",
>+ .owner = THIS_MODULE,
>+ .init = smc_inet_init_sock,
>+ .hash = smc_hash_sk,
>+ .unhash = smc_unhash_sk,
>+ .release_cb = smc_release_cb,
>+ .obj_size = sizeof(struct smc_sock),
>+ .h.smc_hash = &smc_v6_hashinfo,
>+ .slab_flags = SLAB_TYPESAFE_BY_RCU,
>+};
>+
>+const struct proto_ops smc_inet6_stream_ops = {
>+ .family = PF_INET6,
>+ .owner = THIS_MODULE,
>+ .release = smc_release,
>+ .bind = smc_bind,
>+ .connect = smc_connect,
>+ .socketpair = sock_no_socketpair,
>+ .accept = smc_accept,
>+ .getname = smc_getname,
>+ .poll = smc_poll,
>+ .ioctl = smc_ioctl,
>+ .listen = smc_listen,
>+ .shutdown = smc_shutdown,
>+ .setsockopt = smc_setsockopt,
>+ .getsockopt = smc_getsockopt,
>+ .sendmsg = smc_sendmsg,
>+ .recvmsg = smc_recvmsg,
>+ .mmap = sock_no_mmap,
>+ .splice_read = smc_splice_read,
Ditto
>+};
>+
>+struct inet_protosw smc_inet6_protosw = {
>+ .type = SOCK_STREAM,
>+ .protocol = IPPROTO_SMC,
>+ .prot = &smc_inet6_prot,
>+ .ops = &smc_inet6_stream_ops,
>+ .flags = INET_PROTOSW_ICSK,
Ditto
>+};
>+#endif
>+
> unsigned int smc_net_id;
>
> static __net_init int smc_net_init(struct net *net)
>@@ -3595,9 +3690,28 @@ static int __init smc_init(void)
> goto out_lo;
> }
>
>+ rc = proto_register(&smc_inet_prot, 1);
>+ if (rc) {
>+ pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>+ goto out_ulp;
>+ }
>+ inet_register_protosw(&smc_inet_protosw);
>+#if IS_ENABLED(CONFIG_IPV6)
>+ rc = proto_register(&smc_inet6_prot, 1);
>+ if (rc) {
>+ pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>+ goto out_inet_prot;
>+ }
>+ inet6_register_protosw(&smc_inet6_protosw);
>+#endif
>+
> static_branch_enable(&tcp_have_smc);
> return 0;
>-
>+out_inet_prot:
>+ inet_unregister_protosw(&smc_inet_protosw);
>+ proto_unregister(&smc_inet_prot);
>+out_ulp:
>+ tcp_unregister_ulp(&smc_ulp_ops);
> out_lo:
> smc_loopback_exit();
> out_ib:
>@@ -3634,6 +3748,10 @@ static int __init smc_init(void)
> static void __exit smc_exit(void)
> {
> static_branch_disable(&tcp_have_smc);
>+ inet_unregister_protosw(&smc_inet_protosw);
>+#if IS_ENABLED(CONFIG_IPV6)
>+ inet6_unregister_protosw(&smc_inet6_protosw);
>+#endif
> tcp_unregister_ulp(&smc_ulp_ops);
> sock_unregister(PF_SMC);
> smc_core_exit();
>@@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
> destroy_workqueue(smc_hs_wq);
> proto_unregister(&smc_proto6);
> proto_unregister(&smc_proto);
>+ proto_unregister(&smc_inet_prot);
>+#if IS_ENABLED(CONFIG_IPV6)
>+ proto_unregister(&smc_inet6_prot);
>+#endif
> smc_pnet_exit();
> smc_nl_exit();
> smc_clc_exit();
>@@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
> MODULE_LICENSE("GPL");
> MODULE_ALIAS_NETPROTO(PF_SMC);
> MODULE_ALIAS_TCP_ULP("smc");
>+/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>+#if IS_ENABLED(CONFIG_IPV6)
>+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>+#endif
> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>new file mode 100644
>index 00000000..fcdcb61
>--- /dev/null
>+++ b/net/smc/inet_smc.h
>@@ -0,0 +1,32 @@
>+/* SPDX-License-Identifier: GPL-2.0 */
>+/*
>+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
>+ *
>+ * Definitions for the SMC module (socket related)
>+
>+ * Copyright IBM Corp. 2016
You should update this.
>+ *
>+ */
>+#ifndef __INET_SMC
>+#define __INET_SMC
>+
>+#include <net/protocol.h>
>+#include <net/sock.h>
>+#include <net/tcp.h>
>+
>+extern struct proto smc_inet_prot;
>+extern const struct proto_ops smc_inet_stream_ops;
>+extern struct inet_protosw smc_inet_protosw;
>+
>+#if IS_ENABLED(CONFIG_IPV6)
>+#include <net/ipv6.h>
>+/* MUST after net/tcp.h or warning */
>+#include <net/transp_v6.h>
>+extern struct proto smc_inet6_prot;
>+extern const struct proto_ops smc_inet6_stream_ops;
>+extern struct inet_protosw smc_inet6_protosw;
>+#endif
>+
>+int smc_inet_init_sock(struct sock *sk);
>+
>+#endif // __INET_SMC
^
use /* __INET_SMC */ instead
>--
>1.8.3.1
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 0/2] Introduce IPPROTO_SMC
2024-05-10 4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
` (2 preceding siblings ...)
2024-05-10 9:14 ` [PATCH net-next 0/2] " D. Wythe
@ 2024-05-10 10:22 ` Wenjia Zhang
3 siblings, 0 replies; 15+ messages in thread
From: Wenjia Zhang @ 2024-05-10 10:22 UTC (permalink / raw)
To: D. Wythe, kgraul, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 10.05.24 06:12, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
>
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
>
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>
> /* create v6 smc sock */
> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>
> There are several reasons why we believe it is appropriate here:
>
> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> address. There is no AF_SMC address at all.
>
> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> Otherwise, smc have to implement it again in AF_SMC path. Such as:
> 1. Replace IPPROTO_TCP with IPPROTO_SMC in the socket() syscall
> initiated by the user, without the use of LD-PRELOAD.
> 2. Select whether immediate fallback is required based on peer's port/ip
> before connect().
>
> A very significant result is that we can now use eBPF to implement smc_run
> instead of LD_PRELOAD, who is completely ineffective in scenarios of static
> linking.
>
> Another potential value is that we are attempting to optimize the performance of
> fallback socks, where merging socks is an important part, and it relies on the
> creation of SMC sockets under the AF_INET path. (More information :
> https://lore.kernel.org/netdev/1699442703-25015-1-git-send-email-alibuda@linux.alibaba.com/T/)
>
> D. Wythe (2):
> net/smc: refatoring initialization of smc sock
> net/smc: Introduce IPPROTO_SMC
>
> include/uapi/linux/in.h | 2 +
> net/smc/af_smc.c | 222 +++++++++++++++++++++++++++++++++++++++---------
> net/smc/inet_smc.h | 32 +++++++
> 3 files changed, 214 insertions(+), 42 deletions(-)
> create mode 100644 net/smc/inet_smc.h
>
Replacing the preload library is indeed a good reason for this method.
And that could be a new era for smc. However, there are still some
details we need to take care of. Thus, I'd like to ask for more time to
review and test these patches.
Thank you,
Wenjia
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
2024-05-10 4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
2024-05-10 9:57 ` Dust Li
@ 2024-05-10 17:09 ` kernel test robot
2024-05-10 18:32 ` kernel test robot
2 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2024-05-10 17:09 UTC (permalink / raw)
To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
Cc: oe-kbuild-all, kuba, davem, netdev, linux-s390, linux-rdma,
tonylu, pabeni, edumazet
Hi Wythe,
kernel test robot noticed the following build warnings:
[auto build test WARNING on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/D-Wythe/net-smc-refatoring-initialization-of-smc-sock/20240510-121442
base: net-next/main
patch link: https://lore.kernel.org/r/1715314333-107290-3-git-send-email-alibuda%40linux.alibaba.com
patch subject: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
config: i386-buildonly-randconfig-002-20240510 (https://download.01.org/0day-ci/archive/20240511/202405110124.GxQs28cK-lkp@intel.com/config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240511/202405110124.GxQs28cK-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405110124.GxQs28cK-lkp@intel.com/
All warnings (new ones prefixed by >>):
net/smc/af_smc.c: In function 'smc_init':
>> net/smc/af_smc.c:3710:1: warning: label 'out_inet_prot' defined but not used [-Wunused-label]
out_inet_prot:
^~~~~~~~~~~~~
vim +/out_inet_prot +3710 net/smc/af_smc.c
3707
3708 static_branch_enable(&tcp_have_smc);
3709 return 0;
> 3710 out_inet_prot:
3711 inet_unregister_protosw(&smc_inet_protosw);
3712 proto_unregister(&smc_inet_prot);
3713 out_ulp:
3714 tcp_unregister_ulp(&smc_ulp_ops);
3715 out_lo:
3716 smc_loopback_exit();
3717 out_ib:
3718 smc_ib_unregister_client();
3719 out_sock:
3720 sock_unregister(PF_SMC);
3721 out_proto6:
3722 proto_unregister(&smc_proto6);
3723 out_proto:
3724 proto_unregister(&smc_proto);
3725 out_core:
3726 smc_core_exit();
3727 out_alloc_wqs:
3728 destroy_workqueue(smc_close_wq);
3729 out_alloc_hs_wq:
3730 destroy_workqueue(smc_hs_wq);
3731 out_alloc_tcp_ls_wq:
3732 destroy_workqueue(smc_tcp_ls_wq);
3733 out_pnet:
3734 smc_pnet_exit();
3735 out_nl:
3736 smc_nl_exit();
3737 out_ism:
3738 smc_clc_exit();
3739 smc_ism_exit();
3740 out_pernet_subsys_stat:
3741 unregister_pernet_subsys(&smc_net_stat_ops);
3742 out_pernet_subsys:
3743 unregister_pernet_subsys(&smc_net_ops);
3744
3745 return rc;
3746 }
3747
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
2024-05-10 4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
2024-05-10 9:57 ` Dust Li
2024-05-10 17:09 ` kernel test robot
@ 2024-05-10 18:32 ` kernel test robot
2 siblings, 0 replies; 15+ messages in thread
From: kernel test robot @ 2024-05-10 18:32 UTC (permalink / raw)
To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
Cc: llvm, oe-kbuild-all, kuba, davem, netdev, linux-s390, linux-rdma,
tonylu, pabeni, edumazet
Hi Wythe,
kernel test robot noticed the following build warnings:
[auto build test WARNING on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/D-Wythe/net-smc-refatoring-initialization-of-smc-sock/20240510-121442
base: net-next/main
patch link: https://lore.kernel.org/r/1715314333-107290-3-git-send-email-alibuda%40linux.alibaba.com
patch subject: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
config: x86_64-randconfig-002-20240510 (https://download.01.org/0day-ci/archive/20240511/202405110225.7378HJe1-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240511/202405110225.7378HJe1-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405110225.7378HJe1-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> net/smc/af_smc.c:3710:1: warning: unused label 'out_inet_prot' [-Wunused-label]
3710 | out_inet_prot:
| ^~~~~~~~~~~~~~
1 warning generated.
vim +/out_inet_prot +3710 net/smc/af_smc.c
3707
3708 static_branch_enable(&tcp_have_smc);
3709 return 0;
> 3710 out_inet_prot:
3711 inet_unregister_protosw(&smc_inet_protosw);
3712 proto_unregister(&smc_inet_prot);
3713 out_ulp:
3714 tcp_unregister_ulp(&smc_ulp_ops);
3715 out_lo:
3716 smc_loopback_exit();
3717 out_ib:
3718 smc_ib_unregister_client();
3719 out_sock:
3720 sock_unregister(PF_SMC);
3721 out_proto6:
3722 proto_unregister(&smc_proto6);
3723 out_proto:
3724 proto_unregister(&smc_proto);
3725 out_core:
3726 smc_core_exit();
3727 out_alloc_wqs:
3728 destroy_workqueue(smc_close_wq);
3729 out_alloc_hs_wq:
3730 destroy_workqueue(smc_hs_wq);
3731 out_alloc_tcp_ls_wq:
3732 destroy_workqueue(smc_tcp_ls_wq);
3733 out_pnet:
3734 smc_pnet_exit();
3735 out_nl:
3736 smc_nl_exit();
3737 out_ism:
3738 smc_clc_exit();
3739 smc_ism_exit();
3740 out_pernet_subsys_stat:
3741 unregister_pernet_subsys(&smc_net_stat_ops);
3742 out_pernet_subsys:
3743 unregister_pernet_subsys(&smc_net_ops);
3744
3745 return rc;
3746 }
3747
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
2024-05-10 9:57 ` Dust Li
@ 2024-05-11 2:23 ` D. Wythe
2024-05-11 2:46 ` Dust Li
0 siblings, 1 reply; 15+ messages in thread
From: D. Wythe @ 2024-05-11 2:23 UTC (permalink / raw)
To: dust.li, kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 5/10/24 5:57 PM, Dust Li wrote:
> On 2024-05-10 12:12:13, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch allows to create smc socket via AF_INET,
>> similar to the following code,
>>
>> /* create v4 smc sock */
>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>
>> /* create v6 smc sock */
>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>
>> There are several reasons why we believe it is appropriate here:
>>
>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> address. There is no AF_SMC address at all.
>>
>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> Otherwise, smc have to implement it again in AF_SMC path.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>> include/uapi/linux/in.h | 2 +
>> net/smc/af_smc.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
>> net/smc/inet_smc.h | 32 ++++++++++++
>> 3 files changed, 162 insertions(+), 1 deletion(-)
>> create mode 100644 net/smc/inet_smc.h
>>
>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>> index e682ab6..74c12e33 100644
>> --- a/include/uapi/linux/in.h
>> +++ b/include/uapi/linux/in.h
>> @@ -83,6 +83,8 @@ enum {
>> #define IPPROTO_RAW IPPROTO_RAW
>> IPPROTO_MPTCP = 262, /* Multipath TCP connection */
>> #define IPPROTO_MPTCP IPPROTO_MPTCP
>> + IPPROTO_SMC = 263, /* Shared Memory Communications */
> ^ use tab to align here
There is a problem here, all previous definitions were aligned with 2
spaces.
>> +#define IPPROTO_SMC IPPROTO_SMC
>> IPPROTO_MAX
>> };
>> #endif
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 1f03724..b4557828 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -54,6 +54,7 @@
>> #include "smc_tracepoint.h"
>> #include "smc_sysctl.h"
>> #include "smc_loopback.h"
>> +#include "inet_smc.h"
>>
>> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
>> * creation on server
>> @@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
>> .create = smc_create,
>> };
>>
> Why not put those whole bunch of inet staff into smc_inet.c ?
> Looks like your smc_inet.h is meanless without smc_inet.c
>
This header file was originally reserved for future merging of socks. If
nobody likes it, I can move it to the
af_smc.c
>> +int smc_inet_init_sock(struct sock *sk)
>> +{
>> + struct net *net = sock_net(sk);
>> +
>> + /* init common smc sock */
>> + smc_sock_init(net, sk, IPPROTO_SMC);
>> + /* create clcsock */
>> + return __smc_create_clcsk(net, sk, sk->sk_family);
>> +}
>> +
>> static int smc_ulp_init(struct sock *sk)
>> {
>> struct socket *tcp = sk->sk_socket;
>> @@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
>> .clone = smc_ulp_clone,
>> };
>>
>> +struct proto smc_inet_prot = {
>> + .name = "INET_SMC",
>> + .owner = THIS_MODULE,
>> + .init = smc_inet_init_sock,
>> + .hash = smc_hash_sk,
>> + .unhash = smc_unhash_sk,
>> + .release_cb = smc_release_cb,
>> + .obj_size = sizeof(struct smc_sock),
>> + .h.smc_hash = &smc_v4_hashinfo,
>> + .slab_flags = SLAB_TYPESAFE_BY_RCU,
> ^
> Align please.
>
Got it.
>> +};
>> +
>> +const struct proto_ops smc_inet_stream_ops = {
>> + .family = PF_INET,
>> + .owner = THIS_MODULE,
>> + .release = smc_release,
>> + .bind = smc_bind,
>> + .connect = smc_connect,
>> + .socketpair = sock_no_socketpair,
>> + .accept = smc_accept,
>> + .getname = smc_getname,
>> + .poll = smc_poll,
>> + .ioctl = smc_ioctl,
>> + .listen = smc_listen,
>> + .shutdown = smc_shutdown,
>> + .setsockopt = smc_setsockopt,
>> + .getsockopt = smc_getsockopt,
>> + .sendmsg = smc_sendmsg,
>> + .recvmsg = smc_recvmsg,
>> + .mmap = sock_no_mmap,
>> + .splice_read = smc_splice_read,
> Ditto
>
>> +};
>> +
>> +struct inet_protosw smc_inet_protosw = {
>> + .type = SOCK_STREAM,
>> + .protocol = IPPROTO_SMC,
>> + .prot = &smc_inet_prot,
> Ditto
>
>> + .ops = &smc_inet_stream_ops,
>> + .flags = INET_PROTOSW_ICSK,
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +struct proto smc_inet6_prot = {
>> + .name = "INET6_SMC",
>> + .owner = THIS_MODULE,
>> + .init = smc_inet_init_sock,
>> + .hash = smc_hash_sk,
>> + .unhash = smc_unhash_sk,
>> + .release_cb = smc_release_cb,
>> + .obj_size = sizeof(struct smc_sock),
>> + .h.smc_hash = &smc_v6_hashinfo,
>> + .slab_flags = SLAB_TYPESAFE_BY_RCU,
>> +};
>> +
>> +const struct proto_ops smc_inet6_stream_ops = {
>> + .family = PF_INET6,
>> + .owner = THIS_MODULE,
>> + .release = smc_release,
>> + .bind = smc_bind,
>> + .connect = smc_connect,
>> + .socketpair = sock_no_socketpair,
>> + .accept = smc_accept,
>> + .getname = smc_getname,
>> + .poll = smc_poll,
>> + .ioctl = smc_ioctl,
>> + .listen = smc_listen,
>> + .shutdown = smc_shutdown,
>> + .setsockopt = smc_setsockopt,
>> + .getsockopt = smc_getsockopt,
>> + .sendmsg = smc_sendmsg,
>> + .recvmsg = smc_recvmsg,
>> + .mmap = sock_no_mmap,
>> + .splice_read = smc_splice_read,
> Ditto
>
>> +};
>> +
>> +struct inet_protosw smc_inet6_protosw = {
>> + .type = SOCK_STREAM,
>> + .protocol = IPPROTO_SMC,
>> + .prot = &smc_inet6_prot,
>> + .ops = &smc_inet6_stream_ops,
>> + .flags = INET_PROTOSW_ICSK,
> Ditto
>
>> +};
>> +#endif
>> +
>> unsigned int smc_net_id;
>>
>> static __net_init int smc_net_init(struct net *net)
>> @@ -3595,9 +3690,28 @@ static int __init smc_init(void)
>> goto out_lo;
>> }
>>
>> + rc = proto_register(&smc_inet_prot, 1);
>> + if (rc) {
>> + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>> + goto out_ulp;
>> + }
>> + inet_register_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> + rc = proto_register(&smc_inet6_prot, 1);
>> + if (rc) {
>> + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>> + goto out_inet_prot;
>> + }
>> + inet6_register_protosw(&smc_inet6_protosw);
>> +#endif
>> +
>> static_branch_enable(&tcp_have_smc);
>> return 0;
>> -
>> +out_inet_prot:
>> + inet_unregister_protosw(&smc_inet_protosw);
>> + proto_unregister(&smc_inet_prot);
>> +out_ulp:
>> + tcp_unregister_ulp(&smc_ulp_ops);
>> out_lo:
>> smc_loopback_exit();
>> out_ib:
>> @@ -3634,6 +3748,10 @@ static int __init smc_init(void)
>> static void __exit smc_exit(void)
>> {
>> static_branch_disable(&tcp_have_smc);
>> + inet_unregister_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> + inet6_unregister_protosw(&smc_inet6_protosw);
>> +#endif
>> tcp_unregister_ulp(&smc_ulp_ops);
>> sock_unregister(PF_SMC);
>> smc_core_exit();
>> @@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
>> destroy_workqueue(smc_hs_wq);
>> proto_unregister(&smc_proto6);
>> proto_unregister(&smc_proto);
>> + proto_unregister(&smc_inet_prot);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> + proto_unregister(&smc_inet6_prot);
>> +#endif
>> smc_pnet_exit();
>> smc_nl_exit();
>> smc_clc_exit();
>> @@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
>> MODULE_LICENSE("GPL");
>> MODULE_ALIAS_NETPROTO(PF_SMC);
>> MODULE_ALIAS_TCP_ULP("smc");
>> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>> +#endif
>> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>> new file mode 100644
>> index 00000000..fcdcb61
>> --- /dev/null
>> +++ b/net/smc/inet_smc.h
>> @@ -0,0 +1,32 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Shared Memory Communications over RDMA (SMC-R) and RoCE
>> + *
>> + * Definitions for the SMC module (socket related)
>> +
>> + * Copyright IBM Corp. 2016
> You should update this.
Got it.
>> + *
>> + */
>> +#ifndef __INET_SMC
>> +#define __INET_SMC
>> +
>> +#include <net/protocol.h>
>> +#include <net/sock.h>
>> +#include <net/tcp.h>
>> +
>> +extern struct proto smc_inet_prot;
>> +extern const struct proto_ops smc_inet_stream_ops;
>> +extern struct inet_protosw smc_inet_protosw;
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +#include <net/ipv6.h>
>> +/* MUST after net/tcp.h or warning */
>> +#include <net/transp_v6.h>
>> +extern struct proto smc_inet6_prot;
>> +extern const struct proto_ops smc_inet6_stream_ops;
>> +extern struct inet_protosw smc_inet6_protosw;
>> +#endif
>> +
>> +int smc_inet_init_sock(struct sock *sk);
>> +
>> +#endif // __INET_SMC
> ^
> use /* __INET_SMC */ instead
>
>> --
>> 1.8.3.1
>>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
2024-05-10 9:50 ` Dust Li
@ 2024-05-11 2:26 ` D. Wythe
0 siblings, 0 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-11 2:26 UTC (permalink / raw)
To: dust.li, kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 5/10/24 5:50 PM, Dust Li wrote:
> On 2024-05-10 12:12:12, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch aims to isolate the shared components of SMC socket
>> allocation by introducing smc_sock_init() for sock initialization
>> and __smc_create_clcsk() for the initialization of clcsock.
>>
>> This is in preparation for the subsequent implementation of the
>> AF_INET version of SMC.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>> net/smc/af_smc.c | 93 +++++++++++++++++++++++++++++++-------------------------
>> 1 file changed, 52 insertions(+), 41 deletions(-)
>>
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 9389f0c..1f03724 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
>> return;
>> }
>>
>> -static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
>> - int protocol)
>> +static void smc_sock_init(struct net *net, struct sock *sk, int protocol)
>> {
>> - struct smc_sock *smc;
>> - struct proto *prot;
>> - struct sock *sk;
>> -
>> - prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>> - sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>> - if (!sk)
>> - return NULL;
>> + struct smc_sock *smc = smc_sk(sk);
>>
>> - sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>> sk->sk_state = SMC_INIT;
>> - sk->sk_destruct = smc_destruct;
>> sk->sk_protocol = protocol;
>> + mutex_init(&smc->clcsock_release_lock);
>> WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
>> WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
>> - smc = smc_sk(sk);
>> INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
>> INIT_WORK(&smc->connect_work, smc_connect_work);
>> INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
>> INIT_LIST_HEAD(&smc->accept_q);
>> spin_lock_init(&smc->accept_q_lock);
>> spin_lock_init(&smc->conn.send_lock);
>> - sk->sk_prot->hash(sk);
>> - mutex_init(&smc->clcsock_release_lock);
>> smc_init_saved_callbacks(smc);
>> + smc->limit_smc_hs = net->smc.limit_smc_hs;
>> + smc->use_fallback = false; /* assume rdma capability first */
>> + smc->fallback_rsn = 0;
>> +
>> + sk->sk_destruct = smc_destruct;
>> + sk->sk_prot->hash(sk);
> Why change the order here ? e.g.
>
> Before:
> sk->sk_destruct = smc_destruct;
> mutex_init(&smc->clcsock_release_lock);
> After
> mutex_init(&smc->clcsock_release_lock);
> sk->sk_destruct = smc_destruct;
>
> Same for sk->sk_prot->hash(sk)
Yes, you are right, I will fix it in the next version.
>
>
>> +}
>> +
>> +static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
>> + int protocol)
>> +{
>> + struct proto *prot;
>> + struct sock *sk;
>> +
>> + prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>> + sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>> + if (!sk)
>> + return NULL;
>> +
>> + sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>> + smc_sock_init(net, sk, protocol);
>>
>> return sk;
>> }
>> @@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
>> .splice_read = smc_splice_read,
>> };
>>
>> +static int __smc_create_clcsk(struct net *net, struct sock *sk, int family)
> Why add '__' prefix here ?
Good question, I also realize that this is not suitable, I will delete
it in the next version.
Thanks.
>
>> +{
>> + struct smc_sock *smc = smc_sk(sk);
>> + int rc;
>> +
>> + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>> + &smc->clcsock);
>> + if (rc) {
>> + sk_common_release(sk);
>> + return rc;
>> + }
>> +
>> + /* smc_clcsock_release() does not wait smc->clcsock->sk's
>> + * destruction; its sk_state might not be TCP_CLOSE after
>> + * smc->sk is close()d, and TCP timers can be fired later,
>> + * which need net ref.
>> + */
>> + sk = smc->clcsock->sk;
>> + __netns_tracker_free(net, &sk->ns_tracker, false);
>> + sk->sk_net_refcnt = 1;
>> + get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>> + sock_inuse_add(net, 1);
>> + return 0;
>> +}
>> +
>> static int __smc_create(struct net *net, struct socket *sock, int protocol,
>> int kern, struct socket *clcsock)
>> {
>> @@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
>>
>> /* create internal TCP socket for CLC handshake and fallback */
>> smc = smc_sk(sk);
>> - smc->use_fallback = false; /* assume rdma capability first */
>> - smc->fallback_rsn = 0;
>> -
>> - /* default behavior from limit_smc_hs in every net namespace */
>> - smc->limit_smc_hs = net->smc.limit_smc_hs;
>>
>> rc = 0;
>> - if (!clcsock) {
>> - rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>> - &smc->clcsock);
>> - if (rc) {
>> - sk_common_release(sk);
>> - goto out;
>> - }
>> -
>> - /* smc_clcsock_release() does not wait smc->clcsock->sk's
>> - * destruction; its sk_state might not be TCP_CLOSE after
>> - * smc->sk is close()d, and TCP timers can be fired later,
>> - * which need net ref.
>> - */
>> - sk = smc->clcsock->sk;
>> - __netns_tracker_free(net, &sk->ns_tracker, false);
>> - sk->sk_net_refcnt = 1;
>> - get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>> - sock_inuse_add(net, 1);
>> - } else {
>> + if (!clcsock)
>> + rc = __smc_create_clcsk(net, sk, family);
>> + else
>> smc->clcsock = clcsock;
>> - }
>> -
>> out:
>> return rc;
>> }
>> --
>> 1.8.3.1
>>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
2024-05-11 2:23 ` D. Wythe
@ 2024-05-11 2:46 ` Dust Li
2024-05-11 3:02 ` D. Wythe
0 siblings, 1 reply; 15+ messages in thread
From: Dust Li @ 2024-05-11 2:46 UTC (permalink / raw)
To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 2024-05-11 10:23:31, D. Wythe wrote:
>
>
>On 5/10/24 5:57 PM, Dust Li wrote:
>> On 2024-05-10 12:12:13, D. Wythe wrote:
>> > From: "D. Wythe" <alibuda@linux.alibaba.com>
>> >
>> > This patch allows to create smc socket via AF_INET,
>> > similar to the following code,
>> >
>> > /* create v4 smc sock */
>> > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>> >
>> > /* create v6 smc sock */
>> > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>> >
>> > There are several reasons why we believe it is appropriate here:
>> >
>> > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> > address. There is no AF_SMC address at all.
>> >
>> > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> > the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> > Otherwise, smc have to implement it again in AF_SMC path.
>> >
>> > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> > ---
>> > include/uapi/linux/in.h | 2 +
>> > net/smc/af_smc.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
>> > net/smc/inet_smc.h | 32 ++++++++++++
>> > 3 files changed, 162 insertions(+), 1 deletion(-)
>> > create mode 100644 net/smc/inet_smc.h
>> >
>> > diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>> > index e682ab6..74c12e33 100644
>> > --- a/include/uapi/linux/in.h
>> > +++ b/include/uapi/linux/in.h
>> > @@ -83,6 +83,8 @@ enum {
>> > #define IPPROTO_RAW IPPROTO_RAW
>> > IPPROTO_MPTCP = 262, /* Multipath TCP connection */
>> > #define IPPROTO_MPTCP IPPROTO_MPTCP
>> > + IPPROTO_SMC = 263, /* Shared Memory Communications */
>> ^ use tab to align here
>
>There is a problem here, all previous definitions were aligned with 2 spaces.
I mean the tab in the annotation in the end, not the space at the beginning.
>
>> > +#define IPPROTO_SMC IPPROTO_SMC
>> > IPPROTO_MAX
>> > };
>> > #endif
>> > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> > index 1f03724..b4557828 100644
>> > --- a/net/smc/af_smc.c
>> > +++ b/net/smc/af_smc.c
>> > @@ -54,6 +54,7 @@
>> > #include "smc_tracepoint.h"
>> > #include "smc_sysctl.h"
>> > #include "smc_loopback.h"
>> > +#include "inet_smc.h"
>> >
>> > static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
>> > * creation on server
>> > @@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
>> > .create = smc_create,
>> > };
>> >
>> Why not put those whole bunch of inet staff into smc_inet.c ?
>> Looks like your smc_inet.h is meanless without smc_inet.c
>>
>
>This header file was originally reserved for future merging of socks. If
>nobody likes it, I can move it to the
>af_smc.c
I prefer adding a new smc_inet.c, af_smc.c is already very large.
Best regards,
Dust
>
>> > +int smc_inet_init_sock(struct sock *sk)
>> > +{
>> > + struct net *net = sock_net(sk);
>> > +
>> > + /* init common smc sock */
>> > + smc_sock_init(net, sk, IPPROTO_SMC);
>> > + /* create clcsock */
>> > + return __smc_create_clcsk(net, sk, sk->sk_family);
>> > +}
>> > +
>> > static int smc_ulp_init(struct sock *sk)
>> > {
>> > struct socket *tcp = sk->sk_socket;
>> > @@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
>> > .clone = smc_ulp_clone,
>> > };
>> >
>> > +struct proto smc_inet_prot = {
>> > + .name = "INET_SMC",
>> > + .owner = THIS_MODULE,
>> > + .init = smc_inet_init_sock,
>> > + .hash = smc_hash_sk,
>> > + .unhash = smc_unhash_sk,
>> > + .release_cb = smc_release_cb,
>> > + .obj_size = sizeof(struct smc_sock),
>> > + .h.smc_hash = &smc_v4_hashinfo,
>> > + .slab_flags = SLAB_TYPESAFE_BY_RCU,
>> ^
>> Align please.
>>
>Got it.
>> > +};
>> > +
>> > +const struct proto_ops smc_inet_stream_ops = {
>> > + .family = PF_INET,
>> > + .owner = THIS_MODULE,
>> > + .release = smc_release,
>> > + .bind = smc_bind,
>> > + .connect = smc_connect,
>> > + .socketpair = sock_no_socketpair,
>> > + .accept = smc_accept,
>> > + .getname = smc_getname,
>> > + .poll = smc_poll,
>> > + .ioctl = smc_ioctl,
>> > + .listen = smc_listen,
>> > + .shutdown = smc_shutdown,
>> > + .setsockopt = smc_setsockopt,
>> > + .getsockopt = smc_getsockopt,
>> > + .sendmsg = smc_sendmsg,
>> > + .recvmsg = smc_recvmsg,
>> > + .mmap = sock_no_mmap,
>> > + .splice_read = smc_splice_read,
>> Ditto
>>
>> > +};
>> > +
>> > +struct inet_protosw smc_inet_protosw = {
>> > + .type = SOCK_STREAM,
>> > + .protocol = IPPROTO_SMC,
>> > + .prot = &smc_inet_prot,
>> Ditto
>>
>> > + .ops = &smc_inet_stream_ops,
>> > + .flags = INET_PROTOSW_ICSK,
>> > +};
>> > +
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +struct proto smc_inet6_prot = {
>> > + .name = "INET6_SMC",
>> > + .owner = THIS_MODULE,
>> > + .init = smc_inet_init_sock,
>> > + .hash = smc_hash_sk,
>> > + .unhash = smc_unhash_sk,
>> > + .release_cb = smc_release_cb,
>> > + .obj_size = sizeof(struct smc_sock),
>> > + .h.smc_hash = &smc_v6_hashinfo,
>> > + .slab_flags = SLAB_TYPESAFE_BY_RCU,
>> > +};
>> > +
>> > +const struct proto_ops smc_inet6_stream_ops = {
>> > + .family = PF_INET6,
>> > + .owner = THIS_MODULE,
>> > + .release = smc_release,
>> > + .bind = smc_bind,
>> > + .connect = smc_connect,
>> > + .socketpair = sock_no_socketpair,
>> > + .accept = smc_accept,
>> > + .getname = smc_getname,
>> > + .poll = smc_poll,
>> > + .ioctl = smc_ioctl,
>> > + .listen = smc_listen,
>> > + .shutdown = smc_shutdown,
>> > + .setsockopt = smc_setsockopt,
>> > + .getsockopt = smc_getsockopt,
>> > + .sendmsg = smc_sendmsg,
>> > + .recvmsg = smc_recvmsg,
>> > + .mmap = sock_no_mmap,
>> > + .splice_read = smc_splice_read,
>> Ditto
>>
>> > +};
>> > +
>> > +struct inet_protosw smc_inet6_protosw = {
>> > + .type = SOCK_STREAM,
>> > + .protocol = IPPROTO_SMC,
>> > + .prot = &smc_inet6_prot,
>> > + .ops = &smc_inet6_stream_ops,
>> > + .flags = INET_PROTOSW_ICSK,
>> Ditto
>>
>> > +};
>> > +#endif
>> > +
>> > unsigned int smc_net_id;
>> >
>> > static __net_init int smc_net_init(struct net *net)
>> > @@ -3595,9 +3690,28 @@ static int __init smc_init(void)
>> > goto out_lo;
>> > }
>> >
>> > + rc = proto_register(&smc_inet_prot, 1);
>> > + if (rc) {
>> > + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>> > + goto out_ulp;
>> > + }
>> > + inet_register_protosw(&smc_inet_protosw);
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > + rc = proto_register(&smc_inet6_prot, 1);
>> > + if (rc) {
>> > + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>> > + goto out_inet_prot;
>> > + }
>> > + inet6_register_protosw(&smc_inet6_protosw);
>> > +#endif
>> > +
>> > static_branch_enable(&tcp_have_smc);
>> > return 0;
>> > -
>> > +out_inet_prot:
>> > + inet_unregister_protosw(&smc_inet_protosw);
>> > + proto_unregister(&smc_inet_prot);
>> > +out_ulp:
>> > + tcp_unregister_ulp(&smc_ulp_ops);
>> > out_lo:
>> > smc_loopback_exit();
>> > out_ib:
>> > @@ -3634,6 +3748,10 @@ static int __init smc_init(void)
>> > static void __exit smc_exit(void)
>> > {
>> > static_branch_disable(&tcp_have_smc);
>> > + inet_unregister_protosw(&smc_inet_protosw);
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > + inet6_unregister_protosw(&smc_inet6_protosw);
>> > +#endif
>> > tcp_unregister_ulp(&smc_ulp_ops);
>> > sock_unregister(PF_SMC);
>> > smc_core_exit();
>> > @@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
>> > destroy_workqueue(smc_hs_wq);
>> > proto_unregister(&smc_proto6);
>> > proto_unregister(&smc_proto);
>> > + proto_unregister(&smc_inet_prot);
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > + proto_unregister(&smc_inet6_prot);
>> > +#endif
>> > smc_pnet_exit();
>> > smc_nl_exit();
>> > smc_clc_exit();
>> > @@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
>> > MODULE_LICENSE("GPL");
>> > MODULE_ALIAS_NETPROTO(PF_SMC);
>> > MODULE_ALIAS_TCP_ULP("smc");
>> > +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>> > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>> > +#endif
>> > MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>> > diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>> > new file mode 100644
>> > index 00000000..fcdcb61
>> > --- /dev/null
>> > +++ b/net/smc/inet_smc.h
>> > @@ -0,0 +1,32 @@
>> > +/* SPDX-License-Identifier: GPL-2.0 */
>> > +/*
>> > + * Shared Memory Communications over RDMA (SMC-R) and RoCE
>> > + *
>> > + * Definitions for the SMC module (socket related)
>> > +
>> > + * Copyright IBM Corp. 2016
>> You should update this.
>Got it.
>> > + *
>> > + */
>> > +#ifndef __INET_SMC
>> > +#define __INET_SMC
>> > +
>> > +#include <net/protocol.h>
>> > +#include <net/sock.h>
>> > +#include <net/tcp.h>
>> > +
>> > +extern struct proto smc_inet_prot;
>> > +extern const struct proto_ops smc_inet_stream_ops;
>> > +extern struct inet_protosw smc_inet_protosw;
>> > +
>> > +#if IS_ENABLED(CONFIG_IPV6)
>> > +#include <net/ipv6.h>
>> > +/* MUST after net/tcp.h or warning */
>> > +#include <net/transp_v6.h>
>> > +extern struct proto smc_inet6_prot;
>> > +extern const struct proto_ops smc_inet6_stream_ops;
>> > +extern struct inet_protosw smc_inet6_protosw;
>> > +#endif
>> > +
>> > +int smc_inet_init_sock(struct sock *sk);
>> > +
>> > +#endif // __INET_SMC
>> ^
>> use /* __INET_SMC */ instead
>>
>> > --
>> > 1.8.3.1
>> >
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC
2024-05-11 2:46 ` Dust Li
@ 2024-05-11 3:02 ` D. Wythe
0 siblings, 0 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-11 3:02 UTC (permalink / raw)
To: dust.li, kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 5/11/24 10:46 AM, Dust Li wrote:
> On 2024-05-11 10:23:31, D. Wythe wrote:
>>
>> On 5/10/24 5:57 PM, Dust Li wrote:
>>> On 2024-05-10 12:12:13, D. Wythe wrote:
>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>
>>>> This patch allows to create smc socket via AF_INET,
>>>> similar to the following code,
>>>>
>>>> /* create v4 smc sock */
>>>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>>>
>>>> /* create v6 smc sock */
>>>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>>>
>>>> There are several reasons why we believe it is appropriate here:
>>>>
>>>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>>>> address. There is no AF_SMC address at all.
>>>>
>>>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>>>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>>>> Otherwise, smc have to implement it again in AF_SMC path.
>>>>
>>>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>>>> ---
>>>> include/uapi/linux/in.h | 2 +
>>>> net/smc/af_smc.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++-
>>>> net/smc/inet_smc.h | 32 ++++++++++++
>>>> 3 files changed, 162 insertions(+), 1 deletion(-)
>>>> create mode 100644 net/smc/inet_smc.h
>>>>
>>>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>>>> index e682ab6..74c12e33 100644
>>>> --- a/include/uapi/linux/in.h
>>>> +++ b/include/uapi/linux/in.h
>>>> @@ -83,6 +83,8 @@ enum {
>>>> #define IPPROTO_RAW IPPROTO_RAW
>>>> IPPROTO_MPTCP = 262, /* Multipath TCP connection */
>>>> #define IPPROTO_MPTCP IPPROTO_MPTCP
>>>> + IPPROTO_SMC = 263, /* Shared Memory Communications */
>>> ^ use tab to align here
>> There is a problem here, all previous definitions were aligned with 2 spaces.
> I mean the tab in the annotation in the end, not the space at the beginning.
Oh... that's true. Thanks for that.
>>>> +#define IPPROTO_SMC IPPROTO_SMC
>>>> IPPROTO_MAX
>>>> };
>>>> #endif
>>>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>>>> index 1f03724..b4557828 100644
>>>> --- a/net/smc/af_smc.c
>>>> +++ b/net/smc/af_smc.c
>>>> @@ -54,6 +54,7 @@
>>>> #include "smc_tracepoint.h"
>>>> #include "smc_sysctl.h"
>>>> #include "smc_loopback.h"
>>>> +#include "inet_smc.h"
>>>>
>>>> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
>>>> * creation on server
>>>> @@ -3402,6 +3403,16 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
>>>> .create = smc_create,
>>>> };
>>>>
>>> Why not put those whole bunch of inet staff into smc_inet.c ?
>>> Looks like your smc_inet.h is meanless without smc_inet.c
>>>
>> This header file was originally reserved for future merging of socks. If
>> nobody likes it, I can move it to the
>> af_smc.c
> I prefer adding a new smc_inet.c, af_smc.c is already very large.
>
> Best regards,
> Dust
Sounds Reasonable. I'll try it in next version.
Thanks,
D. Wythe
>
>>>> +int smc_inet_init_sock(struct sock *sk)
>>>> +{
>>>> + struct net *net = sock_net(sk);
>>>> +
>>>> + /* init common smc sock */
>>>> + smc_sock_init(net, sk, IPPROTO_SMC);
>>>> + /* create clcsock */
>>>> + return __smc_create_clcsk(net, sk, sk->sk_family);
>>>> +}
>>>> +
>>>> static int smc_ulp_init(struct sock *sk)
>>>> {
>>>> struct socket *tcp = sk->sk_socket;
>>>> @@ -3460,6 +3471,90 @@ static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
>>>> .clone = smc_ulp_clone,
>>>> };
>>>>
>>>> +struct proto smc_inet_prot = {
>>>> + .name = "INET_SMC",
>>>> + .owner = THIS_MODULE,
>>>> + .init = smc_inet_init_sock,
>>>> + .hash = smc_hash_sk,
>>>> + .unhash = smc_unhash_sk,
>>>> + .release_cb = smc_release_cb,
>>>> + .obj_size = sizeof(struct smc_sock),
>>>> + .h.smc_hash = &smc_v4_hashinfo,
>>>> + .slab_flags = SLAB_TYPESAFE_BY_RCU,
>>> ^
>>> Align please.
>>>
>> Got it.
>>>> +};
>>>> +
>>>> +const struct proto_ops smc_inet_stream_ops = {
>>>> + .family = PF_INET,
>>>> + .owner = THIS_MODULE,
>>>> + .release = smc_release,
>>>> + .bind = smc_bind,
>>>> + .connect = smc_connect,
>>>> + .socketpair = sock_no_socketpair,
>>>> + .accept = smc_accept,
>>>> + .getname = smc_getname,
>>>> + .poll = smc_poll,
>>>> + .ioctl = smc_ioctl,
>>>> + .listen = smc_listen,
>>>> + .shutdown = smc_shutdown,
>>>> + .setsockopt = smc_setsockopt,
>>>> + .getsockopt = smc_getsockopt,
>>>> + .sendmsg = smc_sendmsg,
>>>> + .recvmsg = smc_recvmsg,
>>>> + .mmap = sock_no_mmap,
>>>> + .splice_read = smc_splice_read,
>>> Ditto
>>>
>>>> +};
>>>> +
>>>> +struct inet_protosw smc_inet_protosw = {
>>>> + .type = SOCK_STREAM,
>>>> + .protocol = IPPROTO_SMC,
>>>> + .prot = &smc_inet_prot,
>>> Ditto
>>>
>>>> + .ops = &smc_inet_stream_ops,
>>>> + .flags = INET_PROTOSW_ICSK,
>>>> +};
>>>> +
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +struct proto smc_inet6_prot = {
>>>> + .name = "INET6_SMC",
>>>> + .owner = THIS_MODULE,
>>>> + .init = smc_inet_init_sock,
>>>> + .hash = smc_hash_sk,
>>>> + .unhash = smc_unhash_sk,
>>>> + .release_cb = smc_release_cb,
>>>> + .obj_size = sizeof(struct smc_sock),
>>>> + .h.smc_hash = &smc_v6_hashinfo,
>>>> + .slab_flags = SLAB_TYPESAFE_BY_RCU,
>>>> +};
>>>> +
>>>> +const struct proto_ops smc_inet6_stream_ops = {
>>>> + .family = PF_INET6,
>>>> + .owner = THIS_MODULE,
>>>> + .release = smc_release,
>>>> + .bind = smc_bind,
>>>> + .connect = smc_connect,
>>>> + .socketpair = sock_no_socketpair,
>>>> + .accept = smc_accept,
>>>> + .getname = smc_getname,
>>>> + .poll = smc_poll,
>>>> + .ioctl = smc_ioctl,
>>>> + .listen = smc_listen,
>>>> + .shutdown = smc_shutdown,
>>>> + .setsockopt = smc_setsockopt,
>>>> + .getsockopt = smc_getsockopt,
>>>> + .sendmsg = smc_sendmsg,
>>>> + .recvmsg = smc_recvmsg,
>>>> + .mmap = sock_no_mmap,
>>>> + .splice_read = smc_splice_read,
>>> Ditto
>>>
>>>> +};
>>>> +
>>>> +struct inet_protosw smc_inet6_protosw = {
>>>> + .type = SOCK_STREAM,
>>>> + .protocol = IPPROTO_SMC,
>>>> + .prot = &smc_inet6_prot,
>>>> + .ops = &smc_inet6_stream_ops,
>>>> + .flags = INET_PROTOSW_ICSK,
>>> Ditto
>>>
>>>> +};
>>>> +#endif
>>>> +
>>>> unsigned int smc_net_id;
>>>>
>>>> static __net_init int smc_net_init(struct net *net)
>>>> @@ -3595,9 +3690,28 @@ static int __init smc_init(void)
>>>> goto out_lo;
>>>> }
>>>>
>>>> + rc = proto_register(&smc_inet_prot, 1);
>>>> + if (rc) {
>>>> + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>>>> + goto out_ulp;
>>>> + }
>>>> + inet_register_protosw(&smc_inet_protosw);
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> + rc = proto_register(&smc_inet6_prot, 1);
>>>> + if (rc) {
>>>> + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>>>> + goto out_inet_prot;
>>>> + }
>>>> + inet6_register_protosw(&smc_inet6_protosw);
>>>> +#endif
>>>> +
>>>> static_branch_enable(&tcp_have_smc);
>>>> return 0;
>>>> -
>>>> +out_inet_prot:
>>>> + inet_unregister_protosw(&smc_inet_protosw);
>>>> + proto_unregister(&smc_inet_prot);
>>>> +out_ulp:
>>>> + tcp_unregister_ulp(&smc_ulp_ops);
>>>> out_lo:
>>>> smc_loopback_exit();
>>>> out_ib:
>>>> @@ -3634,6 +3748,10 @@ static int __init smc_init(void)
>>>> static void __exit smc_exit(void)
>>>> {
>>>> static_branch_disable(&tcp_have_smc);
>>>> + inet_unregister_protosw(&smc_inet_protosw);
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> + inet6_unregister_protosw(&smc_inet6_protosw);
>>>> +#endif
>>>> tcp_unregister_ulp(&smc_ulp_ops);
>>>> sock_unregister(PF_SMC);
>>>> smc_core_exit();
>>>> @@ -3645,6 +3763,10 @@ static void __exit smc_exit(void)
>>>> destroy_workqueue(smc_hs_wq);
>>>> proto_unregister(&smc_proto6);
>>>> proto_unregister(&smc_proto);
>>>> + proto_unregister(&smc_inet_prot);
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> + proto_unregister(&smc_inet6_prot);
>>>> +#endif
>>>> smc_pnet_exit();
>>>> smc_nl_exit();
>>>> smc_clc_exit();
>>>> @@ -3661,4 +3783,9 @@ static void __exit smc_exit(void)
>>>> MODULE_LICENSE("GPL");
>>>> MODULE_ALIAS_NETPROTO(PF_SMC);
>>>> MODULE_ALIAS_TCP_ULP("smc");
>>>> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>>>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>>>> +#endif
>>>> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>>>> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>>>> new file mode 100644
>>>> index 00000000..fcdcb61
>>>> --- /dev/null
>>>> +++ b/net/smc/inet_smc.h
>>>> @@ -0,0 +1,32 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +/*
>>>> + * Shared Memory Communications over RDMA (SMC-R) and RoCE
>>>> + *
>>>> + * Definitions for the SMC module (socket related)
>>>> +
>>>> + * Copyright IBM Corp. 2016
>>> You should update this.
>> Got it.
>>>> + *
>>>> + */
>>>> +#ifndef __INET_SMC
>>>> +#define __INET_SMC
>>>> +
>>>> +#include <net/protocol.h>
>>>> +#include <net/sock.h>
>>>> +#include <net/tcp.h>
>>>> +
>>>> +extern struct proto smc_inet_prot;
>>>> +extern const struct proto_ops smc_inet_stream_ops;
>>>> +extern struct inet_protosw smc_inet_protosw;
>>>> +
>>>> +#if IS_ENABLED(CONFIG_IPV6)
>>>> +#include <net/ipv6.h>
>>>> +/* MUST after net/tcp.h or warning */
>>>> +#include <net/transp_v6.h>
>>>> +extern struct proto smc_inet6_prot;
>>>> +extern const struct proto_ops smc_inet6_stream_ops;
>>>> +extern struct inet_protosw smc_inet6_protosw;
>>>> +#endif
>>>> +
>>>> +int smc_inet_init_sock(struct sock *sk);
>>>> +
>>>> +#endif // __INET_SMC
>>> ^
>>> use /* __INET_SMC */ instead
>>>
>>>> --
>>>> 1.8.3.1
>>>>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
2024-05-10 4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
2024-05-10 9:50 ` Dust Li
@ 2024-05-11 12:21 ` Zhu Yanjun
2024-05-13 3:22 ` D. Wythe
1 sibling, 1 reply; 15+ messages in thread
From: Zhu Yanjun @ 2024-05-11 12:21 UTC (permalink / raw)
To: D. Wythe, kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
在 2024/5/10 6:12, D. Wythe 写道:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
>
> This patch aims to isolate the shared components of SMC socket
> allocation by introducing smc_sock_init() for sock initialization
> and __smc_create_clcsk() for the initialization of clcsock.
>
> This is in preparation for the subsequent implementation of the
> AF_INET version of SMC.
>
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
> net/smc/af_smc.c | 93 +++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 52 insertions(+), 41 deletions(-)
>
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 9389f0c..1f03724 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
> return;
> }
>
> -static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
> - int protocol)
> +static void smc_sock_init(struct net *net, struct sock *sk, int protocol)
> {
> - struct smc_sock *smc;
> - struct proto *prot;
> - struct sock *sk;
> -
> - prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
> - sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
> - if (!sk)
> - return NULL;
> + struct smc_sock *smc = smc_sk(sk);
>
> - sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
> sk->sk_state = SMC_INIT;
> - sk->sk_destruct = smc_destruct;
> sk->sk_protocol = protocol;
> + mutex_init(&smc->clcsock_release_lock);
Please add mutex_destroy(&smc->clcsock_release_lock); when
smc->clcsock_release_lock is no longer used.
Or else some tools will notify errors.
Zhu Yanjun
> WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
> WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
> - smc = smc_sk(sk);
> INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
> INIT_WORK(&smc->connect_work, smc_connect_work);
> INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
> INIT_LIST_HEAD(&smc->accept_q);
> spin_lock_init(&smc->accept_q_lock);
> spin_lock_init(&smc->conn.send_lock);
> - sk->sk_prot->hash(sk);
> - mutex_init(&smc->clcsock_release_lock);
> smc_init_saved_callbacks(smc);
> + smc->limit_smc_hs = net->smc.limit_smc_hs;
> + smc->use_fallback = false; /* assume rdma capability first */
> + smc->fallback_rsn = 0;
> +
> + sk->sk_destruct = smc_destruct;
> + sk->sk_prot->hash(sk);
> +}
> +
> +static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
> + int protocol)
> +{
> + struct proto *prot;
> + struct sock *sk;
> +
> + prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
> + sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
> + if (!sk)
> + return NULL;
> +
> + sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
> + smc_sock_init(net, sk, protocol);
>
> return sk;
> }
> @@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
> .splice_read = smc_splice_read,
> };
>
> +static int __smc_create_clcsk(struct net *net, struct sock *sk, int family)
> +{
> + struct smc_sock *smc = smc_sk(sk);
> + int rc;
> +
> + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
> + &smc->clcsock);
> + if (rc) {
> + sk_common_release(sk);
> + return rc;
> + }
> +
> + /* smc_clcsock_release() does not wait smc->clcsock->sk's
> + * destruction; its sk_state might not be TCP_CLOSE after
> + * smc->sk is close()d, and TCP timers can be fired later,
> + * which need net ref.
> + */
> + sk = smc->clcsock->sk;
> + __netns_tracker_free(net, &sk->ns_tracker, false);
> + sk->sk_net_refcnt = 1;
> + get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
> + sock_inuse_add(net, 1);
> + return 0;
> +}
> +
> static int __smc_create(struct net *net, struct socket *sock, int protocol,
> int kern, struct socket *clcsock)
> {
> @@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
>
> /* create internal TCP socket for CLC handshake and fallback */
> smc = smc_sk(sk);
> - smc->use_fallback = false; /* assume rdma capability first */
> - smc->fallback_rsn = 0;
> -
> - /* default behavior from limit_smc_hs in every net namespace */
> - smc->limit_smc_hs = net->smc.limit_smc_hs;
>
> rc = 0;
> - if (!clcsock) {
> - rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
> - &smc->clcsock);
> - if (rc) {
> - sk_common_release(sk);
> - goto out;
> - }
> -
> - /* smc_clcsock_release() does not wait smc->clcsock->sk's
> - * destruction; its sk_state might not be TCP_CLOSE after
> - * smc->sk is close()d, and TCP timers can be fired later,
> - * which need net ref.
> - */
> - sk = smc->clcsock->sk;
> - __netns_tracker_free(net, &sk->ns_tracker, false);
> - sk->sk_net_refcnt = 1;
> - get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
> - sock_inuse_add(net, 1);
> - } else {
> + if (!clcsock)
> + rc = __smc_create_clcsk(net, sk, family);
> + else
> smc->clcsock = clcsock;
> - }
> -
> out:
> return rc;
> }
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock
2024-05-11 12:21 ` Zhu Yanjun
@ 2024-05-13 3:22 ` D. Wythe
0 siblings, 0 replies; 15+ messages in thread
From: D. Wythe @ 2024-05-13 3:22 UTC (permalink / raw)
To: Zhu Yanjun, kgraul, wenjia, jaka, wintera, guwen
Cc: kuba, davem, netdev, linux-s390, linux-rdma, tonylu, pabeni, edumazet
On 5/11/24 8:21 PM, Zhu Yanjun wrote:
> 在 2024/5/10 6:12, D. Wythe 写道:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch aims to isolate the shared components of SMC socket
>> allocation by introducing smc_sock_init() for sock initialization
>> and __smc_create_clcsk() for the initialization of clcsock.
>>
>> This is in preparation for the subsequent implementation of the
>> AF_INET version of SMC.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>> net/smc/af_smc.c | 93
>> +++++++++++++++++++++++++++++++-------------------------
>> 1 file changed, 52 insertions(+), 41 deletions(-)
>>
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 9389f0c..1f03724 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -361,34 +361,43 @@ static void smc_destruct(struct sock *sk)
>> return;
>> }
>> -static struct sock *smc_sock_alloc(struct net *net, struct socket
>> *sock,
>> - int protocol)
>> +static void smc_sock_init(struct net *net, struct sock *sk, int
>> protocol)
>> {
>> - struct smc_sock *smc;
>> - struct proto *prot;
>> - struct sock *sk;
>> -
>> - prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>> - sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>> - if (!sk)
>> - return NULL;
>> + struct smc_sock *smc = smc_sk(sk);
>> - sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>> sk->sk_state = SMC_INIT;
>> - sk->sk_destruct = smc_destruct;
>> sk->sk_protocol = protocol;
>> + mutex_init(&smc->clcsock_release_lock);
>
> Please add mutex_destroy(&smc->clcsock_release_lock); when
> smc->clcsock_release_lock is no longer used.
>
> Or else some tools will notify errors.
>
> Zhu Yanjun
It seems that the problem you mentioned is not caused by this patch,
after all, this patch is solely for refactoring.
Adding the fix you mentioned in this refactoring patch would not be
appropriate. Perhaps, you could submit a separate
patch to address the issue. What do you think?
D. Wythe
>
>> WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
>> WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
>> - smc = smc_sk(sk);
>> INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
>> INIT_WORK(&smc->connect_work, smc_connect_work);
>> INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
>> INIT_LIST_HEAD(&smc->accept_q);
>> spin_lock_init(&smc->accept_q_lock);
>> spin_lock_init(&smc->conn.send_lock);
>> - sk->sk_prot->hash(sk);
>> - mutex_init(&smc->clcsock_release_lock);
>> smc_init_saved_callbacks(smc);
>> + smc->limit_smc_hs = net->smc.limit_smc_hs;
>> + smc->use_fallback = false; /* assume rdma capability first */
>> + smc->fallback_rsn = 0;
>> +
>> + sk->sk_destruct = smc_destruct;
>> + sk->sk_prot->hash(sk);
>> +}
>> +
>> +static struct sock *smc_sock_alloc(struct net *net, struct socket
>> *sock,
>> + int protocol)
>> +{
>> + struct proto *prot;
>> + struct sock *sk;
>> +
>> + prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
>> + sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
>> + if (!sk)
>> + return NULL;
>> +
>> + sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
>> + smc_sock_init(net, sk, protocol);
>> return sk;
>> }
>> @@ -3321,6 +3330,31 @@ static ssize_t smc_splice_read(struct socket
>> *sock, loff_t *ppos,
>> .splice_read = smc_splice_read,
>> };
>> +static int __smc_create_clcsk(struct net *net, struct sock *sk,
>> int family)
>> +{
>> + struct smc_sock *smc = smc_sk(sk);
>> + int rc;
>> +
>> + rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>> + &smc->clcsock);
>> + if (rc) {
>> + sk_common_release(sk);
>> + return rc;
>> + }
>> +
>> + /* smc_clcsock_release() does not wait smc->clcsock->sk's
>> + * destruction; its sk_state might not be TCP_CLOSE after
>> + * smc->sk is close()d, and TCP timers can be fired later,
>> + * which need net ref.
>> + */
>> + sk = smc->clcsock->sk;
>> + __netns_tracker_free(net, &sk->ns_tracker, false);
>> + sk->sk_net_refcnt = 1;
>> + get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>> + sock_inuse_add(net, 1);
>> + return 0;
>> +}
>> +
>> static int __smc_create(struct net *net, struct socket *sock, int
>> protocol,
>> int kern, struct socket *clcsock)
>> {
>> @@ -3346,35 +3380,12 @@ static int __smc_create(struct net *net,
>> struct socket *sock, int protocol,
>> /* create internal TCP socket for CLC handshake and fallback */
>> smc = smc_sk(sk);
>> - smc->use_fallback = false; /* assume rdma capability first */
>> - smc->fallback_rsn = 0;
>> -
>> - /* default behavior from limit_smc_hs in every net namespace */
>> - smc->limit_smc_hs = net->smc.limit_smc_hs;
>> rc = 0;
>> - if (!clcsock) {
>> - rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
>> - &smc->clcsock);
>> - if (rc) {
>> - sk_common_release(sk);
>> - goto out;
>> - }
>> -
>> - /* smc_clcsock_release() does not wait smc->clcsock->sk's
>> - * destruction; its sk_state might not be TCP_CLOSE after
>> - * smc->sk is close()d, and TCP timers can be fired later,
>> - * which need net ref.
>> - */
>> - sk = smc->clcsock->sk;
>> - __netns_tracker_free(net, &sk->ns_tracker, false);
>> - sk->sk_net_refcnt = 1;
>> - get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
>> - sock_inuse_add(net, 1);
>> - } else {
>> + if (!clcsock)
>> + rc = __smc_create_clcsk(net, sk, family);
>> + else
>> smc->clcsock = clcsock;
>> - }
>> -
>> out:
>> return rc;
>> }
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2024-05-13 3:22 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-10 4:12 [PATCH net-next 0/2] Introduce IPPROTO_SMC D. Wythe
2024-05-10 4:12 ` [PATCH net-next 1/2] net/smc: refatoring initialization of smc sock D. Wythe
2024-05-10 9:50 ` Dust Li
2024-05-11 2:26 ` D. Wythe
2024-05-11 12:21 ` Zhu Yanjun
2024-05-13 3:22 ` D. Wythe
2024-05-10 4:12 ` [PATCH net-next 2/2] net/smc: Introduce IPPROTO_SMC D. Wythe
2024-05-10 9:57 ` Dust Li
2024-05-11 2:23 ` D. Wythe
2024-05-11 2:46 ` Dust Li
2024-05-11 3:02 ` D. Wythe
2024-05-10 17:09 ` kernel test robot
2024-05-10 18:32 ` kernel test robot
2024-05-10 9:14 ` [PATCH net-next 0/2] " D. Wythe
2024-05-10 10:22 ` Wenjia Zhang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).