All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] net/smc: Fix expected buffersizes and sync logic
@ 2022-11-23 10:49 Jan Karcher
  2022-11-23 11:53 ` Tony Lu
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Karcher @ 2022-11-23 10:49 UTC (permalink / raw)
  To: David Miller, Jakub Kicinski
  Cc: netdev, linux-s390, Heiko Carstens, Alexandra Winter,
	Wenjia Zhang, Thorsten Winkler, Stefan Raspl, Karsten Graul,
	Jan Karcher, Tony Lu

The fixed commit changed the expected behavior of buffersizes
set by the user using the setsockopt mechanism.
Before the fixed patch the logic for determining the buffersizes used
was the following:

default  = net.ipv4.tcp_{w|r}mem[1]
sockopt  = the setsockopt mechanism
val      = the value assigned in default or via setsockopt
sk_buf   = short for sk_{snd|rcv}buf
real_buf = the real size of the buffer (sk_buf_size in __smc_buf_create)

  exposed   | net/core/sock.c  |    af_smc.c    |  smc_core.c
            |                  |                |
+---------+ |                  | +------------+ | +-------------------+
| default |----------------------| sk_buf=val |---| real_buf=sk_buf/2 |
+---------+ |                  | +------------+ | +-------------------+
            |                  |                |    ^
            |                  |                |    |
+---------+ | +--------------+ |                |    |
| sockopt |---| sk_buf=val*2 |-----------------------|
+---------+ | +--------------+ |                |
            |                  |                |

The fixed patch introduced a dedicated sysctl for smc
and removed the /2 in smc_core.c resulting in the following flow:

default  = net.smc.{w|r}mem (which defaults to net.ipv4.tcp_{w|r}mem[1])
sockopt  = the setsockopt mechanism
val      = the value assigned in default or via setsockopt
sk_buf   = short for sk_{snd|rcv}buf
real_buf = the real size of the buffer (sk_buf_size in __smc_buf_create)

  exposed   | net/core/sock.c  |    af_smc.c    |  smc_core.c
            |                  |                |
+---------+ |                  | +------------+ | +-----------------+
| default |----------------------| sk_buf=val |---| real_buf=sk_buf |
+---------+ |                  | +------------+ | +-----------------+
            |                  |                |    ^
            |                  |                |    |
+---------+ | +--------------+ |                |    |
| sockopt |---| sk_buf=val*2 |-----------------------|
+---------+ | +--------------+ |                |
            |                  |                |

This would result in double of memory used for existing configurations
that are using setsockopt.

SMC historically decided to use the explicit value given by the user
to allocate the memory. This is why we used the /2 in smc_core.c.
That logic was not applied to the default value.
Since we now have our own sysctl, which is also exposed to the user,
we should sync the logic in a way that both values are the real value
used by our code and shown by smc_stats. To achieve this this patch
changes the behavior to:

default  = net.smc.{w|r}mem (which defaults to net.ipv4.tcp_{w|r}mem[1])
sockopt  = the setsockopt mechanism
val      = the value assigned in default or via setsockopt
sk_buf   = short for sk_{snd|rcv}buf
real_buf = the real size of the buffer (sk_buf_size in __smc_buf_create)

  exposed   | net/core/sock.c  |    af_smc.c     |  smc_core.c
            |                  |                 |
+---------+ |                  | +-------------+ | +-----------------+
| default |----------------------| sk_buf=val*2|---|real_buf=sk_buf/2|
+---------+ |                  | +-------------+ | +-----------------+
            |                  |                 |    ^
            |                  |                 |    |
+---------+ | +--------------+ |                 |    |
| sockopt |---| sk_buf=val*2 |------------------------|
+---------+ | +--------------+ |                 |
            |                  |                 |

This way both paths follow the same pattern and the expected behavior
is re-established.

Fixes: 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
---
 net/smc/af_smc.c   | 9 +++++++--
 net/smc/smc_core.c | 8 ++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 036532cf39aa..a8c84e7bac99 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -366,6 +366,7 @@ static void smc_destruct(struct sock *sk)
 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
 				   int protocol)
 {
+	int buffersize_without_overhead;
 	struct smc_sock *smc;
 	struct proto *prot;
 	struct sock *sk;
@@ -379,8 +380,12 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
 	sk->sk_state = SMC_INIT;
 	sk->sk_destruct = smc_destruct;
 	sk->sk_protocol = protocol;
-	WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
-	WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+	buffersize_without_overhead =
+		min_t(int, READ_ONCE(net->smc.sysctl_wmem), INT_MAX / 2);
+	WRITE_ONCE(sk->sk_sndbuf, buffersize_without_overhead * 2);
+	buffersize_without_overhead =
+		min_t(int, READ_ONCE(net->smc.sysctl_rmem), INT_MAX / 2);
+	WRITE_ONCE(sk->sk_rcvbuf, buffersize_without_overhead * 2);
 	smc = smc_sk(sk);
 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 	INIT_WORK(&smc->connect_work, smc_connect_work);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 00fb352c2765..36850a2ae167 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2314,10 +2314,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 	if (is_rmb)
 		/* use socket recv buffer size (w/o overhead) as start value */
-		sk_buf_size = smc->sk.sk_rcvbuf;
+		sk_buf_size = smc->sk.sk_rcvbuf / 2;
 	else
 		/* use socket send buffer size (w/o overhead) as start value */
-		sk_buf_size = smc->sk.sk_sndbuf;
+		sk_buf_size = smc->sk.sk_sndbuf / 2;
 
 	for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
 	     bufsize_short >= 0; bufsize_short--) {
@@ -2376,7 +2376,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 	if (is_rmb) {
 		conn->rmb_desc = buf_desc;
 		conn->rmbe_size_short = bufsize_short;
-		smc->sk.sk_rcvbuf = bufsize;
+		smc->sk.sk_rcvbuf = bufsize * 2;
 		atomic_set(&conn->bytes_to_rcv, 0);
 		conn->rmbe_update_limit =
 			smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2384,7 +2384,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
 	} else {
 		conn->sndbuf_desc = buf_desc;
-		smc->sk.sk_sndbuf = bufsize;
+		smc->sk.sk_sndbuf = bufsize * 2;
 		atomic_set(&conn->sndbuf_space, bufsize);
 	}
 	return 0;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-12-02 13:10 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-23 10:49 [PATCH net] net/smc: Fix expected buffersizes and sync logic Jan Karcher
2022-11-23 11:53 ` Tony Lu
2022-11-23 13:13   ` Jan Karcher
2022-11-23 13:41     ` Tony Lu
2022-11-24 13:00       ` Alexandra Winter
2022-11-24 14:07         ` Alexandra Winter
2022-11-25  6:15           ` Jan Karcher
2022-11-25  7:05             ` Tony Lu
2022-11-25 10:59               ` Alexandra Winter
2022-11-28  4:33                 ` Tony Lu
2022-11-28  8:32                   ` Wenjia Zhang
2022-11-28 12:24                     ` Tony Lu
2022-12-02 13:10                 ` Jan Karcher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.