linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v2] net/smc: align the connect behaviour with TCP
@ 2022-05-13  2:24 Guangguan Wang
  2022-05-16  9:50 ` patchwork-bot+netdevbpf
  2022-05-23 12:24 ` Karsten Graul
  0 siblings, 2 replies; 12+ messages in thread
From: Guangguan Wang @ 2022-05-13  2:24 UTC (permalink / raw)
  To: kgraul, davem, kuba, pabeni; +Cc: linux-s390, netdev, linux-kernel

Connect with O_NONBLOCK will not be completed immediately
and returns -EINPROGRESS. It is possible to use selector/poll
for completion by selecting the socket for writing. After select
indicates writability, a second connect function call will return
0 to indicate connected successfully as TCP does, but smc returns
-EISCONN. Use socket state for smc to indicate connect state, which
can help smc aligning the connect behaviour with TCP.

Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
Acked-by: Karsten Graul <kgraul@linux.ibm.com>
---
 net/smc/af_smc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index fce16b9d6e1a..5f70642a8044 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1544,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 		goto out_err;
 
 	lock_sock(sk);
+	switch (sock->state) {
+	default:
+		rc = -EINVAL;
+		goto out;
+	case SS_CONNECTED:
+		rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
+		goto out;
+	case SS_CONNECTING:
+		if (sk->sk_state == SMC_ACTIVE)
+			goto connected;
+		break;
+	case SS_UNCONNECTED:
+		sock->state = SS_CONNECTING;
+		break;
+	}
+
 	switch (sk->sk_state) {
 	default:
 		goto out;
+	case SMC_CLOSED:
+		rc = sock_error(sk) ? : -ECONNABORTED;
+		sock->state = SS_UNCONNECTED;
+		goto out;
 	case SMC_ACTIVE:
 		rc = -EISCONN;
 		goto out;
@@ -1565,20 +1585,24 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 		goto out;
 
 	sock_hold(&smc->sk); /* sock put in passive closing */
-	if (smc->use_fallback)
+	if (smc->use_fallback) {
+		sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
 		goto out;
+	}
 	if (flags & O_NONBLOCK) {
 		if (queue_work(smc_hs_wq, &smc->connect_work))
 			smc->connect_nonblock = 1;
 		rc = -EINPROGRESS;
+		goto out;
 	} else {
 		rc = __smc_connect(smc);
 		if (rc < 0)
 			goto out;
-		else
-			rc = 0; /* success cases including fallback */
 	}
 
+connected:
+	rc = 0;
+	sock->state = SS_CONNECTED;
 out:
 	release_sock(sk);
 out_err:
@@ -1693,6 +1717,7 @@ struct sock *smc_accept_dequeue(struct sock *parent,
 		}
 		if (new_sock) {
 			sock_graft(new_sk, new_sock);
+			new_sock->state = SS_CONNECTED;
 			if (isk->use_fallback) {
 				smc_sk(new_sk)->clcsock->file = new_sock->file;
 				isk->clcsock->file->private_data = isk->clcsock;
@@ -2424,7 +2449,7 @@ static int smc_listen(struct socket *sock, int backlog)
 
 	rc = -EINVAL;
 	if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
-	    smc->connect_nonblock)
+	    smc->connect_nonblock || sock->state != SS_UNCONNECTED)
 		goto out;
 
 	rc = 0;
@@ -2716,6 +2741,17 @@ static int smc_shutdown(struct socket *sock, int how)
 
 	lock_sock(sk);
 
+	if (sock->state == SS_CONNECTING) {
+		if (sk->sk_state == SMC_ACTIVE)
+			sock->state = SS_CONNECTED;
+		else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
+			 sk->sk_state == SMC_PEERCLOSEWAIT2 ||
+			 sk->sk_state == SMC_APPCLOSEWAIT1 ||
+			 sk->sk_state == SMC_APPCLOSEWAIT2 ||
+			 sk->sk_state == SMC_APPFINCLOSEWAIT)
+			sock->state = SS_DISCONNECTING;
+	}
+
 	rc = -ENOTCONN;
 	if ((sk->sk_state != SMC_ACTIVE) &&
 	    (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
@@ -2729,6 +2765,7 @@ static int smc_shutdown(struct socket *sock, int how)
 		sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
 		if (sk->sk_shutdown == SHUTDOWN_MASK) {
 			sk->sk_state = SMC_CLOSED;
+			sk->sk_socket->state = SS_UNCONNECTED;
 			sock_put(sk);
 		}
 		goto out;
@@ -2754,6 +2791,10 @@ static int smc_shutdown(struct socket *sock, int how)
 	/* map sock_shutdown_cmd constants to sk_shutdown value range */
 	sk->sk_shutdown |= how + 1;
 
+	if (sk->sk_state == SMC_CLOSED)
+		sock->state = SS_UNCONNECTED;
+	else
+		sock->state = SS_DISCONNECTING;
 out:
 	release_sock(sk);
 	return rc ? rc : rc1;
@@ -3139,6 +3180,7 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
 
 	rc = -ENOBUFS;
 	sock->ops = &smc_sock_ops;
+	sock->state = SS_UNCONNECTED;
 	sk = smc_sock_alloc(net, sock, protocol);
 	if (!sk)
 		goto out;
-- 
2.24.3 (Apple Git-128)


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-05-13  2:24 [PATCH net-next v2] net/smc: align the connect behaviour with TCP Guangguan Wang
@ 2022-05-16  9:50 ` patchwork-bot+netdevbpf
  2022-05-23 12:24 ` Karsten Graul
  1 sibling, 0 replies; 12+ messages in thread
From: patchwork-bot+netdevbpf @ 2022-05-16  9:50 UTC (permalink / raw)
  To: Guangguan Wang
  Cc: kgraul, davem, kuba, pabeni, linux-s390, netdev, linux-kernel

Hello:

This patch was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:

On Fri, 13 May 2022 10:24:53 +0800 you wrote:
> Connect with O_NONBLOCK will not be completed immediately
> and returns -EINPROGRESS. It is possible to use selector/poll
> for completion by selecting the socket for writing. After select
> indicates writability, a second connect function call will return
> 0 to indicate connected successfully as TCP does, but smc returns
> -EISCONN. Use socket state for smc to indicate connect state, which
> can help smc aligning the connect behaviour with TCP.
> 
> [...]

Here is the summary with links:
  - [net-next,v2] net/smc: align the connect behaviour with TCP
    https://git.kernel.org/netdev/net-next/c/3aba103006bc

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-05-13  2:24 [PATCH net-next v2] net/smc: align the connect behaviour with TCP Guangguan Wang
  2022-05-16  9:50 ` patchwork-bot+netdevbpf
@ 2022-05-23 12:24 ` Karsten Graul
  2022-05-24  2:59   ` Guangguan Wang
  1 sibling, 1 reply; 12+ messages in thread
From: Karsten Graul @ 2022-05-23 12:24 UTC (permalink / raw)
  To: Guangguan Wang, davem, kuba, pabeni; +Cc: linux-s390, netdev, linux-kernel

On 13/05/2022 04:24, Guangguan Wang wrote:
> Connect with O_NONBLOCK will not be completed immediately
> and returns -EINPROGRESS. It is possible to use selector/poll
> for completion by selecting the socket for writing. After select
> indicates writability, a second connect function call will return
> 0 to indicate connected successfully as TCP does, but smc returns
> -EISCONN. Use socket state for smc to indicate connect state, which
> can help smc aligning the connect behaviour with TCP.
> 
> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
> Acked-by: Karsten Graul <kgraul@linux.ibm.com>
> ---
>  net/smc/af_smc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 46 insertions(+), 4 deletions(-)
> 
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index fce16b9d6e1a..5f70642a8044 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -1544,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
>  		goto out_err;
>  
>  	lock_sock(sk);
> +	switch (sock->state) {
> +	default:
> +		rc = -EINVAL;
> +		goto out;
> +	case SS_CONNECTED:
> +		rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
> +		goto out;
> +	case SS_CONNECTING:
> +		if (sk->sk_state == SMC_ACTIVE)
> +			goto connected;

I stumbled over this when thinking about the fallback processing. If for whatever reason
fallback==true during smc_connect(), the "if (smc->use_fallback)" below would set sock->state
to e.g. SS_CONNECTED. But in the fallback case sk_state keeps SMC_INIT. So during the next call
the SS_CONNECTING case above would break because sk_state in NOT SMC_ACTIVE, and we would end
up calling kernel_connect() again. Which seems to be no problem when kernel_connect() returns 
-EISCONN and we return this to the caller. But is this how it should work, or does it work by chance?

> +		break;
> +	case SS_UNCONNECTED:
> +		sock->state = SS_CONNECTING;
> +		break;
> +	}
> +
>  	switch (sk->sk_state) {
>  	default:
>  		goto out;
> +	case SMC_CLOSED:
> +		rc = sock_error(sk) ? : -ECONNABORTED;
> +		sock->state = SS_UNCONNECTED;
> +		goto out;
>  	case SMC_ACTIVE:
>  		rc = -EISCONN;
>  		goto out;
> @@ -1565,20 +1585,24 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
>  		goto out;
>  
>  	sock_hold(&smc->sk); /* sock put in passive closing */
> -	if (smc->use_fallback)
> +	if (smc->use_fallback) {
> +		sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
>  		goto out;
> +	}
>  	if (flags & O_NONBLOCK) {
>  		if (queue_work(smc_hs_wq, &smc->connect_work))
>  			smc->connect_nonblock = 1;
>  		rc = -EINPROGRESS;
> +		goto out;
>  	} else {
>  		rc = __smc_connect(smc);
>  		if (rc < 0)
>  			goto out;
> -		else
> -			rc = 0; /* success cases including fallback */
>  	}
>  
> +connected:
> +	rc = 0;
> +	sock->state = SS_CONNECTED;
>  out:
>  	release_sock(sk);
>  out_err:
> @@ -1693,6 +1717,7 @@ struct sock *smc_accept_dequeue(struct sock *parent,
>  		}
>  		if (new_sock) {
>  			sock_graft(new_sk, new_sock);
> +			new_sock->state = SS_CONNECTED;
>  			if (isk->use_fallback) {
>  				smc_sk(new_sk)->clcsock->file = new_sock->file;
>  				isk->clcsock->file->private_data = isk->clcsock;
> @@ -2424,7 +2449,7 @@ static int smc_listen(struct socket *sock, int backlog)
>  
>  	rc = -EINVAL;
>  	if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
> -	    smc->connect_nonblock)
> +	    smc->connect_nonblock || sock->state != SS_UNCONNECTED)
>  		goto out;
>  
>  	rc = 0;
> @@ -2716,6 +2741,17 @@ static int smc_shutdown(struct socket *sock, int how)
>  
>  	lock_sock(sk);
>  
> +	if (sock->state == SS_CONNECTING) {
> +		if (sk->sk_state == SMC_ACTIVE)
> +			sock->state = SS_CONNECTED;
> +		else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
> +			 sk->sk_state == SMC_PEERCLOSEWAIT2 ||
> +			 sk->sk_state == SMC_APPCLOSEWAIT1 ||
> +			 sk->sk_state == SMC_APPCLOSEWAIT2 ||
> +			 sk->sk_state == SMC_APPFINCLOSEWAIT)
> +			sock->state = SS_DISCONNECTING;
> +	}
> +
>  	rc = -ENOTCONN;
>  	if ((sk->sk_state != SMC_ACTIVE) &&
>  	    (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
> @@ -2729,6 +2765,7 @@ static int smc_shutdown(struct socket *sock, int how)
>  		sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
>  		if (sk->sk_shutdown == SHUTDOWN_MASK) {
>  			sk->sk_state = SMC_CLOSED;
> +			sk->sk_socket->state = SS_UNCONNECTED;
>  			sock_put(sk);
>  		}
>  		goto out;
> @@ -2754,6 +2791,10 @@ static int smc_shutdown(struct socket *sock, int how)
>  	/* map sock_shutdown_cmd constants to sk_shutdown value range */
>  	sk->sk_shutdown |= how + 1;
>  
> +	if (sk->sk_state == SMC_CLOSED)
> +		sock->state = SS_UNCONNECTED;
> +	else
> +		sock->state = SS_DISCONNECTING;
>  out:
>  	release_sock(sk);
>  	return rc ? rc : rc1;
> @@ -3139,6 +3180,7 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
>  
>  	rc = -ENOBUFS;
>  	sock->ops = &smc_sock_ops;
> +	sock->state = SS_UNCONNECTED;
>  	sk = smc_sock_alloc(net, sock, protocol);
>  	if (!sk)
>  		goto out;

-- 
Karsten

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-05-23 12:24 ` Karsten Graul
@ 2022-05-24  2:59   ` Guangguan Wang
  2022-05-24 12:04     ` Karsten Graul
  0 siblings, 1 reply; 12+ messages in thread
From: Guangguan Wang @ 2022-05-24  2:59 UTC (permalink / raw)
  To: Karsten Graul, davem, kuba, pabeni; +Cc: linux-s390, netdev, linux-kernel



On 2022/5/23 20:24, Karsten Graul wrote:
> On 13/05/2022 04:24, Guangguan Wang wrote:
>> Connect with O_NONBLOCK will not be completed immediately
>> and returns -EINPROGRESS. It is possible to use selector/poll
>> for completion by selecting the socket for writing. After select
>> indicates writability, a second connect function call will return
>> 0 to indicate connected successfully as TCP does, but smc returns
>> -EISCONN. Use socket state for smc to indicate connect state, which
>> can help smc aligning the connect behaviour with TCP.
>>
>> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
>> Acked-by: Karsten Graul <kgraul@linux.ibm.com>
>> ---
>>  net/smc/af_smc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++----
>>  1 file changed, 46 insertions(+), 4 deletions(-)
>>
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index fce16b9d6e1a..5f70642a8044 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -1544,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
>>  		goto out_err;
>>  
>>  	lock_sock(sk);
>> +	switch (sock->state) {
>> +	default:
>> +		rc = -EINVAL;
>> +		goto out;
>> +	case SS_CONNECTED:
>> +		rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
>> +		goto out;
>> +	case SS_CONNECTING:
>> +		if (sk->sk_state == SMC_ACTIVE)
>> +			goto connected;
> 
> I stumbled over this when thinking about the fallback processing. If for whatever reason
> fallback==true during smc_connect(), the "if (smc->use_fallback)" below would set sock->state
> to e.g. SS_CONNECTED. But in the fallback case sk_state keeps SMC_INIT. So during the next call
> the SS_CONNECTING case above would break because sk_state in NOT SMC_ACTIVE, and we would end
> up calling kernel_connect() again. Which seems to be no problem when kernel_connect() returns 
> -EISCONN and we return this to the caller. But is this how it should work, or does it work by chance?
> 

Since the sk_state keeps SMC_INIT and does not correctly indicate the state of clcsock, it should end
up calling kernel_connect() again to get the actual connection state of clcsock.

And I'm sorry there is a problem that if sock->state==SS_CONNECTED and sk_state==SMC_INIT, further call
of smc_connect will return -EINVAL where -EISCONN is preferred. 
The steps to reproduce:
1)switch fallback before connect, such as setsockopt TCP_FASTOPEN
2)connect with noblocking and returns -EINPROGRESS. (sock->state changes to SS_CONNECTING)
3) end up calling connect with noblocking again and returns 0. (kernel_connect() returns 0 and sock->state changes to
   SS_CONNECTED but sk->sk_state stays SMC_INIT)
4) call connect again, maybe by mistake, will return -EINVAL, but -EISCONN is preferred.

What do you think about if we synchronize the sk_state to SMC_ACTIVE instead of keeping SMC_INIT when clcsock
connected successfully in fallback case described above.

...
if (smc->use_fallback) {
	sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
	if (!rc)
		sk->sk_state = SMC_ACTIVE;    /* synchronize sk_state from SMC_INIT to SMC_ACTIVE */
	goto out;
}
...

>> +		break;
>> +	case SS_UNCONNECTED:
>> +		sock->state = SS_CONNECTING;
>> +		break;
>> +	}
>> +
>>  	switch (sk->sk_state) {
>>  	default:
>>  		goto out;
>> +	case SMC_CLOSED:
>> +		rc = sock_error(sk) ? : -ECONNABORTED;
>> +		sock->state = SS_UNCONNECTED;
>> +		goto out;
>>  	case SMC_ACTIVE:
>>  		rc = -EISCONN;
>>  		goto out;
>> @@ -1565,20 +1585,24 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
>>  		goto out;
>>  
>>  	sock_hold(&smc->sk); /* sock put in passive closing */
>> -	if (smc->use_fallback)
>> +	if (smc->use_fallback) {
>> +		sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
>>  		goto out;
>> +	}
>>  	if (flags & O_NONBLOCK) {
>>  		if (queue_work(smc_hs_wq, &smc->connect_work))
>>  			smc->connect_nonblock = 1;
>>  		rc = -EINPROGRESS;
>> +		goto out;
>>  	} else {
>>  		rc = __smc_connect(smc);
>>  		if (rc < 0)
>>  			goto out;
>> -		else
>> -			rc = 0; /* success cases including fallback */
>>  	}
>>  
>> +connected:
>> +	rc = 0;
>> +	sock->state = SS_CONNECTED;
>>  out:
>>  	release_sock(sk);
>>  out_err:

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-05-24  2:59   ` Guangguan Wang
@ 2022-05-24 12:04     ` Karsten Graul
  2022-05-24 12:57       ` liuyacan
  0 siblings, 1 reply; 12+ messages in thread
From: Karsten Graul @ 2022-05-24 12:04 UTC (permalink / raw)
  To: Guangguan Wang, davem, kuba, pabeni; +Cc: linux-s390, netdev, linux-kernel

On 24/05/2022 04:59, Guangguan Wang wrote:
> 
> 
> On 2022/5/23 20:24, Karsten Graul wrote:
>> On 13/05/2022 04:24, Guangguan Wang wrote:
>>> Connect with O_NONBLOCK will not be completed immediately
>>> and returns -EINPROGRESS. It is possible to use selector/poll
>>> for completion by selecting the socket for writing. After select
>>> indicates writability, a second connect function call will return
>>> 0 to indicate connected successfully as TCP does, but smc returns
>>> -EISCONN. Use socket state for smc to indicate connect state, which
>>> can help smc aligning the connect behaviour with TCP.
>>>
>>> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
>>> Acked-by: Karsten Graul <kgraul@linux.ibm.com>
>>> ---
>>>  net/smc/af_smc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++----
>>>  1 file changed, 46 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>>> index fce16b9d6e1a..5f70642a8044 100644
>>> --- a/net/smc/af_smc.c
>>> +++ b/net/smc/af_smc.c
>>> @@ -1544,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
>>>  		goto out_err;
>>>  
>>>  	lock_sock(sk);
>>> +	switch (sock->state) {
>>> +	default:
>>> +		rc = -EINVAL;
>>> +		goto out;
>>> +	case SS_CONNECTED:
>>> +		rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
>>> +		goto out;
>>> +	case SS_CONNECTING:
>>> +		if (sk->sk_state == SMC_ACTIVE)
>>> +			goto connected;
>>
>> I stumbled over this when thinking about the fallback processing. If for whatever reason
>> fallback==true during smc_connect(), the "if (smc->use_fallback)" below would set sock->state
>> to e.g. SS_CONNECTED. But in the fallback case sk_state keeps SMC_INIT. So during the next call
>> the SS_CONNECTING case above would break because sk_state in NOT SMC_ACTIVE, and we would end
>> up calling kernel_connect() again. Which seems to be no problem when kernel_connect() returns 
>> -EISCONN and we return this to the caller. But is this how it should work, or does it work by chance?
>>
> 
> Since the sk_state keeps SMC_INIT and does not correctly indicate the state of clcsock, it should end
> up calling kernel_connect() again to get the actual connection state of clcsock.
> 
> And I'm sorry there is a problem that if sock->state==SS_CONNECTED and sk_state==SMC_INIT, further call
> of smc_connect will return -EINVAL where -EISCONN is preferred. 
> The steps to reproduce:
> 1)switch fallback before connect, such as setsockopt TCP_FASTOPEN
> 2)connect with noblocking and returns -EINPROGRESS. (sock->state changes to SS_CONNECTING)
> 3) end up calling connect with noblocking again and returns 0. (kernel_connect() returns 0 and sock->state changes to
>    SS_CONNECTED but sk->sk_state stays SMC_INIT)
> 4) call connect again, maybe by mistake, will return -EINVAL, but -EISCONN is preferred.
> 
> What do you think about if we synchronize the sk_state to SMC_ACTIVE instead of keeping SMC_INIT when clcsock
> connected successfully in fallback case described above.
> 
> ...

I start thinking that the fix in 86434744 introduced a problem. Before that fix a connect with
fallback always reached __smc_connect() and on top of that function in case of fallback
smc_connect_fallback() is called, which itself sets sk_state to SMC_ACTIVE.

86434744 removed that code path and I wonder what it actually fixed, because at this time the 
fallback check in __smc_connect() was already present.

Without that "goto out;" the state would be set correctly in smc_connect_fallback(), and the 
socket close processing would work as expected.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-05-24 12:04     ` Karsten Graul
@ 2022-05-24 12:57       ` liuyacan
  2022-05-24 13:05         ` Karsten Graul
  0 siblings, 1 reply; 12+ messages in thread
From: liuyacan @ 2022-05-24 12:57 UTC (permalink / raw)
  To: kgraul
  Cc: davem, guangguan.wang, kuba, linux-kernel, linux-s390, netdev, pabeni

> > 
> > 
> > On 2022/5/23 20:24, Karsten Graul wrote:
> >> On 13/05/2022 04:24, Guangguan Wang wrote:
> >>> Connect with O_NONBLOCK will not be completed immediately
> >>> and returns -EINPROGRESS. It is possible to use selector/poll
> >>> for completion by selecting the socket for writing. After select
> >>> indicates writability, a second connect function call will return
> >>> 0 to indicate connected successfully as TCP does, but smc returns
> >>> -EISCONN. Use socket state for smc to indicate connect state, which
> >>> can help smc aligning the connect behaviour with TCP.
> >>>
> >>> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
> >>> Acked-by: Karsten Graul <kgraul@linux.ibm.com>
> >>> ---
> >>>  net/smc/af_smc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++----
> >>>  1 file changed, 46 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> >>> index fce16b9d6e1a..5f70642a8044 100644
> >>> --- a/net/smc/af_smc.c
> >>> +++ b/net/smc/af_smc.c
> >>> @@ -1544,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
> >>>  		goto out_err;
> >>>  
> >>>  	lock_sock(sk);
> >>> +	switch (sock->state) {
> >>> +	default:
> >>> +		rc = -EINVAL;
> >>> +		goto out;
> >>> +	case SS_CONNECTED:
> >>> +		rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
> >>> +		goto out;
> >>> +	case SS_CONNECTING:
> >>> +		if (sk->sk_state == SMC_ACTIVE)
> >>> +			goto connected;
> >>
> >> I stumbled over this when thinking about the fallback processing. If for whatever reason
> >> fallback==true during smc_connect(), the "if (smc->use_fallback)" below would set sock->state
> >> to e.g. SS_CONNECTED. But in the fallback case sk_state keeps SMC_INIT. So during the next call
> >> the SS_CONNECTING case above would break because sk_state in NOT SMC_ACTIVE, and we would end
> >> up calling kernel_connect() again. Which seems to be no problem when kernel_connect() returns 
> >> -EISCONN and we return this to the caller. But is this how it should work, or does it work by chance?
> >>
> > 
> > Since the sk_state keeps SMC_INIT and does not correctly indicate the state of clcsock, it should end
> > up calling kernel_connect() again to get the actual connection state of clcsock.
> > 
> > And I'm sorry there is a problem that if sock->state==SS_CONNECTED and sk_state==SMC_INIT, further call
> > of smc_connect will return -EINVAL where -EISCONN is preferred. 
> > The steps to reproduce:
> > 1)switch fallback before connect, such as setsockopt TCP_FASTOPEN
> > 2)connect with noblocking and returns -EINPROGRESS. (sock->state changes to SS_CONNECTING)
> > 3) end up calling connect with noblocking again and returns 0. (kernel_connect() returns 0 and sock->state changes to
> >    SS_CONNECTED but sk->sk_state stays SMC_INIT)
> > 4) call connect again, maybe by mistake, will return -EINVAL, but -EISCONN is preferred.
> > 
> > What do you think about if we synchronize the sk_state to SMC_ACTIVE instead of keeping SMC_INIT when clcsock
> > connected successfully in fallback case described above.
> > 
> > ...
> 
> I start thinking that the fix in 86434744 introduced a problem. Before that fix a connect with
> fallback always reached __smc_connect() and on top of that function in case of fallback
> smc_connect_fallback() is called, which itself sets sk_state to SMC_ACTIVE.
> 
> 86434744 removed that code path and I wonder what it actually fixed, because at this time the 
> fallback check in __smc_connect() was already present.
> 
> Without that "goto out;" the state would be set correctly in smc_connect_fallback(), and the 
> socket close processing would work as expected.

I think it is OK without that "goto out;". And I guess the purpose of "goto out;" is to avoid calling __smc_connect(), 
because it is impossible to establish an rdma channel at this time.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-05-24 12:57       ` liuyacan
@ 2022-05-24 13:05         ` Karsten Graul
  2022-06-29 20:29           ` Wenjia Zhang
       [not found]           ` <8a15e288-4534-501c-8b3d-c235ae93238f@linux.ibm.com>
  0 siblings, 2 replies; 12+ messages in thread
From: Karsten Graul @ 2022-05-24 13:05 UTC (permalink / raw)
  To: liuyacan
  Cc: davem, guangguan.wang, kuba, linux-kernel, linux-s390, netdev, pabeni

On 24/05/2022 14:57, liuyacan@corp.netease.com wrote:
>>>
>>>
>>> On 2022/5/23 20:24, Karsten Graul wrote:
>>>> On 13/05/2022 04:24, Guangguan Wang wrote:
>>>>> Connect with O_NONBLOCK will not be completed immediately
>>>>> and returns -EINPROGRESS. It is possible to use selector/poll
>>>>> for completion by selecting the socket for writing. After select
>>>>> indicates writability, a second connect function call will return
>>>>> 0 to indicate connected successfully as TCP does, but smc returns
>>>>> -EISCONN. Use socket state for smc to indicate connect state, which
>>>>> can help smc aligning the connect behaviour with TCP.
>>>>>
>>>>> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
>>>>> Acked-by: Karsten Graul <kgraul@linux.ibm.com>
>>>>> ---
>>>>>  net/smc/af_smc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++----
>>>>>  1 file changed, 46 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>>>>> index fce16b9d6e1a..5f70642a8044 100644
>>>>> --- a/net/smc/af_smc.c
>>>>> +++ b/net/smc/af_smc.c
>>>>> @@ -1544,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
>>>>>  		goto out_err;
>>>>>  
>>>>>  	lock_sock(sk);
>>>>> +	switch (sock->state) {
>>>>> +	default:
>>>>> +		rc = -EINVAL;
>>>>> +		goto out;
>>>>> +	case SS_CONNECTED:
>>>>> +		rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
>>>>> +		goto out;
>>>>> +	case SS_CONNECTING:
>>>>> +		if (sk->sk_state == SMC_ACTIVE)
>>>>> +			goto connected;
>>>>
>>>> I stumbled over this when thinking about the fallback processing. If for whatever reason
>>>> fallback==true during smc_connect(), the "if (smc->use_fallback)" below would set sock->state
>>>> to e.g. SS_CONNECTED. But in the fallback case sk_state keeps SMC_INIT. So during the next call
>>>> the SS_CONNECTING case above would break because sk_state in NOT SMC_ACTIVE, and we would end
>>>> up calling kernel_connect() again. Which seems to be no problem when kernel_connect() returns 
>>>> -EISCONN and we return this to the caller. But is this how it should work, or does it work by chance?
>>>>
>>>
>>> Since the sk_state keeps SMC_INIT and does not correctly indicate the state of clcsock, it should end
>>> up calling kernel_connect() again to get the actual connection state of clcsock.
>>>
>>> And I'm sorry there is a problem that if sock->state==SS_CONNECTED and sk_state==SMC_INIT, further call
>>> of smc_connect will return -EINVAL where -EISCONN is preferred. 
>>> The steps to reproduce:
>>> 1)switch fallback before connect, such as setsockopt TCP_FASTOPEN
>>> 2)connect with noblocking and returns -EINPROGRESS. (sock->state changes to SS_CONNECTING)
>>> 3) end up calling connect with noblocking again and returns 0. (kernel_connect() returns 0 and sock->state changes to
>>>    SS_CONNECTED but sk->sk_state stays SMC_INIT)
>>> 4) call connect again, maybe by mistake, will return -EINVAL, but -EISCONN is preferred.
>>>
>>> What do you think about if we synchronize the sk_state to SMC_ACTIVE instead of keeping SMC_INIT when clcsock
>>> connected successfully in fallback case described above.
>>>
>>> ...
>>
>> I start thinking that the fix in 86434744 introduced a problem. Before that fix a connect with
>> fallback always reached __smc_connect() and on top of that function in case of fallback
>> smc_connect_fallback() is called, which itself sets sk_state to SMC_ACTIVE.
>>
>> 86434744 removed that code path and I wonder what it actually fixed, because at this time the 
>> fallback check in __smc_connect() was already present.
>>
>> Without that "goto out;" the state would be set correctly in smc_connect_fallback(), and the 
>> socket close processing would work as expected.
> 
> I think it is OK without that "goto out;". And I guess the purpose of "goto out;" is to avoid calling __smc_connect(), 
> because it is impossible to establish an rdma channel at this time.

Yes that was the purpose, but this disabled all the extra processing that should be done
for fallback sockets during connect().


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-05-24 13:05         ` Karsten Graul
@ 2022-06-29 20:29           ` Wenjia Zhang
       [not found]           ` <8a15e288-4534-501c-8b3d-c235ae93238f@linux.ibm.com>
  1 sibling, 0 replies; 12+ messages in thread
From: Wenjia Zhang @ 2022-06-29 20:29 UTC (permalink / raw)
  To: guangguan.wang
  Cc: Karsten Graul, liuyacan, davem, kuba, linux-kernel, linux-s390,
	netdev, pabeni



On 24.05.22 15:05, Karsten Graul wrote:
> On 24/05/2022 14:57, liuyacan@corp.netease.com wrote:
>>>>
>>>>
>>>> On 2022/5/23 20:24, Karsten Graul wrote:
>>>>> On 13/05/2022 04:24, Guangguan Wang wrote:
>>>>>> Connect with O_NONBLOCK will not be completed immediately
>>>>>> and returns -EINPROGRESS. It is possible to use selector/poll
>>>>>> for completion by selecting the socket for writing. After select
>>>>>> indicates writability, a second connect function call will return
>>>>>> 0 to indicate connected successfully as TCP does, but smc returns
>>>>>> -EISCONN. Use socket state for smc to indicate connect state, which
>>>>>> can help smc aligning the connect behaviour with TCP.
>>>>>>
>>>>>> Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
>>>>>> Acked-by: Karsten Graul <kgraul@linux.ibm.com>
>>>>>> ---
>>>>>>   net/smc/af_smc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++----
>>>>>>   1 file changed, 46 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>>>>>> index fce16b9d6e1a..5f70642a8044 100644
>>>>>> --- a/net/smc/af_smc.c
>>>>>> +++ b/net/smc/af_smc.c
>>>>>> @@ -1544,9 +1544,29 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
>>>>>>   		goto out_err;
>>>>>>   
>>>>>>   	lock_sock(sk);
>>>>>> +	switch (sock->state) {
>>>>>> +	default:
>>>>>> +		rc = -EINVAL;
>>>>>> +		goto out;
>>>>>> +	case SS_CONNECTED:
>>>>>> +		rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
>>>>>> +		goto out;
>>>>>> +	case SS_CONNECTING:
>>>>>> +		if (sk->sk_state == SMC_ACTIVE)
>>>>>> +			goto connected;
>>>>>
>>>>> I stumbled over this when thinking about the fallback processing. If for whatever reason
>>>>> fallback==true during smc_connect(), the "if (smc->use_fallback)" below would set sock->state
>>>>> to e.g. SS_CONNECTED. But in the fallback case sk_state keeps SMC_INIT. So during the next call
>>>>> the SS_CONNECTING case above would break because sk_state in NOT SMC_ACTIVE, and we would end
>>>>> up calling kernel_connect() again. Which seems to be no problem when kernel_connect() returns
>>>>> -EISCONN and we return this to the caller. But is this how it should work, or does it work by chance?
>>>>>
>>>>
>>>> Since the sk_state keeps SMC_INIT and does not correctly indicate the state of clcsock, it should end
>>>> up calling kernel_connect() again to get the actual connection state of clcsock.
>>>>
>>>> And I'm sorry there is a problem that if sock->state==SS_CONNECTED and sk_state==SMC_INIT, further call
>>>> of smc_connect will return -EINVAL where -EISCONN is preferred.
>>>> The steps to reproduce:
>>>> 1)switch fallback before connect, such as setsockopt TCP_FASTOPEN
>>>> 2)connect with noblocking and returns -EINPROGRESS. (sock->state changes to SS_CONNECTING)
>>>> 3) end up calling connect with noblocking again and returns 0. (kernel_connect() returns 0 and sock->state changes to
>>>>     SS_CONNECTED but sk->sk_state stays SMC_INIT)
>>>> 4) call connect again, maybe by mistake, will return -EINVAL, but -EISCONN is preferred.
>>>>
>>>> What do you think about if we synchronize the sk_state to SMC_ACTIVE instead of keeping SMC_INIT when clcsock
>>>> connected successfully in fallback case described above.
>>>>
>>>> ...
>>>
>>> I start thinking that the fix in 86434744 introduced a problem. Before that fix a connect with
>>> fallback always reached __smc_connect() and on top of that function in case of fallback
>>> smc_connect_fallback() is called, which itself sets sk_state to SMC_ACTIVE.
>>>
>>> 86434744 removed that code path and I wonder what it actually fixed, because at this time the
>>> fallback check in __smc_connect() was already present.
>>>
>>> Without that "goto out;" the state would be set correctly in smc_connect_fallback(), and the
>>> socket close processing would work as expected.
>>
>> I think it is OK without that "goto out;". And I guess the purpose of "goto out;" is to avoid calling __smc_connect(),
>> because it is impossible to establish an rdma channel at this time.
> 
> Yes that was the purpose, but this disabled all the extra processing that should be done
> for fallback sockets during connect().
> 
Since Karsten's suggestion, we didn't hear from you any more. We just 
want to know:

- What do you think about the commit (86434744)? Could it be the trigger 
of the problem you met?

- Have you ever tried to just remove the following lines from 
smc_connection(), and check if your scenario could run correctly?

       if (smc->use_fallback)
               goto out;

In our opinion, we don't see the necessity of the patch, if partly 
reverting the commit (86434744) could solve the problem.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
       [not found]           ` <8a15e288-4534-501c-8b3d-c235ae93238f@linux.ibm.com>
@ 2022-06-30 14:29             ` Guangguan Wang
  2022-06-30 20:16               ` Wenjia Zhang
  0 siblings, 1 reply; 12+ messages in thread
From: Guangguan Wang @ 2022-06-30 14:29 UTC (permalink / raw)
  To: Wenjia Zhang
  Cc: davem, Karsten Graul, liuyacan, kuba, linux-kernel, linux-s390,
	netdev, pabeni



On 2022/6/30 04:09, Wenjia Zhang wrote:
> 
> Since Karsten's suggestion, we didn't hear from you any more. We just want to know:
> 
> - What do you think about the commit (86434744)? Could it be the trigger of the problem you met?
> 
> - Have you ever tried to just remove the following lines from smc_connection(), and check if your scenario could run correctly?
> 
> if (smc->use_fallback)
>               goto out;
> 
> In our opinion, we don't see the necessity of the patch, if partly reverting the commit (86434744) could solve the problem.

I'm so sorry I missed the last emails for this discussion.

Yes, commit (86434744) is the trigger of the problem described in 
https://lore.kernel.org/linux-s390/45a19f8b-1b64-3459-c28c-aebab4fd8f1e@linux.alibaba.com/#t .

And I have tested just remove the following lines from smc_connection() can solve the above problem. 
if (smc->use_fallback)
     goto out;

I aggree that partly reverting the commit (86434744) is a better solution.

Thanks,
Guangguan Wang

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-06-30 14:29             ` Guangguan Wang
@ 2022-06-30 20:16               ` Wenjia Zhang
  2022-07-01  2:03                 ` Guangguan Wang
  0 siblings, 1 reply; 12+ messages in thread
From: Wenjia Zhang @ 2022-06-30 20:16 UTC (permalink / raw)
  To: Guangguan Wang
  Cc: davem, Karsten Graul, liuyacan, kuba, linux-kernel, linux-s390,
	netdev, pabeni



On 30.06.22 16:29, Guangguan Wang wrote:
> I'm so sorry I missed the last emails for this discussion.
> 
> Yes, commit (86434744) is the trigger of the problem described in
> https://lore.kernel.org/linux-s390/45a19f8b-1b64-3459-c28c-aebab4fd8f1e@linux.alibaba.com/#t  .
> 
> And I have tested just remove the following lines from smc_connection() can solve the above problem.
> if (smc->use_fallback)
>       goto out;
> 
> I aggree that partly reverting the commit (86434744) is a better solution.
> 
> Thanks,
> Guangguan Wang
Thank you for your effort!
Would you like to revert this patch? We'll revert the commit (86434744) 
partly.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-06-30 20:16               ` Wenjia Zhang
@ 2022-07-01  2:03                 ` Guangguan Wang
  2022-07-01 12:45                   ` Wenjia Zhang
  0 siblings, 1 reply; 12+ messages in thread
From: Guangguan Wang @ 2022-07-01  2:03 UTC (permalink / raw)
  To: Wenjia Zhang
  Cc: davem, Karsten Graul, liuyacan, kuba, linux-kernel, linux-s390,
	netdev, pabeni



On 2022/7/1 04:16, Wenjia Zhang wrote:
> 
> 
> On 30.06.22 16:29, Guangguan Wang wrote:
>> I'm so sorry I missed the last emails for this discussion.
>>
>> Yes, commit (86434744) is the trigger of the problem described in
>> https://lore.kernel.org/linux-s390/45a19f8b-1b64-3459-c28c-aebab4fd8f1e@linux.alibaba.com/#t  .
>>
>> And I have tested just remove the following lines from smc_connection() can solve the above problem.
>> if (smc->use_fallback)
>>       goto out;
>>
>> I aggree that partly reverting the commit (86434744) is a better solution.
>>
>> Thanks,
>> Guangguan Wang
> Thank you for your effort!
> Would you like to revert this patch? We'll revert the commit (86434744) partly.

Did you mean revert commit (3aba1030)?
Sorry, I think I led to a misunderstanding. I mean commit (86434744) is the trigger of the problem I replied
in email https://lore.kernel.org/linux-s390/45a19f8b-1b64-3459-c28c-aebab4fd8f1e@linux.alibaba.com/#t, not 
the problem that commit (3aba1030) resolved for.

So I think the final solution is to remove the following lines from smc_connection() based on the current code.
if (smc->use_fallback) {
	sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
	goto out;
}

Thanks,
Guangguan Wang

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next v2] net/smc: align the connect behaviour with TCP
  2022-07-01  2:03                 ` Guangguan Wang
@ 2022-07-01 12:45                   ` Wenjia Zhang
  0 siblings, 0 replies; 12+ messages in thread
From: Wenjia Zhang @ 2022-07-01 12:45 UTC (permalink / raw)
  To: Guangguan Wang
  Cc: davem, Karsten Graul, liuyacan, kuba, linux-kernel, linux-s390,
	netdev, pabeni



On 01.07.22 04:03, Guangguan Wang wrote:
> 
> 
> On 2022/7/1 04:16, Wenjia Zhang wrote:
>>
>>
>> On 30.06.22 16:29, Guangguan Wang wrote:
>>> I'm so sorry I missed the last emails for this discussion.
>>>
>>> Yes, commit (86434744) is the trigger of the problem described in
>>> https://lore.kernel.org/linux-s390/45a19f8b-1b64-3459-c28c-aebab4fd8f1e@linux.alibaba.com/#t  .
>>>
>>> And I have tested just remove the following lines from smc_connection() can solve the above problem.
>>> if (smc->use_fallback)
>>>        goto out;
>>>
>>> I aggree that partly reverting the commit (86434744) is a better solution.
>>>
>>> Thanks,
>>> Guangguan Wang
>> Thank you for your effort!
>> Would you like to revert this patch? We'll revert the commit (86434744) partly.
> 
> Did you mean revert commit (3aba1030)?
> Sorry, I think I led to a misunderstanding. I mean commit (86434744) is the trigger of the problem I replied
> in email https://lore.kernel.org/linux-s390/45a19f8b-1b64-3459-c28c-aebab4fd8f1e@linux.alibaba.com/#t, not
> the problem that commit (3aba1030) resolved for.
> 
> So I think the final solution is to remove the following lines from smc_connection() based on the current code.
> if (smc->use_fallback) {
> 	sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
> 	goto out;
> }
> 
> Thanks,
> Guangguan Wang
That would be also ok for us, thanks!

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2022-07-01 12:45 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-13  2:24 [PATCH net-next v2] net/smc: align the connect behaviour with TCP Guangguan Wang
2022-05-16  9:50 ` patchwork-bot+netdevbpf
2022-05-23 12:24 ` Karsten Graul
2022-05-24  2:59   ` Guangguan Wang
2022-05-24 12:04     ` Karsten Graul
2022-05-24 12:57       ` liuyacan
2022-05-24 13:05         ` Karsten Graul
2022-06-29 20:29           ` Wenjia Zhang
     [not found]           ` <8a15e288-4534-501c-8b3d-c235ae93238f@linux.ibm.com>
2022-06-30 14:29             ` Guangguan Wang
2022-06-30 20:16               ` Wenjia Zhang
2022-07-01  2:03                 ` Guangguan Wang
2022-07-01 12:45                   ` Wenjia Zhang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).