* [PATCH] nvme-tcp: proper handling of tcp socket closing flows
@ 2021-01-28 15:27 elad.grupi
2021-01-28 22:03 ` Sagi Grimberg
2021-01-29 22:10 ` Sagi Grimberg
0 siblings, 2 replies; 14+ messages in thread
From: elad.grupi @ 2021-01-28 15:27 UTC (permalink / raw)
To: sagi, linux-nvme; +Cc: Elad Grupi
From: Elad Grupi <elad.grupi@dell.com>
avoid calling nvmet_tcp_release_queue_work if tcp socket was closed
before setting the sk callbacks.
Signed-off-by: Elad Grupi <elad.grupi@dell.com>
---
drivers/nvme/target/tcp.c | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index d535080b781f..dac737bac874 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -647,7 +647,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
int ret = 0;
- if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
+ if (!cmd) {
cmd = nvmet_tcp_fetch_cmd(queue);
if (unlikely(!cmd))
return 0;
@@ -1453,9 +1453,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
sock->sk->sk_state_change = nvmet_tcp_state_change;
queue->write_space = sock->sk->sk_write_space;
sock->sk->sk_write_space = nvmet_tcp_write_space;
+
+ switch (sk->sk_state) {
+ case TCP_FIN_WAIT1:
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSE:
+ /* FALLTHRU */
+ sock->sk->sk_data_ready = queue->data_ready;
+ sock->sk->sk_state_change = queue->state_change;
+ sock->sk->sk_write_space = queue->write_space;
+ sk->sk_user_data = NULL;
+ queue->state = NVMET_TCP_Q_DISCONNECTING;
+ ret = -ENOTCONN;
+ break;
+ default:
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ ret = 0;
+ }
+
write_unlock_bh(&sock->sk->sk_callback_lock);
- return 0;
+ return ret;
}
static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
@@ -1506,8 +1524,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_destroy_sq;
- queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
-
return 0;
out_destroy_sq:
mutex_lock(&nvmet_tcp_queue_mutex);
--
2.16.5
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 15:27 [PATCH] nvme-tcp: proper handling of tcp socket closing flows elad.grupi
@ 2021-01-28 22:03 ` Sagi Grimberg
2021-01-28 23:07 ` Grupi, Elad
2021-01-29 22:10 ` Sagi Grimberg
1 sibling, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2021-01-28 22:03 UTC (permalink / raw)
To: elad.grupi, linux-nvme
> ---
> drivers/nvme/target/tcp.c | 24 ++++++++++++++++++++----
> 1 file changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index d535080b781f..dac737bac874 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -647,7 +647,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
> struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
> int ret = 0;
>
> - if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
> + if (!cmd) {
> cmd = nvmet_tcp_fetch_cmd(queue);
> if (unlikely(!cmd))
> return 0;
> @@ -1453,9 +1453,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> sock->sk->sk_state_change = nvmet_tcp_state_change;
> queue->write_space = sock->sk->sk_write_space;
> sock->sk->sk_write_space = nvmet_tcp_write_space;
> +
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
> + /* FALLTHRU */
> + sock->sk->sk_data_ready = queue->data_ready;
> + sock->sk->sk_state_change = queue->state_change;
> + sock->sk->sk_write_space = queue->write_space;
> + sk->sk_user_data = NULL;
> + queue->state = NVMET_TCP_Q_DISCONNECTING;
> + ret = -ENOTCONN;
> + break;
> + default:
> + queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> + ret = 0;
> + }
> +
> write_unlock_bh(&sock->sk->sk_callback_lock);
>
> - return 0;
> + return ret;
> }
>
> static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
> @@ -1506,8 +1524,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
> if (ret)
> goto out_destroy_sq;
>
> - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> -
> return 0;
> out_destroy_sq:
> mutex_lock(&nvmet_tcp_queue_mutex);
>
What about this instead?
--
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index c41902f7ce39..6388d18ca7c2 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1494,16 +1494,28 @@ static int nvmet_tcp_set_queue_sock(struct
nvmet_tcp_queue *queue)
ip_sock_set_tos(sock->sk, inet->rcv_tos);
write_lock_bh(&sock->sk->sk_callback_lock);
- sock->sk->sk_user_data = queue;
- queue->data_ready = sock->sk->sk_data_ready;
- sock->sk->sk_data_ready = nvmet_tcp_data_ready;
- queue->state_change = sock->sk->sk_state_change;
- sock->sk->sk_state_change = nvmet_tcp_state_change;
- queue->write_space = sock->sk->sk_write_space;
- sock->sk->sk_write_space = nvmet_tcp_write_space;
+ switch (sk->sk_state) {
+ case TCP_FIN_WAIT1:
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSE:
+ /*
+ * If the socket is already closing, don't even start
+ * consuming it
+ */
+ ret = -ENOTCONN;
+ break;
+ default:
+ sock->sk->sk_user_data = queue;
+ queue->data_ready = sock->sk->sk_data_ready;
+ sock->sk->sk_data_ready = nvmet_tcp_data_ready;
+ queue->state_change = sock->sk->sk_state_change;
+ sock->sk->sk_state_change = nvmet_tcp_state_change;
+ queue->write_space = sock->sk->sk_write_space;
+ sock->sk->sk_write_space = nvmet_tcp_write_space;
+ }
write_unlock_bh(&sock->sk->sk_callback_lock);
- return 0;
+ return ret;
}
--
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply related [flat|nested] 14+ messages in thread
* RE: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 22:03 ` Sagi Grimberg
@ 2021-01-28 23:07 ` Grupi, Elad
2021-01-28 23:33 ` Sagi Grimberg
0 siblings, 1 reply; 14+ messages in thread
From: Grupi, Elad @ 2021-01-28 23:07 UTC (permalink / raw)
To: Sagi Grimberg, linux-nvme
That will work if sk_state_change is called under sk_callback_lock.
In addition, need to flush_work(&queue->port->accept_work) in nvmet_tcp_release_queue_work because accept_work is still using queue struct after unlocking sk_callback_lock.
-----Original Message-----
From: Sagi Grimberg <sagi@grimberg.me>
Sent: Friday, 29 January 2021 0:03
To: Grupi, Elad; linux-nvme@lists.infradead.org
Subject: Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
[EXTERNAL EMAIL]
> ---
> drivers/nvme/target/tcp.c | 24 ++++++++++++++++++++----
> 1 file changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index d535080b781f..dac737bac874 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -647,7 +647,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
> struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
> int ret = 0;
>
> - if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
> + if (!cmd) {
> cmd = nvmet_tcp_fetch_cmd(queue);
> if (unlikely(!cmd))
> return 0;
> @@ -1453,9 +1453,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> sock->sk->sk_state_change = nvmet_tcp_state_change;
> queue->write_space = sock->sk->sk_write_space;
> sock->sk->sk_write_space = nvmet_tcp_write_space;
> +
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
> + /* FALLTHRU */
> + sock->sk->sk_data_ready = queue->data_ready;
> + sock->sk->sk_state_change = queue->state_change;
> + sock->sk->sk_write_space = queue->write_space;
> + sk->sk_user_data = NULL;
> + queue->state = NVMET_TCP_Q_DISCONNECTING;
> + ret = -ENOTCONN;
> + break;
> + default:
> + queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> + ret = 0;
> + }
> +
> write_unlock_bh(&sock->sk->sk_callback_lock);
>
> - return 0;
> + return ret;
> }
>
> static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, @@
> -1506,8 +1524,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
> if (ret)
> goto out_destroy_sq;
>
> - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> -
> return 0;
> out_destroy_sq:
> mutex_lock(&nvmet_tcp_queue_mutex);
>
What about this instead?
--
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c41902f7ce39..6388d18ca7c2 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1494,16 +1494,28 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
ip_sock_set_tos(sock->sk, inet->rcv_tos);
write_lock_bh(&sock->sk->sk_callback_lock);
- sock->sk->sk_user_data = queue;
- queue->data_ready = sock->sk->sk_data_ready;
- sock->sk->sk_data_ready = nvmet_tcp_data_ready;
- queue->state_change = sock->sk->sk_state_change;
- sock->sk->sk_state_change = nvmet_tcp_state_change;
- queue->write_space = sock->sk->sk_write_space;
- sock->sk->sk_write_space = nvmet_tcp_write_space;
+ switch (sk->sk_state) {
+ case TCP_FIN_WAIT1:
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSE:
+ /*
+ * If the socket is already closing, don't even start
+ * consuming it
+ */
+ ret = -ENOTCONN;
+ break;
+ default:
+ sock->sk->sk_user_data = queue;
+ queue->data_ready = sock->sk->sk_data_ready;
+ sock->sk->sk_data_ready = nvmet_tcp_data_ready;
+ queue->state_change = sock->sk->sk_state_change;
+ sock->sk->sk_state_change = nvmet_tcp_state_change;
+ queue->write_space = sock->sk->sk_write_space;
+ sock->sk->sk_write_space = nvmet_tcp_write_space;
+ }
write_unlock_bh(&sock->sk->sk_callback_lock);
- return 0;
+ return ret;
}
--
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 23:07 ` Grupi, Elad
@ 2021-01-28 23:33 ` Sagi Grimberg
2021-01-28 23:43 ` Grupi, Elad
2021-01-31 15:47 ` Grupi, Elad
0 siblings, 2 replies; 14+ messages in thread
From: Sagi Grimberg @ 2021-01-28 23:33 UTC (permalink / raw)
To: Grupi, Elad, linux-nvme
> That will work if sk_state_change is called under sk_callback_lock.
It is.
> In addition, need to flush_work(&queue->port->accept_work) in nvmet_tcp_release_queue_work because accept_work is still using queue struct after unlocking sk_callback_lock.
But sk->sk_user_data was not set, so I don't see how the release work
can invoke.
> What about this instead?
> --
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c41902f7ce39..6388d18ca7c2 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -1494,16 +1494,28 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> ip_sock_set_tos(sock->sk, inet->rcv_tos);
>
> write_lock_bh(&sock->sk->sk_callback_lock);
> - sock->sk->sk_user_data = queue;
> - queue->data_ready = sock->sk->sk_data_ready;
> - sock->sk->sk_data_ready = nvmet_tcp_data_ready;
> - queue->state_change = sock->sk->sk_state_change;
> - sock->sk->sk_state_change = nvmet_tcp_state_change;
> - queue->write_space = sock->sk->sk_write_space;
> - sock->sk->sk_write_space = nvmet_tcp_write_space;
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
> + /*
> + * If the socket is already closing, don't even start
> + * consuming it
> + */
> + ret = -ENOTCONN;
> + break;
> + default:
> + sock->sk->sk_user_data = queue;
> + queue->data_ready = sock->sk->sk_data_ready;
> + sock->sk->sk_data_ready = nvmet_tcp_data_ready;
> + queue->state_change = sock->sk->sk_state_change;
> + sock->sk->sk_state_change = nvmet_tcp_state_change;
> + queue->write_space = sock->sk->sk_write_space;
> + sock->sk->sk_write_space = nvmet_tcp_write_space;
> + }
> write_unlock_bh(&sock->sk->sk_callback_lock);
>
> - return 0;
> + return ret;
> }
> --
>
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 23:33 ` Sagi Grimberg
@ 2021-01-28 23:43 ` Grupi, Elad
2021-01-28 23:54 ` Sagi Grimberg
2021-01-31 15:47 ` Grupi, Elad
1 sibling, 1 reply; 14+ messages in thread
From: Grupi, Elad @ 2021-01-28 23:43 UTC (permalink / raw)
To: Sagi Grimberg, linux-nvme
Release work might get invoked if nvmet_tcp_set_queue_sock is completed successfully and set sk_user_data, but sk_state_change is triggered by network stack before queue_work_on is invoked. That case - there is a race between release_work and accept_work.
-----Original Message-----
From: Sagi Grimberg <sagi@grimberg.me>
Sent: Friday, 29 January 2021 1:34
To: Grupi, Elad; linux-nvme@lists.infradead.org
Subject: Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
[EXTERNAL EMAIL]
> That will work if sk_state_change is called under sk_callback_lock.
It is.
> In addition, need to flush_work(&queue->port->accept_work) in nvmet_tcp_release_queue_work because accept_work is still using queue struct after unlocking sk_callback_lock.
But sk->sk_user_data was not set, so I don't see how the release work can invoke.
> What about this instead?
> --
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index c41902f7ce39..6388d18ca7c2 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -1494,16 +1494,28 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> ip_sock_set_tos(sock->sk, inet->rcv_tos);
>
> write_lock_bh(&sock->sk->sk_callback_lock);
> - sock->sk->sk_user_data = queue;
> - queue->data_ready = sock->sk->sk_data_ready;
> - sock->sk->sk_data_ready = nvmet_tcp_data_ready;
> - queue->state_change = sock->sk->sk_state_change;
> - sock->sk->sk_state_change = nvmet_tcp_state_change;
> - queue->write_space = sock->sk->sk_write_space;
> - sock->sk->sk_write_space = nvmet_tcp_write_space;
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
> + /*
> + * If the socket is already closing, don't even start
> + * consuming it
> + */
> + ret = -ENOTCONN;
> + break;
> + default:
> + sock->sk->sk_user_data = queue;
> + queue->data_ready = sock->sk->sk_data_ready;
> + sock->sk->sk_data_ready = nvmet_tcp_data_ready;
> + queue->state_change = sock->sk->sk_state_change;
> + sock->sk->sk_state_change = nvmet_tcp_state_change;
> + queue->write_space = sock->sk->sk_write_space;
> + sock->sk->sk_write_space = nvmet_tcp_write_space;
> + }
> write_unlock_bh(&sock->sk->sk_callback_lock);
>
> - return 0;
> + return ret;
> }
> --
>
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 23:43 ` Grupi, Elad
@ 2021-01-28 23:54 ` Sagi Grimberg
2021-01-29 0:01 ` Grupi, Elad
0 siblings, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2021-01-28 23:54 UTC (permalink / raw)
To: Grupi, Elad, linux-nvme
> Release work might get invoked if nvmet_tcp_set_queue_sock is completed successfully and set sk_user_data, but sk_state_change is triggered by network stack before queue_work_on is invoked. That case - there is a race between release_work and accept_work.
This is just like any other case where release_work races with io_work.
That is not an exception from other cases which release_work will need
to fence from.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 23:54 ` Sagi Grimberg
@ 2021-01-29 0:01 ` Grupi, Elad
2021-01-29 0:07 ` Sagi Grimberg
0 siblings, 1 reply; 14+ messages in thread
From: Grupi, Elad @ 2021-01-29 0:01 UTC (permalink / raw)
To: Sagi Grimberg, linux-nvme
That's not what I meant.
My concern is release_work races with accept_work.
nvmet_tcp_alloc_queue is called from accept_work context and still accessing the queue struct after setting sk callbacks.
-----Original Message-----
From: Sagi Grimberg <sagi@grimberg.me>
Sent: Friday, 29 January 2021 1:54
To: Grupi, Elad; linux-nvme@lists.infradead.org
Subject: Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
[EXTERNAL EMAIL]
> Release work might get invoked if nvmet_tcp_set_queue_sock is completed successfully and set sk_user_data, but sk_state_change is triggered by network stack before queue_work_on is invoked. That case - there is a race between release_work and accept_work.
This is just like any other case where release_work races with io_work.
That is not an exception from other cases which release_work will need to fence from.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-29 0:01 ` Grupi, Elad
@ 2021-01-29 0:07 ` Sagi Grimberg
0 siblings, 0 replies; 14+ messages in thread
From: Sagi Grimberg @ 2021-01-29 0:07 UTC (permalink / raw)
To: Grupi, Elad, linux-nvme
> That's not what I meant.
>
> My concern is release_work races with accept_work.
>
> nvmet_tcp_alloc_queue is called from accept_work context and still accessing the queue struct after setting sk callbacks.
I see, we can move the queue_work to set_queue_sock then.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 15:27 [PATCH] nvme-tcp: proper handling of tcp socket closing flows elad.grupi
2021-01-28 22:03 ` Sagi Grimberg
@ 2021-01-29 22:10 ` Sagi Grimberg
2021-01-31 15:47 ` Grupi, Elad
1 sibling, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2021-01-29 22:10 UTC (permalink / raw)
To: elad.grupi, linux-nvme
> @@ -1453,9 +1453,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> sock->sk->sk_state_change = nvmet_tcp_state_change;
> queue->write_space = sock->sk->sk_write_space;
> sock->sk->sk_write_space = nvmet_tcp_write_space;
> +
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
BTW, shouldn't this be if (sk->sk_state != TCP_ESTABLISHED) ?
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 23:33 ` Sagi Grimberg
2021-01-28 23:43 ` Grupi, Elad
@ 2021-01-31 15:47 ` Grupi, Elad
1 sibling, 0 replies; 14+ messages in thread
From: Grupi, Elad @ 2021-01-31 15:47 UTC (permalink / raw)
To: Sagi Grimberg, linux-nvme
Is it possible that we will have the following flow:
Thread 1 taking sk_callback_lock and goes into the switch case of sk_state, going to line:
sock->sk->sk_user_data = queue;
at that point, thread 2 invokes sk_state_change, but this is still the original state_change callback because we didn't set yet to nvmet_tcp_state_change.
Thread 1 continues and set the callback to nvmet_tcp_state_change, but the state was already changed and nvmet_tcp_state_change will be never called.
I think this race still exist even when locked by sk_callback_lock.
-----Original Message-----
From: Sagi Grimberg <sagi@grimberg.me>
Sent: Friday, 29 January 2021 1:34
To: Grupi, Elad; linux-nvme@lists.infradead.org
Subject: Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
[EXTERNAL EMAIL]
> That will work if sk_state_change is called under sk_callback_lock.
It is.
> In addition, need to flush_work(&queue->port->accept_work) in nvmet_tcp_release_queue_work because accept_work is still using queue struct after unlocking sk_callback_lock.
But sk->sk_user_data was not set, so I don't see how the release work can invoke.
> What about this instead?
> --
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index c41902f7ce39..6388d18ca7c2 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -1494,16 +1494,28 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> ip_sock_set_tos(sock->sk, inet->rcv_tos);
>
> write_lock_bh(&sock->sk->sk_callback_lock);
> - sock->sk->sk_user_data = queue;
> - queue->data_ready = sock->sk->sk_data_ready;
> - sock->sk->sk_data_ready = nvmet_tcp_data_ready;
> - queue->state_change = sock->sk->sk_state_change;
> - sock->sk->sk_state_change = nvmet_tcp_state_change;
> - queue->write_space = sock->sk->sk_write_space;
> - sock->sk->sk_write_space = nvmet_tcp_write_space;
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
> + /*
> + * If the socket is already closing, don't even start
> + * consuming it
> + */
> + ret = -ENOTCONN;
> + break;
> + default:
> + sock->sk->sk_user_data = queue;
> + queue->data_ready = sock->sk->sk_data_ready;
> + sock->sk->sk_data_ready = nvmet_tcp_data_ready;
> + queue->state_change = sock->sk->sk_state_change;
> + sock->sk->sk_state_change = nvmet_tcp_state_change;
> + queue->write_space = sock->sk->sk_write_space;
> + sock->sk->sk_write_space = nvmet_tcp_write_space;
> + }
> write_unlock_bh(&sock->sk->sk_callback_lock);
>
> - return 0;
> + return ret;
> }
> --
>
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-29 22:10 ` Sagi Grimberg
@ 2021-01-31 15:47 ` Grupi, Elad
0 siblings, 0 replies; 14+ messages in thread
From: Grupi, Elad @ 2021-01-31 15:47 UTC (permalink / raw)
To: Sagi Grimberg, linux-nvme
Yes. That's looks good
-----Original Message-----
From: Sagi Grimberg <sagi@grimberg.me>
Sent: Saturday, 30 January 2021 0:10
To: Grupi, Elad; linux-nvme@lists.infradead.org
Subject: Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
[EXTERNAL EMAIL]
> @@ -1453,9 +1453,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> sock->sk->sk_state_change = nvmet_tcp_state_change;
> queue->write_space = sock->sk->sk_write_space;
> sock->sk->sk_write_space = nvmet_tcp_write_space;
> +
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
BTW, shouldn't this be if (sk->sk_state != TCP_ESTABLISHED) ?
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-28 7:54 ` Sagi Grimberg
@ 2021-01-28 15:27 ` Grupi, Elad
0 siblings, 0 replies; 14+ messages in thread
From: Grupi, Elad @ 2021-01-28 15:27 UTC (permalink / raw)
To: Sagi Grimberg, linux-nvme
> But we need to call the release_queue work, not sure I understand how this works.
If the tcp socket was closed before setting the socket callback,
nvmet_tcp_set_queue_sock will return -ENOTCONN and everything will be cleared by goto out_destroy_sq.
No need a worker to clean resources in that case and everything is protected by sk_callback_lock.
> io_work is going to run with release_queue_work, because it is also coming from the backend completion. The right way to solve this is to correctly fence them.
> Can you describe the exact race you are referring to?
Correct, the fence already exist in the code, cancel_work_sync is called after nvmet_sq_destroy in release_queue_work. I will remove that part form the patch.
-----Original Message-----
From: Sagi Grimberg <sagi@grimberg.me>
Sent: Thursday, 28 January 2021 9:55
To: Grupi, Elad; linux-nvme@lists.infradead.org
Subject: Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
[EXTERNAL EMAIL]
> From: Elad Grupi <elad.grupi@dell.com>
>
> avoid calling nvmet_tcp_release_queue_work if tcp socket was closed
> before setting the sk callbacks.
But we need to call the release_queue work, not sure I understand how this works.
>
> prevent io_work from enqueuing while closing the tcp queue to avoid
> race with nvmet_tcp_release_queue_work.
io_work is going to run with release_queue_work, because it is also coming from the backend completion. The right way to solve this is to correctly fence them.
Can you describe the exact race you are referring to?
>
> Signed-off-by: Elad Grupi <elad.grupi@dell.com>
> ---
> drivers/nvme/target/tcp.c | 26 +++++++++++++++++++++-----
> 1 file changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index d535080b781f..937f2a746d8b 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -647,7 +647,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
> struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
> int ret = 0;
>
> - if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
> + if (!cmd) {
> cmd = nvmet_tcp_fetch_cmd(queue);
> if (unlikely(!cmd))
> return 0;
> @@ -1196,7 +1196,7 @@ static void nvmet_tcp_io_work(struct work_struct *w)
> /*
> * We exahusted our budget, requeue our selves
> */
> - if (pending)
> + if (pending && queue->state != NVMET_TCP_Q_DISCONNECTING)
> queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> }
>
> @@ -1453,9 +1453,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> sock->sk->sk_state_change = nvmet_tcp_state_change;
> queue->write_space = sock->sk->sk_write_space;
> sock->sk->sk_write_space = nvmet_tcp_write_space;
> +
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
> + /* FALLTHRU */
> + sock->sk->sk_data_ready = queue->data_ready;
> + sock->sk->sk_state_change = queue->state_change;
> + sock->sk->sk_write_space = queue->write_space;
> + sk->sk_user_data = NULL;
> + queue->state = NVMET_TCP_Q_DISCONNECTING;
> + ret = -ENOTCONN;
> + break;
> + default:
> + queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> + ret = 0;
> + }
> +
> write_unlock_bh(&sock->sk->sk_callback_lock);
>
> - return 0;
> + return ret;
> }
>
> static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, @@
> -1506,8 +1524,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
> if (ret)
> goto out_destroy_sq;
>
> - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> -
> return 0;
> out_destroy_sq:
> mutex_lock(&nvmet_tcp_queue_mutex);
>
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] nvme-tcp: proper handling of tcp socket closing flows
2021-01-26 15:37 elad.grupi
@ 2021-01-28 7:54 ` Sagi Grimberg
2021-01-28 15:27 ` Grupi, Elad
0 siblings, 1 reply; 14+ messages in thread
From: Sagi Grimberg @ 2021-01-28 7:54 UTC (permalink / raw)
To: elad.grupi, linux-nvme
> From: Elad Grupi <elad.grupi@dell.com>
>
> avoid calling nvmet_tcp_release_queue_work if tcp socket was closed
> before setting the sk callbacks.
But we need to call the release_queue work, not sure I understand
how this works.
>
> prevent io_work from enqueuing while closing the tcp queue to
> avoid race with nvmet_tcp_release_queue_work.
io_work is going to run with release_queue_work, because
it is also coming from the backend completion. The right
way to solve this is to correctly fence them.
Can you describe the exact race you are referring to?
>
> Signed-off-by: Elad Grupi <elad.grupi@dell.com>
> ---
> drivers/nvme/target/tcp.c | 26 +++++++++++++++++++++-----
> 1 file changed, 21 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index d535080b781f..937f2a746d8b 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -647,7 +647,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
> struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
> int ret = 0;
>
> - if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
> + if (!cmd) {
> cmd = nvmet_tcp_fetch_cmd(queue);
> if (unlikely(!cmd))
> return 0;
> @@ -1196,7 +1196,7 @@ static void nvmet_tcp_io_work(struct work_struct *w)
> /*
> * We exahusted our budget, requeue our selves
> */
> - if (pending)
> + if (pending && queue->state != NVMET_TCP_Q_DISCONNECTING)
> queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> }
>
> @@ -1453,9 +1453,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> sock->sk->sk_state_change = nvmet_tcp_state_change;
> queue->write_space = sock->sk->sk_write_space;
> sock->sk->sk_write_space = nvmet_tcp_write_space;
> +
> + switch (sk->sk_state) {
> + case TCP_FIN_WAIT1:
> + case TCP_CLOSE_WAIT:
> + case TCP_CLOSE:
> + /* FALLTHRU */
> + sock->sk->sk_data_ready = queue->data_ready;
> + sock->sk->sk_state_change = queue->state_change;
> + sock->sk->sk_write_space = queue->write_space;
> + sk->sk_user_data = NULL;
> + queue->state = NVMET_TCP_Q_DISCONNECTING;
> + ret = -ENOTCONN;
> + break;
> + default:
> + queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> + ret = 0;
> + }
> +
> write_unlock_bh(&sock->sk->sk_callback_lock);
>
> - return 0;
> + return ret;
> }
>
> static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
> @@ -1506,8 +1524,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
> if (ret)
> goto out_destroy_sq;
>
> - queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
> -
> return 0;
> out_destroy_sq:
> mutex_lock(&nvmet_tcp_queue_mutex);
>
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH] nvme-tcp: proper handling of tcp socket closing flows
@ 2021-01-26 15:37 elad.grupi
2021-01-28 7:54 ` Sagi Grimberg
0 siblings, 1 reply; 14+ messages in thread
From: elad.grupi @ 2021-01-26 15:37 UTC (permalink / raw)
To: sagi, linux-nvme; +Cc: Elad Grupi
From: Elad Grupi <elad.grupi@dell.com>
avoid calling nvmet_tcp_release_queue_work if tcp socket was closed
before setting the sk callbacks.
prevent io_work from enqueuing while closing the tcp queue to
avoid race with nvmet_tcp_release_queue_work.
Signed-off-by: Elad Grupi <elad.grupi@dell.com>
---
drivers/nvme/target/tcp.c | 26 +++++++++++++++++++++-----
1 file changed, 21 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index d535080b781f..937f2a746d8b 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -647,7 +647,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd = queue->snd_cmd;
int ret = 0;
- if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) {
+ if (!cmd) {
cmd = nvmet_tcp_fetch_cmd(queue);
if (unlikely(!cmd))
return 0;
@@ -1196,7 +1196,7 @@ static void nvmet_tcp_io_work(struct work_struct *w)
/*
* We exahusted our budget, requeue our selves
*/
- if (pending)
+ if (pending && queue->state != NVMET_TCP_Q_DISCONNECTING)
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
}
@@ -1453,9 +1453,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
sock->sk->sk_state_change = nvmet_tcp_state_change;
queue->write_space = sock->sk->sk_write_space;
sock->sk->sk_write_space = nvmet_tcp_write_space;
+
+ switch (sk->sk_state) {
+ case TCP_FIN_WAIT1:
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSE:
+ /* FALLTHRU */
+ sock->sk->sk_data_ready = queue->data_ready;
+ sock->sk->sk_state_change = queue->state_change;
+ sock->sk->sk_write_space = queue->write_space;
+ sk->sk_user_data = NULL;
+ queue->state = NVMET_TCP_Q_DISCONNECTING;
+ ret = -ENOTCONN;
+ break;
+ default:
+ queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
+ ret = 0;
+ }
+
write_unlock_bh(&sock->sk->sk_callback_lock);
- return 0;
+ return ret;
}
static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
@@ -1506,8 +1524,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_destroy_sq;
- queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
-
return 0;
out_destroy_sq:
mutex_lock(&nvmet_tcp_queue_mutex);
--
2.16.5
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
^ permalink raw reply related [flat|nested] 14+ messages in thread
end of thread, other threads:[~2021-01-31 15:47 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-28 15:27 [PATCH] nvme-tcp: proper handling of tcp socket closing flows elad.grupi
2021-01-28 22:03 ` Sagi Grimberg
2021-01-28 23:07 ` Grupi, Elad
2021-01-28 23:33 ` Sagi Grimberg
2021-01-28 23:43 ` Grupi, Elad
2021-01-28 23:54 ` Sagi Grimberg
2021-01-29 0:01 ` Grupi, Elad
2021-01-29 0:07 ` Sagi Grimberg
2021-01-31 15:47 ` Grupi, Elad
2021-01-29 22:10 ` Sagi Grimberg
2021-01-31 15:47 ` Grupi, Elad
-- strict thread matches above, loose matches on Subject: below --
2021-01-26 15:37 elad.grupi
2021-01-28 7:54 ` Sagi Grimberg
2021-01-28 15:27 ` Grupi, Elad
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.