linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] nvmet: fix uninitialized work for zero kato
@ 2020-10-13  3:52 zhenwei pi
  2020-10-13 22:18 ` Sagi Grimberg
  0 siblings, 1 reply; 2+ messages in thread
From: zhenwei pi @ 2020-10-13  3:52 UTC (permalink / raw)
  To: hch, sagi, chaitanya.kulkarni; +Cc: linux-nvme, linux-kernel, pizhenwei

Hit a warning:
WARNING: CPU: 1 PID: 241 at kernel/workqueue.c:1627 __queue_delayed_work+0x6d/0x90
with trace:
  mod_delayed_work_on+0x59/0x90
  nvmet_update_cc+0xee/0x100 [nvmet]
  nvmet_execute_prop_set+0x72/0x80 [nvmet]
  nvmet_tcp_try_recv_pdu+0x2f7/0x770 [nvmet_tcp]
  nvmet_tcp_io_work+0x63f/0xb2d [nvmet_tcp]
  ...

This could be reproduced easily with a keep alive time 0:
nvme connect -t tcp -n NQN -a ADDR -s PORT --keep-alive-tmo=0

The reason is:
Starting an uninitialized work when initiator connects with zero
kato. Althrough keep-alive timer is disabled during allocating a ctrl
(fix in 0d3b6a8d213a), ka_work still has a chance to run
(called by nvmet_start_ctrl to detect dead host).

Initilize ka_work during allocating ctrl, and set a reasonable kato
before scheduling ka_work.

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 drivers/nvme/target/core.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b7b63330b5ef..3c5b2b065476 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -19,6 +19,8 @@ struct workqueue_struct *buffered_io_wq;
 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
 static DEFINE_IDA(cntlid_ida);
 
+#define NVMET_DEFAULT_KATO	5
+
 /*
  * This read/write semaphore is used to synchronize access to configuration
  * information on a target system that will result in discovery log page
@@ -385,6 +387,11 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
 	if (cmd_seen) {
 		pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
 			ctrl->cntlid);
+
+		/* run once, trigger from nvmet_start_ctrl to detect dead link */
+		if (!ctrl->kato)
+			return;
+
 		schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 		return;
 	}
@@ -403,15 +410,11 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
 	pr_debug("ctrl %d start keep-alive timer for %d secs\n",
 		ctrl->cntlid, ctrl->kato);
 
-	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 }
 
 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 {
-	if (unlikely(ctrl->kato == 0))
-		return;
-
 	pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
 
 	cancel_delayed_work_sync(&ctrl->ka_work);
@@ -1107,6 +1110,8 @@ static inline u8 nvmet_cc_iocqes(u32 cc)
 
 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 {
+	u32 kato = ctrl->kato ? ctrl->kato : NVMET_DEFAULT_KATO;
+
 	lockdep_assert_held(&ctrl->lock);
 
 	if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
@@ -1126,7 +1131,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 	 * in case a host died before it enabled the controller.  Hence, simply
 	 * reset the keep alive timer when the controller is enabled.
 	 */
-	mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
+	mod_delayed_work(system_wq, &ctrl->ka_work, kato * HZ);
 }
 
 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
@@ -1378,6 +1383,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 
 	/* keep-alive timeout in seconds */
 	ctrl->kato = DIV_ROUND_UP(kato, 1000);
+	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 
 	ctrl->err_counter = 0;
 	spin_lock_init(&ctrl->error_lock);
-- 
2.11.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] nvmet: fix uninitialized work for zero kato
  2020-10-13  3:52 [PATCH] nvmet: fix uninitialized work for zero kato zhenwei pi
@ 2020-10-13 22:18 ` Sagi Grimberg
  0 siblings, 0 replies; 2+ messages in thread
From: Sagi Grimberg @ 2020-10-13 22:18 UTC (permalink / raw)
  To: zhenwei pi, hch, chaitanya.kulkarni; +Cc: linux-nvme, linux-kernel

> Hit a warning:
> WARNING: CPU: 1 PID: 241 at kernel/workqueue.c:1627 __queue_delayed_work+0x6d/0x90
> with trace:
>    mod_delayed_work_on+0x59/0x90
>    nvmet_update_cc+0xee/0x100 [nvmet]
>    nvmet_execute_prop_set+0x72/0x80 [nvmet]
>    nvmet_tcp_try_recv_pdu+0x2f7/0x770 [nvmet_tcp]
>    nvmet_tcp_io_work+0x63f/0xb2d [nvmet_tcp]
>    ...
> 
> This could be reproduced easily with a keep alive time 0:
> nvme connect -t tcp -n NQN -a ADDR -s PORT --keep-alive-tmo=0
> 
> The reason is:
> Starting an uninitialized work when initiator connects with zero
> kato. Althrough keep-alive timer is disabled during allocating a ctrl
> (fix in 0d3b6a8d213a), ka_work still has a chance to run
> (called by nvmet_start_ctrl to detect dead host).

This should have a "Fixes:" tag.

> 
> Initilize ka_work during allocating ctrl, and set a reasonable kato
> before scheduling ka_work.
> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>   drivers/nvme/target/core.c | 16 +++++++++++-----
>   1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index b7b63330b5ef..3c5b2b065476 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -19,6 +19,8 @@ struct workqueue_struct *buffered_io_wq;
>   static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
>   static DEFINE_IDA(cntlid_ida);
>   
> +#define NVMET_DEFAULT_KATO	5
> +
>   /*
>    * This read/write semaphore is used to synchronize access to configuration
>    * information on a target system that will result in discovery log page
> @@ -385,6 +387,11 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
>   	if (cmd_seen) {
>   		pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
>   			ctrl->cntlid);
> +
> +		/* run once, trigger from nvmet_start_ctrl to detect dead link */
> +		if (!ctrl->kato)
> +			return;
> +
>   		schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);

It will be better to just schedule/mod the ka_work if kato != 0, other
changes in the patch aren't needed IMO.

>   		return;
>   	}
> @@ -403,15 +410,11 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
>   	pr_debug("ctrl %d start keep-alive timer for %d secs\n",
>   		ctrl->cntlid, ctrl->kato);
>   
> -	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
>   	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
>   }
>   
>   static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
>   {
> -	if (unlikely(ctrl->kato == 0))
> -		return;
> -
>   	pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
>   
>   	cancel_delayed_work_sync(&ctrl->ka_work);
> @@ -1107,6 +1110,8 @@ static inline u8 nvmet_cc_iocqes(u32 cc)
>   
>   static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
>   {
> +	u32 kato = ctrl->kato ? ctrl->kato : NVMET_DEFAULT_KATO;
> +

The controller shouldn't have a default value, it should receive
the desired value from the host.

>   	lockdep_assert_held(&ctrl->lock);
>   
>   	if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
> @@ -1126,7 +1131,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
>   	 * in case a host died before it enabled the controller.  Hence, simply
>   	 * reset the keep alive timer when the controller is enabled.
>   	 */
> -	mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
> +	mod_delayed_work(system_wq, &ctrl->ka_work, kato * HZ);
>   }
>   
>   static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
> @@ -1378,6 +1383,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
>   
>   	/* keep-alive timeout in seconds */
>   	ctrl->kato = DIV_ROUND_UP(kato, 1000);
> +	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
>   
>   	ctrl->err_counter = 0;
>   	spin_lock_init(&ctrl->error_lock);
> 

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-10-13 22:18 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-13  3:52 [PATCH] nvmet: fix uninitialized work for zero kato zhenwei pi
2020-10-13 22:18 ` Sagi Grimberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).