All of lore.kernel.org
 help / color / mirror / Atom feed
From: Adrian Hunter <adrian.hunter@intel.com>
To: Bart Van Assche <bvanassche@acm.org>,
	"Martin K . Petersen" <martin.petersen@oracle.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>,
	linux-scsi@vger.kernel.org,
	"James E.J. Bottomley" <jejb@linux.ibm.com>,
	Bean Huo <beanhuo@micron.com>, Avri Altman <avri.altman@wdc.com>,
	Can Guo <cang@codeaurora.org>,
	Stanley Chu <stanley.chu@mediatek.com>,
	Asutosh Das <asutoshd@codeaurora.org>,
	Keoseong Park <keosung.park@samsung.com>
Subject: Re: [PATCH v3 10/17] scsi: ufs: Fix a deadlock in the error handler
Date: Wed, 1 Dec 2021 15:48:07 +0200	[thread overview]
Message-ID: <25844cd2-872a-514f-4384-6ee877418dc7@intel.com> (raw)
In-Reply-To: <20211130233324.1402448-11-bvanassche@acm.org>

On 01/12/2021 01:33, Bart Van Assche wrote:
> The following deadlock has been observed on a test setup:
> * All tags allocated.
> * The SCSI error handler calls ufshcd_eh_host_reset_handler()
> * ufshcd_eh_host_reset_handler() queues work that calls ufshcd_err_handler()
> * ufshcd_err_handler() locks up as follows:
> 
> Workqueue: ufs_eh_wq_0 ufshcd_err_handler.cfi_jt
> Call trace:
>  __switch_to+0x298/0x5d8
>  __schedule+0x6cc/0xa94
>  schedule+0x12c/0x298
>  blk_mq_get_tag+0x210/0x480
>  __blk_mq_alloc_request+0x1c8/0x284
>  blk_get_request+0x74/0x134
>  ufshcd_exec_dev_cmd+0x68/0x640
>  ufshcd_verify_dev_init+0x68/0x35c
>  ufshcd_probe_hba+0x12c/0x1cb8
>  ufshcd_host_reset_and_restore+0x88/0x254
>  ufshcd_reset_and_restore+0xd0/0x354
>  ufshcd_err_handler+0x408/0xc58
>  process_one_work+0x24c/0x66c
>  worker_thread+0x3e8/0xa4c
>  kthread+0x150/0x1b4
>  ret_from_fork+0x10/0x30
> 
> Fix this lockup by making ufshcd_exec_dev_cmd() allocate a reserved
> request.
> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>

One minor comment below, nevertheless:

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>

> ---
>  drivers/scsi/ufs/ufshcd.c | 53 +++++++++++----------------------------
>  drivers/scsi/ufs/ufshcd.h |  2 ++
>  2 files changed, 16 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 2d0f59424b00..da4714aaa850 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -128,8 +128,9 @@ EXPORT_SYMBOL_GPL(ufshcd_dump_regs);
>  enum {
>  	UFSHCD_MAX_CHANNEL	= 0,
>  	UFSHCD_MAX_ID		= 1,
> -	UFSHCD_CMD_PER_LUN	= 32,
> -	UFSHCD_CAN_QUEUE	= 32,
> +	UFSHCD_NUM_RESERVED	= 1,
> +	UFSHCD_CMD_PER_LUN	= 32 - UFSHCD_NUM_RESERVED,
> +	UFSHCD_CAN_QUEUE	= 32 - UFSHCD_NUM_RESERVED,
>  };
>  
>  static const char *const ufshcd_state_name[] = {
> @@ -2170,6 +2171,7 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba)
>  	hba->nutrs = (hba->capabilities & MASK_TRANSFER_REQUESTS_SLOTS) + 1;
>  	hba->nutmrs =
>  	((hba->capabilities & MASK_TASK_MANAGEMENT_REQUEST_SLOTS) >> 16) + 1;
> +	hba->reserved_slot = hba->nutrs - 1;
>  
>  	/* Read crypto capabilities */
>  	err = ufshcd_hba_init_crypto_capabilities(hba);
> @@ -2912,30 +2914,15 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
>  static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>  		enum dev_cmd_type cmd_type, int timeout)
>  {
> -	struct request_queue *q = hba->cmd_queue;

I think cmd_queue is not used anymore after this.

>  	DECLARE_COMPLETION_ONSTACK(wait);
> -	struct request *req;
> +	const u32 tag = hba->reserved_slot;
>  	struct ufshcd_lrb *lrbp;
>  	int err;
> -	int tag;
>  
> -	down_read(&hba->clk_scaling_lock);
> +	/* Protects use of hba->reserved_slot. */
> +	lockdep_assert_held(&hba->dev_cmd.lock);
>  
> -	/*
> -	 * Get free slot, sleep if slots are unavailable.
> -	 * Even though we use wait_event() which sleeps indefinitely,
> -	 * the maximum wait time is bounded by SCSI request timeout.
> -	 */
> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
> -	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> -		goto out_unlock;
> -	}
> -	tag = req->tag;
> -	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
> -	/* Set the timeout such that the SCSI error handler is not activated. */
> -	req->timeout = msecs_to_jiffies(2 * timeout);
> -	blk_mq_start_request(req);
> +	down_read(&hba->clk_scaling_lock);
>  
>  	lrbp = &hba->lrb[tag];
>  	WARN_ON(lrbp->cmd);
> @@ -2953,8 +2940,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
>  				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
>  
>  out:
> -	blk_mq_free_request(req);
> -out_unlock:
>  	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> @@ -6689,23 +6674,16 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
>  					enum dev_cmd_type cmd_type,
>  					enum query_opcode desc_op)
>  {
> -	struct request_queue *q = hba->cmd_queue;
>  	DECLARE_COMPLETION_ONSTACK(wait);
> -	struct request *req;
> +	const u32 tag = hba->reserved_slot;
>  	struct ufshcd_lrb *lrbp;
>  	int err = 0;
> -	int tag;
>  	u8 upiu_flags;
>  
> -	down_read(&hba->clk_scaling_lock);
> +	/* Protects use of hba->reserved_slot. */
> +	lockdep_assert_held(&hba->dev_cmd.lock);
>  
> -	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
> -	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> -		goto out_unlock;
> -	}
> -	tag = req->tag;
> -	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
> +	down_read(&hba->clk_scaling_lock);
>  
>  	lrbp = &hba->lrb[tag];
>  	WARN_ON(lrbp->cmd);
> @@ -6774,9 +6752,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
>  	ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP,
>  				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
>  
> -	blk_mq_free_request(req);
> -
> -out_unlock:
>  	up_read(&hba->clk_scaling_lock);
>  	return err;
>  }
> @@ -9507,8 +9482,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
>  	/* Configure LRB */
>  	ufshcd_host_memory_configure(hba);
>  
> -	host->can_queue = hba->nutrs;
> -	host->cmd_per_lun = hba->nutrs;
> +	host->can_queue = hba->nutrs - UFSHCD_NUM_RESERVED;
> +	host->cmd_per_lun = hba->nutrs - UFSHCD_NUM_RESERVED;
>  	host->max_id = UFSHCD_MAX_ID;
>  	host->max_lun = UFS_MAX_LUNS;
>  	host->max_channel = UFSHCD_MAX_CHANNEL;
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index ecc6c545a19d..c3c2792f309f 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -745,6 +745,7 @@ struct ufs_hba_monitor {
>   * @capabilities: UFS Controller Capabilities
>   * @nutrs: Transfer Request Queue depth supported by controller
>   * @nutmrs: Task Management Queue depth supported by controller
> + * @reserved_slot: Used to submit device commands. Protected by @dev_cmd.lock.
>   * @ufs_version: UFS Version to which controller complies
>   * @vops: pointer to variant specific operations
>   * @priv: pointer to variant specific private data
> @@ -836,6 +837,7 @@ struct ufs_hba {
>  	u32 capabilities;
>  	int nutrs;
>  	int nutmrs;
> +	u32 reserved_slot;
>  	u32 ufs_version;
>  	const struct ufs_hba_variant_ops *vops;
>  	struct ufs_hba_variant_params *vps;
> 


  reply	other threads:[~2021-12-01 13:50 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-30 23:33 [PATCH v3 00/17] UFS patches for kernel v5.17 Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 01/17] scsi: core: Fix scsi_device_max_queue_depth() Bart Van Assche
2021-12-01  1:32   ` Ming Lei
2021-11-30 23:33 ` [PATCH v3 02/17] scsi: core: Fix a race between scsi_done() and scsi_times_out() Bart Van Assche
2021-12-01 21:43   ` Keith Busch
2021-12-02  1:10     ` Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 03/17] scsi: ufs: Rename a function argument Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 04/17] scsi: ufs: Remove is_rpmb_wlun() Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 05/17] scsi: ufs: Remove the sdev_rpmb member Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 06/17] scsi: ufs: Remove dead code Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 07/17] scsi: ufs: Fix race conditions related to driver data Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 08/17] scsi: ufs: Remove ufshcd_any_tag_in_use() Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 09/17] scsi: ufs: Rework ufshcd_change_queue_depth() Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 10/17] scsi: ufs: Fix a deadlock in the error handler Bart Van Assche
2021-12-01 13:48   ` Adrian Hunter [this message]
2021-12-01 21:26     ` Bart Van Assche
2021-12-02  8:25       ` Adrian Hunter
2021-11-30 23:33 ` [PATCH v3 11/17] scsi: ufs: Remove the 'update_scaling' local variable Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 12/17] scsi: ufs: Introduce ufshcd_release_scsi_cmd() Bart Van Assche
2021-12-01 14:51   ` Adrian Hunter
2021-11-30 23:33 ` [PATCH v3 13/17] scsi: ufs: Improve SCSI abort handling further Bart Van Assche
2021-12-01 15:33   ` Adrian Hunter
2021-11-30 23:33 ` [PATCH v3 14/17] scsi: ufs: Fix a kernel crash during shutdown Bart Van Assche
2021-11-30 23:33 ` [PATCH v3 15/17] scsi: ufs: Stop using the clock scaling lock in the error handler Bart Van Assche
2021-12-01 14:08   ` Adrian Hunter
2021-11-30 23:33 ` [PATCH v3 16/17] scsi: ufs: Optimize the command queueing code Bart Van Assche
2021-12-01 23:33   ` Asutosh Das (asd)
2021-12-02 18:13     ` Bart Van Assche
2021-12-02 23:56       ` Bart Van Assche
2021-12-03 15:38       ` Asutosh Das (asd)
2021-11-30 23:33 ` [PATCH v3 17/17] scsi: ufs: Implement polling support Bart Van Assche
2021-12-02 15:44 ` [PATCH v3 00/17] UFS patches for kernel v5.17 Bean Huo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=25844cd2-872a-514f-4384-6ee877418dc7@intel.com \
    --to=adrian.hunter@intel.com \
    --cc=asutoshd@codeaurora.org \
    --cc=avri.altman@wdc.com \
    --cc=beanhuo@micron.com \
    --cc=bvanassche@acm.org \
    --cc=cang@codeaurora.org \
    --cc=jaegeuk@kernel.org \
    --cc=jejb@linux.ibm.com \
    --cc=keosung.park@samsung.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=stanley.chu@mediatek.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.