All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand
@ 2021-10-04 12:06 Adrian Hunter
  2021-10-04 12:06 ` [PATCH RFC 1/6] scsi: ufs: Encapsulate clk_scaling_lock by inline functions Adrian Hunter
                   ` (6 more replies)
  0 siblings, 7 replies; 12+ messages in thread
From: Adrian Hunter @ 2021-10-04 12:06 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, Bart Van Assche, linux-scsi

Hi

Driver synchronization would be easier to understand if we used the
clk_scaling_lock as the only lock to provide either shared (down/up_read)
or exclusive (down/up_write) access to the host.

These patches make changes with that in mind, finally resulting in being
able to hold the down_write() lock for the entire error handler duration.

If this approach is acceptable, it could be extended to simplify the
the synchronization of PM vs error handler and Shutdown vs sysfs.


Adrian Hunter (6):
      scsi: ufs: Encapsulate clk_scaling_lock by inline functions
      scsi: ufs: Rename clk_scaling_lock to host_rw_sem
      scsi: ufs: Let ufshcd_[down/up]_read be nested within ufshcd_[down/up]_write
      scsi: ufs: Fix a possible dead lock in clock scaling
      scsi: ufs: Reorder dev_cmd locking
      scsi: ufs: Hold ufshcd_down_write() lock for entire error handler duration

 drivers/scsi/ufs/ufshcd.c | 104 +++++++++++++++++++++-------------------------
 drivers/scsi/ufs/ufshcd.h |  41 +++++++++++++++++-
 2 files changed, 87 insertions(+), 58 deletions(-)


Regards
Adrian

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH RFC 1/6] scsi: ufs: Encapsulate clk_scaling_lock by inline functions
  2021-10-04 12:06 [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Adrian Hunter
@ 2021-10-04 12:06 ` Adrian Hunter
  2021-10-04 12:06 ` [PATCH RFC 2/6] scsi: ufs: Rename clk_scaling_lock to host_rw_sem Adrian Hunter
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Adrian Hunter @ 2021-10-04 12:06 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, Bart Van Assche, linux-scsi

In preparation for making clk_scaling_lock a more general purpose sleeping
lock for the host.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/scsi/ufs/ufshcd.c | 34 +++++++++++++++++-----------------
 drivers/scsi/ufs/ufshcd.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 9faf02cbb9ad..350ecfd90306 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1175,12 +1175,12 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
 	 * clock scaling is in progress
 	 */
 	ufshcd_scsi_block_requests(hba);
-	down_write(&hba->clk_scaling_lock);
+	ufshcd_down_write(hba);
 
 	if (!hba->clk_scaling.is_allowed ||
 	    ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) {
 		ret = -EBUSY;
-		up_write(&hba->clk_scaling_lock);
+		ufshcd_up_write(hba);
 		ufshcd_scsi_unblock_requests(hba);
 		goto out;
 	}
@@ -1195,9 +1195,9 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
 static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
 {
 	if (writelock)
-		up_write(&hba->clk_scaling_lock);
+		ufshcd_up_write(hba);
 	else
-		up_read(&hba->clk_scaling_lock);
+		ufshcd_up_read(hba);
 	ufshcd_scsi_unblock_requests(hba);
 	ufshcd_release(hba);
 }
@@ -1244,7 +1244,7 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
 	}
 
 	/* Enable Write Booster if we have scaled up else disable it */
-	downgrade_write(&hba->clk_scaling_lock);
+	ufshcd_downgrade_write(hba);
 	is_writelock = false;
 	ufshcd_wb_toggle(hba, scale_up);
 
@@ -2681,7 +2681,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 
 	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 
-	if (!down_read_trylock(&hba->clk_scaling_lock))
+	if (!ufshcd_down_read_trylock(hba))
 		return SCSI_MLQUEUE_HOST_BUSY;
 
 	switch (hba->ufshcd_state) {
@@ -2756,7 +2756,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 
 	ufshcd_send_command(hba, tag);
 out:
-	up_read(&hba->clk_scaling_lock);
+	ufshcd_up_read(hba);
 
 	if (ufs_trigger_eh())
 		scsi_schedule_eh(hba->host);
@@ -2914,7 +2914,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	int err;
 	int tag;
 
-	down_read(&hba->clk_scaling_lock);
+	ufshcd_down_read(hba);
 
 	/*
 	 * Get free slot, sleep if slots are unavailable.
@@ -2950,7 +2950,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 out:
 	blk_put_request(req);
 out_unlock:
-	up_read(&hba->clk_scaling_lock);
+	ufshcd_up_read(hba);
 	return err;
 }
 
@@ -5934,9 +5934,9 @@ static bool ufshcd_quirk_dl_nac_errors(struct ufs_hba *hba)
 
 static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
 {
-	down_write(&hba->clk_scaling_lock);
+	ufshcd_down_write(hba);
 	hba->clk_scaling.is_allowed = allow;
-	up_write(&hba->clk_scaling_lock);
+	ufshcd_up_write(hba);
 }
 
 static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
@@ -5984,8 +5984,8 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
 	}
 	ufshcd_scsi_block_requests(hba);
 	/* Drain ufshcd_queuecommand() */
-	down_write(&hba->clk_scaling_lock);
-	up_write(&hba->clk_scaling_lock);
+	ufshcd_down_write(hba);
+	ufshcd_up_write(hba);
 	cancel_work_sync(&hba->eeh_work);
 }
 
@@ -6196,7 +6196,7 @@ static void ufshcd_err_handler(struct Scsi_Host *host)
 		 * Hold the scaling lock just in case dev cmds
 		 * are sent via bsg and/or sysfs.
 		 */
-		down_write(&hba->clk_scaling_lock);
+		ufshcd_down_write(hba);
 		hba->force_pmc = true;
 		pmc_err = ufshcd_config_pwr_mode(hba, &(hba->pwr_info));
 		if (pmc_err) {
@@ -6206,7 +6206,7 @@ static void ufshcd_err_handler(struct Scsi_Host *host)
 		}
 		hba->force_pmc = false;
 		ufshcd_print_pwr_info(hba);
-		up_write(&hba->clk_scaling_lock);
+		ufshcd_up_write(hba);
 		spin_lock_irqsave(hba->host->host_lock, flags);
 	}
 
@@ -6705,7 +6705,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	int tag;
 	u8 upiu_flags;
 
-	down_read(&hba->clk_scaling_lock);
+	ufshcd_down_read(hba);
 
 	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req)) {
@@ -6790,7 +6790,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 out:
 	blk_put_request(req);
 out_unlock:
-	up_read(&hba->clk_scaling_lock);
+	ufshcd_up_read(hba);
 	return err;
 }
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index ed960d206260..9b1ef272fb3c 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -1418,4 +1418,34 @@ static inline int ufshcd_rpmb_rpm_put(struct ufs_hba *hba)
 	return pm_runtime_put(&hba->sdev_rpmb->sdev_gendev);
 }
 
+static inline void ufshcd_down_read(struct ufs_hba *hba)
+{
+	down_read(&hba->clk_scaling_lock);
+}
+
+static inline void ufshcd_up_read(struct ufs_hba *hba)
+{
+	up_read(&hba->clk_scaling_lock);
+}
+
+static inline int ufshcd_down_read_trylock(struct ufs_hba *hba)
+{
+	return down_read_trylock(&hba->clk_scaling_lock);
+}
+
+static inline void ufshcd_down_write(struct ufs_hba *hba)
+{
+	down_write(&hba->clk_scaling_lock);
+}
+
+static inline void ufshcd_up_write(struct ufs_hba *hba)
+{
+	up_write(&hba->clk_scaling_lock);
+}
+
+static inline void ufshcd_downgrade_write(struct ufs_hba *hba)
+{
+	downgrade_write(&hba->clk_scaling_lock);
+}
+
 #endif /* End of Header */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH RFC 2/6] scsi: ufs: Rename clk_scaling_lock to host_rw_sem
  2021-10-04 12:06 [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Adrian Hunter
  2021-10-04 12:06 ` [PATCH RFC 1/6] scsi: ufs: Encapsulate clk_scaling_lock by inline functions Adrian Hunter
@ 2021-10-04 12:06 ` Adrian Hunter
  2021-10-04 16:52   ` Bart Van Assche
  2021-10-04 12:06 ` [PATCH RFC 3/6] scsi: ufs: Let ufshcd_[down/up]_read be nested within ufshcd_[down/up]_write Adrian Hunter
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 12+ messages in thread
From: Adrian Hunter @ 2021-10-04 12:06 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, Bart Van Assche, linux-scsi

To fit its new purpose as a more general purpose sleeping lock for the
host.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/scsi/ufs/ufshcd.c |  2 +-
 drivers/scsi/ufs/ufshcd.h | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 350ecfd90306..3912b74d50ae 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -9630,7 +9630,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	/* Initialize mutex for exception event control */
 	mutex_init(&hba->ee_ctrl_mutex);
 
-	init_rwsem(&hba->clk_scaling_lock);
+	init_rwsem(&hba->host_rw_sem);
 
 	ufshcd_init_clk_gating(hba);
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 9b1ef272fb3c..495e1c0afae3 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -897,7 +897,7 @@ struct ufs_hba {
 	enum bkops_status urgent_bkops_lvl;
 	bool is_urgent_bkops_lvl_checked;
 
-	struct rw_semaphore clk_scaling_lock;
+	struct rw_semaphore host_rw_sem;
 	unsigned char desc_size[QUERY_DESC_IDN_MAX];
 	atomic_t scsi_block_reqs_cnt;
 
@@ -1420,32 +1420,32 @@ static inline int ufshcd_rpmb_rpm_put(struct ufs_hba *hba)
 
 static inline void ufshcd_down_read(struct ufs_hba *hba)
 {
-	down_read(&hba->clk_scaling_lock);
+	down_read(&hba->host_rw_sem);
 }
 
 static inline void ufshcd_up_read(struct ufs_hba *hba)
 {
-	up_read(&hba->clk_scaling_lock);
+	up_read(&hba->host_rw_sem);
 }
 
 static inline int ufshcd_down_read_trylock(struct ufs_hba *hba)
 {
-	return down_read_trylock(&hba->clk_scaling_lock);
+	return down_read_trylock(&hba->host_rw_sem);
 }
 
 static inline void ufshcd_down_write(struct ufs_hba *hba)
 {
-	down_write(&hba->clk_scaling_lock);
+	down_write(&hba->host_rw_sem);
 }
 
 static inline void ufshcd_up_write(struct ufs_hba *hba)
 {
-	up_write(&hba->clk_scaling_lock);
+	up_write(&hba->host_rw_sem);
 }
 
 static inline void ufshcd_downgrade_write(struct ufs_hba *hba)
 {
-	downgrade_write(&hba->clk_scaling_lock);
+	downgrade_write(&hba->host_rw_sem);
 }
 
 #endif /* End of Header */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH RFC 3/6] scsi: ufs: Let ufshcd_[down/up]_read be nested within ufshcd_[down/up]_write
  2021-10-04 12:06 [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Adrian Hunter
  2021-10-04 12:06 ` [PATCH RFC 1/6] scsi: ufs: Encapsulate clk_scaling_lock by inline functions Adrian Hunter
  2021-10-04 12:06 ` [PATCH RFC 2/6] scsi: ufs: Rename clk_scaling_lock to host_rw_sem Adrian Hunter
@ 2021-10-04 12:06 ` Adrian Hunter
  2021-10-04 12:06 ` [PATCH RFC 4/6] scsi: ufs: Fix a possible dead lock in clock scaling Adrian Hunter
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Adrian Hunter @ 2021-10-04 12:06 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, Bart Van Assche, linux-scsi

In preparation to hold ufshcd_down_write() lock for the entire error
handler duration.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/scsi/ufs/ufshcd.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 495e1c0afae3..74891947bb34 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -898,6 +898,7 @@ struct ufs_hba {
 	bool is_urgent_bkops_lvl_checked;
 
 	struct rw_semaphore host_rw_sem;
+	struct task_struct *excl_task;
 	unsigned char desc_size[QUERY_DESC_IDN_MAX];
 	atomic_t scsi_block_reqs_cnt;
 
@@ -1420,31 +1421,45 @@ static inline int ufshcd_rpmb_rpm_put(struct ufs_hba *hba)
 
 static inline void ufshcd_down_read(struct ufs_hba *hba)
 {
-	down_read(&hba->host_rw_sem);
+	if (hba->excl_task != current)
+		down_read(&hba->host_rw_sem);
 }
 
 static inline void ufshcd_up_read(struct ufs_hba *hba)
 {
-	up_read(&hba->host_rw_sem);
+	if (hba->excl_task != current)
+		up_read(&hba->host_rw_sem);
 }
 
 static inline int ufshcd_down_read_trylock(struct ufs_hba *hba)
 {
+	if (hba->excl_task == current)
+		return 1;
+
 	return down_read_trylock(&hba->host_rw_sem);
 }
 
 static inline void ufshcd_down_write(struct ufs_hba *hba)
 {
 	down_write(&hba->host_rw_sem);
+	/*
+	 * Assign exclusive access to this task, which enables bypassing
+	 * down_read/up_read, refer ufshcd_down_read() and ufshcd_up_read().
+	 * Note, if the same task will not be doing up_write(), it must set
+	 * hba->excl_task to NULL itself.
+	 */
+	hba->excl_task = current;
 }
 
 static inline void ufshcd_up_write(struct ufs_hba *hba)
 {
+	hba->excl_task = NULL;
 	up_write(&hba->host_rw_sem);
 }
 
 static inline void ufshcd_downgrade_write(struct ufs_hba *hba)
 {
+	hba->excl_task = NULL;
 	downgrade_write(&hba->host_rw_sem);
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH RFC 4/6] scsi: ufs: Fix a possible dead lock in clock scaling
  2021-10-04 12:06 [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Adrian Hunter
                   ` (2 preceding siblings ...)
  2021-10-04 12:06 ` [PATCH RFC 3/6] scsi: ufs: Let ufshcd_[down/up]_read be nested within ufshcd_[down/up]_write Adrian Hunter
@ 2021-10-04 12:06 ` Adrian Hunter
  2021-10-04 12:06 ` [PATCH RFC 5/6] scsi: ufs: Reorder dev_cmd locking Adrian Hunter
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Adrian Hunter @ 2021-10-04 12:06 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, Bart Van Assche, linux-scsi

Instead of downgrade_write(), extend the ufshcd_down_write() to cover
ufshcd_wb_toggle() because ufshcd_down_write() now supports nesting
ufshcd_down_read() but down_read() nested within down_read() can deadlock.

Reported-by: Can Guo <cang@codeaurora.org>
Link: https://lore.kernel.org/all/fbc4d03a07f03fe4fbe697813111471f@codeaurora.org
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/scsi/ufs/ufshcd.c | 12 +++---------
 drivers/scsi/ufs/ufshcd.h |  6 ------
 2 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 3912b74d50ae..64ac9e48c4d7 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1192,12 +1192,9 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba)
 	return ret;
 }
 
-static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
+static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba)
 {
-	if (writelock)
-		ufshcd_up_write(hba);
-	else
-		ufshcd_up_read(hba);
+	ufshcd_up_write(hba);
 	ufshcd_scsi_unblock_requests(hba);
 	ufshcd_release(hba);
 }
@@ -1214,7 +1211,6 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock)
 static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
 {
 	int ret = 0;
-	bool is_writelock = true;
 
 	ret = ufshcd_clock_scaling_prepare(hba);
 	if (ret)
@@ -1244,12 +1240,10 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up)
 	}
 
 	/* Enable Write Booster if we have scaled up else disable it */
-	ufshcd_downgrade_write(hba);
-	is_writelock = false;
 	ufshcd_wb_toggle(hba, scale_up);
 
 out_unprepare:
-	ufshcd_clock_scaling_unprepare(hba, is_writelock);
+	ufshcd_clock_scaling_unprepare(hba);
 	return ret;
 }
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 74891947bb34..47f81eae178c 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -1457,10 +1457,4 @@ static inline void ufshcd_up_write(struct ufs_hba *hba)
 	up_write(&hba->host_rw_sem);
 }
 
-static inline void ufshcd_downgrade_write(struct ufs_hba *hba)
-{
-	hba->excl_task = NULL;
-	downgrade_write(&hba->host_rw_sem);
-}
-
 #endif /* End of Header */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH RFC 5/6] scsi: ufs: Reorder dev_cmd locking
  2021-10-04 12:06 [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Adrian Hunter
                   ` (3 preceding siblings ...)
  2021-10-04 12:06 ` [PATCH RFC 4/6] scsi: ufs: Fix a possible dead lock in clock scaling Adrian Hunter
@ 2021-10-04 12:06 ` Adrian Hunter
  2021-10-04 12:06 ` [PATCH RFC 6/6] scsi: ufs: Hold ufshcd_down_write() lock for entire error handler duration Adrian Hunter
  2021-10-04 12:42 ` [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Avri Altman
  6 siblings, 0 replies; 12+ messages in thread
From: Adrian Hunter @ 2021-10-04 12:06 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, Bart Van Assche, linux-scsi

Consolidate the locking for dev_cmd's in one new function
ufshcd_dev_cmd_lock() and reorder dev_cmd.lock mutex after
ufshcd_down_read() to pave the way to hold ufshcd_down_write() lock for the
entire error handler duration.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/scsi/ufs/ufshcd.c | 69 +++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 64ac9e48c4d7..3c11a35ecba6 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2908,18 +2908,14 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	int err;
 	int tag;
 
-	ufshcd_down_read(hba);
-
 	/*
 	 * Get free slot, sleep if slots are unavailable.
 	 * Even though we use wait_event() which sleeps indefinitely,
 	 * the maximum wait time is bounded by SCSI request timeout.
 	 */
 	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out_unlock;
-	}
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 	tag = req->tag;
 	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 	/* Set the timeout such that the SCSI error handler is not activated. */
@@ -2943,8 +2939,6 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 
 out:
 	blk_put_request(req);
-out_unlock:
-	ufshcd_up_read(hba);
 	return err;
 }
 
@@ -2995,6 +2989,25 @@ static int ufshcd_query_flag_retry(struct ufs_hba *hba,
 	return ret;
 }
 
+static void ufshcd_dev_cmd_lock(struct ufs_hba *hba)
+{
+	/* Ungate while not holding other locks */
+	ufshcd_hold(hba, false);
+	ufshcd_down_read(hba);
+	/*
+	 * Take the mutex after ufshcd_down_read() to avoid deadlocking against
+	 * the owner of ufshcd_down_write().
+	 */
+	mutex_lock(&hba->dev_cmd.lock);
+}
+
+static void ufshcd_dev_cmd_unlock(struct ufs_hba *hba)
+{
+	mutex_unlock(&hba->dev_cmd.lock);
+	ufshcd_up_read(hba);
+	ufshcd_release(hba);
+}
+
 /**
  * ufshcd_query_flag() - API function for sending flag query requests
  * @hba: per-adapter instance
@@ -3015,8 +3028,7 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 
 	BUG_ON(!hba);
 
-	ufshcd_hold(hba, false);
-	mutex_lock(&hba->dev_cmd.lock);
+	ufshcd_dev_cmd_lock(hba);
 	ufshcd_init_query(hba, &request, &response, opcode, idn, index,
 			selector);
 
@@ -3058,8 +3070,7 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 				MASK_QUERY_UPIU_FLAG_LOC) & 0x1;
 
 out_unlock:
-	mutex_unlock(&hba->dev_cmd.lock);
-	ufshcd_release(hba);
+	ufshcd_dev_cmd_unlock(hba);
 	return err;
 }
 
@@ -3089,9 +3100,8 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode,
 		return -EINVAL;
 	}
 
-	ufshcd_hold(hba, false);
+	ufshcd_dev_cmd_lock(hba);
 
-	mutex_lock(&hba->dev_cmd.lock);
 	ufshcd_init_query(hba, &request, &response, opcode, idn, index,
 			selector);
 
@@ -3121,8 +3131,7 @@ int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode,
 	*attr_val = be32_to_cpu(response->upiu_res.value);
 
 out_unlock:
-	mutex_unlock(&hba->dev_cmd.lock);
-	ufshcd_release(hba);
+	ufshcd_dev_cmd_unlock(hba);
 	return err;
 }
 
@@ -3185,9 +3194,8 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba,
 		return -EINVAL;
 	}
 
-	ufshcd_hold(hba, false);
+	ufshcd_dev_cmd_lock(hba);
 
-	mutex_lock(&hba->dev_cmd.lock);
 	ufshcd_init_query(hba, &request, &response, opcode, idn, index,
 			selector);
 	hba->dev_cmd.query.descriptor = desc_buf;
@@ -3220,8 +3228,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba,
 
 out_unlock:
 	hba->dev_cmd.query.descriptor = NULL;
-	mutex_unlock(&hba->dev_cmd.lock);
-	ufshcd_release(hba);
+	ufshcd_dev_cmd_unlock(hba);
 	return err;
 }
 
@@ -4766,8 +4773,7 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba)
 	int err = 0;
 	int retries;
 
-	ufshcd_hold(hba, false);
-	mutex_lock(&hba->dev_cmd.lock);
+	ufshcd_dev_cmd_lock(hba);
 	for (retries = NOP_OUT_RETRIES; retries > 0; retries--) {
 		err = ufshcd_exec_dev_cmd(hba, DEV_CMD_TYPE_NOP,
 					       NOP_OUT_TIMEOUT);
@@ -4777,8 +4783,7 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba)
 
 		dev_dbg(hba->dev, "%s: error %d retrying\n", __func__, err);
 	}
-	mutex_unlock(&hba->dev_cmd.lock);
-	ufshcd_release(hba);
+	ufshcd_dev_cmd_unlock(hba);
 
 	if (err)
 		dev_err(hba->dev, "%s: NOP OUT failed %d\n", __func__, err);
@@ -6699,13 +6704,9 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 	int tag;
 	u8 upiu_flags;
 
-	ufshcd_down_read(hba);
-
 	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
-	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
-		goto out_unlock;
-	}
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 	tag = req->tag;
 	WARN_ONCE(tag < 0, "Invalid tag %d\n", tag);
 
@@ -6783,8 +6784,6 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 
 out:
 	blk_put_request(req);
-out_unlock:
-	ufshcd_up_read(hba);
 	return err;
 }
 
@@ -6821,13 +6820,11 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba,
 		cmd_type = DEV_CMD_TYPE_NOP;
 		fallthrough;
 	case UPIU_TRANSACTION_QUERY_REQ:
-		ufshcd_hold(hba, false);
-		mutex_lock(&hba->dev_cmd.lock);
+		ufshcd_dev_cmd_lock(hba);
 		err = ufshcd_issue_devman_upiu_cmd(hba, req_upiu, rsp_upiu,
 						   desc_buff, buff_len,
 						   cmd_type, desc_op);
-		mutex_unlock(&hba->dev_cmd.lock);
-		ufshcd_release(hba);
+		ufshcd_dev_cmd_unlock(hba);
 
 		break;
 	case UPIU_TRANSACTION_TASK_REQ:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH RFC 6/6] scsi: ufs: Hold ufshcd_down_write() lock for entire error handler duration
  2021-10-04 12:06 [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Adrian Hunter
                   ` (4 preceding siblings ...)
  2021-10-04 12:06 ` [PATCH RFC 5/6] scsi: ufs: Reorder dev_cmd locking Adrian Hunter
@ 2021-10-04 12:06 ` Adrian Hunter
  2021-10-04 12:42 ` [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Avri Altman
  6 siblings, 0 replies; 12+ messages in thread
From: Adrian Hunter @ 2021-10-04 12:06 UTC (permalink / raw)
  To: Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, Bart Van Assche, linux-scsi

This prevents the error handler racing with scsi_unjam_host() which can
submit requests directly to ->queuecommand() i.e. ufshcd_queuecommand().

It also could pave the way in the future for removing the need for
host_sem.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 drivers/scsi/ufs/ufshcd.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 3c11a35ecba6..439937ce693c 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -5984,12 +5984,11 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
 	ufshcd_scsi_block_requests(hba);
 	/* Drain ufshcd_queuecommand() */
 	ufshcd_down_write(hba);
-	ufshcd_up_write(hba);
-	cancel_work_sync(&hba->eeh_work);
 }
 
 static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
 {
+	ufshcd_up_write(hba);
 	ufshcd_scsi_unblock_requests(hba);
 	ufshcd_release(hba);
 	if (ufshcd_is_clkscaling_supported(hba))
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* RE: [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand
  2021-10-04 12:06 [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Adrian Hunter
                   ` (5 preceding siblings ...)
  2021-10-04 12:06 ` [PATCH RFC 6/6] scsi: ufs: Hold ufshcd_down_write() lock for entire error handler duration Adrian Hunter
@ 2021-10-04 12:42 ` Avri Altman
  2021-10-04 13:27   ` Adrian Hunter
  6 siblings, 1 reply; 12+ messages in thread
From: Avri Altman @ 2021-10-04 12:42 UTC (permalink / raw)
  To: Adrian Hunter, Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Alim Akhtar, Can Guo,
	Asutosh Das, Bart Van Assche, linux-scsi

 
> Driver synchronization would be easier to understand if we used the
> clk_scaling_lock as the only lock to provide either shared (down/up_read)
> or exclusive (down/up_write) access to the host.
> 
> These patches make changes with that in mind, finally resulting in being
> able to hold the down_write() lock for the entire error handler duration.
> 
> If this approach is acceptable, it could be extended to simplify the
> the synchronization of PM vs error handler and Shutdown vs sysfs.
Given that UFSHCD_CAP_CLK_SCALING is only set for ufs-qcom:
If extending its use, wouldn't that become a source of contention for them?

Thanks,
Avri

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand
  2021-10-04 12:42 ` [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Avri Altman
@ 2021-10-04 13:27   ` Adrian Hunter
  0 siblings, 0 replies; 12+ messages in thread
From: Adrian Hunter @ 2021-10-04 13:27 UTC (permalink / raw)
  To: Avri Altman, Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Alim Akhtar, Can Guo,
	Asutosh Das, Bart Van Assche, linux-scsi

On 04/10/2021 15:42, Avri Altman wrote:
>  
>> Driver synchronization would be easier to understand if we used the
>> clk_scaling_lock as the only lock to provide either shared (down/up_read)
>> or exclusive (down/up_write) access to the host.
>>
>> These patches make changes with that in mind, finally resulting in being
>> able to hold the down_write() lock for the entire error handler duration.
>>
>> If this approach is acceptable, it could be extended to simplify the
>> the synchronization of PM vs error handler and Shutdown vs sysfs.
> Given that UFSHCD_CAP_CLK_SCALING is only set for ufs-qcom:
> If extending its use, wouldn't that become a source of contention for them?

It is a good question. The error handler already seems to stop clock scaling.
PM / Shutdown seem to suspend / resume clock scaling. So it doesn't *look*
like it should make much difference.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC 2/6] scsi: ufs: Rename clk_scaling_lock to host_rw_sem
  2021-10-04 12:06 ` [PATCH RFC 2/6] scsi: ufs: Rename clk_scaling_lock to host_rw_sem Adrian Hunter
@ 2021-10-04 16:52   ` Bart Van Assche
  2021-10-05  5:06     ` Adrian Hunter
  0 siblings, 1 reply; 12+ messages in thread
From: Bart Van Assche @ 2021-10-04 16:52 UTC (permalink / raw)
  To: Adrian Hunter, Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, linux-scsi

On 10/4/21 5:06 AM, Adrian Hunter wrote:
> To fit its new purpose as a more general purpose sleeping lock for the
> host.
> 
> [ ... ]
 >
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 9b1ef272fb3c..495e1c0afae3 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -897,7 +897,7 @@ struct ufs_hba {
>   	enum bkops_status urgent_bkops_lvl;
>   	bool is_urgent_bkops_lvl_checked;
>   
> -	struct rw_semaphore clk_scaling_lock;
> +	struct rw_semaphore host_rw_sem;
>   	unsigned char desc_size[QUERY_DESC_IDN_MAX];
>   	atomic_t scsi_block_reqs_cnt;

Hi Adrian,

It seems to me that this patch series goes in another direction than the
direction the JEDEC UFS committee is going into. The UFSHCI 4.0 specification
will support MCQ (Multi-Circular queue). We will benefit most from the v4.0
MCQ support if there is no contention between the CPUs that submit UFS commands
to different queues. I think the intention of this patch series is to make a
single synchronization object protect all submission queues. I'm concerned that
this will prevent to fully benefit from multiqueue support. Has it been
considered to eliminate the clk_scaling_lock and instead to use RCU to
serialize clock scaling against command processing? One possible approach is to
use blk_mq_freeze_queue() and blk_mq_unfreeze_queue() around the clock scaling
code. A disadvantage of using RCU is that waiting for an RCU grace period takes
some time - about 10 ms on my test setup. I have not yet verified what the
performance and time impact would be of using an expedited RCU grace period
instead of a regular RCU grace period.

Bart.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC 2/6] scsi: ufs: Rename clk_scaling_lock to host_rw_sem
  2021-10-04 16:52   ` Bart Van Assche
@ 2021-10-05  5:06     ` Adrian Hunter
  2021-10-05 19:06       ` Bart Van Assche
  0 siblings, 1 reply; 12+ messages in thread
From: Adrian Hunter @ 2021-10-05  5:06 UTC (permalink / raw)
  To: Bart Van Assche, Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, linux-scsi

On 04/10/2021 19:52, Bart Van Assche wrote:
> On 10/4/21 5:06 AM, Adrian Hunter wrote:
>> To fit its new purpose as a more general purpose sleeping lock for the
>> host.
>>
>> [ ... ]
>>
>> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
>> index 9b1ef272fb3c..495e1c0afae3 100644
>> --- a/drivers/scsi/ufs/ufshcd.h
>> +++ b/drivers/scsi/ufs/ufshcd.h
>> @@ -897,7 +897,7 @@ struct ufs_hba {
>>       enum bkops_status urgent_bkops_lvl;
>>       bool is_urgent_bkops_lvl_checked;
>>   -    struct rw_semaphore clk_scaling_lock;
>> +    struct rw_semaphore host_rw_sem;
>>       unsigned char desc_size[QUERY_DESC_IDN_MAX];
>>       atomic_t scsi_block_reqs_cnt;
> 
> Hi Adrian,

Thanks for looking at this.

> 
> It seems to me that this patch series goes in another direction than the
> direction the JEDEC UFS committee is going into. The UFSHCI 4.0 specification
> will support MCQ (Multi-Circular queue). We will benefit most from the v4.0
> MCQ support if there is no contention between the CPUs that submit UFS commands
> to different queues. I think the intention of this patch series is to make a
> single synchronization object protect all submission queues.

The intention is to make the locking easier to understand.  We need either to
share access to the host (e.g. ufshcd_queuecommand) or provide for exclusive
ownership (e.g. ufshcd_err_handler, PM, Shutdown).  We should be able to do
that with 1 rw_semaphore.

> I'm concerned that
> this will prevent to fully benefit from multiqueue support. Has it been

You are talking about contention between ufshcd_queuecommand() running
simultaneously on 2 CPUs right?  In that case, down_read() should be practically
atomic, so no contention unless a third process is waiting on down_write()
which never happens under normal circumstances.

> Has it been
> considered to eliminate the clk_scaling_lock and instead to use RCU to
> serialize clock scaling against command processing? One possible approach is to
> use blk_mq_freeze_queue() and blk_mq_unfreeze_queue() around the clock scaling
> code. A disadvantage of using RCU is that waiting for an RCU grace period takes
> some time - about 10 ms on my test setup. I have not yet verified what the
> performance and time impact would be of using an expedited RCU grace period
> instead of a regular RCU grace period.

It is probably worth measuring the performance of clk_scaling_lock first.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFC 2/6] scsi: ufs: Rename clk_scaling_lock to host_rw_sem
  2021-10-05  5:06     ` Adrian Hunter
@ 2021-10-05 19:06       ` Bart Van Assche
  0 siblings, 0 replies; 12+ messages in thread
From: Bart Van Assche @ 2021-10-05 19:06 UTC (permalink / raw)
  To: Adrian Hunter, Martin K . Petersen
  Cc: James E . J . Bottomley, Bean Huo, Avri Altman, Alim Akhtar,
	Can Guo, Asutosh Das, linux-scsi

On 10/4/21 10:06 PM, Adrian Hunter wrote:
> On 04/10/2021 19:52, Bart Van Assche wrote:
>> I'm concerned that
>> this will prevent to fully benefit from multiqueue support. Has it been
> 
> You are talking about contention between ufshcd_queuecommand() running
> simultaneously on 2 CPUs right?  In that case, down_read() should be practically
> atomic, so no contention unless a third process is waiting on down_write()
> which never happens under normal circumstances.
> 
>> Has it been
>> considered to eliminate the clk_scaling_lock and instead to use RCU to
>> serialize clock scaling against command processing? One possible approach is to
>> use blk_mq_freeze_queue() and blk_mq_unfreeze_queue() around the clock scaling
>> code. A disadvantage of using RCU is that waiting for an RCU grace period takes
>> some time - about 10 ms on my test setup. I have not yet verified what the
>> performance and time impact would be of using an expedited RCU grace period
>> instead of a regular RCU grace period.
> 
> It is probably worth measuring the performance of clk_scaling_lock first.

Upcoming UFS devices support several million IOPS. My experience, and that of
everyone else working with such storage devices is that every single atomic
operation in the hot path causes a measurable performance overhead.
down_read() is a synchronization operation and implementing synchronization
operations without using atomic loads or stores is not possible. This is why
I see clk_scaling_lock as a performance bottleneck.

Thanks,

Bart.



^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-10-05 19:06 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-04 12:06 [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Adrian Hunter
2021-10-04 12:06 ` [PATCH RFC 1/6] scsi: ufs: Encapsulate clk_scaling_lock by inline functions Adrian Hunter
2021-10-04 12:06 ` [PATCH RFC 2/6] scsi: ufs: Rename clk_scaling_lock to host_rw_sem Adrian Hunter
2021-10-04 16:52   ` Bart Van Assche
2021-10-05  5:06     ` Adrian Hunter
2021-10-05 19:06       ` Bart Van Assche
2021-10-04 12:06 ` [PATCH RFC 3/6] scsi: ufs: Let ufshcd_[down/up]_read be nested within ufshcd_[down/up]_write Adrian Hunter
2021-10-04 12:06 ` [PATCH RFC 4/6] scsi: ufs: Fix a possible dead lock in clock scaling Adrian Hunter
2021-10-04 12:06 ` [PATCH RFC 5/6] scsi: ufs: Reorder dev_cmd locking Adrian Hunter
2021-10-04 12:06 ` [PATCH RFC 6/6] scsi: ufs: Hold ufshcd_down_write() lock for entire error handler duration Adrian Hunter
2021-10-04 12:42 ` [PATCH RFC 0/6] scsi: ufs: Start to make driver synchronization easier to understand Avri Altman
2021-10-04 13:27   ` Adrian Hunter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.