linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 20:05   ` Bart Van Assche
                     ` (3 more replies)
  2021-06-23  7:35 ` [PATCH v4 02/10] scsi: ufs: Add " Can Guo
                   ` (9 subsequent siblings)
  10 siblings, 4 replies; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	Bart Van Assche, open list:ARM/QUALCOMM SUPPORT, open list

Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress and
is_wlu_sys_suspended accordingly.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufs-qcom.c |  2 +-
 drivers/scsi/ufs/ufshcd.c   | 30 +++++++++++++++---------------
 drivers/scsi/ufs/ufshcd.h   |  6 ++++--
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 9b1d18d..fbe21e0 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -641,7 +641,7 @@ static int ufs_qcom_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 	if (err)
 		return err;
 
-	hba->is_sys_suspended = false;
+	hba->is_wlu_sys_suspended = false;
 	return 0;
 }
 
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 25fe18a..c40ba1d 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -549,8 +549,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba)
 		hba->saved_err, hba->saved_uic_err);
 	dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n",
 		hba->curr_dev_pwr_mode, hba->uic_link_state);
-	dev_err(hba->dev, "PM in progress=%d, sys. suspended=%d\n",
-		hba->pm_op_in_progress, hba->is_sys_suspended);
+	dev_err(hba->dev, "wlu_pm_op_in_progress=%d, is_wlu_sys_suspended=%d\n",
+		hba->wlu_pm_op_in_progress, hba->is_wlu_sys_suspended);
 	dev_err(hba->dev, "Auto BKOPS=%d, Host self-block=%d\n",
 		hba->auto_bkops_enabled, hba->host->host_self_blocked);
 	dev_err(hba->dev, "Clk gate=%d\n", hba->clk_gating.state);
@@ -1999,7 +1999,7 @@ static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba)
 	if (!hba->clk_scaling.active_reqs++)
 		queue_resume_work = true;
 
-	if (!hba->clk_scaling.is_enabled || hba->pm_op_in_progress) {
+	if (!hba->clk_scaling.is_enabled || hba->wlu_pm_op_in_progress) {
 		spin_unlock_irqrestore(hba->host->host_lock, flags);
 		return;
 	}
@@ -2734,7 +2734,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		 * err handler blocked for too long. So, just fail the scsi cmd
 		 * sent from PM ops, err handler can recover PM error anyways.
 		 */
-		if (hba->pm_op_in_progress) {
+		if (hba->wlu_pm_op_in_progress) {
 			hba->force_reset = true;
 			set_host_byte(cmd, DID_BAD_TARGET);
 			cmd->scsi_done(cmd);
@@ -2767,7 +2767,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		(hba->clk_gating.state != CLKS_ON));
 
 	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
-		if (hba->pm_op_in_progress)
+		if (hba->wlu_pm_op_in_progress)
 			set_host_byte(cmd, DID_BAD_TARGET);
 		else
 			err = SCSI_MLQUEUE_HOST_BUSY;
@@ -5116,7 +5116,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 			 * solution could be to abort the system suspend if
 			 * UFS device needs urgent BKOPs.
 			 */
-			if (!hba->pm_op_in_progress &&
+			if (!hba->wlu_pm_op_in_progress &&
 			    !ufshcd_eh_in_progress(hba) &&
 			    ufshcd_is_exception_event(lrbp->ucd_rsp_ptr))
 				/* Flushed in suspend */
@@ -5916,7 +5916,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
 {
 	ufshcd_rpm_get_sync(hba);
 	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
-	    hba->is_sys_suspended) {
+	    hba->is_wlu_sys_suspended) {
 		enum ufs_pm_op pm_op;
 
 		/*
@@ -5933,7 +5933,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
 		if (!ufshcd_is_clkgating_allowed(hba))
 			ufshcd_setup_clocks(hba, true);
 		ufshcd_release(hba);
-		pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
+		pm_op = hba->is_wlu_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
 		ufshcd_vops_resume(hba, pm_op);
 	} else {
 		ufshcd_hold(hba, false);
@@ -5976,7 +5976,7 @@ static void ufshcd_recover_pm_error(struct ufs_hba *hba)
 	struct request_queue *q;
 	int ret;
 
-	hba->is_sys_suspended = false;
+	hba->is_wlu_sys_suspended = false;
 	/*
 	 * Set RPM status of wlun device to RPM_ACTIVE,
 	 * this also clears its runtime error.
@@ -8784,7 +8784,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 	enum ufs_dev_pwr_mode req_dev_pwr_mode;
 	enum uic_link_state req_link_state;
 
-	hba->pm_op_in_progress = true;
+	hba->wlu_pm_op_in_progress = true;
 	if (pm_op != UFS_SHUTDOWN_PM) {
 		pm_lvl = pm_op == UFS_RUNTIME_PM ?
 			 hba->rpm_lvl : hba->spm_lvl;
@@ -8919,7 +8919,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 		hba->clk_gating.is_suspended = false;
 		ufshcd_release(hba);
 	}
-	hba->pm_op_in_progress = false;
+	hba->wlu_pm_op_in_progress = false;
 	return ret;
 }
 
@@ -8928,7 +8928,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 	int ret;
 	enum uic_link_state old_link_state = hba->uic_link_state;
 
-	hba->pm_op_in_progress = true;
+	hba->wlu_pm_op_in_progress = true;
 
 	/*
 	 * Call vendor specific resume callback. As these callbacks may access
@@ -9006,7 +9006,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 		ufshcd_update_evt_hist(hba, UFS_EVT_WL_RES_ERR, (u32)ret);
 	hba->clk_gating.is_suspended = false;
 	ufshcd_release(hba);
-	hba->pm_op_in_progress = false;
+	hba->wlu_pm_op_in_progress = false;
 	return ret;
 }
 
@@ -9072,7 +9072,7 @@ static int ufshcd_wl_suspend(struct device *dev)
 
 out:
 	if (!ret)
-		hba->is_sys_suspended = true;
+		hba->is_wlu_sys_suspended = true;
 	trace_ufshcd_wl_suspend(dev_name(dev), ret,
 		ktime_to_us(ktime_sub(ktime_get(), start)),
 		hba->curr_dev_pwr_mode, hba->uic_link_state);
@@ -9100,7 +9100,7 @@ static int ufshcd_wl_resume(struct device *dev)
 		ktime_to_us(ktime_sub(ktime_get(), start)),
 		hba->curr_dev_pwr_mode, hba->uic_link_state);
 	if (!ret)
-		hba->is_sys_suspended = false;
+		hba->is_wlu_sys_suspended = false;
 	up(&hba->host_sem);
 	return ret;
 }
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index c98d540..93aeeb3 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -752,7 +752,8 @@ struct ufs_hba {
 	enum ufs_pm_level spm_lvl;
 	struct device_attribute rpm_lvl_attr;
 	struct device_attribute spm_lvl_attr;
-	int pm_op_in_progress;
+	/* A flag to tell whether __ufshcd_wl_suspend/resume() is in progress */
+	bool wlu_pm_op_in_progress;
 
 	/* Auto-Hibernate Idle Timer register value */
 	u32 ahit;
@@ -838,7 +839,8 @@ struct ufs_hba {
 
 	struct devfreq *devfreq;
 	struct ufs_clk_scaling clk_scaling;
-	bool is_sys_suspended;
+	/* A flag to tell whether the UFS device W-LU is system suspended */
+	bool is_wlu_sys_suspended;
 
 	enum bkops_status urgent_bkops_lvl;
 	bool is_urgent_bkops_lvl_checked;
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 02/10] scsi: ufs: Add flags pm_op_in_progress and is_sys_suspended
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
  2021-06-23  7:35 ` [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 12:33   ` Adrian Hunter
                     ` (2 more replies)
  2021-06-23  7:35 ` [PATCH v4 03/10] scsi: ufs: Update the return value of supplier pm ops Can Guo
                   ` (8 subsequent siblings)
  10 siblings, 3 replies; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Adrian Hunter, Satya Tangirala, Kiwoong Kim, Bart Van Assche,
	open list

Add flags pm_op_in_progress and is_sys_suspended to track the status of hba
runtime and system suspend/resume operations.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 12 +++++++++++-
 drivers/scsi/ufs/ufshcd.h |  4 ++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c40ba1d..abe5f2d 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -551,6 +551,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba)
 		hba->curr_dev_pwr_mode, hba->uic_link_state);
 	dev_err(hba->dev, "wlu_pm_op_in_progress=%d, is_wlu_sys_suspended=%d\n",
 		hba->wlu_pm_op_in_progress, hba->is_wlu_sys_suspended);
+	dev_err(hba->dev, "pm_op_in_progress=%d, is_sys_suspended=%d\n",
+		hba->pm_op_in_progress, hba->is_sys_suspended);
 	dev_err(hba->dev, "Auto BKOPS=%d, Host self-block=%d\n",
 		hba->auto_bkops_enabled, hba->host->host_self_blocked);
 	dev_err(hba->dev, "Clk gate=%d\n", hba->clk_gating.state);
@@ -9141,6 +9143,8 @@ static int ufshcd_suspend(struct ufs_hba *hba)
 
 	if (!hba->is_powered)
 		return 0;
+
+	hba->pm_op_in_progress = true;
 	/*
 	 * Disable the host irq as host controller as there won't be any
 	 * host controller transaction expected till resume.
@@ -9160,6 +9164,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
 	ufshcd_vreg_set_lpm(hba);
 	/* Put the host controller in low power mode if possible */
 	ufshcd_hba_vreg_set_lpm(hba);
+	hba->pm_op_in_progress = false;
 	return ret;
 }
 
@@ -9179,6 +9184,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
 	if (!hba->is_powered)
 		return 0;
 
+	hba->pm_op_in_progress = true;
 	ufshcd_hba_vreg_set_hpm(hba);
 	ret = ufshcd_vreg_set_hpm(hba);
 	if (ret)
@@ -9198,6 +9204,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
 out:
 	if (ret)
 		ufshcd_update_evt_hist(hba, UFS_EVT_RESUME_ERR, (u32)ret);
+	hba->pm_op_in_progress = false;
 	return ret;
 }
 
@@ -9222,6 +9229,8 @@ int ufshcd_system_suspend(struct ufs_hba *hba)
 	trace_ufshcd_system_suspend(dev_name(hba->dev), ret,
 		ktime_to_us(ktime_sub(ktime_get(), start)),
 		hba->curr_dev_pwr_mode, hba->uic_link_state);
+	if (!ret)
+		hba->is_sys_suspended = true;
 	return ret;
 }
 EXPORT_SYMBOL(ufshcd_system_suspend);
@@ -9247,7 +9256,8 @@ int ufshcd_system_resume(struct ufs_hba *hba)
 	trace_ufshcd_system_resume(dev_name(hba->dev), ret,
 		ktime_to_us(ktime_sub(ktime_get(), start)),
 		hba->curr_dev_pwr_mode, hba->uic_link_state);
-
+	if (!ret)
+		hba->is_sys_suspended = false;
 	return ret;
 }
 EXPORT_SYMBOL(ufshcd_system_resume);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 93aeeb3..1e7fe73 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -754,6 +754,8 @@ struct ufs_hba {
 	struct device_attribute spm_lvl_attr;
 	/* A flag to tell whether __ufshcd_wl_suspend/resume() is in progress */
 	bool wlu_pm_op_in_progress;
+	/* A flag to tell whether ufshcd_suspend/resume() is in progress */
+	bool pm_op_in_progress;
 
 	/* Auto-Hibernate Idle Timer register value */
 	u32 ahit;
@@ -841,6 +843,8 @@ struct ufs_hba {
 	struct ufs_clk_scaling clk_scaling;
 	/* A flag to tell whether the UFS device W-LU is system suspended */
 	bool is_wlu_sys_suspended;
+	/* A flag to tell whether hba is system suspended */
+	bool is_sys_suspended;
 
 	enum bkops_status urgent_bkops_lvl;
 	bool is_urgent_bkops_lvl_checked;
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 03/10] scsi: ufs: Update the return value of supplier pm ops
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
  2021-06-23  7:35 ` [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended Can Guo
  2021-06-23  7:35 ` [PATCH v4 02/10] scsi: ufs: Add " Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 21:08   ` Bart Van Assche
  2021-06-23  7:35 ` [PATCH v4 04/10] scsi: ufs: Enable IRQ after enabling clocks in error handling preparation Can Guo
                   ` (7 subsequent siblings)
  10 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

rpm_get_suppliers() is returning an error only if the error is negative.
However, ufshcd_wl_resume() may return a positive error code, e.g., when
hibern8 or SSU cmd fails. Make the positive return value a negative error
code so that consumers are aware of any resume failure from their supplier.
Make the same change to ufshcd_wl_suspend() just to keep symmetry.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index abe5f2d..ee70522 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -8922,7 +8922,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 		ufshcd_release(hba);
 	}
 	hba->wlu_pm_op_in_progress = false;
-	return ret;
+	return ret <= 0 ? ret : -EINVAL;
 }
 
 static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
@@ -9009,7 +9009,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 	hba->clk_gating.is_suspended = false;
 	ufshcd_release(hba);
 	hba->wlu_pm_op_in_progress = false;
-	return ret;
+	return ret <= 0 ? ret : -EINVAL;
 }
 
 static int ufshcd_wl_runtime_suspend(struct device *dev)
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 04/10] scsi: ufs: Enable IRQ after enabling clocks in error handling preparation
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
                   ` (2 preceding siblings ...)
  2021-06-23  7:35 ` [PATCH v4 03/10] scsi: ufs: Update the return value of supplier pm ops Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 21:20   ` Bart Van Assche
  2021-06-23  7:35 ` [PATCH 05/10] scsi: ufs: Complete the cmd before returning in queuecommand Can Guo
                   ` (6 subsequent siblings)
  10 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

In error handling preparation, enable IRQ after enabling clocks in case
unclocked register access happens.

Fixes: c72e79c0ad2bd ("scsi: ufs: Recover HBA runtime PM error in error handler")
Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index ee70522..5f837c4 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -5927,13 +5927,14 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
 		 * can be OFF or in LPM.
 		 */
 		ufshcd_setup_hba_vreg(hba, true);
-		ufshcd_enable_irq(hba);
 		ufshcd_setup_vreg(hba, true);
 		ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq);
 		ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq2);
 		ufshcd_hold(hba, false);
-		if (!ufshcd_is_clkgating_allowed(hba))
+		if (!ufshcd_is_clkgating_allowed(hba)) {
 			ufshcd_setup_clocks(hba, true);
+			ufshcd_enable_irq(hba);
+		}
 		ufshcd_release(hba);
 		pm_op = hba->is_wlu_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
 		ufshcd_vops_resume(hba, pm_op);
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH 05/10] scsi: ufs: Complete the cmd before returning in queuecommand
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
                   ` (3 preceding siblings ...)
  2021-06-23  7:35 ` [PATCH v4 04/10] scsi: ufs: Enable IRQ after enabling clocks in error handling preparation Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23  7:39   ` Can Guo
  2021-06-23  7:35 ` [PATCH v4 05/10] scsi: ufs: Remove a redundant tag check in ufshcd_queuecommand() Can Guo
                   ` (5 subsequent siblings)
  10 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

Commit 7a7e66c65d4148fc3f23b058405bc9f102414fcb ("scsi: ufs: Fix a race
condition between ufshcd_abort() and eh_work()") forgot to complete the
cmd, which takes an occupied lrb, before returning in queuecommand. This
change adds the missing codes.

Fixes: 7a7e66c65d414 ("scsi: ufs: Fix a race condition between ufshcd_abort() and eh_work()")
Signed-off-by: Can Guo <cang@codeaurora.org>

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5f837c4..7fbc63e 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2758,6 +2758,16 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		goto out;
 	}
 
+	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
+		if (hba->wlu_pm_op_in_progress) {
+			set_host_byte(cmd, DID_BAD_TARGET);
+			cmd->scsi_done(cmd);
+		} else {
+			err = SCSI_MLQUEUE_HOST_BUSY;
+		}
+		goto out;
+	}
+
 	hba->req_abort_count = 0;
 
 	err = ufshcd_hold(hba, true);
@@ -2768,15 +2778,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	WARN_ON(ufshcd_is_clkgating_allowed(hba) &&
 		(hba->clk_gating.state != CLKS_ON));
 
-	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
-		if (hba->wlu_pm_op_in_progress)
-			set_host_byte(cmd, DID_BAD_TARGET);
-		else
-			err = SCSI_MLQUEUE_HOST_BUSY;
-		ufshcd_release(hba);
-		goto out;
-	}
-
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	lrbp->cmd = cmd;
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 05/10] scsi: ufs: Remove a redundant tag check in ufshcd_queuecommand()
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
                   ` (4 preceding siblings ...)
  2021-06-23  7:35 ` [PATCH 05/10] scsi: ufs: Complete the cmd before returning in queuecommand Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 21:24   ` Bart Van Assche
  2021-06-23  7:35 ` [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume Can Guo
                   ` (4 subsequent siblings)
  10 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

Since commit a45f937110fa6b0c2c06a5d3ef026963a5759050 ("scsi: ufs: Optimize
host lock on transfer requests send/compl paths") has moved ufshcd state
check to front, we can remove the following tag check added for preventing
the scenario where a cmd is trying to take a lrbp, which is still in use.
This scenario can only happen if a cmd goes through the fast abort path
(whose tag is released but lrbp is not) in ufshcd_abort(), and since ufshcd
state is also changed by the fast abort path, checking the ufshcd state in
ufshcd_queuecommand() is equivalent, hence remove the tag check.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5f837c4..3695dd2 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2768,15 +2768,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	WARN_ON(ufshcd_is_clkgating_allowed(hba) &&
 		(hba->clk_gating.state != CLKS_ON));
 
-	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
-		if (hba->wlu_pm_op_in_progress)
-			set_host_byte(cmd, DID_BAD_TARGET);
-		else
-			err = SCSI_MLQUEUE_HOST_BUSY;
-		ufshcd_release(hba);
-		goto out;
-	}
-
 	lrbp = &hba->lrb[tag];
 	WARN_ON(lrbp->cmd);
 	lrbp->cmd = cmd;
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
                   ` (5 preceding siblings ...)
  2021-06-23  7:35 ` [PATCH v4 05/10] scsi: ufs: Remove a redundant tag check in ufshcd_queuecommand() Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 14:30   ` Adrian Hunter
  2021-06-23  7:35 ` [PATCH v4 07/10] scsi: ufs: Simplify error handling preparation Can Guo
                   ` (3 subsequent siblings)
  10 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

To protect system suspend/resume from being disturbed by error handling,
instead of using host_sem, let error handler call lock_system_sleep() and
unlock_system_sleep() which achieve the same purpose. Remove the host_sem
used in suspend/resume paths to make the code more readable.

Suggested-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 3695dd2..a09e4a2 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -5907,6 +5907,11 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
 
 static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
 {
+	/*
+	 * It is not safe to perform error handling while suspend or resume is
+	 * in progress. Hence the lock_system_sleep() call.
+	 */
+	lock_system_sleep();
 	ufshcd_rpm_get_sync(hba);
 	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
 	    hba->is_wlu_sys_suspended) {
@@ -5951,6 +5956,7 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
 		ufshcd_clk_scaling_suspend(hba, false);
 	ufshcd_clear_ua_wluns(hba);
 	ufshcd_rpm_put(hba);
+	unlock_system_sleep();
 }
 
 static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
@@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct device *dev)
 	ktime_t start = ktime_get();
 
 	hba = shost_priv(sdev->host);
-	down(&hba->host_sem);
 
 	if (pm_runtime_suspended(dev))
 		goto out;
 
 	ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
-	if (ret) {
+	if (ret)
 		dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__,  ret);
-		up(&hba->host_sem);
-	}
 
 out:
 	if (!ret)
@@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device *dev)
 		hba->curr_dev_pwr_mode, hba->uic_link_state);
 	if (!ret)
 		hba->is_wlu_sys_suspended = false;
-	up(&hba->host_sem);
 	return ret;
 }
 #endif
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 07/10] scsi: ufs: Simplify error handling preparation
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
                   ` (6 preceding siblings ...)
  2021-06-23  7:35 ` [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 21:30   ` Bart Van Assche
  2021-06-23  7:35 ` [PATCH v4 08/10] scsi: ufs: Update ufshcd_recover_pm_error() Can Guo
                   ` (2 subsequent siblings)
  10 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

Commit cb7e6f05fce67c965194ac04467e1ba7bc70b069 ("scsi: ufs: core: Enable
power management for wlun") moves UFS operations out of ufshcd_resume(), so
in error handling preparation, if ufshcd hba has failed to resume, there is
no point to re-enable IRQ/clk/pwr.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 56 +++++++++++++++++++++++++----------------------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index a09e4a2..379c6a0 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2727,8 +2727,8 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		break;
 	case UFSHCD_STATE_EH_SCHEDULED_FATAL:
 		/*
-		 * pm_runtime_get_sync() is used at error handling preparation
-		 * stage. If a scsi cmd, e.g. the SSU cmd, is sent from hba's
+		 * ufshcd_rpm_get_sync() is used at error handling preparation
+		 * stage. If a scsi cmd, e.g., the SSU cmd, is sent from the
 		 * PM ops, it can never be finished if we let SCSI layer keep
 		 * retrying it, which gets err handler stuck forever. Neither
 		 * can we let the scsi cmd pass through, because UFS is in bad
@@ -5905,34 +5905,31 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
 	}
 }
 
-static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
+static int ufshcd_err_handling_prepare(struct ufs_hba *hba)
 {
 	/*
 	 * It is not safe to perform error handling while suspend or resume is
 	 * in progress. Hence the lock_system_sleep() call.
 	 */
 	lock_system_sleep();
+	/*
+	 * Exclusively call pm_runtime_get_sync(hba->dev) once, in case
+	 * following ufshcd_rpm_get_sync() fails.
+	 */
+	pm_runtime_get_sync(hba->dev);
+	if (pm_runtime_suspended(hba->dev) || hba->is_sys_suspended) {
+		pm_runtime_put(hba->dev);
+		unlock_system_sleep();
+		return -EINVAL;
+	}
+
+	ufshcd_set_eh_in_progress(hba);
 	ufshcd_rpm_get_sync(hba);
-	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
+	if (pm_runtime_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
 	    hba->is_wlu_sys_suspended) {
-		enum ufs_pm_op pm_op;
+		enum ufs_pm_op pm_op = hba->is_wlu_sys_suspended ?
+				       UFS_SYSTEM_PM : UFS_RUNTIME_PM;
 
-		/*
-		 * Don't assume anything of resume, if
-		 * resume fails, irq and clocks can be OFF, and powers
-		 * can be OFF or in LPM.
-		 */
-		ufshcd_setup_hba_vreg(hba, true);
-		ufshcd_setup_vreg(hba, true);
-		ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq);
-		ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq2);
-		ufshcd_hold(hba, false);
-		if (!ufshcd_is_clkgating_allowed(hba)) {
-			ufshcd_setup_clocks(hba, true);
-			ufshcd_enable_irq(hba);
-		}
-		ufshcd_release(hba);
-		pm_op = hba->is_wlu_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
 		ufshcd_vops_resume(hba, pm_op);
 	} else {
 		ufshcd_hold(hba, false);
@@ -5946,16 +5943,19 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
 	down_write(&hba->clk_scaling_lock);
 	up_write(&hba->clk_scaling_lock);
 	cancel_work_sync(&hba->eeh_work);
+	return 0;
 }
 
 static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
 {
+	ufshcd_clear_eh_in_progress(hba);
 	ufshcd_scsi_unblock_requests(hba);
 	ufshcd_release(hba);
 	if (ufshcd_is_clkscaling_supported(hba))
 		ufshcd_clk_scaling_suspend(hba, false);
 	ufshcd_clear_ua_wluns(hba);
 	ufshcd_rpm_put(hba);
+	pm_runtime_put(hba->dev);
 	unlock_system_sleep();
 }
 
@@ -6048,9 +6048,13 @@ static void ufshcd_err_handler(struct work_struct *work)
 		up(&hba->host_sem);
 		return;
 	}
-	ufshcd_set_eh_in_progress(hba);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_err_handling_prepare(hba);
+	if (ufshcd_err_handling_prepare(hba)) {
+		dev_err(hba->dev, "%s: error handling preparation failed\n",
+				__func__);
+		up(&hba->host_sem);
+		return;
+	}
 	/* Complete requests that have door-bell cleared by h/w */
 	ufshcd_complete_requests(hba);
 	spin_lock_irqsave(hba->host->host_lock, flags);
@@ -6194,7 +6198,6 @@ static void ufshcd_err_handler(struct work_struct *work)
 			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
 			    __func__, hba->saved_err, hba->saved_uic_err);
 	}
-	ufshcd_clear_eh_in_progress(hba);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	ufshcd_err_handling_unprepare(hba);
 	up(&hba->host_sem);
@@ -8995,6 +8998,9 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 
 	/* Enable Auto-Hibernate if configured */
 	ufshcd_auto_hibern8_enable(hba);
+
+	hba->clk_gating.is_suspended = false;
+	ufshcd_release(hba);
 	goto out;
 
 set_old_link_state:
@@ -9004,8 +9010,6 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 out:
 	if (ret)
 		ufshcd_update_evt_hist(hba, UFS_EVT_WL_RES_ERR, (u32)ret);
-	hba->clk_gating.is_suspended = false;
-	ufshcd_release(hba);
 	hba->wlu_pm_op_in_progress = false;
 	return ret <= 0 ? ret : -EINVAL;
 }
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 08/10] scsi: ufs: Update ufshcd_recover_pm_error()
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
                   ` (7 preceding siblings ...)
  2021-06-23  7:35 ` [PATCH v4 07/10] scsi: ufs: Simplify error handling preparation Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23  7:35 ` [PATCH v4 09/10] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests Can Guo
  2021-06-23  7:35 ` [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access Can Guo
  10 siblings, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

After error handler performs a successful reset and restore, all the LUs
become active, forcibly set the runtime PM status of the scsi devices (and
their request queues) underneath hba to ACTIVE to reflect the change. By
doing so, dev->power.runtime_error (if any) can also be cleared, such that
runtime PM can get back to work on them, otherwise the device(s) may be
left either runtime active or runtime suspended permanently.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 49 ++++++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 379c6a0..d739401 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -243,6 +243,7 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba);
 static int ufshcd_change_power_mode(struct ufs_hba *hba,
 			     struct ufs_pa_layer_attr *pwr_mode);
 static void ufshcd_schedule_eh_work(struct ufs_hba *hba);
+static void ufshcd_recover_pm_error(struct ufs_hba *hba);
 static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
 static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
 static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
@@ -5946,13 +5947,15 @@ static int ufshcd_err_handling_prepare(struct ufs_hba *hba)
 	return 0;
 }
 
-static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
+static void ufshcd_err_handling_unprepare(struct ufs_hba *hba, int reset_err)
 {
 	ufshcd_clear_eh_in_progress(hba);
 	ufshcd_scsi_unblock_requests(hba);
 	ufshcd_release(hba);
 	if (ufshcd_is_clkscaling_supported(hba))
 		ufshcd_clk_scaling_suspend(hba, false);
+	if (!reset_err)
+		ufshcd_recover_pm_error(hba);
 	ufshcd_clear_ua_wluns(hba);
 	ufshcd_rpm_put(hba);
 	pm_runtime_put(hba->dev);
@@ -5972,34 +5975,26 @@ static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
 static void ufshcd_recover_pm_error(struct ufs_hba *hba)
 {
 	struct Scsi_Host *shost = hba->host;
-	struct scsi_device *sdev;
-	struct request_queue *q;
+	struct scsi_device *sdev = hba->sdev_ufs_device;
+	struct scsi_target *starget = sdev->sdev_target;
 	int ret;
 
 	hba->is_wlu_sys_suspended = false;
-	/*
-	 * Set RPM status of wlun device to RPM_ACTIVE,
-	 * this also clears its runtime error.
-	 */
-	ret = pm_runtime_set_active(&hba->sdev_ufs_device->sdev_gendev);
 
-	/* hba device might have a runtime error otherwise */
-	if (ret)
-		ret = pm_runtime_set_active(hba->dev);
-	/*
-	 * If wlun device had runtime error, we also need to resume those
-	 * consumer scsi devices in case any of them has failed to be
-	 * resumed due to supplier runtime resume failure. This is to unblock
-	 * blk_queue_enter in case there are bios waiting inside it.
-	 */
-	if (!ret) {
-		shost_for_each_device(sdev, shost) {
-			q = sdev->request_queue;
-			if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
-				       q->rpm_status == RPM_SUSPENDING))
-				pm_request_resume(q->dev);
-		}
+	/* Resume parent/target to clear path for pm_runtime_set_active() */
+	pm_runtime_get_sync(&starget->dev);
+	shost_for_each_device(sdev, shost) {
+		struct device *dev = &sdev->sdev_gendev;
+
+		pm_runtime_get_sync(dev);
+		/* Clear dev->power.runtime_error */
+		ret = pm_runtime_set_active(dev);
+		if (!ret)
+			/* runtime_error cleared, kick blk_queue_enter() */
+			blk_set_runtime_active(sdev->request_queue);
+		pm_runtime_put(dev);
 	}
+	pm_runtime_put(&starget->dev);
 }
 #else
 static inline void ufshcd_recover_pm_error(struct ufs_hba *hba)
@@ -6033,7 +6028,7 @@ static void ufshcd_err_handler(struct work_struct *work)
 	unsigned long flags;
 	bool err_xfer = false;
 	bool err_tm = false;
-	int err = 0, pmc_err;
+	int err = -1, pmc_err;
 	int tag;
 	bool needs_reset = false, needs_restore = false;
 
@@ -6185,8 +6180,6 @@ static void ufshcd_err_handler(struct work_struct *work)
 		if (err)
 			dev_err(hba->dev, "%s: reset and restore failed with err %d\n",
 					__func__, err);
-		else
-			ufshcd_recover_pm_error(hba);
 		spin_lock_irqsave(hba->host->host_lock, flags);
 	}
 
@@ -6199,7 +6192,7 @@ static void ufshcd_err_handler(struct work_struct *work)
 			    __func__, hba->saved_err, hba->saved_uic_err);
 	}
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	ufshcd_err_handling_unprepare(hba);
+	ufshcd_err_handling_unprepare(hba, err);
 	up(&hba->host_sem);
 }
 
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 09/10] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
                   ` (8 preceding siblings ...)
  2021-06-23  7:35 ` [PATCH v4 08/10] scsi: ufs: Update ufshcd_recover_pm_error() Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 21:33   ` Bart Van Assche
  2021-06-23  7:35 ` [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access Can Guo
  10 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

If PM requests fail during runtime suspend/resume, RPM framework saves the
error to dev->power.runtime_error. Before the runtime_error gets cleared,
runtime PM on this specific device won't work again, leaving the device
either runtime active or runtime suspended permanently.

When task abort happens to a PM request sent during runtime suspend/resume,
even if it can be successfully aborted, RPM framework anyways saves the
(TIMEOUT) error. In this situation, we can leverage error handling to
recover and clear the runtime_error. So, let PM requests take the fast
abort path in ufshcd_abort().

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index d739401..59fc521 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2737,7 +2737,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		 * err handler blocked for too long. So, just fail the scsi cmd
 		 * sent from PM ops, err handler can recover PM error anyways.
 		 */
-		if (hba->wlu_pm_op_in_progress) {
+		if (cmd->request->rq_flags & RQF_PM) {
 			hba->force_reset = true;
 			set_host_byte(cmd, DID_BAD_TARGET);
 			cmd->scsi_done(cmd);
@@ -6981,11 +6981,14 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	int err = 0;
 	struct ufshcd_lrb *lrbp;
 	u32 reg;
+	bool need_eh = false;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
 	tag = cmd->request->tag;
 	lrbp = &hba->lrb[tag];
+
+	dev_info(hba->dev, "%s: Device abort task at tag %d\n", __func__, tag);
 	if (!ufshcd_valid_tag(hba, tag)) {
 		dev_err(hba->dev,
 			"%s: invalid command tag %d: cmd=0x%p, cmd->request=0x%p",
@@ -7003,9 +7006,6 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 		goto out;
 	}
 
-	/* Print Transfer Request of aborted task */
-	dev_info(hba->dev, "%s: Device abort task at tag %d\n", __func__, tag);
-
 	/*
 	 * Print detailed info about aborted request.
 	 * As more than one request might get aborted at the same time,
@@ -7033,21 +7033,21 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	}
 
 	/*
-	 * Task abort to the device W-LUN is illegal. When this command
-	 * will fail, due to spec violation, scsi err handling next step
-	 * will be to send LU reset which, again, is a spec violation.
-	 * To avoid these unnecessary/illegal steps, first we clean up
-	 * the lrb taken by this cmd and re-set it in outstanding_reqs,
-	 * then queue the eh_work and bail.
+	 * This fast path guarantees the cmd always gets aborted successfully,
+	 * meanwhile it invokes the error handler. It allows contexts, which
+	 * are blocked by this cmd, to fail fast. It serves multiple purposes:
+	 * #1 To avoid unnecessary/illagal abort attempts to the W-LU.
+	 * #2 To avoid live lock between eh_work and specific contexts, i.e.,
+	 *    suspend/resume and eh_work itself.
+	 * #3 To let eh_work recover runtime PM error in case abort happens
+	 *    to cmds sent from runtime suspend/resume ops.
 	 */
-	if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
+	if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN ||
+	    (cmd->request->rq_flags & RQF_PM)) {
 		ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
 		__ufshcd_transfer_req_compl(hba, (1UL << tag));
 		set_bit(tag, &hba->outstanding_reqs);
-		spin_lock_irqsave(host->host_lock, flags);
-		hba->force_reset = true;
-		ufshcd_schedule_eh_work(hba);
-		spin_unlock_irqrestore(host->host_lock, flags);
+		need_eh = true;
 		goto out;
 	}
 
@@ -7061,6 +7061,12 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 cleanup:
 		__ufshcd_transfer_req_compl(hba, (1UL << tag));
 out:
+		if (cmd->request->rq_flags & RQF_PM || need_eh) {
+			spin_lock_irqsave(host->host_lock, flags);
+			hba->force_reset = true;
+			ufshcd_schedule_eh_work(hba);
+			spin_unlock_irqrestore(host->host_lock, flags);
+		}
 		err = SUCCESS;
 	} else {
 		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access
       [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
                   ` (9 preceding siblings ...)
  2021-06-23  7:35 ` [PATCH v4 09/10] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests Can Guo
@ 2021-06-23  7:35 ` Can Guo
  2021-06-23 21:51   ` Bart Van Assche
  10 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:35 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Adrian Hunter, Bean Huo, Stanley Chu,
	Keoseong Park, Gustavo A. R. Silva, Jaegeuk Kim, Kiwoong Kim,
	Satya Tangirala, Bart Van Assche, open list

Do not let user access HW if hba resume fails or hba is not in good state,
otherwise it may lead to various stability issues.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/ufs-debugfs.c |  27 ++---------
 drivers/scsi/ufs/ufs-sysfs.c   | 105 ++++++++++++++---------------------------
 drivers/scsi/ufs/ufs_bsg.c     |  16 +++----
 drivers/scsi/ufs/ufshcd.c      |  63 +++++++++++++++----------
 drivers/scsi/ufs/ufshcd.h      |  17 ++++++-
 5 files changed, 101 insertions(+), 127 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-debugfs.c b/drivers/scsi/ufs/ufs-debugfs.c
index 4e1ff20..42c1c8b 100644
--- a/drivers/scsi/ufs/ufs-debugfs.c
+++ b/drivers/scsi/ufs/ufs-debugfs.c
@@ -52,25 +52,6 @@ static int ee_usr_mask_get(void *data, u64 *val)
 	return 0;
 }
 
-static int ufs_debugfs_get_user_access(struct ufs_hba *hba)
-__acquires(&hba->host_sem)
-{
-	down(&hba->host_sem);
-	if (!ufshcd_is_user_access_allowed(hba)) {
-		up(&hba->host_sem);
-		return -EBUSY;
-	}
-	ufshcd_rpm_get_sync(hba);
-	return 0;
-}
-
-static void ufs_debugfs_put_user_access(struct ufs_hba *hba)
-__releases(&hba->host_sem)
-{
-	ufshcd_rpm_put_sync(hba);
-	up(&hba->host_sem);
-}
-
 static int ee_usr_mask_set(void *data, u64 val)
 {
 	struct ufs_hba *hba = data;
@@ -78,11 +59,11 @@ static int ee_usr_mask_set(void *data, u64 val)
 
 	if (val & ~(u64)MASK_EE_STATUS)
 		return -EINVAL;
-	err = ufs_debugfs_get_user_access(hba);
+	err = ufshcd_get_user_access(hba);
 	if (err)
 		return err;
 	err = ufshcd_update_ee_usr_mask(hba, val, MASK_EE_STATUS);
-	ufs_debugfs_put_user_access(hba);
+	ufshcd_put_user_access(hba);
 	return err;
 }
 
@@ -120,10 +101,10 @@ static void ufs_debugfs_restart_ee(struct work_struct *work)
 	struct ufs_hba *hba = container_of(work, struct ufs_hba, debugfs_ee_work.work);
 
 	if (!hba->ee_usr_mask || pm_runtime_suspended(hba->dev) ||
-	    ufs_debugfs_get_user_access(hba))
+	    ufshcd_get_user_access(hba))
 		return;
 	ufshcd_write_ee_control(hba);
-	ufs_debugfs_put_user_access(hba);
+	ufshcd_put_user_access(hba);
 }
 
 void ufs_debugfs_hba_init(struct ufs_hba *hba)
diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c
index 52bd807..b8732b9 100644
--- a/drivers/scsi/ufs/ufs-sysfs.c
+++ b/drivers/scsi/ufs/ufs-sysfs.c
@@ -160,22 +160,14 @@ static ssize_t auto_hibern8_show(struct device *dev,
 	if (!ufshcd_is_auto_hibern8_supported(hba))
 		return -EOPNOTSUPP;
 
-	down(&hba->host_sem);
-	if (!ufshcd_is_user_access_allowed(hba)) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	pm_runtime_get_sync(hba->dev);
+	ret = ufshcd_get_user_access(hba);
+	if (ret)
+		return ret;
 	ufshcd_hold(hba, false);
 	ahit = ufshcd_readl(hba, REG_AUTO_HIBERNATE_IDLE_TIMER);
 	ufshcd_release(hba);
-	pm_runtime_put_sync(hba->dev);
-
 	ret = sysfs_emit(buf, "%d\n", ufshcd_ahit_to_us(ahit));
-
-out:
-	up(&hba->host_sem);
+	ufshcd_put_user_access(hba);
 	return ret;
 }
 
@@ -202,7 +194,7 @@ static ssize_t auto_hibern8_store(struct device *dev,
 		goto out;
 	}
 
-	ufshcd_auto_hibern8_update(hba, ufshcd_us_to_ahit(timer));
+	ret = ufshcd_auto_hibern8_update(hba, ufshcd_us_to_ahit(timer));
 
 out:
 	up(&hba->host_sem);
@@ -239,17 +231,11 @@ static ssize_t wb_on_store(struct device *dev, struct device_attribute *attr,
 	if (wb_enable != 0 && wb_enable != 1)
 		return -EINVAL;
 
-	down(&hba->host_sem);
-	if (!ufshcd_is_user_access_allowed(hba)) {
-		res = -EBUSY;
-		goto out;
-	}
-
-	ufshcd_rpm_get_sync(hba);
+	res = ufshcd_get_user_access(hba);
+	if (res)
+		return res;
 	res = ufshcd_wb_toggle(hba, wb_enable);
-	ufshcd_rpm_put_sync(hba);
-out:
-	up(&hba->host_sem);
+	ufshcd_put_user_access(hba);
 	return res < 0 ? res : count;
 }
 
@@ -527,16 +513,11 @@ static ssize_t ufs_sysfs_read_desc_param(struct ufs_hba *hba,
 	if (param_size > 8)
 		return -EINVAL;
 
-	down(&hba->host_sem);
-	if (!ufshcd_is_user_access_allowed(hba)) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	ufshcd_rpm_get_sync(hba);
+	ret = ufshcd_get_user_access(hba);
+	if (ret)
+		return ret;
 	ret = ufshcd_read_desc_param(hba, desc_id, desc_index,
 				param_offset, desc_buf, param_size);
-	ufshcd_rpm_put_sync(hba);
 	if (ret) {
 		ret = -EINVAL;
 		goto out;
@@ -561,7 +542,7 @@ static ssize_t ufs_sysfs_read_desc_param(struct ufs_hba *hba,
 	}
 
 out:
-	up(&hba->host_sem);
+	ufshcd_put_user_access(hba);
 	return ret;
 }
 
@@ -904,23 +885,20 @@ static ssize_t _name##_show(struct device *dev,				\
 	int desc_len = QUERY_DESC_MAX_SIZE;				\
 	u8 *desc_buf;							\
 									\
-	down(&hba->host_sem);						\
-	if (!ufshcd_is_user_access_allowed(hba)) {			\
-		up(&hba->host_sem);					\
-		return -EBUSY;						\
-	}								\
+	ret = ufshcd_get_user_access(hba);				\
+	if (ret)							\
+		return ret;						\
 	desc_buf = kzalloc(QUERY_DESC_MAX_SIZE, GFP_ATOMIC);		\
 	if (!desc_buf) {						\
-		up(&hba->host_sem);					\
-		return -ENOMEM;						\
+		ret = -ENOMEM;						\
+		goto out;						\
 	}								\
-	ufshcd_rpm_get_sync(hba);					\
 	ret = ufshcd_query_descriptor_retry(hba,			\
 		UPIU_QUERY_OPCODE_READ_DESC, QUERY_DESC_IDN_DEVICE,	\
 		0, 0, desc_buf, &desc_len);				\
 	if (ret) {							\
 		ret = -EINVAL;						\
-		goto out;						\
+		goto out_free;						\
 	}								\
 	index = desc_buf[DEVICE_DESC_PARAM##_pname];			\
 	kfree(desc_buf);						\
@@ -928,12 +906,12 @@ static ssize_t _name##_show(struct device *dev,				\
 	ret = ufshcd_read_string_desc(hba, index, &desc_buf,		\
 				      SD_ASCII_STD);			\
 	if (ret < 0)							\
-		goto out;						\
+		goto out_free;						\
 	ret = sysfs_emit(buf, "%s\n", desc_buf);			\
-out:									\
-	ufshcd_rpm_put_sync(hba);					\
+out_free:								\
 	kfree(desc_buf);						\
-	up(&hba->host_sem);						\
+out:									\
+	ufshcd_put_user_access(hba);					\
 	return ret;							\
 }									\
 static DEVICE_ATTR_RO(_name)
@@ -973,24 +951,20 @@ static ssize_t _name##_show(struct device *dev,				\
 	int ret;							\
 	struct ufs_hba *hba = dev_get_drvdata(dev);			\
 									\
-	down(&hba->host_sem);						\
-	if (!ufshcd_is_user_access_allowed(hba)) {			\
-		up(&hba->host_sem);					\
-		return -EBUSY;						\
-	}								\
+	ret = ufshcd_get_user_access(hba);				\
+	if (ret)							\
+		return ret;						\
 	if (ufshcd_is_wb_flags(QUERY_FLAG_IDN##_uname))			\
 		index = ufshcd_wb_get_query_index(hba);			\
-	ufshcd_rpm_get_sync(hba);					\
 	ret = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_READ_FLAG,	\
 		QUERY_FLAG_IDN##_uname, index, &flag);			\
-	ufshcd_rpm_put_sync(hba);					\
 	if (ret) {							\
 		ret = -EINVAL;						\
 		goto out;						\
 	}								\
 	ret = sysfs_emit(buf, "%s\n", flag ? "true" : "false");		\
 out:									\
-	up(&hba->host_sem);						\
+	ufshcd_put_user_access(hba);					\
 	return ret;							\
 }									\
 static DEVICE_ATTR_RO(_name)
@@ -1042,24 +1016,20 @@ static ssize_t _name##_show(struct device *dev,				\
 	int ret;							\
 	u8 index = 0;							\
 									\
-	down(&hba->host_sem);						\
-	if (!ufshcd_is_user_access_allowed(hba)) {			\
-		up(&hba->host_sem);					\
-		return -EBUSY;						\
-	}								\
+	ret = ufshcd_get_user_access(hba);				\
+	if (ret)							\
+		return ret;						\
 	if (ufshcd_is_wb_attrs(QUERY_ATTR_IDN##_uname))			\
 		index = ufshcd_wb_get_query_index(hba);			\
-	ufshcd_rpm_get_sync(hba);					\
 	ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR,	\
 		QUERY_ATTR_IDN##_uname, index, 0, &value);		\
-	ufshcd_rpm_put_sync(hba);					\
 	if (ret) {							\
 		ret = -EINVAL;						\
 		goto out;						\
 	}								\
 	ret = sysfs_emit(buf, "0x%08X\n", value);			\
 out:									\
-	up(&hba->host_sem);						\
+	ufshcd_put_user_access(hba);					\
 	return ret;							\
 }									\
 static DEVICE_ATTR_RO(_name)
@@ -1195,16 +1165,11 @@ static ssize_t dyn_cap_needed_attribute_show(struct device *dev,
 	u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun);
 	int ret;
 
-	down(&hba->host_sem);
-	if (!ufshcd_is_user_access_allowed(hba)) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	ufshcd_rpm_get_sync(hba);
+	ret = ufshcd_get_user_access(hba);
+	if (ret)
+		return ret;
 	ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR,
 		QUERY_ATTR_IDN_DYN_CAP_NEEDED, lun, 0, &value);
-	ufshcd_rpm_put_sync(hba);
 	if (ret) {
 		ret = -EINVAL;
 		goto out;
@@ -1213,7 +1178,7 @@ static ssize_t dyn_cap_needed_attribute_show(struct device *dev,
 	ret = sysfs_emit(buf, "0x%08X\n", value);
 
 out:
-	up(&hba->host_sem);
+	ufshcd_put_user_access(hba);
 	return ret;
 }
 static DEVICE_ATTR_RO(dyn_cap_needed_attribute);
diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c
index 39bf204..c5b3eb8 100644
--- a/drivers/scsi/ufs/ufs_bsg.c
+++ b/drivers/scsi/ufs/ufs_bsg.c
@@ -97,7 +97,9 @@ static int ufs_bsg_request(struct bsg_job *job)
 
 	bsg_reply->reply_payload_rcv_len = 0;
 
-	ufshcd_rpm_get_sync(hba);
+	ret = ufshcd_get_user_access(hba);
+	if (ret)
+		goto out;
 
 	msgcode = bsg_request->msgcode;
 	switch (msgcode) {
@@ -105,10 +107,8 @@ static int ufs_bsg_request(struct bsg_job *job)
 		desc_op = bsg_request->upiu_req.qr.opcode;
 		ret = ufs_bsg_alloc_desc_buffer(hba, job, &desc_buff,
 						&desc_len, desc_op);
-		if (ret) {
-			ufshcd_rpm_put_sync(hba);
-			goto out;
-		}
+		if (ret)
+			goto out_put_access;
 
 		fallthrough;
 	case UPIU_TRANSACTION_NOP_OUT:
@@ -138,10 +138,8 @@ static int ufs_bsg_request(struct bsg_job *job)
 		break;
 	}
 
-	ufshcd_rpm_put_sync(hba);
-
 	if (!desc_buff)
-		goto out;
+		goto out_put_access;
 
 	if (desc_op == UPIU_QUERY_OPCODE_READ_DESC && desc_len)
 		bsg_reply->reply_payload_rcv_len =
@@ -151,6 +149,8 @@ static int ufs_bsg_request(struct bsg_job *job)
 
 	kfree(desc_buff);
 
+out_put_access:
+	ufshcd_put_user_access(hba);
 out:
 	bsg_reply->result = ret;
 	job->reply_len = sizeof(struct ufs_bsg_reply);
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 59fc521..8bab3ea 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -128,15 +128,6 @@ enum {
 	UFSHCD_CAN_QUEUE	= 32,
 };
 
-/* UFSHCD states */
-enum {
-	UFSHCD_STATE_RESET,
-	UFSHCD_STATE_ERROR,
-	UFSHCD_STATE_OPERATIONAL,
-	UFSHCD_STATE_EH_SCHEDULED_FATAL,
-	UFSHCD_STATE_EH_SCHEDULED_NON_FATAL,
-};
-
 /* UFSHCD error handling flags */
 enum {
 	UFSHCD_EH_IN_PROGRESS = (1 << 0),
@@ -254,6 +245,31 @@ static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable);
 static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba);
 static void ufshcd_hba_vreg_set_hpm(struct ufs_hba *hba);
 
+int ufshcd_get_user_access(struct ufs_hba *hba)
+__acquires(&hba->host_sem)
+{
+	down(&hba->host_sem);
+	if (!ufshcd_is_user_access_allowed(hba)) {
+		up(&hba->host_sem);
+		return -EBUSY;
+	}
+	if (ufshcd_rpm_get_sync(hba)) {
+		ufshcd_rpm_put_sync(hba);
+		up(&hba->host_sem);
+		return -EBUSY;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ufshcd_get_user_access);
+
+void ufshcd_put_user_access(struct ufs_hba *hba)
+__releases(&hba->host_sem)
+{
+	ufshcd_rpm_put_sync(hba);
+	up(&hba->host_sem);
+}
+EXPORT_SYMBOL_GPL(ufshcd_put_user_access);
+
 static inline bool ufshcd_valid_tag(struct ufs_hba *hba, int tag)
 {
 	return tag >= 0 && tag < hba->nutrs;
@@ -1553,19 +1569,14 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
 	if (kstrtou32(buf, 0, &value))
 		return -EINVAL;
 
-	down(&hba->host_sem);
-	if (!ufshcd_is_user_access_allowed(hba)) {
-		err = -EBUSY;
-		goto out;
-	}
+	err = ufshcd_get_user_access(hba);
+	if (err)
+		return err;
+	ufshcd_hold(hba, false);
 
 	value = !!value;
 	if (value == hba->clk_scaling.is_enabled)
 		goto out;
-
-	ufshcd_rpm_get_sync(hba);
-	ufshcd_hold(hba, false);
-
 	hba->clk_scaling.is_enabled = value;
 
 	if (value) {
@@ -1578,10 +1589,9 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
 					__func__, err);
 	}
 
-	ufshcd_release(hba);
-	ufshcd_rpm_put_sync(hba);
 out:
-	up(&hba->host_sem);
+	ufshcd_release(hba);
+	ufshcd_put_user_access(hba);
 	return err ? err : count;
 }
 
@@ -4170,13 +4180,13 @@ int ufshcd_uic_hibern8_exit(struct ufs_hba *hba)
 }
 EXPORT_SYMBOL_GPL(ufshcd_uic_hibern8_exit);
 
-void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
+int ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
 {
 	unsigned long flags;
 	bool update = false;
 
 	if (!ufshcd_is_auto_hibern8_supported(hba))
-		return;
+		return 0;
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (hba->ahit != ahit) {
@@ -4187,12 +4197,17 @@ void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit)
 
 	if (update &&
 	    !pm_runtime_suspended(&hba->sdev_ufs_device->sdev_gendev)) {
-		ufshcd_rpm_get_sync(hba);
+		if (ufshcd_rpm_get_sync(hba)) {
+			ufshcd_rpm_put_sync(hba);
+			return -EBUSY;
+		}
 		ufshcd_hold(hba, false);
 		ufshcd_auto_hibern8_enable(hba);
 		ufshcd_release(hba);
 		ufshcd_rpm_put_sync(hba);
 	}
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(ufshcd_auto_hibern8_update);
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 1e7fe73..3fc080d 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -101,6 +101,15 @@ struct uic_command {
 	struct completion done;
 };
 
+/* UFSHCD states */
+enum {
+	UFSHCD_STATE_RESET,
+	UFSHCD_STATE_ERROR,
+	UFSHCD_STATE_OPERATIONAL,
+	UFSHCD_STATE_EH_SCHEDULED_FATAL,
+	UFSHCD_STATE_EH_SCHEDULED_NON_FATAL,
+};
+
 /* Used to differentiate the power management options */
 enum ufs_pm_op {
 	UFS_RUNTIME_PM,
@@ -935,7 +944,9 @@ static inline bool ufshcd_is_wb_allowed(struct ufs_hba *hba)
 
 static inline bool ufshcd_is_user_access_allowed(struct ufs_hba *hba)
 {
-	return !hba->shutting_down;
+	return !hba->shutting_down && !hba->is_sys_suspended &&
+		!hba->is_wlu_sys_suspended &&
+		hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL;
 }
 
 #define ufshcd_writel(hba, val, reg)	\
@@ -1108,7 +1119,7 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 	enum flag_idn idn, u8 index, bool *flag_res);
 
 void ufshcd_auto_hibern8_enable(struct ufs_hba *hba);
-void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit);
+int ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit);
 void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, struct ufs_dev_fix *fixups);
 #define SD_ASCII_STD true
 #define SD_RAW false
@@ -1135,6 +1146,8 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba,
 int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable);
 int ufshcd_suspend_prepare(struct device *dev);
 void ufshcd_resume_complete(struct device *dev);
+int ufshcd_get_user_access(struct ufs_hba *hba);
+void ufshcd_put_user_access(struct ufs_hba *hba);
 
 /* Wrapper functions for safely calling variant operations */
 static inline const char *ufshcd_get_var_name(struct ufs_hba *hba)
-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH 05/10] scsi: ufs: Complete the cmd before returning in queuecommand
  2021-06-23  7:35 ` [PATCH 05/10] scsi: ufs: Complete the cmd before returning in queuecommand Can Guo
@ 2021-06-23  7:39   ` Can Guo
  0 siblings, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-23  7:39 UTC (permalink / raw)
  To: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team, cang
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	linux-kernel

Sorry, please ignore this change, it is wrongly sent out along with this 
series...

On 2021-06-23 15:35, Can Guo wrote:
> Commit 7a7e66c65d4148fc3f23b058405bc9f102414fcb ("scsi: ufs: Fix a race
> condition between ufshcd_abort() and eh_work()") forgot to complete the
> cmd, which takes an occupied lrb, before returning in queuecommand. 
> This
> change adds the missing codes.
> 
> Fixes: 7a7e66c65d414 ("scsi: ufs: Fix a race condition between
> ufshcd_abort() and eh_work()")
> Signed-off-by: Can Guo <cang@codeaurora.org>
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 5f837c4..7fbc63e 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -2758,6 +2758,16 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  		goto out;
>  	}
> 
> +	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
> +		if (hba->wlu_pm_op_in_progress) {
> +			set_host_byte(cmd, DID_BAD_TARGET);
> +			cmd->scsi_done(cmd);
> +		} else {
> +			err = SCSI_MLQUEUE_HOST_BUSY;
> +		}
> +		goto out;
> +	}
> +
>  	hba->req_abort_count = 0;
> 
>  	err = ufshcd_hold(hba, true);
> @@ -2768,15 +2778,6 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
>  	WARN_ON(ufshcd_is_clkgating_allowed(hba) &&
>  		(hba->clk_gating.state != CLKS_ON));
> 
> -	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
> -		if (hba->wlu_pm_op_in_progress)
> -			set_host_byte(cmd, DID_BAD_TARGET);
> -		else
> -			err = SCSI_MLQUEUE_HOST_BUSY;
> -		ufshcd_release(hba);
> -		goto out;
> -	}
> -
>  	lrbp = &hba->lrb[tag];
>  	WARN_ON(lrbp->cmd);
>  	lrbp->cmd = cmd;

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 02/10] scsi: ufs: Add flags pm_op_in_progress and is_sys_suspended
  2021-06-23  7:35 ` [PATCH v4 02/10] scsi: ufs: Add " Can Guo
@ 2021-06-23 12:33   ` Adrian Hunter
  2021-06-24  2:05     ` Can Guo
  2021-06-23 20:59   ` Bart Van Assche
  2021-06-24 17:35   ` Bart Van Assche
  2 siblings, 1 reply; 58+ messages in thread
From: Adrian Hunter @ 2021-06-23 12:33 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Satya Tangirala, Kiwoong Kim, Bart Van Assche, open list

On 23/06/21 10:35 am, Can Guo wrote:
> Add flags pm_op_in_progress and is_sys_suspended to track the status of hba
> runtime and system suspend/resume operations.
> 
> Signed-off-by: Can Guo <cang@codeaurora.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 12 +++++++++++-
>  drivers/scsi/ufs/ufshcd.h |  4 ++++
>  2 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index c40ba1d..abe5f2d 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -551,6 +551,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba)
>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>  	dev_err(hba->dev, "wlu_pm_op_in_progress=%d, is_wlu_sys_suspended=%d\n",
>  		hba->wlu_pm_op_in_progress, hba->is_wlu_sys_suspended);
> +	dev_err(hba->dev, "pm_op_in_progress=%d, is_sys_suspended=%d\n",
> +		hba->pm_op_in_progress, hba->is_sys_suspended);
>  	dev_err(hba->dev, "Auto BKOPS=%d, Host self-block=%d\n",
>  		hba->auto_bkops_enabled, hba->host->host_self_blocked);
>  	dev_err(hba->dev, "Clk gate=%d\n", hba->clk_gating.state);
> @@ -9141,6 +9143,8 @@ static int ufshcd_suspend(struct ufs_hba *hba)
>  
>  	if (!hba->is_powered)
>  		return 0;
> +
> +	hba->pm_op_in_progress = true;
>  	/*
>  	 * Disable the host irq as host controller as there won't be any
>  	 * host controller transaction expected till resume.
	 */
	ufshcd_disable_irq(hba);
	ret = ufshcd_setup_clocks(hba, false);
	if (ret) {
		ufshcd_enable_irq(hba);

Is "hba->pm_op_in_progress = false" needed here?

		return ret;
	}



> @@ -9160,6 +9164,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
>  	ufshcd_vreg_set_lpm(hba);
>  	/* Put the host controller in low power mode if possible */
>  	ufshcd_hba_vreg_set_lpm(hba);
> +	hba->pm_op_in_progress = false;
>  	return ret;
>  }
>  
> @@ -9179,6 +9184,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
>  	if (!hba->is_powered)
>  		return 0;
>  
> +	hba->pm_op_in_progress = true;
>  	ufshcd_hba_vreg_set_hpm(hba);
>  	ret = ufshcd_vreg_set_hpm(hba);
>  	if (ret)
> @@ -9198,6 +9204,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
>  out:
>  	if (ret)
>  		ufshcd_update_evt_hist(hba, UFS_EVT_RESUME_ERR, (u32)ret);
> +	hba->pm_op_in_progress = false;
>  	return ret;
>  }
>  
> @@ -9222,6 +9229,8 @@ int ufshcd_system_suspend(struct ufs_hba *hba)
>  	trace_ufshcd_system_suspend(dev_name(hba->dev), ret,
>  		ktime_to_us(ktime_sub(ktime_get(), start)),
>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
> +	if (!ret)
> +		hba->is_sys_suspended = true;
>  	return ret;
>  }
>  EXPORT_SYMBOL(ufshcd_system_suspend);
> @@ -9247,7 +9256,8 @@ int ufshcd_system_resume(struct ufs_hba *hba)
>  	trace_ufshcd_system_resume(dev_name(hba->dev), ret,
>  		ktime_to_us(ktime_sub(ktime_get(), start)),
>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
> -
> +	if (!ret)
> +		hba->is_sys_suspended = false;
>  	return ret;
>  }
>  EXPORT_SYMBOL(ufshcd_system_resume);
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 93aeeb3..1e7fe73 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -754,6 +754,8 @@ struct ufs_hba {
>  	struct device_attribute spm_lvl_attr;
>  	/* A flag to tell whether __ufshcd_wl_suspend/resume() is in progress */
>  	bool wlu_pm_op_in_progress;
> +	/* A flag to tell whether ufshcd_suspend/resume() is in progress */
> +	bool pm_op_in_progress;
>  
>  	/* Auto-Hibernate Idle Timer register value */
>  	u32 ahit;
> @@ -841,6 +843,8 @@ struct ufs_hba {
>  	struct ufs_clk_scaling clk_scaling;
>  	/* A flag to tell whether the UFS device W-LU is system suspended */
>  	bool is_wlu_sys_suspended;
> +	/* A flag to tell whether hba is system suspended */
> +	bool is_sys_suspended;
>  
>  	enum bkops_status urgent_bkops_lvl;
>  	bool is_urgent_bkops_lvl_checked;
> 


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-23  7:35 ` [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume Can Guo
@ 2021-06-23 14:30   ` Adrian Hunter
  2021-06-24  2:16     ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Adrian Hunter @ 2021-06-23 14:30 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 23/06/21 10:35 am, Can Guo wrote:
> To protect system suspend/resume from being disturbed by error handling,
> instead of using host_sem, let error handler call lock_system_sleep() and
> unlock_system_sleep() which achieve the same purpose. Remove the host_sem
> used in suspend/resume paths to make the code more readable.
> 
> Suggested-by: Bart Van Assche <bvanassche@acm.org>
> Signed-off-by: Can Guo <cang@codeaurora.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 3695dd2..a09e4a2 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -5907,6 +5907,11 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
>  
>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>  {
> +	/*
> +	 * It is not safe to perform error handling while suspend or resume is
> +	 * in progress. Hence the lock_system_sleep() call.
> +	 */
> +	lock_system_sleep();

It looks to me like the system takes this lock quite early, even before
freezing tasks, so if anything needs the error handler to run it will
deadlock.

>  	ufshcd_rpm_get_sync(hba);
>  	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
>  	    hba->is_wlu_sys_suspended) {
> @@ -5951,6 +5956,7 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
>  		ufshcd_clk_scaling_suspend(hba, false);
>  	ufshcd_clear_ua_wluns(hba);
>  	ufshcd_rpm_put(hba);
> +	unlock_system_sleep();
>  }
>  
>  static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
> @@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct device *dev)
>  	ktime_t start = ktime_get();
>  
>  	hba = shost_priv(sdev->host);
> -	down(&hba->host_sem);
>  
>  	if (pm_runtime_suspended(dev))
>  		goto out;
>  
>  	ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
> -	if (ret) {
> +	if (ret)
>  		dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__,  ret);
> -		up(&hba->host_sem);
> -	}
>  
>  out:
>  	if (!ret)
> @@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device *dev)
>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>  	if (!ret)
>  		hba->is_wlu_sys_suspended = false;
> -	up(&hba->host_sem);
>  	return ret;
>  }
>  #endif
> 


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-23  7:35 ` [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended Can Guo
@ 2021-06-23 20:05   ` Bart Van Assche
  2021-06-23 20:57     ` Bart Van Assche
  2021-06-23 20:42   ` Bjorn Andersson
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 20:05 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

On 6/23/21 12:35 AM, Can Guo wrote:
> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress and
> is_wlu_sys_suspended accordingly.

Hi Can,

My understanding is that power management operations must be submitted
to one particular UFS WLUN (hba->sdev_ufs_device). That makes the "wlu_"
part of the new names redundant. In other words, I like the current
names better than the new names. Unless if I missed something, consider
dropping this patch.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-23  7:35 ` [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended Can Guo
  2021-06-23 20:05   ` Bart Van Assche
@ 2021-06-23 20:42   ` Bjorn Andersson
  2021-06-23 22:41     ` Bart Van Assche
  2021-06-24  2:04     ` Can Guo
  2021-06-24 17:32   ` Bart Van Assche
  2021-06-24 23:42   ` Bart Van Assche
  3 siblings, 2 replies; 58+ messages in thread
From: Bjorn Andersson @ 2021-06-23 20:42 UTC (permalink / raw)
  To: Can Guo
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Andy Gross, Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Adrian Hunter, Kiwoong Kim, Satya Tangirala, Bart Van Assche,
	open list:ARM/QUALCOMM SUPPORT, open list

On Wed 23 Jun 02:35 CDT 2021, Can Guo wrote:

> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress and
> is_wlu_sys_suspended accordingly.
> 

This reflects what the change does, but the commit message is supposed
to capture "why".

Regards,
Bjorn

> Signed-off-by: Can Guo <cang@codeaurora.org>
> ---
>  drivers/scsi/ufs/ufs-qcom.c |  2 +-
>  drivers/scsi/ufs/ufshcd.c   | 30 +++++++++++++++---------------
>  drivers/scsi/ufs/ufshcd.h   |  6 ++++--
>  3 files changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
> index 9b1d18d..fbe21e0 100644
> --- a/drivers/scsi/ufs/ufs-qcom.c
> +++ b/drivers/scsi/ufs/ufs-qcom.c
> @@ -641,7 +641,7 @@ static int ufs_qcom_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>  	if (err)
>  		return err;
>  
> -	hba->is_sys_suspended = false;
> +	hba->is_wlu_sys_suspended = false;
>  	return 0;
>  }
>  
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 25fe18a..c40ba1d 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -549,8 +549,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba)
>  		hba->saved_err, hba->saved_uic_err);
>  	dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n",
>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
> -	dev_err(hba->dev, "PM in progress=%d, sys. suspended=%d\n",
> -		hba->pm_op_in_progress, hba->is_sys_suspended);
> +	dev_err(hba->dev, "wlu_pm_op_in_progress=%d, is_wlu_sys_suspended=%d\n",
> +		hba->wlu_pm_op_in_progress, hba->is_wlu_sys_suspended);
>  	dev_err(hba->dev, "Auto BKOPS=%d, Host self-block=%d\n",
>  		hba->auto_bkops_enabled, hba->host->host_self_blocked);
>  	dev_err(hba->dev, "Clk gate=%d\n", hba->clk_gating.state);
> @@ -1999,7 +1999,7 @@ static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba)
>  	if (!hba->clk_scaling.active_reqs++)
>  		queue_resume_work = true;
>  
> -	if (!hba->clk_scaling.is_enabled || hba->pm_op_in_progress) {
> +	if (!hba->clk_scaling.is_enabled || hba->wlu_pm_op_in_progress) {
>  		spin_unlock_irqrestore(hba->host->host_lock, flags);
>  		return;
>  	}
> @@ -2734,7 +2734,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>  		 * err handler blocked for too long. So, just fail the scsi cmd
>  		 * sent from PM ops, err handler can recover PM error anyways.
>  		 */
> -		if (hba->pm_op_in_progress) {
> +		if (hba->wlu_pm_op_in_progress) {
>  			hba->force_reset = true;
>  			set_host_byte(cmd, DID_BAD_TARGET);
>  			cmd->scsi_done(cmd);
> @@ -2767,7 +2767,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>  		(hba->clk_gating.state != CLKS_ON));
>  
>  	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
> -		if (hba->pm_op_in_progress)
> +		if (hba->wlu_pm_op_in_progress)
>  			set_host_byte(cmd, DID_BAD_TARGET);
>  		else
>  			err = SCSI_MLQUEUE_HOST_BUSY;
> @@ -5116,7 +5116,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
>  			 * solution could be to abort the system suspend if
>  			 * UFS device needs urgent BKOPs.
>  			 */
> -			if (!hba->pm_op_in_progress &&
> +			if (!hba->wlu_pm_op_in_progress &&
>  			    !ufshcd_eh_in_progress(hba) &&
>  			    ufshcd_is_exception_event(lrbp->ucd_rsp_ptr))
>  				/* Flushed in suspend */
> @@ -5916,7 +5916,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>  {
>  	ufshcd_rpm_get_sync(hba);
>  	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
> -	    hba->is_sys_suspended) {
> +	    hba->is_wlu_sys_suspended) {
>  		enum ufs_pm_op pm_op;
>  
>  		/*
> @@ -5933,7 +5933,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>  		if (!ufshcd_is_clkgating_allowed(hba))
>  			ufshcd_setup_clocks(hba, true);
>  		ufshcd_release(hba);
> -		pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
> +		pm_op = hba->is_wlu_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
>  		ufshcd_vops_resume(hba, pm_op);
>  	} else {
>  		ufshcd_hold(hba, false);
> @@ -5976,7 +5976,7 @@ static void ufshcd_recover_pm_error(struct ufs_hba *hba)
>  	struct request_queue *q;
>  	int ret;
>  
> -	hba->is_sys_suspended = false;
> +	hba->is_wlu_sys_suspended = false;
>  	/*
>  	 * Set RPM status of wlun device to RPM_ACTIVE,
>  	 * this also clears its runtime error.
> @@ -8784,7 +8784,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>  	enum ufs_dev_pwr_mode req_dev_pwr_mode;
>  	enum uic_link_state req_link_state;
>  
> -	hba->pm_op_in_progress = true;
> +	hba->wlu_pm_op_in_progress = true;
>  	if (pm_op != UFS_SHUTDOWN_PM) {
>  		pm_lvl = pm_op == UFS_RUNTIME_PM ?
>  			 hba->rpm_lvl : hba->spm_lvl;
> @@ -8919,7 +8919,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>  		hba->clk_gating.is_suspended = false;
>  		ufshcd_release(hba);
>  	}
> -	hba->pm_op_in_progress = false;
> +	hba->wlu_pm_op_in_progress = false;
>  	return ret;
>  }
>  
> @@ -8928,7 +8928,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>  	int ret;
>  	enum uic_link_state old_link_state = hba->uic_link_state;
>  
> -	hba->pm_op_in_progress = true;
> +	hba->wlu_pm_op_in_progress = true;
>  
>  	/*
>  	 * Call vendor specific resume callback. As these callbacks may access
> @@ -9006,7 +9006,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>  		ufshcd_update_evt_hist(hba, UFS_EVT_WL_RES_ERR, (u32)ret);
>  	hba->clk_gating.is_suspended = false;
>  	ufshcd_release(hba);
> -	hba->pm_op_in_progress = false;
> +	hba->wlu_pm_op_in_progress = false;
>  	return ret;
>  }
>  
> @@ -9072,7 +9072,7 @@ static int ufshcd_wl_suspend(struct device *dev)
>  
>  out:
>  	if (!ret)
> -		hba->is_sys_suspended = true;
> +		hba->is_wlu_sys_suspended = true;
>  	trace_ufshcd_wl_suspend(dev_name(dev), ret,
>  		ktime_to_us(ktime_sub(ktime_get(), start)),
>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
> @@ -9100,7 +9100,7 @@ static int ufshcd_wl_resume(struct device *dev)
>  		ktime_to_us(ktime_sub(ktime_get(), start)),
>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>  	if (!ret)
> -		hba->is_sys_suspended = false;
> +		hba->is_wlu_sys_suspended = false;
>  	up(&hba->host_sem);
>  	return ret;
>  }
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index c98d540..93aeeb3 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -752,7 +752,8 @@ struct ufs_hba {
>  	enum ufs_pm_level spm_lvl;
>  	struct device_attribute rpm_lvl_attr;
>  	struct device_attribute spm_lvl_attr;
> -	int pm_op_in_progress;
> +	/* A flag to tell whether __ufshcd_wl_suspend/resume() is in progress */
> +	bool wlu_pm_op_in_progress;
>  
>  	/* Auto-Hibernate Idle Timer register value */
>  	u32 ahit;
> @@ -838,7 +839,8 @@ struct ufs_hba {
>  
>  	struct devfreq *devfreq;
>  	struct ufs_clk_scaling clk_scaling;
> -	bool is_sys_suspended;
> +	/* A flag to tell whether the UFS device W-LU is system suspended */
> +	bool is_wlu_sys_suspended;
>  
>  	enum bkops_status urgent_bkops_lvl;
>  	bool is_urgent_bkops_lvl_checked;
> -- 
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-23 20:05   ` Bart Van Assche
@ 2021-06-23 20:57     ` Bart Van Assche
  2021-06-24  2:02       ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 20:57 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

On 6/23/21 1:05 PM, Bart Van Assche wrote:
> On 6/23/21 12:35 AM, Can Guo wrote:
>> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress and
>> is_wlu_sys_suspended accordingly.
> 
> My understanding is that power management operations must be submitted
> to one particular UFS WLUN (hba->sdev_ufs_device). That makes the "wlu_"
> part of the new names redundant. In other words, I like the current
> names better than the new names. Unless if I missed something, consider
> dropping this patch.

Hi Can,

Reviewing later patches in this series made me realize that there are
two families of suspend/resume functions. One family of functions
operates at the platform level while the other family operates at the
SCSI LUN level. My comments about the suspend/resume functions are as
follows:
- It seems redundant to me to have system suspend support at the SCSI
  LUN level (__ufshcd_wl_suspend(hba, UFS_SYSTEM_PM)) and also at the
  platform level. Since the platform device is a parent of the SCSI
  WLUN, can system suspend/resume support be left out from
  ufshcd_wl_pm_ops (or in other words, remove the .freeze and .thaw
  callbacks)? Do we really need two calls from the power management
  subsystem into the UFS driver for every system suspend and every
  system resume?
- Because of the device links (device_link_add()), the ufschd_wl_*()
  RPM callbacks are invoked after all LUNs have been suspended. I would
  appreciate it if the "ufshcd_wl_" prefix would be changed into
  "ufshcd_lun_" since that would make it more clear that these callbacks
  are associated with all LUNs and not only with the WLUN through which
  power management commands are submitted.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 02/10] scsi: ufs: Add flags pm_op_in_progress and is_sys_suspended
  2021-06-23  7:35 ` [PATCH v4 02/10] scsi: ufs: Add " Can Guo
  2021-06-23 12:33   ` Adrian Hunter
@ 2021-06-23 20:59   ` Bart Van Assche
  2021-06-24  2:07     ` Can Guo
  2021-06-24 17:35   ` Bart Van Assche
  2 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 20:59 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Adrian Hunter, Satya Tangirala, Kiwoong Kim, open list

On 6/23/21 12:35 AM, Can Guo wrote:
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 93aeeb3..1e7fe73 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -754,6 +754,8 @@ struct ufs_hba {
>  	struct device_attribute spm_lvl_attr;
>  	/* A flag to tell whether __ufshcd_wl_suspend/resume() is in progress */
>  	bool wlu_pm_op_in_progress;
> +	/* A flag to tell whether ufshcd_suspend/resume() is in progress */
> +	bool pm_op_in_progress;
>  
>  	/* Auto-Hibernate Idle Timer register value */
>  	u32 ahit;
> @@ -841,6 +843,8 @@ struct ufs_hba {
>  	struct ufs_clk_scaling clk_scaling;
>  	/* A flag to tell whether the UFS device W-LU is system suspended */
>  	bool is_wlu_sys_suspended;
> +	/* A flag to tell whether hba is system suspended */
> +	bool is_sys_suspended;
>  
>  	enum bkops_status urgent_bkops_lvl;
>  	bool is_urgent_bkops_lvl_checked;

It is not yet clear to me whether we really need these new member
variables. If these are retained, please rename pm_op_in_progress into
platform_pm_op_in_progress and is_sys_suspended into
platform_is_sys_suspended.

Thanks,

Bart.



^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 03/10] scsi: ufs: Update the return value of supplier pm ops
  2021-06-23  7:35 ` [PATCH v4 03/10] scsi: ufs: Update the return value of supplier pm ops Can Guo
@ 2021-06-23 21:08   ` Bart Van Assche
  2021-06-24  2:11     ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 21:08 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/23/21 12:35 AM, Can Guo wrote:
> rpm_get_suppliers() is returning an error only if the error is negative.
> However, ufshcd_wl_resume() may return a positive error code, e.g., when
> hibern8 or SSU cmd fails. Make the positive return value a negative error
> code so that consumers are aware of any resume failure from their supplier.
> Make the same change to ufshcd_wl_suspend() just to keep symmetry.
> 
> Signed-off-by: Can Guo <cang@codeaurora.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index abe5f2d..ee70522 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -8922,7 +8922,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>  		ufshcd_release(hba);
>  	}
>  	hba->wlu_pm_op_in_progress = false;
> -	return ret;
> +	return ret <= 0 ? ret : -EINVAL;
>  }
>  
>  static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
> @@ -9009,7 +9009,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
>  	hba->clk_gating.is_suspended = false;
>  	ufshcd_release(hba);
>  	hba->wlu_pm_op_in_progress = false;
> -	return ret;
> +	return ret <= 0 ? ret : -EINVAL;
>  }

I think the above patch shows that indicating failure by either
returning a positive or a negative value is a booby trap. Please modify
ufshcd_send_request_sense() and ufshcd_set_dev_pwr_mode() such that
these return a value that is either zero or negative. Are there any
other functions than that need to be modified?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 04/10] scsi: ufs: Enable IRQ after enabling clocks in error handling preparation
  2021-06-23  7:35 ` [PATCH v4 04/10] scsi: ufs: Enable IRQ after enabling clocks in error handling preparation Can Guo
@ 2021-06-23 21:20   ` Bart Van Assche
  0 siblings, 0 replies; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 21:20 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/23/21 12:35 AM, Can Guo wrote:
> In error handling preparation, enable IRQ after enabling clocks in case
> unclocked register access happens.
> 
> Fixes: c72e79c0ad2bd ("scsi: ufs: Recover HBA runtime PM error in error handler")
> Signed-off-by: Can Guo <cang@codeaurora.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index ee70522..5f837c4 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -5927,13 +5927,14 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>  		 * can be OFF or in LPM.
>  		 */
>  		ufshcd_setup_hba_vreg(hba, true);
> -		ufshcd_enable_irq(hba);
>  		ufshcd_setup_vreg(hba, true);
>  		ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq);
>  		ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq2);
>  		ufshcd_hold(hba, false);
> -		if (!ufshcd_is_clkgating_allowed(hba))
> +		if (!ufshcd_is_clkgating_allowed(hba)) {
>  			ufshcd_setup_clocks(hba, true);
> +			ufshcd_enable_irq(hba);
> +		}
>  		ufshcd_release(hba);
>  		pm_op = hba->is_wlu_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
>  		ufshcd_vops_resume(hba, pm_op);

Reviewed-by: Bart Van Assche <bvanassche@acm.org>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 05/10] scsi: ufs: Remove a redundant tag check in ufshcd_queuecommand()
  2021-06-23  7:35 ` [PATCH v4 05/10] scsi: ufs: Remove a redundant tag check in ufshcd_queuecommand() Can Guo
@ 2021-06-23 21:24   ` Bart Van Assche
  0 siblings, 0 replies; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 21:24 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/23/21 12:35 AM, Can Guo wrote:
> Since commit a45f937110fa6b0c2c06a5d3ef026963a5759050 ("scsi: ufs: Optimize

Please shorten commit IDs to 12 characters.

> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 5f837c4..3695dd2 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -2768,15 +2768,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>  	WARN_ON(ufshcd_is_clkgating_allowed(hba) &&
>  		(hba->clk_gating.state != CLKS_ON));
>  
> -	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
> -		if (hba->wlu_pm_op_in_progress)
> -			set_host_byte(cmd, DID_BAD_TARGET);
> -		else
> -			err = SCSI_MLQUEUE_HOST_BUSY;
> -		ufshcd_release(hba);
> -		goto out;
> -	}

I have never encountered code like the above in any other SCSI LLD. Anyway:

Reviewed-by: Bart Van Assche <bvanassche@acm.org>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 07/10] scsi: ufs: Simplify error handling preparation
  2021-06-23  7:35 ` [PATCH v4 07/10] scsi: ufs: Simplify error handling preparation Can Guo
@ 2021-06-23 21:30   ` Bart Van Assche
  0 siblings, 0 replies; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 21:30 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/23/21 12:35 AM, Can Guo wrote:
> -static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
> +static int ufshcd_err_handling_prepare(struct ufs_hba *hba)
>  {
>  	/*
>  	 * It is not safe to perform error handling while suspend or resume is
>  	 * in progress. Hence the lock_system_sleep() call.
>  	 */
>  	lock_system_sleep();
> +	/*
> +	 * Exclusively call pm_runtime_get_sync(hba->dev) once, in case
> +	 * following ufshcd_rpm_get_sync() fails.
> +	 */
> +	pm_runtime_get_sync(hba->dev);
> +	if (pm_runtime_suspended(hba->dev) || hba->is_sys_suspended) {
> +		pm_runtime_put(hba->dev);
> +		unlock_system_sleep();
> +		return -EINVAL;
> +	}

There is code present in ufshcd_queuecommand() that may trigger data
corruption to prevent that the above pm_runtime_get_sync() call triggers
a deadlock. I think we need a better solution.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 09/10] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests
  2021-06-23  7:35 ` [PATCH v4 09/10] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests Can Guo
@ 2021-06-23 21:33   ` Bart Van Assche
  2021-06-24  4:16     ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 21:33 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/23/21 12:35 AM, Can Guo wrote:
> @@ -2737,7 +2737,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>  		 * err handler blocked for too long. So, just fail the scsi cmd
>  		 * sent from PM ops, err handler can recover PM error anyways.
>  		 */
> -		if (hba->wlu_pm_op_in_progress) {
> +		if (cmd->request->rq_flags & RQF_PM) {
>  			hba->force_reset = true;
>  			set_host_byte(cmd, DID_BAD_TARGET);
>  			cmd->scsi_done(cmd);

I'm still concerned that the above code may trigger data corruption. I
prefer that the above code is removed instead of being modified.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access
  2021-06-23  7:35 ` [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access Can Guo
@ 2021-06-23 21:51   ` Bart Van Assche
  2021-06-24  2:23     ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 21:51 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Adrian Hunter, Bean Huo, Stanley Chu,
	Keoseong Park, Gustavo A. R. Silva, Jaegeuk Kim, Kiwoong Kim,
	Satya Tangirala, open list

On 6/23/21 12:35 AM, Can Guo wrote:
> +int ufshcd_get_user_access(struct ufs_hba *hba)
> +__acquires(&hba->host_sem)
> +{
> +	down(&hba->host_sem);
> +	if (!ufshcd_is_user_access_allowed(hba)) {
> +		up(&hba->host_sem);
> +		return -EBUSY;
> +	}
> +	if (ufshcd_rpm_get_sync(hba)) {
> +		ufshcd_rpm_put_sync(hba);
> +		up(&hba->host_sem);
> +		return -EBUSY;
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(ufshcd_get_user_access);
> +
> +void ufshcd_put_user_access(struct ufs_hba *hba)
> +__releases(&hba->host_sem)
> +{
> +	ufshcd_rpm_put_sync(hba);
> +	up(&hba->host_sem);
> +}
> +EXPORT_SYMBOL_GPL(ufshcd_put_user_access);

Please indent __acquires() and __releases() annotations by one tab as is
done elsewhere in the kernel.

>  static inline bool ufshcd_is_user_access_allowed(struct ufs_hba *hba)
>  {
> -	return !hba->shutting_down;
> +	return !hba->shutting_down && !hba->is_sys_suspended &&
> +		!hba->is_wlu_sys_suspended &&
> +		hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL;
>  }

Is my understanding of the following correct?
- ufshcd_is_user_access_allowed() is not in the hot path and hence
  should not be inline.
- The hba->shutting_down member variable is set from inside a shutdown
  callback. Hence, the hba->shutting_down test can be left out since
  no UFS sysfs attributes are accessed after shutdown has started.
- During system suspend, user space software is paused before the device
  driver freeze callbacks are invoked. Hence, the hba->is_sys_suspended
  check can be left out.
- If a LUN is runtime suspended, it should be resumed if accessed from
  user space instead of failing user space accesses. In other words, the
  hba->is_wlu_sys_suspended check seems inappropriate to me.
- If the HBA is not in an operational state, user space accesses
  should be blocked until error handling has finished. After error
  handling has finished, the user space access should fail if and only
  if error handling failed.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-23 20:42   ` Bjorn Andersson
@ 2021-06-23 22:41     ` Bart Van Assche
  2021-06-24  2:04     ` Can Guo
  1 sibling, 0 replies; 58+ messages in thread
From: Bart Van Assche @ 2021-06-23 22:41 UTC (permalink / raw)
  To: Bjorn Andersson, Can Guo
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Andy Gross, Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

On 6/23/21 1:42 PM, Bjorn Andersson wrote:
> On Wed 23 Jun 02:35 CDT 2021, Can Guo wrote:
>> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress and
>> is_wlu_sys_suspended accordingly.
> 
> This reflects what the change does, but the commit message is supposed
> to capture "why".

What's even better is to describe both: what has been changed and also
why a change has been made. See also
https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-23 20:57     ` Bart Van Assche
@ 2021-06-24  2:02       ` Can Guo
  2021-06-24  2:34         ` Can Guo
  2021-06-24  6:04         ` Adrian Hunter
  0 siblings, 2 replies; 58+ messages in thread
From: Can Guo @ 2021-06-24  2:02 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

Hi Bart,

On 2021-06-24 04:57, Bart Van Assche wrote:
> On 6/23/21 1:05 PM, Bart Van Assche wrote:
>> On 6/23/21 12:35 AM, Can Guo wrote:
>>> Rename pm_op_in_progress and is_sys_suspended to 
>>> wlu_pm_op_in_progress and
>>> is_wlu_sys_suspended accordingly.
>> 
>> My understanding is that power management operations must be submitted
>> to one particular UFS WLUN (hba->sdev_ufs_device). That makes the 
>> "wlu_"
>> part of the new names redundant. In other words, I like the current
>> names better than the new names. Unless if I missed something, 
>> consider
>> dropping this patch.
> 
> Hi Can,
> 
> Reviewing later patches in this series made me realize that there are
> two families of suspend/resume functions. One family of functions
> operates at the platform level while the other family operates at the
> SCSI LUN level. My comments about the suspend/resume functions are as
> follows:
> - It seems redundant to me to have system suspend support at the SCSI
>   LUN level (__ufshcd_wl_suspend(hba, UFS_SYSTEM_PM)) and also at the
>   platform level. Since the platform device is a parent of the SCSI
>   WLUN, can system suspend/resume support be left out from
>   ufshcd_wl_pm_ops (or in other words, remove the .freeze and .thaw
>   callbacks)? Do we really need two calls from the power management
>   subsystem into the UFS driver for every system suspend and every
>   system resume?

Asutosh and Adrian should be the right persons to answer this, since
they've been working together on that huge change for 4 months -

https://lore.kernel.org/patchwork/patch/1417696/

In short, we need a dedicated suspend/resume ops for the UFS device
W-LU because one cannot send requests (not even PM requests) after the
device is runtime suspended - sending SSU cmds in hba suspend/resume
cannot pass through blk_queue_enter() as SSU cmd is sent to the UFS
device W-LU scsi device (by now it is runtime suspended) but not the
hba device.

Of course we can keep the old way and send the SSU cmd through a
request queue without block layer PM initialized (hba->cmd_queue for
example, by pointing cmd_queue->dev to the UFS device W-LU scsi device),
but that would look like a hack.

> - Because of the device links (device_link_add()), the ufschd_wl_*()
>   RPM callbacks are invoked after all LUNs have been suspended. I would
>   appreciate it if the "ufshcd_wl_" prefix would be changed into
>   "ufshcd_lun_" since that would make it more clear that these 
> callbacks
>   are associated with all LUNs and not only with the WLUN through which
>   power management commands are submitted.
> 

Sure, we will do that later.

Thanks,

Can Guo.

> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-23 20:42   ` Bjorn Andersson
  2021-06-23 22:41     ` Bart Van Assche
@ 2021-06-24  2:04     ` Can Guo
  1 sibling, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-24  2:04 UTC (permalink / raw)
  To: Bjorn Andersson
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Andy Gross, Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Adrian Hunter, Kiwoong Kim, Satya Tangirala, Bart Van Assche,
	open list:ARM/QUALCOMM SUPPORT, open list

Hi Bjorn,

On 2021-06-24 04:42, Bjorn Andersson wrote:
> On Wed 23 Jun 02:35 CDT 2021, Can Guo wrote:
> 
>> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress 
>> and
>> is_wlu_sys_suspended accordingly.
>> 
> 
> This reflects what the change does, but the commit message is supposed
> to capture "why".
> 

Sure, I will add the "why" in next ver.

Thanks,

Can Guo.

> Regards,
> Bjorn
> 
>> Signed-off-by: Can Guo <cang@codeaurora.org>
>> ---
>>  drivers/scsi/ufs/ufs-qcom.c |  2 +-
>>  drivers/scsi/ufs/ufshcd.c   | 30 +++++++++++++++---------------
>>  drivers/scsi/ufs/ufshcd.h   |  6 ++++--
>>  3 files changed, 20 insertions(+), 18 deletions(-)
>> 
>> diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
>> index 9b1d18d..fbe21e0 100644
>> --- a/drivers/scsi/ufs/ufs-qcom.c
>> +++ b/drivers/scsi/ufs/ufs-qcom.c
>> @@ -641,7 +641,7 @@ static int ufs_qcom_resume(struct ufs_hba *hba, 
>> enum ufs_pm_op pm_op)
>>  	if (err)
>>  		return err;
>> 
>> -	hba->is_sys_suspended = false;
>> +	hba->is_wlu_sys_suspended = false;
>>  	return 0;
>>  }
>> 
>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>> index 25fe18a..c40ba1d 100644
>> --- a/drivers/scsi/ufs/ufshcd.c
>> +++ b/drivers/scsi/ufs/ufshcd.c
>> @@ -549,8 +549,8 @@ static void ufshcd_print_host_state(struct ufs_hba 
>> *hba)
>>  		hba->saved_err, hba->saved_uic_err);
>>  	dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n",
>>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>> -	dev_err(hba->dev, "PM in progress=%d, sys. suspended=%d\n",
>> -		hba->pm_op_in_progress, hba->is_sys_suspended);
>> +	dev_err(hba->dev, "wlu_pm_op_in_progress=%d, 
>> is_wlu_sys_suspended=%d\n",
>> +		hba->wlu_pm_op_in_progress, hba->is_wlu_sys_suspended);
>>  	dev_err(hba->dev, "Auto BKOPS=%d, Host self-block=%d\n",
>>  		hba->auto_bkops_enabled, hba->host->host_self_blocked);
>>  	dev_err(hba->dev, "Clk gate=%d\n", hba->clk_gating.state);
>> @@ -1999,7 +1999,7 @@ static void ufshcd_clk_scaling_start_busy(struct 
>> ufs_hba *hba)
>>  	if (!hba->clk_scaling.active_reqs++)
>>  		queue_resume_work = true;
>> 
>> -	if (!hba->clk_scaling.is_enabled || hba->pm_op_in_progress) {
>> +	if (!hba->clk_scaling.is_enabled || hba->wlu_pm_op_in_progress) {
>>  		spin_unlock_irqrestore(hba->host->host_lock, flags);
>>  		return;
>>  	}
>> @@ -2734,7 +2734,7 @@ static int ufshcd_queuecommand(struct Scsi_Host 
>> *host, struct scsi_cmnd *cmd)
>>  		 * err handler blocked for too long. So, just fail the scsi cmd
>>  		 * sent from PM ops, err handler can recover PM error anyways.
>>  		 */
>> -		if (hba->pm_op_in_progress) {
>> +		if (hba->wlu_pm_op_in_progress) {
>>  			hba->force_reset = true;
>>  			set_host_byte(cmd, DID_BAD_TARGET);
>>  			cmd->scsi_done(cmd);
>> @@ -2767,7 +2767,7 @@ static int ufshcd_queuecommand(struct Scsi_Host 
>> *host, struct scsi_cmnd *cmd)
>>  		(hba->clk_gating.state != CLKS_ON));
>> 
>>  	if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
>> -		if (hba->pm_op_in_progress)
>> +		if (hba->wlu_pm_op_in_progress)
>>  			set_host_byte(cmd, DID_BAD_TARGET);
>>  		else
>>  			err = SCSI_MLQUEUE_HOST_BUSY;
>> @@ -5116,7 +5116,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, 
>> struct ufshcd_lrb *lrbp)
>>  			 * solution could be to abort the system suspend if
>>  			 * UFS device needs urgent BKOPs.
>>  			 */
>> -			if (!hba->pm_op_in_progress &&
>> +			if (!hba->wlu_pm_op_in_progress &&
>>  			    !ufshcd_eh_in_progress(hba) &&
>>  			    ufshcd_is_exception_event(lrbp->ucd_rsp_ptr))
>>  				/* Flushed in suspend */
>> @@ -5916,7 +5916,7 @@ static void ufshcd_err_handling_prepare(struct 
>> ufs_hba *hba)
>>  {
>>  	ufshcd_rpm_get_sync(hba);
>>  	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) 
>> ||
>> -	    hba->is_sys_suspended) {
>> +	    hba->is_wlu_sys_suspended) {
>>  		enum ufs_pm_op pm_op;
>> 
>>  		/*
>> @@ -5933,7 +5933,7 @@ static void ufshcd_err_handling_prepare(struct 
>> ufs_hba *hba)
>>  		if (!ufshcd_is_clkgating_allowed(hba))
>>  			ufshcd_setup_clocks(hba, true);
>>  		ufshcd_release(hba);
>> -		pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
>> +		pm_op = hba->is_wlu_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
>>  		ufshcd_vops_resume(hba, pm_op);
>>  	} else {
>>  		ufshcd_hold(hba, false);
>> @@ -5976,7 +5976,7 @@ static void ufshcd_recover_pm_error(struct 
>> ufs_hba *hba)
>>  	struct request_queue *q;
>>  	int ret;
>> 
>> -	hba->is_sys_suspended = false;
>> +	hba->is_wlu_sys_suspended = false;
>>  	/*
>>  	 * Set RPM status of wlun device to RPM_ACTIVE,
>>  	 * this also clears its runtime error.
>> @@ -8784,7 +8784,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba 
>> *hba, enum ufs_pm_op pm_op)
>>  	enum ufs_dev_pwr_mode req_dev_pwr_mode;
>>  	enum uic_link_state req_link_state;
>> 
>> -	hba->pm_op_in_progress = true;
>> +	hba->wlu_pm_op_in_progress = true;
>>  	if (pm_op != UFS_SHUTDOWN_PM) {
>>  		pm_lvl = pm_op == UFS_RUNTIME_PM ?
>>  			 hba->rpm_lvl : hba->spm_lvl;
>> @@ -8919,7 +8919,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba 
>> *hba, enum ufs_pm_op pm_op)
>>  		hba->clk_gating.is_suspended = false;
>>  		ufshcd_release(hba);
>>  	}
>> -	hba->pm_op_in_progress = false;
>> +	hba->wlu_pm_op_in_progress = false;
>>  	return ret;
>>  }
>> 
>> @@ -8928,7 +8928,7 @@ static int __ufshcd_wl_resume(struct ufs_hba 
>> *hba, enum ufs_pm_op pm_op)
>>  	int ret;
>>  	enum uic_link_state old_link_state = hba->uic_link_state;
>> 
>> -	hba->pm_op_in_progress = true;
>> +	hba->wlu_pm_op_in_progress = true;
>> 
>>  	/*
>>  	 * Call vendor specific resume callback. As these callbacks may 
>> access
>> @@ -9006,7 +9006,7 @@ static int __ufshcd_wl_resume(struct ufs_hba 
>> *hba, enum ufs_pm_op pm_op)
>>  		ufshcd_update_evt_hist(hba, UFS_EVT_WL_RES_ERR, (u32)ret);
>>  	hba->clk_gating.is_suspended = false;
>>  	ufshcd_release(hba);
>> -	hba->pm_op_in_progress = false;
>> +	hba->wlu_pm_op_in_progress = false;
>>  	return ret;
>>  }
>> 
>> @@ -9072,7 +9072,7 @@ static int ufshcd_wl_suspend(struct device *dev)
>> 
>>  out:
>>  	if (!ret)
>> -		hba->is_sys_suspended = true;
>> +		hba->is_wlu_sys_suspended = true;
>>  	trace_ufshcd_wl_suspend(dev_name(dev), ret,
>>  		ktime_to_us(ktime_sub(ktime_get(), start)),
>>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>> @@ -9100,7 +9100,7 @@ static int ufshcd_wl_resume(struct device *dev)
>>  		ktime_to_us(ktime_sub(ktime_get(), start)),
>>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>>  	if (!ret)
>> -		hba->is_sys_suspended = false;
>> +		hba->is_wlu_sys_suspended = false;
>>  	up(&hba->host_sem);
>>  	return ret;
>>  }
>> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
>> index c98d540..93aeeb3 100644
>> --- a/drivers/scsi/ufs/ufshcd.h
>> +++ b/drivers/scsi/ufs/ufshcd.h
>> @@ -752,7 +752,8 @@ struct ufs_hba {
>>  	enum ufs_pm_level spm_lvl;
>>  	struct device_attribute rpm_lvl_attr;
>>  	struct device_attribute spm_lvl_attr;
>> -	int pm_op_in_progress;
>> +	/* A flag to tell whether __ufshcd_wl_suspend/resume() is in 
>> progress */
>> +	bool wlu_pm_op_in_progress;
>> 
>>  	/* Auto-Hibernate Idle Timer register value */
>>  	u32 ahit;
>> @@ -838,7 +839,8 @@ struct ufs_hba {
>> 
>>  	struct devfreq *devfreq;
>>  	struct ufs_clk_scaling clk_scaling;
>> -	bool is_sys_suspended;
>> +	/* A flag to tell whether the UFS device W-LU is system suspended */
>> +	bool is_wlu_sys_suspended;
>> 
>>  	enum bkops_status urgent_bkops_lvl;
>>  	bool is_urgent_bkops_lvl_checked;
>> --
>> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
>> Linux Foundation Collaborative Project.
>> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 02/10] scsi: ufs: Add flags pm_op_in_progress and is_sys_suspended
  2021-06-23 12:33   ` Adrian Hunter
@ 2021-06-24  2:05     ` Can Guo
  0 siblings, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-24  2:05 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Satya Tangirala, Kiwoong Kim, Bart Van Assche, open list

Hi Adrian,

On 2021-06-23 20:33, Adrian Hunter wrote:
> On 23/06/21 10:35 am, Can Guo wrote:
>> Add flags pm_op_in_progress and is_sys_suspended to track the status 
>> of hba
>> runtime and system suspend/resume operations.
>> 
>> Signed-off-by: Can Guo <cang@codeaurora.org>
>> ---
>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++++++-
>>  drivers/scsi/ufs/ufshcd.h |  4 ++++
>>  2 files changed, 15 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>> index c40ba1d..abe5f2d 100644
>> --- a/drivers/scsi/ufs/ufshcd.c
>> +++ b/drivers/scsi/ufs/ufshcd.c
>> @@ -551,6 +551,8 @@ static void ufshcd_print_host_state(struct ufs_hba 
>> *hba)
>>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>>  	dev_err(hba->dev, "wlu_pm_op_in_progress=%d, 
>> is_wlu_sys_suspended=%d\n",
>>  		hba->wlu_pm_op_in_progress, hba->is_wlu_sys_suspended);
>> +	dev_err(hba->dev, "pm_op_in_progress=%d, is_sys_suspended=%d\n",
>> +		hba->pm_op_in_progress, hba->is_sys_suspended);
>>  	dev_err(hba->dev, "Auto BKOPS=%d, Host self-block=%d\n",
>>  		hba->auto_bkops_enabled, hba->host->host_self_blocked);
>>  	dev_err(hba->dev, "Clk gate=%d\n", hba->clk_gating.state);
>> @@ -9141,6 +9143,8 @@ static int ufshcd_suspend(struct ufs_hba *hba)
>> 
>>  	if (!hba->is_powered)
>>  		return 0;
>> +
>> +	hba->pm_op_in_progress = true;
>>  	/*
>>  	 * Disable the host irq as host controller as there won't be any
>>  	 * host controller transaction expected till resume.
> 	 */
> 	ufshcd_disable_irq(hba);
> 	ret = ufshcd_setup_clocks(hba, false);
> 	if (ret) {
> 		ufshcd_enable_irq(hba);
> 
> Is "hba->pm_op_in_progress = false" needed here?
> 
> 		return ret;
> 	}
> 
> 

You are right, I missed it. Will add it in next ver. Thanks!

Regards,

Can Guo.

> 
>> @@ -9160,6 +9164,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
>>  	ufshcd_vreg_set_lpm(hba);
>>  	/* Put the host controller in low power mode if possible */
>>  	ufshcd_hba_vreg_set_lpm(hba);
>> +	hba->pm_op_in_progress = false;
>>  	return ret;
>>  }
>> 
>> @@ -9179,6 +9184,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
>>  	if (!hba->is_powered)
>>  		return 0;
>> 
>> +	hba->pm_op_in_progress = true;
>>  	ufshcd_hba_vreg_set_hpm(hba);
>>  	ret = ufshcd_vreg_set_hpm(hba);
>>  	if (ret)
>> @@ -9198,6 +9204,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
>>  out:
>>  	if (ret)
>>  		ufshcd_update_evt_hist(hba, UFS_EVT_RESUME_ERR, (u32)ret);
>> +	hba->pm_op_in_progress = false;
>>  	return ret;
>>  }
>> 
>> @@ -9222,6 +9229,8 @@ int ufshcd_system_suspend(struct ufs_hba *hba)
>>  	trace_ufshcd_system_suspend(dev_name(hba->dev), ret,
>>  		ktime_to_us(ktime_sub(ktime_get(), start)),
>>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>> +	if (!ret)
>> +		hba->is_sys_suspended = true;
>>  	return ret;
>>  }
>>  EXPORT_SYMBOL(ufshcd_system_suspend);
>> @@ -9247,7 +9256,8 @@ int ufshcd_system_resume(struct ufs_hba *hba)
>>  	trace_ufshcd_system_resume(dev_name(hba->dev), ret,
>>  		ktime_to_us(ktime_sub(ktime_get(), start)),
>>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>> -
>> +	if (!ret)
>> +		hba->is_sys_suspended = false;
>>  	return ret;
>>  }
>>  EXPORT_SYMBOL(ufshcd_system_resume);
>> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
>> index 93aeeb3..1e7fe73 100644
>> --- a/drivers/scsi/ufs/ufshcd.h
>> +++ b/drivers/scsi/ufs/ufshcd.h
>> @@ -754,6 +754,8 @@ struct ufs_hba {
>>  	struct device_attribute spm_lvl_attr;
>>  	/* A flag to tell whether __ufshcd_wl_suspend/resume() is in 
>> progress */
>>  	bool wlu_pm_op_in_progress;
>> +	/* A flag to tell whether ufshcd_suspend/resume() is in progress */
>> +	bool pm_op_in_progress;
>> 
>>  	/* Auto-Hibernate Idle Timer register value */
>>  	u32 ahit;
>> @@ -841,6 +843,8 @@ struct ufs_hba {
>>  	struct ufs_clk_scaling clk_scaling;
>>  	/* A flag to tell whether the UFS device W-LU is system suspended */
>>  	bool is_wlu_sys_suspended;
>> +	/* A flag to tell whether hba is system suspended */
>> +	bool is_sys_suspended;
>> 
>>  	enum bkops_status urgent_bkops_lvl;
>>  	bool is_urgent_bkops_lvl_checked;
>> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 02/10] scsi: ufs: Add flags pm_op_in_progress and is_sys_suspended
  2021-06-23 20:59   ` Bart Van Assche
@ 2021-06-24  2:07     ` Can Guo
  0 siblings, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-24  2:07 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Adrian Hunter, Satya Tangirala, Kiwoong Kim, open list

On 2021-06-24 04:59, Bart Van Assche wrote:
> On 6/23/21 12:35 AM, Can Guo wrote:
>> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
>> index 93aeeb3..1e7fe73 100644
>> --- a/drivers/scsi/ufs/ufshcd.h
>> +++ b/drivers/scsi/ufs/ufshcd.h
>> @@ -754,6 +754,8 @@ struct ufs_hba {
>>  	struct device_attribute spm_lvl_attr;
>>  	/* A flag to tell whether __ufshcd_wl_suspend/resume() is in 
>> progress */
>>  	bool wlu_pm_op_in_progress;
>> +	/* A flag to tell whether ufshcd_suspend/resume() is in progress */
>> +	bool pm_op_in_progress;
>> 
>>  	/* Auto-Hibernate Idle Timer register value */
>>  	u32 ahit;
>> @@ -841,6 +843,8 @@ struct ufs_hba {
>>  	struct ufs_clk_scaling clk_scaling;
>>  	/* A flag to tell whether the UFS device W-LU is system suspended */
>>  	bool is_wlu_sys_suspended;
>> +	/* A flag to tell whether hba is system suspended */
>> +	bool is_sys_suspended;
>> 
>>  	enum bkops_status urgent_bkops_lvl;
>>  	bool is_urgent_bkops_lvl_checked;
> 
> It is not yet clear to me whether we really need these new member
> variables. If these are retained, please rename pm_op_in_progress into
> platform_pm_op_in_progress and is_sys_suspended into
> platform_is_sys_suspended.

Hi Bart,

These flags are informative when we debug some UFS issues and they
are used by later changes. Sure, I will modify the namings.

Thanks,

Can Guo.

> 
> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 03/10] scsi: ufs: Update the return value of supplier pm ops
  2021-06-23 21:08   ` Bart Van Assche
@ 2021-06-24  2:11     ` Can Guo
  0 siblings, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-24  2:11 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 2021-06-24 05:08, Bart Van Assche wrote:
> On 6/23/21 12:35 AM, Can Guo wrote:
>> rpm_get_suppliers() is returning an error only if the error is 
>> negative.
>> However, ufshcd_wl_resume() may return a positive error code, e.g., 
>> when
>> hibern8 or SSU cmd fails. Make the positive return value a negative 
>> error
>> code so that consumers are aware of any resume failure from their 
>> supplier.
>> Make the same change to ufshcd_wl_suspend() just to keep symmetry.
>> 
>> Signed-off-by: Can Guo <cang@codeaurora.org>
>> ---
>>  drivers/scsi/ufs/ufshcd.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>> index abe5f2d..ee70522 100644
>> --- a/drivers/scsi/ufs/ufshcd.c
>> +++ b/drivers/scsi/ufs/ufshcd.c
>> @@ -8922,7 +8922,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba 
>> *hba, enum ufs_pm_op pm_op)
>>  		ufshcd_release(hba);
>>  	}
>>  	hba->wlu_pm_op_in_progress = false;
>> -	return ret;
>> +	return ret <= 0 ? ret : -EINVAL;
>>  }
>> 
>>  static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op 
>> pm_op)
>> @@ -9009,7 +9009,7 @@ static int __ufshcd_wl_resume(struct ufs_hba 
>> *hba, enum ufs_pm_op pm_op)
>>  	hba->clk_gating.is_suspended = false;
>>  	ufshcd_release(hba);
>>  	hba->wlu_pm_op_in_progress = false;
>> -	return ret;
>> +	return ret <= 0 ? ret : -EINVAL;
>>  }
> 
> I think the above patch shows that indicating failure by either
> returning a positive or a negative value is a booby trap. Please modify
> ufshcd_send_request_sense() and ufshcd_set_dev_pwr_mode() such that
> these return a value that is either zero or negative. Are there any
> other functions than that need to be modified?

Yes, there are more of them - both enter/exit_hibern8() and 
reset_and_restore()
can return positive values. I will modify all of them in next ver.

Thanks,

Can Guo.

> 
> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-23 14:30   ` Adrian Hunter
@ 2021-06-24  2:16     ` Can Guo
  2021-06-24  5:52       ` Adrian Hunter
  0 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-24  2:16 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 2021-06-23 22:30, Adrian Hunter wrote:
> On 23/06/21 10:35 am, Can Guo wrote:
>> To protect system suspend/resume from being disturbed by error 
>> handling,
>> instead of using host_sem, let error handler call lock_system_sleep() 
>> and
>> unlock_system_sleep() which achieve the same purpose. Remove the 
>> host_sem
>> used in suspend/resume paths to make the code more readable.
>> 
>> Suggested-by: Bart Van Assche <bvanassche@acm.org>
>> Signed-off-by: Can Guo <cang@codeaurora.org>
>> ---
>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>>  1 file changed, 7 insertions(+), 5 deletions(-)
>> 
>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>> index 3695dd2..a09e4a2 100644
>> --- a/drivers/scsi/ufs/ufshcd.c
>> +++ b/drivers/scsi/ufs/ufshcd.c
>> @@ -5907,6 +5907,11 @@ static void ufshcd_clk_scaling_suspend(struct 
>> ufs_hba *hba, bool suspend)
>> 
>>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>>  {
>> +	/*
>> +	 * It is not safe to perform error handling while suspend or resume 
>> is
>> +	 * in progress. Hence the lock_system_sleep() call.
>> +	 */
>> +	lock_system_sleep();
> 
> It looks to me like the system takes this lock quite early, even before
> freezing tasks, so if anything needs the error handler to run it will
> deadlock.

Hi Adrian,

UFS/hba system suspend/resume does not invoke or call error handling in 
a
synchronous way. So, whatever UFS errors (which schedules the error 
handler)
happens during suspend/resume, error handler will just wait here till 
system
suspend/resume release the lock. Hence no worries of deadlock here.

Thanks,

Can Guo.

> 
>>  	ufshcd_rpm_get_sync(hba);
>>  	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) 
>> ||
>>  	    hba->is_wlu_sys_suspended) {
>> @@ -5951,6 +5956,7 @@ static void ufshcd_err_handling_unprepare(struct 
>> ufs_hba *hba)
>>  		ufshcd_clk_scaling_suspend(hba, false);
>>  	ufshcd_clear_ua_wluns(hba);
>>  	ufshcd_rpm_put(hba);
>> +	unlock_system_sleep();
>>  }
>> 
>>  static inline bool ufshcd_err_handling_should_stop(struct ufs_hba 
>> *hba)
>> @@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct device 
>> *dev)
>>  	ktime_t start = ktime_get();
>> 
>>  	hba = shost_priv(sdev->host);
>> -	down(&hba->host_sem);
>> 
>>  	if (pm_runtime_suspended(dev))
>>  		goto out;
>> 
>>  	ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
>> -	if (ret) {
>> +	if (ret)
>>  		dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__,  ret);
>> -		up(&hba->host_sem);
>> -	}
>> 
>>  out:
>>  	if (!ret)
>> @@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device *dev)
>>  		hba->curr_dev_pwr_mode, hba->uic_link_state);
>>  	if (!ret)
>>  		hba->is_wlu_sys_suspended = false;
>> -	up(&hba->host_sem);
>>  	return ret;
>>  }
>>  #endif
>> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access
  2021-06-23 21:51   ` Bart Van Assche
@ 2021-06-24  2:23     ` Can Guo
  2021-06-24 22:25       ` Bart Van Assche
  0 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-24  2:23 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Adrian Hunter, Bean Huo, Stanley Chu,
	Keoseong Park, Gustavo A. R. Silva, Jaegeuk Kim, Kiwoong Kim,
	Satya Tangirala, open list

Hi Bart,

On 2021-06-24 05:51, Bart Van Assche wrote:
> On 6/23/21 12:35 AM, Can Guo wrote:
>> +int ufshcd_get_user_access(struct ufs_hba *hba)
>> +__acquires(&hba->host_sem)
>> +{
>> +	down(&hba->host_sem);
>> +	if (!ufshcd_is_user_access_allowed(hba)) {
>> +		up(&hba->host_sem);
>> +		return -EBUSY;
>> +	}
>> +	if (ufshcd_rpm_get_sync(hba)) {
>> +		ufshcd_rpm_put_sync(hba);
>> +		up(&hba->host_sem);
>> +		return -EBUSY;
>> +	}
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(ufshcd_get_user_access);
>> +
>> +void ufshcd_put_user_access(struct ufs_hba *hba)
>> +__releases(&hba->host_sem)
>> +{
>> +	ufshcd_rpm_put_sync(hba);
>> +	up(&hba->host_sem);
>> +}
>> +EXPORT_SYMBOL_GPL(ufshcd_put_user_access);
> 
> Please indent __acquires() and __releases() annotations by one tab as 
> is
> done elsewhere in the kernel.

OK.

> 
>>  static inline bool ufshcd_is_user_access_allowed(struct ufs_hba *hba)
>>  {
>> -	return !hba->shutting_down;
>> +	return !hba->shutting_down && !hba->is_sys_suspended &&
>> +		!hba->is_wlu_sys_suspended &&
>> +		hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL;
>>  }
> 
> Is my understanding of the following correct?
> - ufshcd_is_user_access_allowed() is not in the hot path and hence
>   should not be inline.

OK.

> - The hba->shutting_down member variable is set from inside a shutdown
>   callback. Hence, the hba->shutting_down test can be left out since
>   no UFS sysfs attributes are accessed after shutdown has started.

We see that user can still access UFS sysfs during shutdown if shutdown
is invoked by: reboot -f, hence the check.

> - During system suspend, user space software is paused before the 
> device
>   driver freeze callbacks are invoked. Hence, the hba->is_sys_suspended
>   check can be left out.

is_sys_suspended indicates that system resume failed (power/clk is OFF).

> - If a LUN is runtime suspended, it should be resumed if accessed from
>   user space instead of failing user space accesses. In other words, 
> the
>   hba->is_wlu_sys_suspended check seems inappropriate to me.

hba->is_wlu_sys_suspended indicates that wl system resume failed, device
is not operational.

> - If the HBA is not in an operational state, user space accesses
>   should be blocked until error handling has finished. After error
>   handling has finished, the user space access should fail if and only
>   if error handling failed.
> 

Yes, which is why ufshcd_get_user_access() acquires host_sem first and
checks the OPERATOINAL flag here. host_sem shall make sure that user
space accesses should be blocked until error handling has finished.

Thanks,

Can Guo.

> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-24  2:02       ` Can Guo
@ 2021-06-24  2:34         ` Can Guo
  2021-06-24  6:04         ` Adrian Hunter
  1 sibling, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-24  2:34 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

Typo fixed.

On 2021-06-24 10:02, Can Guo wrote:
> Hi Bart,
> 
> On 2021-06-24 04:57, Bart Van Assche wrote:
>> On 6/23/21 1:05 PM, Bart Van Assche wrote:
>>> On 6/23/21 12:35 AM, Can Guo wrote:
>>>> Rename pm_op_in_progress and is_sys_suspended to 
>>>> wlu_pm_op_in_progress and
>>>> is_wlu_sys_suspended accordingly.
>>> 
>>> My understanding is that power management operations must be 
>>> submitted
>>> to one particular UFS WLUN (hba->sdev_ufs_device). That makes the 
>>> "wlu_"
>>> part of the new names redundant. In other words, I like the current
>>> names better than the new names. Unless if I missed something, 
>>> consider
>>> dropping this patch.
>> 
>> Hi Can,
>> 
>> Reviewing later patches in this series made me realize that there are
>> two families of suspend/resume functions. One family of functions
>> operates at the platform level while the other family operates at the
>> SCSI LUN level. My comments about the suspend/resume functions are as
>> follows:
>> - It seems redundant to me to have system suspend support at the SCSI
>>   LUN level (__ufshcd_wl_suspend(hba, UFS_SYSTEM_PM)) and also at the
>>   platform level. Since the platform device is a parent of the SCSI
>>   WLUN, can system suspend/resume support be left out from
>>   ufshcd_wl_pm_ops (or in other words, remove the .freeze and .thaw
>>   callbacks)? Do we really need two calls from the power management
>>   subsystem into the UFS driver for every system suspend and every
>>   system resume?
> 
> Asutosh and Adrian should be the right persons to answer this, since
> they've been working together on that huge change for 4 months -
> 
> https://lore.kernel.org/patchwork/patch/1417696/
> 
> In short, we need a dedicated suspend/resume ops for the UFS device
> W-LU because one cannot send requests (not even PM requests) after the
> device is runtime suspended - sending SSU cmds in hba suspend/resume
> cannot pass through blk_queue_enter() as SSU cmd is sent to the UFS
> device W-LU scsi device (by now it is runtime suspended) but not the
> hba device.
> 
> Of course we can keep the old way and send the SSU cmd through a
> request queue without block layer PM initialized (hba->cmd_queue for
> example, by pointing cmd_queue->queuedata to the UFS device W-LU scsi
> device), but that would look like a hack.
> 
>> - Because of the device links (device_link_add()), the ufschd_wl_*()
>>   RPM callbacks are invoked after all LUNs have been suspended. I 
>> would
>>   appreciate it if the "ufshcd_wl_" prefix would be changed into
>>   "ufshcd_lun_" since that would make it more clear that these 
>> callbacks
>>   are associated with all LUNs and not only with the WLUN through 
>> which
>>   power management commands are submitted.
>> 
> 
> Sure, we will do that later.
> 
> Thanks,
> 
> Can Guo.
> 
>> Thanks,
>> 
>> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 09/10] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests
  2021-06-23 21:33   ` Bart Van Assche
@ 2021-06-24  4:16     ` Can Guo
  2021-06-24 16:57       ` Bart Van Assche
  0 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-24  4:16 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

Hi Bart,

On 2021-06-24 05:33, Bart Van Assche wrote:
> On 6/23/21 12:35 AM, Can Guo wrote:
>> @@ -2737,7 +2737,7 @@ static int ufshcd_queuecommand(struct Scsi_Host 
>> *host, struct scsi_cmnd *cmd)
>>  		 * err handler blocked for too long. So, just fail the scsi cmd
>>  		 * sent from PM ops, err handler can recover PM error anyways.
>>  		 */
>> -		if (hba->wlu_pm_op_in_progress) {
>> +		if (cmd->request->rq_flags & RQF_PM) {
>>  			hba->force_reset = true;
>>  			set_host_byte(cmd, DID_BAD_TARGET);
>>  			cmd->scsi_done(cmd);
> 
> I'm still concerned that the above code may trigger data corruption. I
> prefer that the above code is removed instead of being modified.

Removing the change will lead to deadlock when error handling prepare
calls pm_runtime_get_sync().

RQF_PM is only given to requests sent from power management operations,
during which the specific device/LU is suspending/resuming, meaning no
data transaction is ongoing. How can fast failing a PM request trigger
data corruption?

Thanks,

Can Guo.

> 
> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-24  2:16     ` Can Guo
@ 2021-06-24  5:52       ` Adrian Hunter
  2021-06-24  6:12         ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Adrian Hunter @ 2021-06-24  5:52 UTC (permalink / raw)
  To: Can Guo
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 24/06/21 5:16 am, Can Guo wrote:
> On 2021-06-23 22:30, Adrian Hunter wrote:
>> On 23/06/21 10:35 am, Can Guo wrote:
>>> To protect system suspend/resume from being disturbed by error handling,
>>> instead of using host_sem, let error handler call lock_system_sleep() and
>>> unlock_system_sleep() which achieve the same purpose. Remove the host_sem
>>> used in suspend/resume paths to make the code more readable.
>>>
>>> Suggested-by: Bart Van Assche <bvanassche@acm.org>
>>> Signed-off-by: Can Guo <cang@codeaurora.org>
>>> ---
>>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>> index 3695dd2..a09e4a2 100644
>>> --- a/drivers/scsi/ufs/ufshcd.c
>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>> @@ -5907,6 +5907,11 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
>>>
>>>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>>>  {
>>> +    /*
>>> +     * It is not safe to perform error handling while suspend or resume is
>>> +     * in progress. Hence the lock_system_sleep() call.
>>> +     */
>>> +    lock_system_sleep();
>>
>> It looks to me like the system takes this lock quite early, even before
>> freezing tasks, so if anything needs the error handler to run it will
>> deadlock.
> 
> Hi Adrian,
> 
> UFS/hba system suspend/resume does not invoke or call error handling in a
> synchronous way. So, whatever UFS errors (which schedules the error handler)
> happens during suspend/resume, error handler will just wait here till system
> suspend/resume release the lock. Hence no worries of deadlock here.

It looks to me like the state can change to UFSHCD_STATE_EH_SCHEDULED_FATAL
and since user processes are not frozen, nor file systems sync'ed, everything
is going to deadlock.
i.e.
I/O is blocked waiting on error handling
error handling is blocked waiting on lock_system_sleep()
suspend is blocked waiting on I/O

> 
> Thanks,
> 
> Can Guo.
> 
>>
>>>      ufshcd_rpm_get_sync(hba);
>>>      if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
>>>          hba->is_wlu_sys_suspended) {
>>> @@ -5951,6 +5956,7 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
>>>          ufshcd_clk_scaling_suspend(hba, false);
>>>      ufshcd_clear_ua_wluns(hba);
>>>      ufshcd_rpm_put(hba);
>>> +    unlock_system_sleep();
>>>  }
>>>
>>>  static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
>>> @@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct device *dev)
>>>      ktime_t start = ktime_get();
>>>
>>>      hba = shost_priv(sdev->host);
>>> -    down(&hba->host_sem);
>>>
>>>      if (pm_runtime_suspended(dev))
>>>          goto out;
>>>
>>>      ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
>>> -    if (ret) {
>>> +    if (ret)
>>>          dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__,  ret);
>>> -        up(&hba->host_sem);
>>> -    }
>>>
>>>  out:
>>>      if (!ret)
>>> @@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device *dev)
>>>          hba->curr_dev_pwr_mode, hba->uic_link_state);
>>>      if (!ret)
>>>          hba->is_wlu_sys_suspended = false;
>>> -    up(&hba->host_sem);
>>>      return ret;
>>>  }
>>>  #endif
>>>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-24  2:02       ` Can Guo
  2021-06-24  2:34         ` Can Guo
@ 2021-06-24  6:04         ` Adrian Hunter
  1 sibling, 0 replies; 58+ messages in thread
From: Adrian Hunter @ 2021-06-24  6:04 UTC (permalink / raw)
  To: Can Guo, Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

On 24/06/21 5:02 am, Can Guo wrote:
> Hi Bart,
> 
> On 2021-06-24 04:57, Bart Van Assche wrote:
>> On 6/23/21 1:05 PM, Bart Van Assche wrote:
>>> On 6/23/21 12:35 AM, Can Guo wrote:
>>>> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress and
>>>> is_wlu_sys_suspended accordingly.
>>>
>>> My understanding is that power management operations must be submitted
>>> to one particular UFS WLUN (hba->sdev_ufs_device). That makes the "wlu_"
>>> part of the new names redundant. In other words, I like the current
>>> names better than the new names. Unless if I missed something, consider
>>> dropping this patch.
>>
>> Hi Can,
>>
>> Reviewing later patches in this series made me realize that there are
>> two families of suspend/resume functions. One family of functions
>> operates at the platform level while the other family operates at the
>> SCSI LUN level. My comments about the suspend/resume functions are as
>> follows:
>> - It seems redundant to me to have system suspend support at the SCSI
>>   LUN level (__ufshcd_wl_suspend(hba, UFS_SYSTEM_PM)) and also at the
>>   platform level. Since the platform device is a parent of the SCSI
>>   WLUN, can system suspend/resume support be left out from
>>   ufshcd_wl_pm_ops (or in other words, remove the .freeze and .thaw
>>   callbacks)? Do we really need two calls from the power management
>>   subsystem into the UFS driver for every system suspend and every
>>   system resume?
> 
> Asutosh and Adrian should be the right persons to answer this, since
> they've been working together on that huge change for 4 months -
> 
> https://lore.kernel.org/patchwork/patch/1417696/
> 
> In short, we need a dedicated suspend/resume ops for the UFS device
> W-LU because one cannot send requests (not even PM requests) after the
> device is runtime suspended - sending SSU cmds in hba suspend/resume
> cannot pass through blk_queue_enter() as SSU cmd is sent to the UFS
> device W-LU scsi device (by now it is runtime suspended) but not the
> hba device.

Yes it is quite normal to have different PM callbacks for different
devices (e.g. host controller and device controlled by host controller),
and SCSI effectively expects it that way now.

.freeze and .thaw are necessary for suspend-to-disk

> 
> Of course we can keep the old way and send the SSU cmd through a
> request queue without block layer PM initialized (hba->cmd_queue for
> example, by pointing cmd_queue->dev to the UFS device W-LU scsi device),
> but that would look like a hack.
> 
>> - Because of the device links (device_link_add()), the ufschd_wl_*()
>>   RPM callbacks are invoked after all LUNs have been suspended. I would
>>   appreciate it if the "ufshcd_wl_" prefix would be changed into
>>   "ufshcd_lun_" since that would make it more clear that these callbacks
>>   are associated with all LUNs and not only with the WLUN through which
>>   power management commands are submitted.
>>
> 
> Sure, we will do that later.
> 
> Thanks,
> 
> Can Guo.
> 
>> Thanks,
>>
>> Bart.


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-24  5:52       ` Adrian Hunter
@ 2021-06-24  6:12         ` Can Guo
  2021-06-24  6:23           ` Adrian Hunter
  0 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-24  6:12 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 2021-06-24 13:52, Adrian Hunter wrote:
> On 24/06/21 5:16 am, Can Guo wrote:
>> On 2021-06-23 22:30, Adrian Hunter wrote:
>>> On 23/06/21 10:35 am, Can Guo wrote:
>>>> To protect system suspend/resume from being disturbed by error 
>>>> handling,
>>>> instead of using host_sem, let error handler call 
>>>> lock_system_sleep() and
>>>> unlock_system_sleep() which achieve the same purpose. Remove the 
>>>> host_sem
>>>> used in suspend/resume paths to make the code more readable.
>>>> 
>>>> Suggested-by: Bart Van Assche <bvanassche@acm.org>
>>>> Signed-off-by: Can Guo <cang@codeaurora.org>
>>>> ---
>>>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>>>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>>> 
>>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>>> index 3695dd2..a09e4a2 100644
>>>> --- a/drivers/scsi/ufs/ufshcd.c
>>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>>> @@ -5907,6 +5907,11 @@ static void ufshcd_clk_scaling_suspend(struct 
>>>> ufs_hba *hba, bool suspend)
>>>> 
>>>>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>>>>  {
>>>> +    /*
>>>> +     * It is not safe to perform error handling while suspend or 
>>>> resume is
>>>> +     * in progress. Hence the lock_system_sleep() call.
>>>> +     */
>>>> +    lock_system_sleep();
>>> 
>>> It looks to me like the system takes this lock quite early, even 
>>> before
>>> freezing tasks, so if anything needs the error handler to run it will
>>> deadlock.
>> 
>> Hi Adrian,
>> 
>> UFS/hba system suspend/resume does not invoke or call error handling 
>> in a
>> synchronous way. So, whatever UFS errors (which schedules the error 
>> handler)
>> happens during suspend/resume, error handler will just wait here till 
>> system
>> suspend/resume release the lock. Hence no worries of deadlock here.
> 
> It looks to me like the state can change to 
> UFSHCD_STATE_EH_SCHEDULED_FATAL
> and since user processes are not frozen, nor file systems sync'ed, 
> everything
> is going to deadlock.
> i.e.
> I/O is blocked waiting on error handling
> error handling is blocked waiting on lock_system_sleep()
> suspend is blocked waiting on I/O
> 

Hi Adrian,

First of all, enter_state(suspend_state_t state) uses 
mutex_trylock(&system_transition_mutex).
Second, even that happens, in ufshcd_queuecommand(), below logic will 
break the cycle, by
fast failing the PM request (below codes are from the code tip with this 
whole series applied).

         case UFSHCD_STATE_EH_SCHEDULED_FATAL:
                 /*
                  * ufshcd_rpm_get_sync() is used at error handling 
preparation
                  * stage. If a scsi cmd, e.g., the SSU cmd, is sent from 
the
                  * PM ops, it can never be finished if we let SCSI layer 
keep
                  * retrying it, which gets err handler stuck forever. 
Neither
                  * can we let the scsi cmd pass through, because UFS is 
in bad
                  * state, the scsi cmd may eventually time out, which 
will get
                  * err handler blocked for too long. So, just fail the 
scsi cmd
                  * sent from PM ops, err handler can recover PM error 
anyways.
                  */
                 if (cmd->request->rq_flags & RQF_PM) {
                         hba->force_reset = true;
                         set_host_byte(cmd, DID_BAD_TARGET);
                         cmd->scsi_done(cmd);
                         goto out;
                 }
                 fallthrough;
         case UFSHCD_STATE_RESET:

Thanks,

Can Guo.

>> 
>> Thanks,
>> 
>> Can Guo.
>> 
>>> 
>>>>      ufshcd_rpm_get_sync(hba);
>>>>      if 
>>>> (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
>>>>          hba->is_wlu_sys_suspended) {
>>>> @@ -5951,6 +5956,7 @@ static void 
>>>> ufshcd_err_handling_unprepare(struct ufs_hba *hba)
>>>>          ufshcd_clk_scaling_suspend(hba, false);
>>>>      ufshcd_clear_ua_wluns(hba);
>>>>      ufshcd_rpm_put(hba);
>>>> +    unlock_system_sleep();
>>>>  }
>>>> 
>>>>  static inline bool ufshcd_err_handling_should_stop(struct ufs_hba 
>>>> *hba)
>>>> @@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct device 
>>>> *dev)
>>>>      ktime_t start = ktime_get();
>>>> 
>>>>      hba = shost_priv(sdev->host);
>>>> -    down(&hba->host_sem);
>>>> 
>>>>      if (pm_runtime_suspended(dev))
>>>>          goto out;
>>>> 
>>>>      ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
>>>> -    if (ret) {
>>>> +    if (ret)
>>>>          dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__,  
>>>> ret);
>>>> -        up(&hba->host_sem);
>>>> -    }
>>>> 
>>>>  out:
>>>>      if (!ret)
>>>> @@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device 
>>>> *dev)
>>>>          hba->curr_dev_pwr_mode, hba->uic_link_state);
>>>>      if (!ret)
>>>>          hba->is_wlu_sys_suspended = false;
>>>> -    up(&hba->host_sem);
>>>>      return ret;
>>>>  }
>>>>  #endif
>>>> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-24  6:12         ` Can Guo
@ 2021-06-24  6:23           ` Adrian Hunter
  2021-06-24  6:31             ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Adrian Hunter @ 2021-06-24  6:23 UTC (permalink / raw)
  To: Can Guo
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 24/06/21 9:12 am, Can Guo wrote:
> On 2021-06-24 13:52, Adrian Hunter wrote:
>> On 24/06/21 5:16 am, Can Guo wrote:
>>> On 2021-06-23 22:30, Adrian Hunter wrote:
>>>> On 23/06/21 10:35 am, Can Guo wrote:
>>>>> To protect system suspend/resume from being disturbed by error handling,
>>>>> instead of using host_sem, let error handler call lock_system_sleep() and
>>>>> unlock_system_sleep() which achieve the same purpose. Remove the host_sem
>>>>> used in suspend/resume paths to make the code more readable.
>>>>>
>>>>> Suggested-by: Bart Van Assche <bvanassche@acm.org>
>>>>> Signed-off-by: Can Guo <cang@codeaurora.org>
>>>>> ---
>>>>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>>>>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>>>> index 3695dd2..a09e4a2 100644
>>>>> --- a/drivers/scsi/ufs/ufshcd.c
>>>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>>>> @@ -5907,6 +5907,11 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
>>>>>
>>>>>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>>>>>  {
>>>>> +    /*
>>>>> +     * It is not safe to perform error handling while suspend or resume is
>>>>> +     * in progress. Hence the lock_system_sleep() call.
>>>>> +     */
>>>>> +    lock_system_sleep();
>>>>
>>>> It looks to me like the system takes this lock quite early, even before
>>>> freezing tasks, so if anything needs the error handler to run it will
>>>> deadlock.
>>>
>>> Hi Adrian,
>>>
>>> UFS/hba system suspend/resume does not invoke or call error handling in a
>>> synchronous way. So, whatever UFS errors (which schedules the error handler)
>>> happens during suspend/resume, error handler will just wait here till system
>>> suspend/resume release the lock. Hence no worries of deadlock here.
>>
>> It looks to me like the state can change to UFSHCD_STATE_EH_SCHEDULED_FATAL
>> and since user processes are not frozen, nor file systems sync'ed, everything
>> is going to deadlock.
>> i.e.
>> I/O is blocked waiting on error handling
>> error handling is blocked waiting on lock_system_sleep()
>> suspend is blocked waiting on I/O
>>
> 
> Hi Adrian,
> 
> First of all, enter_state(suspend_state_t state) uses mutex_trylock(&system_transition_mutex).

Yes, in the case I am outlining it gets the mutex.

> Second, even that happens, in ufshcd_queuecommand(), below logic will break the cycle, by
> fast failing the PM request (below codes are from the code tip with this whole series applied).

It won't get that far because the suspend will be waiting to sync filesystems.
Filesystems will be waiting on I/O.
I/O will be waiting on the error handler.
The error handler will be waiting on system_transition_mutex.
But system_transition_mutex is already held by PM core.

> 
>         case UFSHCD_STATE_EH_SCHEDULED_FATAL:
>                 /*
>                  * ufshcd_rpm_get_sync() is used at error handling preparation
>                  * stage. If a scsi cmd, e.g., the SSU cmd, is sent from the
>                  * PM ops, it can never be finished if we let SCSI layer keep
>                  * retrying it, which gets err handler stuck forever. Neither
>                  * can we let the scsi cmd pass through, because UFS is in bad
>                  * state, the scsi cmd may eventually time out, which will get
>                  * err handler blocked for too long. So, just fail the scsi cmd
>                  * sent from PM ops, err handler can recover PM error anyways.
>                  */
>                 if (cmd->request->rq_flags & RQF_PM) {
>                         hba->force_reset = true;
>                         set_host_byte(cmd, DID_BAD_TARGET);
>                         cmd->scsi_done(cmd);
>                         goto out;
>                 }
>                 fallthrough;
>         case UFSHCD_STATE_RESET:
> 
> Thanks,
> 
> Can Guo.
> 
>>>
>>> Thanks,
>>>
>>> Can Guo.
>>>
>>>>
>>>>>      ufshcd_rpm_get_sync(hba);
>>>>>      if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
>>>>>          hba->is_wlu_sys_suspended) {
>>>>> @@ -5951,6 +5956,7 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
>>>>>          ufshcd_clk_scaling_suspend(hba, false);
>>>>>      ufshcd_clear_ua_wluns(hba);
>>>>>      ufshcd_rpm_put(hba);
>>>>> +    unlock_system_sleep();
>>>>>  }
>>>>>
>>>>>  static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
>>>>> @@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct device *dev)
>>>>>      ktime_t start = ktime_get();
>>>>>
>>>>>      hba = shost_priv(sdev->host);
>>>>> -    down(&hba->host_sem);
>>>>>
>>>>>      if (pm_runtime_suspended(dev))
>>>>>          goto out;
>>>>>
>>>>>      ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
>>>>> -    if (ret) {
>>>>> +    if (ret)
>>>>>          dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__,  ret);
>>>>> -        up(&hba->host_sem);
>>>>> -    }
>>>>>
>>>>>  out:
>>>>>      if (!ret)
>>>>> @@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device *dev)
>>>>>          hba->curr_dev_pwr_mode, hba->uic_link_state);
>>>>>      if (!ret)
>>>>>          hba->is_wlu_sys_suspended = false;
>>>>> -    up(&hba->host_sem);
>>>>>      return ret;
>>>>>  }
>>>>>  #endif
>>>>>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-24  6:23           ` Adrian Hunter
@ 2021-06-24  6:31             ` Can Guo
  2021-06-24 10:04               ` Adrian Hunter
  2021-06-24 17:11               ` Bart Van Assche
  0 siblings, 2 replies; 58+ messages in thread
From: Can Guo @ 2021-06-24  6:31 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 2021-06-24 14:23, Adrian Hunter wrote:
> On 24/06/21 9:12 am, Can Guo wrote:
>> On 2021-06-24 13:52, Adrian Hunter wrote:
>>> On 24/06/21 5:16 am, Can Guo wrote:
>>>> On 2021-06-23 22:30, Adrian Hunter wrote:
>>>>> On 23/06/21 10:35 am, Can Guo wrote:
>>>>>> To protect system suspend/resume from being disturbed by error 
>>>>>> handling,
>>>>>> instead of using host_sem, let error handler call 
>>>>>> lock_system_sleep() and
>>>>>> unlock_system_sleep() which achieve the same purpose. Remove the 
>>>>>> host_sem
>>>>>> used in suspend/resume paths to make the code more readable.
>>>>>> 
>>>>>> Suggested-by: Bart Van Assche <bvanassche@acm.org>
>>>>>> Signed-off-by: Can Guo <cang@codeaurora.org>
>>>>>> ---
>>>>>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>>>>>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>> 
>>>>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>>>>> index 3695dd2..a09e4a2 100644
>>>>>> --- a/drivers/scsi/ufs/ufshcd.c
>>>>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>>>>> @@ -5907,6 +5907,11 @@ static void 
>>>>>> ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
>>>>>> 
>>>>>>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>>>>>>  {
>>>>>> +    /*
>>>>>> +     * It is not safe to perform error handling while suspend or 
>>>>>> resume is
>>>>>> +     * in progress. Hence the lock_system_sleep() call.
>>>>>> +     */
>>>>>> +    lock_system_sleep();
>>>>> 
>>>>> It looks to me like the system takes this lock quite early, even 
>>>>> before
>>>>> freezing tasks, so if anything needs the error handler to run it 
>>>>> will
>>>>> deadlock.
>>>> 
>>>> Hi Adrian,
>>>> 
>>>> UFS/hba system suspend/resume does not invoke or call error handling 
>>>> in a
>>>> synchronous way. So, whatever UFS errors (which schedules the error 
>>>> handler)
>>>> happens during suspend/resume, error handler will just wait here 
>>>> till system
>>>> suspend/resume release the lock. Hence no worries of deadlock here.
>>> 
>>> It looks to me like the state can change to 
>>> UFSHCD_STATE_EH_SCHEDULED_FATAL
>>> and since user processes are not frozen, nor file systems sync'ed, 
>>> everything
>>> is going to deadlock.
>>> i.e.
>>> I/O is blocked waiting on error handling
>>> error handling is blocked waiting on lock_system_sleep()
>>> suspend is blocked waiting on I/O
>>> 
>> 
>> Hi Adrian,
>> 
>> First of all, enter_state(suspend_state_t state) uses 
>> mutex_trylock(&system_transition_mutex).
> 
> Yes, in the case I am outlining it gets the mutex.
> 
>> Second, even that happens, in ufshcd_queuecommand(), below logic will 
>> break the cycle, by
>> fast failing the PM request (below codes are from the code tip with 
>> this whole series applied).
> 
> It won't get that far because the suspend will be waiting to sync 
> filesystems.
> Filesystems will be waiting on I/O.
> I/O will be waiting on the error handler.
> The error handler will be waiting on system_transition_mutex.
> But system_transition_mutex is already held by PM core.

Hi Adrian,

You are right.... I missed the action of syncing filesystems...

Using back host_sem in suspend_prepare()/resume_complete() won't have 
this
problem of deadlock, right?

Thanks,

Can Guo.

> 
>> 
>>         case UFSHCD_STATE_EH_SCHEDULED_FATAL:
>>                 /*
>>                  * ufshcd_rpm_get_sync() is used at error handling 
>> preparation
>>                  * stage. If a scsi cmd, e.g., the SSU cmd, is sent 
>> from the
>>                  * PM ops, it can never be finished if we let SCSI 
>> layer keep
>>                  * retrying it, which gets err handler stuck forever. 
>> Neither
>>                  * can we let the scsi cmd pass through, because UFS 
>> is in bad
>>                  * state, the scsi cmd may eventually time out, which 
>> will get
>>                  * err handler blocked for too long. So, just fail the 
>> scsi cmd
>>                  * sent from PM ops, err handler can recover PM error 
>> anyways.
>>                  */
>>                 if (cmd->request->rq_flags & RQF_PM) {
>>                         hba->force_reset = true;
>>                         set_host_byte(cmd, DID_BAD_TARGET);
>>                         cmd->scsi_done(cmd);
>>                         goto out;
>>                 }
>>                 fallthrough;
>>         case UFSHCD_STATE_RESET:
>> 
>> Thanks,
>> 
>> Can Guo.
>> 
>>>> 
>>>> Thanks,
>>>> 
>>>> Can Guo.
>>>> 
>>>>> 
>>>>>>      ufshcd_rpm_get_sync(hba);
>>>>>>      if 
>>>>>> (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) 
>>>>>> ||
>>>>>>          hba->is_wlu_sys_suspended) {
>>>>>> @@ -5951,6 +5956,7 @@ static void 
>>>>>> ufshcd_err_handling_unprepare(struct ufs_hba *hba)
>>>>>>          ufshcd_clk_scaling_suspend(hba, false);
>>>>>>      ufshcd_clear_ua_wluns(hba);
>>>>>>      ufshcd_rpm_put(hba);
>>>>>> +    unlock_system_sleep();
>>>>>>  }
>>>>>> 
>>>>>>  static inline bool ufshcd_err_handling_should_stop(struct ufs_hba 
>>>>>> *hba)
>>>>>> @@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct device 
>>>>>> *dev)
>>>>>>      ktime_t start = ktime_get();
>>>>>> 
>>>>>>      hba = shost_priv(sdev->host);
>>>>>> -    down(&hba->host_sem);
>>>>>> 
>>>>>>      if (pm_runtime_suspended(dev))
>>>>>>          goto out;
>>>>>> 
>>>>>>      ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
>>>>>> -    if (ret) {
>>>>>> +    if (ret)
>>>>>>          dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__,  
>>>>>> ret);
>>>>>> -        up(&hba->host_sem);
>>>>>> -    }
>>>>>> 
>>>>>>  out:
>>>>>>      if (!ret)
>>>>>> @@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device 
>>>>>> *dev)
>>>>>>          hba->curr_dev_pwr_mode, hba->uic_link_state);
>>>>>>      if (!ret)
>>>>>>          hba->is_wlu_sys_suspended = false;
>>>>>> -    up(&hba->host_sem);
>>>>>>      return ret;
>>>>>>  }
>>>>>>  #endif
>>>>>> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-24  6:31             ` Can Guo
@ 2021-06-24 10:04               ` Adrian Hunter
  2021-06-28  7:26                 ` Can Guo
  2021-06-24 17:11               ` Bart Van Assche
  1 sibling, 1 reply; 58+ messages in thread
From: Adrian Hunter @ 2021-06-24 10:04 UTC (permalink / raw)
  To: Can Guo
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 24/06/21 9:31 am, Can Guo wrote:
> On 2021-06-24 14:23, Adrian Hunter wrote:
>> On 24/06/21 9:12 am, Can Guo wrote:
>>> On 2021-06-24 13:52, Adrian Hunter wrote:
>>>> On 24/06/21 5:16 am, Can Guo wrote:
>>>>> On 2021-06-23 22:30, Adrian Hunter wrote:
>>>>>> On 23/06/21 10:35 am, Can Guo wrote:
>>>>>>> To protect system suspend/resume from being disturbed by error handling,
>>>>>>> instead of using host_sem, let error handler call lock_system_sleep() and
>>>>>>> unlock_system_sleep() which achieve the same purpose. Remove the host_sem
>>>>>>> used in suspend/resume paths to make the code more readable.
>>>>>>>
>>>>>>> Suggested-by: Bart Van Assche <bvanassche@acm.org>
>>>>>>> Signed-off-by: Can Guo <cang@codeaurora.org>
>>>>>>> ---
>>>>>>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>>>>>>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>>>>>> index 3695dd2..a09e4a2 100644
>>>>>>> --- a/drivers/scsi/ufs/ufshcd.c
>>>>>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>>>>>> @@ -5907,6 +5907,11 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
>>>>>>>
>>>>>>>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>>>>>>>  {
>>>>>>> +    /*
>>>>>>> +     * It is not safe to perform error handling while suspend or resume is
>>>>>>> +     * in progress. Hence the lock_system_sleep() call.
>>>>>>> +     */
>>>>>>> +    lock_system_sleep();
>>>>>>
>>>>>> It looks to me like the system takes this lock quite early, even before
>>>>>> freezing tasks, so if anything needs the error handler to run it will
>>>>>> deadlock.
>>>>>
>>>>> Hi Adrian,
>>>>>
>>>>> UFS/hba system suspend/resume does not invoke or call error handling in a
>>>>> synchronous way. So, whatever UFS errors (which schedules the error handler)
>>>>> happens during suspend/resume, error handler will just wait here till system
>>>>> suspend/resume release the lock. Hence no worries of deadlock here.
>>>>
>>>> It looks to me like the state can change to UFSHCD_STATE_EH_SCHEDULED_FATAL
>>>> and since user processes are not frozen, nor file systems sync'ed, everything
>>>> is going to deadlock.
>>>> i.e.
>>>> I/O is blocked waiting on error handling
>>>> error handling is blocked waiting on lock_system_sleep()
>>>> suspend is blocked waiting on I/O
>>>>
>>>
>>> Hi Adrian,
>>>
>>> First of all, enter_state(suspend_state_t state) uses mutex_trylock(&system_transition_mutex).
>>
>> Yes, in the case I am outlining it gets the mutex.
>>
>>> Second, even that happens, in ufshcd_queuecommand(), below logic will break the cycle, by
>>> fast failing the PM request (below codes are from the code tip with this whole series applied).
>>
>> It won't get that far because the suspend will be waiting to sync filesystems.
>> Filesystems will be waiting on I/O.
>> I/O will be waiting on the error handler.
>> The error handler will be waiting on system_transition_mutex.
>> But system_transition_mutex is already held by PM core.
> 
> Hi Adrian,
> 
> You are right.... I missed the action of syncing filesystems...
> 
> Using back host_sem in suspend_prepare()/resume_complete() won't have this
> problem of deadlock, right?

I am not sure, but what was problem that the V3 patch was fixing?
Can you give an example?

> 
> Thanks,
> 
> Can Guo.
> 
>>
>>>
>>>         case UFSHCD_STATE_EH_SCHEDULED_FATAL:
>>>                 /*
>>>                  * ufshcd_rpm_get_sync() is used at error handling preparation
>>>                  * stage. If a scsi cmd, e.g., the SSU cmd, is sent from the
>>>                  * PM ops, it can never be finished if we let SCSI layer keep
>>>                  * retrying it, which gets err handler stuck forever. Neither
>>>                  * can we let the scsi cmd pass through, because UFS is in bad
>>>                  * state, the scsi cmd may eventually time out, which will get
>>>                  * err handler blocked for too long. So, just fail the scsi cmd
>>>                  * sent from PM ops, err handler can recover PM error anyways.
>>>                  */
>>>                 if (cmd->request->rq_flags & RQF_PM) {
>>>                         hba->force_reset = true;
>>>                         set_host_byte(cmd, DID_BAD_TARGET);
>>>                         cmd->scsi_done(cmd);
>>>                         goto out;
>>>                 }
>>>                 fallthrough;
>>>         case UFSHCD_STATE_RESET:
>>>
>>> Thanks,
>>>
>>> Can Guo.
>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>> Can Guo.
>>>>>
>>>>>>
>>>>>>>      ufshcd_rpm_get_sync(hba);
>>>>>>>      if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
>>>>>>>          hba->is_wlu_sys_suspended) {
>>>>>>> @@ -5951,6 +5956,7 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
>>>>>>>          ufshcd_clk_scaling_suspend(hba, false);
>>>>>>>      ufshcd_clear_ua_wluns(hba);
>>>>>>>      ufshcd_rpm_put(hba);
>>>>>>> +    unlock_system_sleep();
>>>>>>>  }
>>>>>>>
>>>>>>>  static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
>>>>>>> @@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct device *dev)
>>>>>>>      ktime_t start = ktime_get();
>>>>>>>
>>>>>>>      hba = shost_priv(sdev->host);
>>>>>>> -    down(&hba->host_sem);
>>>>>>>
>>>>>>>      if (pm_runtime_suspended(dev))
>>>>>>>          goto out;
>>>>>>>
>>>>>>>      ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
>>>>>>> -    if (ret) {
>>>>>>> +    if (ret)
>>>>>>>          dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__,  ret);
>>>>>>> -        up(&hba->host_sem);
>>>>>>> -    }
>>>>>>>
>>>>>>>  out:
>>>>>>>      if (!ret)
>>>>>>> @@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device *dev)
>>>>>>>          hba->curr_dev_pwr_mode, hba->uic_link_state);
>>>>>>>      if (!ret)
>>>>>>>          hba->is_wlu_sys_suspended = false;
>>>>>>> -    up(&hba->host_sem);
>>>>>>>      return ret;
>>>>>>>  }
>>>>>>>  #endif
>>>>>>>


^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 09/10] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests
  2021-06-24  4:16     ` Can Guo
@ 2021-06-24 16:57       ` Bart Van Assche
  0 siblings, 0 replies; 58+ messages in thread
From: Bart Van Assche @ 2021-06-24 16:57 UTC (permalink / raw)
  To: Can Guo
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/23/21 9:16 PM, Can Guo wrote:
> On 2021-06-24 05:33, Bart Van Assche wrote:
>> On 6/23/21 12:35 AM, Can Guo wrote:
>>> @@ -2737,7 +2737,7 @@ static int ufshcd_queuecommand(struct Scsi_Host
>>> *host, struct scsi_cmnd *cmd)
>>>           * err handler blocked for too long. So, just fail the scsi cmd
>>>           * sent from PM ops, err handler can recover PM error anyways.
>>>           */
>>> -        if (hba->wlu_pm_op_in_progress) {
>>> +        if (cmd->request->rq_flags & RQF_PM) {
>>>              hba->force_reset = true;
>>>              set_host_byte(cmd, DID_BAD_TARGET);
>>>              cmd->scsi_done(cmd);
>>
>> I'm still concerned that the above code may trigger data corruption. I
>> prefer that the above code is removed instead of being modified.
> 
> Removing the change will lead to deadlock when error handling prepare
> calls pm_runtime_get_sync().
> 
> RQF_PM is only given to requests sent from power management operations,
> during which the specific device/LU is suspending/resuming, meaning no
> data transaction is ongoing. How can fast failing a PM request trigger
> data corruption?

Right, the above code only affects power management requests so there is
no risk for data corruption.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-24  6:31             ` Can Guo
  2021-06-24 10:04               ` Adrian Hunter
@ 2021-06-24 17:11               ` Bart Van Assche
  2021-06-28  8:17                 ` Can Guo
  1 sibling, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-24 17:11 UTC (permalink / raw)
  To: Can Guo, Adrian Hunter
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/23/21 11:31 PM, Can Guo wrote:
> Using back host_sem in suspend_prepare()/resume_complete() won't have this
> problem of deadlock, right?

Although that would solve the deadlock discussed in this email thread, it
wouldn't solve the issue of potential adverse interactions of the UFS error
handler and the SCSI error handler running concurrently. How about using the
standard approach for invoking the UFS error handler instead of using a custom
mechanism, e.g. by using something like the (untested) patch below? This
approach guarantees that the UFS error handler is only activated after all
pending SCSI commands have failed or timed out and also guarantees that no new
SCSI commands will be queued while the UFS error handler is in progress (see
also scsi_host_queue_ready()).

Thanks,

Bart.

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 0ac1e15ac914..c6303ea9e5db 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -17,6 +17,8 @@
 #include <linux/blk-pm.h>
 #include <linux/blkdev.h>
 #include <scsi/scsi_driver.h>
+#include <scsi/scsi_transport.h>
+#include "../scsi_transport_api.h"
 #include "ufshcd.h"
 #include "ufs_quirks.h"
 #include "unipro.h"
@@ -233,7 +235,6 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up);
 static irqreturn_t ufshcd_intr(int irq, void *__hba);
 static int ufshcd_change_power_mode(struct ufs_hba *hba,
 			     struct ufs_pa_layer_attr *pwr_mode);
-static void ufshcd_schedule_eh_work(struct ufs_hba *hba);
 static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
 static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
 static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
@@ -2697,26 +2698,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)

 	switch (hba->ufshcd_state) {
 	case UFSHCD_STATE_OPERATIONAL:
-	case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL:
 		break;
-	case UFSHCD_STATE_EH_SCHEDULED_FATAL:
-		/*
-		 * pm_runtime_get_sync() is used at error handling preparation
-		 * stage. If a scsi cmd, e.g. the SSU cmd, is sent from hba's
-		 * PM ops, it can never be finished if we let SCSI layer keep
-		 * retrying it, which gets err handler stuck forever. Neither
-		 * can we let the scsi cmd pass through, because UFS is in bad
-		 * state, the scsi cmd may eventually time out, which will get
-		 * err handler blocked for too long. So, just fail the scsi cmd
-		 * sent from PM ops, err handler can recover PM error anyways.
-		 */
-		if (hba->pm_op_in_progress) {
-			hba->force_reset = true;
-			set_host_byte(cmd, DID_BAD_TARGET);
-			cmd->scsi_done(cmd);
-			goto out;
-		}
-		fallthrough;
 	case UFSHCD_STATE_RESET:
 		err = SCSI_MLQUEUE_HOST_BUSY;
 		goto out;
@@ -3954,6 +3936,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd)
 	u8 status;
 	int ret;
 	bool reenable_intr = false;
+	bool schedule_eh = false;

 	mutex_lock(&hba->uic_cmd_mutex);
 	init_completion(&uic_async_done);
@@ -4021,10 +4004,12 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd)
 		ufshcd_enable_intr(hba, UIC_COMMAND_COMPL);
 	if (ret) {
 		ufshcd_set_link_broken(hba);
-		ufshcd_schedule_eh_work(hba);
+		schedule_eh = true;
 	}
 out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	if (schedule_eh)
+		scsi_schedule_eh(hba->host);
 	mutex_unlock(&hba->uic_cmd_mutex);

 	return ret;
@@ -4085,8 +4070,6 @@ static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state new_state)
 		}
 		break;
 	case UFSHCD_STATE_RESET:
-	case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL:
-	case UFSHCD_STATE_EH_SCHEDULED_FATAL:
 		allowed = hba->ufshcd_state != UFSHCD_STATE_ERROR;
 		break;
 	case UFSHCD_STATE_ERROR:
@@ -5886,22 +5869,6 @@ static inline bool ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
 	       (hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK));
 }

-/* host lock must be held before calling this func */
-static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
-{
-	bool proceed;
-
-	if (hba->force_reset || ufshcd_is_link_broken(hba) ||
-	    ufshcd_is_saved_err_fatal(hba))
-		proceed = ufshcd_set_state(hba,
-					   UFSHCD_STATE_EH_SCHEDULED_FATAL);
-	else
-		proceed = ufshcd_set_state(hba,
-					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);
-	if (proceed)
-		queue_work(hba->eh_wq, &hba->eh_work);
-}
-
 static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
 {
 	down_write(&hba->clk_scaling_lock);
@@ -5924,7 +5891,7 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)

 static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
 {
-	ufshcd_rpm_get_sync(hba);
+	pm_runtime_get_noresume(&hba->sdev_ufs_device->sdev_gendev);
 	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
 	    hba->is_sys_suspended) {
 		enum ufs_pm_op pm_op;
@@ -6035,11 +6002,12 @@ static bool ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba)

 /**
  * ufshcd_err_handler - handle UFS errors that require s/w attention
- * @work: pointer to work structure
+ * @host: SCSI host pointer
  */
-static void ufshcd_err_handler(struct work_struct *work)
+static void ufshcd_err_handler(struct Scsi_Host *host)
 {
-	struct ufs_hba *hba;
+	struct ufs_hba *hba = shost_priv(host);
+	struct completion *eh_compl = NULL;
 	unsigned long flags;
 	bool err_xfer = false;
 	bool err_tm = false;
@@ -6047,10 +6015,10 @@ static void ufshcd_err_handler(struct work_struct *work)
 	int tag;
 	bool needs_reset = false, needs_restore = false;

-	hba = container_of(work, struct ufs_hba, eh_work);
-
 	down(&hba->host_sem);
 	spin_lock_irqsave(hba->host->host_lock, flags);
+	/* Clear host_eh_scheduled which has been set by scsi_schedule_eh(). */
+	hba->host->host_eh_scheduled = 0;
 	if (ufshcd_err_handling_should_stop(hba)) {
 		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
 		spin_unlock_irqrestore(hba->host->host_lock, flags);
@@ -6202,9 +6170,12 @@ static void ufshcd_err_handler(struct work_struct *work)
 			    __func__, hba->saved_err, hba->saved_uic_err);
 	}
 	ufshcd_clear_eh_in_progress(hba);
+	swap(hba->eh_compl, eh_compl);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	ufshcd_err_handling_unprepare(hba);
 	up(&hba->host_sem);
+	if (eh_compl)
+		complete(eh_compl);
 }

 /**
@@ -6361,7 +6332,6 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status)
 					 "host_regs: ");
 			ufshcd_print_pwr_info(hba);
 		}
-		ufshcd_schedule_eh_work(hba);
 		retval |= IRQ_HANDLED;
 	}
 	/*
@@ -6373,6 +6343,8 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status)
 	hba->errors = 0;
 	hba->uic_error = 0;
 	spin_unlock(hba->host->host_lock);
+	if (queue_eh_work)
+		scsi_schedule_eh(hba->host);
 	return retval;
 }

@@ -7045,8 +7017,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 		set_bit(tag, &hba->outstanding_reqs);
 		spin_lock_irqsave(host->host_lock, flags);
 		hba->force_reset = true;
-		ufshcd_schedule_eh_work(hba);
 		spin_unlock_irqrestore(host->host_lock, flags);
+		scsi_schedule_eh(hba->host);
 		goto out;
 	}

@@ -7172,7 +7144,8 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba)
  */
 static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
 {
-	int err = SUCCESS;
+	DECLARE_COMPLETION_ONSTACK(eh_compl);
+	int err = SUCCESS, res;
 	unsigned long flags;
 	struct ufs_hba *hba;

@@ -7180,11 +7153,15 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)

 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->force_reset = true;
-	ufshcd_schedule_eh_work(hba);
+	hba->eh_compl = &eh_compl;
 	dev_err(hba->dev, "%s: reset in progress - 1\n", __func__);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);

-	flush_work(&hba->eh_work);
+	scsi_schedule_eh(hba->host);
+	res = wait_for_completion_interruptible_timeout(&eh_compl, 180 * HZ);
+	WARN_ONCE(res <= 0,
+		  "wait_for_completion_interruptible_timeout() returned %d",
+		  res);

 	spin_lock_irqsave(hba->host->host_lock, flags);
 	if (hba->ufshcd_state == UFSHCD_STATE_ERROR)
@@ -8511,8 +8488,6 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
 	if (hba->is_powered) {
 		ufshcd_exit_clk_scaling(hba);
 		ufshcd_exit_clk_gating(hba);
-		if (hba->eh_wq)
-			destroy_workqueue(hba->eh_wq);
 		ufs_debugfs_hba_exit(hba);
 		ufshcd_variant_hba_exit(hba);
 		ufshcd_setup_vreg(hba, false);
@@ -9371,6 +9346,10 @@ static int ufshcd_set_dma_mask(struct ufs_hba *hba)
 	return dma_set_mask_and_coherent(hba->dev, DMA_BIT_MASK(32));
 }

+static struct scsi_transport_template ufshcd_transport_template = {
+	.eh_strategy_handler = ufshcd_err_handler,
+};
+
 /**
  * ufshcd_alloc_host - allocate Host Bus Adapter (HBA)
  * @dev: pointer to device handle
@@ -9397,6 +9376,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
 		err = -ENOMEM;
 		goto out_error;
 	}
+	host->transportt = &ufshcd_transport_template;
 	hba = shost_priv(host);
 	hba->host = host;
 	hba->dev = dev;
@@ -9434,7 +9414,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	int err;
 	struct Scsi_Host *host = hba->host;
 	struct device *dev = hba->dev;
-	char eh_wq_name[sizeof("ufs_eh_wq_00")];

 	if (!mmio_base) {
 		dev_err(hba->dev,
@@ -9488,17 +9467,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)

 	hba->max_pwr_info.is_valid = false;

-	/* Initialize work queues */
-	snprintf(eh_wq_name, sizeof(eh_wq_name), "ufs_eh_wq_%d",
-		 hba->host->host_no);
-	hba->eh_wq = create_singlethread_workqueue(eh_wq_name);
-	if (!hba->eh_wq) {
-		dev_err(hba->dev, "%s: failed to create eh workqueue\n",
-				__func__);
-		err = -ENOMEM;
-		goto out_disable;
-	}
-	INIT_WORK(&hba->eh_work, ufshcd_err_handler);
 	INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);

 	sema_init(&hba->host_sem, 1);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index f2796ea25598..d4f7ab0171c6 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -482,18 +482,12 @@ struct ufs_stats {
  *	processing.
  * @UFSHCD_STATE_OPERATIONAL: The host controller is operational and can process
  *	SCSI commands.
- * @UFSHCD_STATE_EH_SCHEDULED_NON_FATAL: The error handler has been scheduled.
- *	SCSI commands may be submitted to the controller.
- * @UFSHCD_STATE_EH_SCHEDULED_FATAL: The error handler has been scheduled. Fail
- *	newly submitted SCSI commands with error code DID_BAD_TARGET.
  * @UFSHCD_STATE_ERROR: An unrecoverable error occurred, e.g. link recovery
  *	failed. Fail all SCSI commands with error code DID_ERROR.
  */
 enum ufshcd_state {
 	UFSHCD_STATE_RESET,
 	UFSHCD_STATE_OPERATIONAL,
-	UFSHCD_STATE_EH_SCHEDULED_NON_FATAL,
-	UFSHCD_STATE_EH_SCHEDULED_FATAL,
 	UFSHCD_STATE_ERROR,
 };

@@ -715,8 +709,7 @@ struct ufs_hba_monitor {
  * @is_powered: flag to check if HBA is powered
  * @shutting_down: flag to check if shutdown has been invoked
  * @host_sem: semaphore used to serialize concurrent contexts
- * @eh_wq: Workqueue that eh_work works on
- * @eh_work: Worker to handle UFS errors that require s/w attention
+ * @eh_compl: Signaled by the UFS error handler after error handling finished.
  * @eeh_work: Worker to handle exception events
  * @errors: HBA errors
  * @uic_error: UFS interconnect layer error status
@@ -817,9 +810,7 @@ struct ufs_hba {
 	bool shutting_down;
 	struct semaphore host_sem;

-	/* Work Queues */
-	struct workqueue_struct *eh_wq;
-	struct work_struct eh_work;
+	struct completion *eh_compl;
 	struct work_struct eeh_work;

 	/* HBA Errors */

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-23  7:35 ` [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended Can Guo
  2021-06-23 20:05   ` Bart Van Assche
  2021-06-23 20:42   ` Bjorn Andersson
@ 2021-06-24 17:32   ` Bart Van Assche
  2021-06-24 23:42   ` Bart Van Assche
  3 siblings, 0 replies; 58+ messages in thread
From: Bart Van Assche @ 2021-06-24 17:32 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

On 6/23/21 12:35 AM, Can Guo wrote:
> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress and
> is_wlu_sys_suspended accordingly.

Can the pm_op_in_progress variable be removed if the UFS driver checks whether
q->rpm_status == RPM_SUSPENDING || q->rpm_status == RPM_RESUMING instead of
using pm_op_in_progress? The fewer state variables we maintain the lower the
chance that these are inconsistent or incorrect. See also block/blk-pm.c for
the code that sets q->rpm_status.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 02/10] scsi: ufs: Add flags pm_op_in_progress and is_sys_suspended
  2021-06-23  7:35 ` [PATCH v4 02/10] scsi: ufs: Add " Can Guo
  2021-06-23 12:33   ` Adrian Hunter
  2021-06-23 20:59   ` Bart Van Assche
@ 2021-06-24 17:35   ` Bart Van Assche
  2021-06-28  7:11     ` Can Guo
  2 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-24 17:35 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Adrian Hunter, Satya Tangirala, Kiwoong Kim, open list

On 6/23/21 12:35 AM, Can Guo wrote:
> @@ -9141,6 +9143,8 @@ static int ufshcd_suspend(struct ufs_hba *hba)
>  
>  	if (!hba->is_powered)
>  		return 0;
> +
> +	hba->pm_op_in_progress = true;
>  	/*
>  	 * Disable the host irq as host controller as there won't be any
>  	 * host controller transaction expected till resume.
> @@ -9160,6 +9164,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
>  	ufshcd_vreg_set_lpm(hba);
>  	/* Put the host controller in low power mode if possible */
>  	ufshcd_hba_vreg_set_lpm(hba);
> +	hba->pm_op_in_progress = false;
>  	return ret;
>  }
>  
> @@ -9179,6 +9184,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
>  	if (!hba->is_powered)
>  		return 0;
>  
> +	hba->pm_op_in_progress = true;
>  	ufshcd_hba_vreg_set_hpm(hba);
>  	ret = ufshcd_vreg_set_hpm(hba);
>  	if (ret)
> @@ -9198,6 +9204,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
>  out:
>  	if (ret)
>  		ufshcd_update_evt_hist(hba, UFS_EVT_RESUME_ERR, (u32)ret);
> +	hba->pm_op_in_progress = false;
>  	return ret;
>  }

Has it been considered to check dev->power.runtime_status instead of
introducing the pm_op_in_progress variable?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access
  2021-06-24  2:23     ` Can Guo
@ 2021-06-24 22:25       ` Bart Van Assche
  2021-06-28  7:16         ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-24 22:25 UTC (permalink / raw)
  To: Can Guo
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Adrian Hunter, Bean Huo, Stanley Chu,
	Keoseong Park, Gustavo A. R. Silva, Jaegeuk Kim, Kiwoong Kim,
	Satya Tangirala, open list

On 6/23/21 7:23 PM, Can Guo wrote:
> On 2021-06-24 05:51, Bart Van Assche wrote:
>> On 6/23/21 12:35 AM, Can Guo wrote:
>> - During system suspend, user space software is paused before the device
>>   driver freeze callbacks are invoked. Hence, the hba->is_sys_suspended
>>   check can be left out.
> 
> is_sys_suspended indicates that system resume failed (power/clk is OFF).
> 
>> - If a LUN is runtime suspended, it should be resumed if accessed from
>>   user space instead of failing user space accesses. In other words, the
>>   hba->is_wlu_sys_suspended check seems inappropriate to me.
> 
> hba->is_wlu_sys_suspended indicates that wl system resume failed, device
> is not operational.

Hi Can,

Thanks for the clarification. How about converting the above two answers
into comments inside ufshcd_is_user_access_allowed()?

Should ufshcd_is_user_access_allowed() perhaps be called after
ufshcd_rpm_get_sync() instead of before to prevent that the value of
hba->is_sys_suspended or hba->is_wlu_sys_suspended changes after having
been checked and before the UFS device is accessed?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-23  7:35 ` [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended Can Guo
                     ` (2 preceding siblings ...)
  2021-06-24 17:32   ` Bart Van Assche
@ 2021-06-24 23:42   ` Bart Van Assche
  2021-06-28  7:01     ` Can Guo
  3 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-24 23:42 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team
  Cc: Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

On 6/23/21 12:35 AM, Can Guo wrote:
> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress and
> is_wlu_sys_suspended accordingly.

Can the is_wlu_sys_suspended member variable be removed by checking
dev->power.is_suspended where dev represents the WLUN?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-24 23:42   ` Bart Van Assche
@ 2021-06-28  7:01     ` Can Guo
  2021-06-28  7:35       ` Can Guo
  2021-06-28 17:07       ` Bart Van Assche
  0 siblings, 2 replies; 58+ messages in thread
From: Can Guo @ 2021-06-28  7:01 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

On 2021-06-25 07:42, Bart Van Assche wrote:
> On 6/23/21 12:35 AM, Can Guo wrote:
>> Rename pm_op_in_progress and is_sys_suspended to wlu_pm_op_in_progress 
>> and
>> is_wlu_sys_suspended accordingly.
> 
> Can the is_wlu_sys_suspended member variable be removed by checking
> dev->power.is_suspended where dev represents the WLUN?
> 

No, PM set dev->power.is_suspended to "false" even the device failed 
resuming,
while is_wlu_sys_suspended can be used to tell that.

Thanks,

Can Guo.

> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 02/10] scsi: ufs: Add flags pm_op_in_progress and is_sys_suspended
  2021-06-24 17:35   ` Bart Van Assche
@ 2021-06-28  7:11     ` Can Guo
  0 siblings, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-28  7:11 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	Adrian Hunter, Satya Tangirala, Kiwoong Kim, open list

On 2021-06-25 01:35, Bart Van Assche wrote:
> On 6/23/21 12:35 AM, Can Guo wrote:
>> @@ -9141,6 +9143,8 @@ static int ufshcd_suspend(struct ufs_hba *hba)
>> 
>>  	if (!hba->is_powered)
>>  		return 0;
>> +
>> +	hba->pm_op_in_progress = true;
>>  	/*
>>  	 * Disable the host irq as host controller as there won't be any
>>  	 * host controller transaction expected till resume.
>> @@ -9160,6 +9164,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
>>  	ufshcd_vreg_set_lpm(hba);
>>  	/* Put the host controller in low power mode if possible */
>>  	ufshcd_hba_vreg_set_lpm(hba);
>> +	hba->pm_op_in_progress = false;
>>  	return ret;
>>  }
>> 
>> @@ -9179,6 +9184,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
>>  	if (!hba->is_powered)
>>  		return 0;
>> 
>> +	hba->pm_op_in_progress = true;
>>  	ufshcd_hba_vreg_set_hpm(hba);
>>  	ret = ufshcd_vreg_set_hpm(hba);
>>  	if (ret)
>> @@ -9198,6 +9204,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
>>  out:
>>  	if (ret)
>>  		ufshcd_update_evt_hist(hba, UFS_EVT_RESUME_ERR, (u32)ret);
>> +	hba->pm_op_in_progress = false;
>>  	return ret;
>>  }
> 
> Has it been considered to check dev->power.runtime_status instead of
> introducing the pm_op_in_progress variable?

ufshcd_resume() is also used by system resume, while runtime_status only
tells about runtime resume. So does ufshcd_suspend().

Thanks,

Can Guo.

> 
> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access
  2021-06-24 22:25       ` Bart Van Assche
@ 2021-06-28  7:16         ` Can Guo
  0 siblings, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-28  7:16 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Adrian Hunter, Bean Huo, Stanley Chu,
	Keoseong Park, Gustavo A. R. Silva, Jaegeuk Kim, Kiwoong Kim,
	Satya Tangirala, open list

On 2021-06-25 06:25, Bart Van Assche wrote:
> On 6/23/21 7:23 PM, Can Guo wrote:
>> On 2021-06-24 05:51, Bart Van Assche wrote:
>>> On 6/23/21 12:35 AM, Can Guo wrote:
>>> - During system suspend, user space software is paused before the 
>>> device
>>>   driver freeze callbacks are invoked. Hence, the 
>>> hba->is_sys_suspended
>>>   check can be left out.
>> 
>> is_sys_suspended indicates that system resume failed (power/clk is 
>> OFF).
>> 
>>> - If a LUN is runtime suspended, it should be resumed if accessed 
>>> from
>>>   user space instead of failing user space accesses. In other words, 
>>> the
>>>   hba->is_wlu_sys_suspended check seems inappropriate to me.
>> 
>> hba->is_wlu_sys_suspended indicates that wl system resume failed, 
>> device
>> is not operational.
> 
> Hi Can,
> 
> Thanks for the clarification. How about converting the above two 
> answers
> into comments inside ufshcd_is_user_access_allowed()?
> 

Sure.

> Should ufshcd_is_user_access_allowed() perhaps be called after
> ufshcd_rpm_get_sync() instead of before to prevent that the value of
> hba->is_sys_suspended or hba->is_wlu_sys_suspended changes after having
> been checked and before the UFS device is accessed?
> 

is_sys_suspended and is_wlu_sys_suspended only represent system PM 
status,
not runtime PM status.

My understanding is that user threads are frozen before system PM 
starts,
so it does not matter we call  ufshcd_is_user_access_allowed() before or
after ufshcd_rpm_get_sync().

If my understanding is wrong, then we need to either call 
lock_system_sleep()
in get_user_access() or wrap 
ufshcd_suspend_prepare/ufshcd_resume_complete()
with host_sem.

Thanks,

Can Guo.

> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-24 10:04               ` Adrian Hunter
@ 2021-06-28  7:26                 ` Can Guo
  2021-07-07 19:04                   ` Adrian Hunter
  0 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-28  7:26 UTC (permalink / raw)
  To: Adrian Hunter
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 2021-06-24 18:04, Adrian Hunter wrote:
> On 24/06/21 9:31 am, Can Guo wrote:
>> On 2021-06-24 14:23, Adrian Hunter wrote:
>>> On 24/06/21 9:12 am, Can Guo wrote:
>>>> On 2021-06-24 13:52, Adrian Hunter wrote:
>>>>> On 24/06/21 5:16 am, Can Guo wrote:
>>>>>> On 2021-06-23 22:30, Adrian Hunter wrote:
>>>>>>> On 23/06/21 10:35 am, Can Guo wrote:
>>>>>>>> To protect system suspend/resume from being disturbed by error 
>>>>>>>> handling,
>>>>>>>> instead of using host_sem, let error handler call 
>>>>>>>> lock_system_sleep() and
>>>>>>>> unlock_system_sleep() which achieve the same purpose. Remove the 
>>>>>>>> host_sem
>>>>>>>> used in suspend/resume paths to make the code more readable.
>>>>>>>> 
>>>>>>>> Suggested-by: Bart Van Assche <bvanassche@acm.org>
>>>>>>>> Signed-off-by: Can Guo <cang@codeaurora.org>
>>>>>>>> ---
>>>>>>>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>>>>>>>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/drivers/scsi/ufs/ufshcd.c 
>>>>>>>> b/drivers/scsi/ufs/ufshcd.c
>>>>>>>> index 3695dd2..a09e4a2 100644
>>>>>>>> --- a/drivers/scsi/ufs/ufshcd.c
>>>>>>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>>>>>>> @@ -5907,6 +5907,11 @@ static void 
>>>>>>>> ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
>>>>>>>> 
>>>>>>>>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>>>>>>>>  {
>>>>>>>> +    /*
>>>>>>>> +     * It is not safe to perform error handling while suspend 
>>>>>>>> or resume is
>>>>>>>> +     * in progress. Hence the lock_system_sleep() call.
>>>>>>>> +     */
>>>>>>>> +    lock_system_sleep();
>>>>>>> 
>>>>>>> It looks to me like the system takes this lock quite early, even 
>>>>>>> before
>>>>>>> freezing tasks, so if anything needs the error handler to run it 
>>>>>>> will
>>>>>>> deadlock.
>>>>>> 
>>>>>> Hi Adrian,
>>>>>> 
>>>>>> UFS/hba system suspend/resume does not invoke or call error 
>>>>>> handling in a
>>>>>> synchronous way. So, whatever UFS errors (which schedules the 
>>>>>> error handler)
>>>>>> happens during suspend/resume, error handler will just wait here 
>>>>>> till system
>>>>>> suspend/resume release the lock. Hence no worries of deadlock 
>>>>>> here.
>>>>> 
>>>>> It looks to me like the state can change to 
>>>>> UFSHCD_STATE_EH_SCHEDULED_FATAL
>>>>> and since user processes are not frozen, nor file systems sync'ed, 
>>>>> everything
>>>>> is going to deadlock.
>>>>> i.e.
>>>>> I/O is blocked waiting on error handling
>>>>> error handling is blocked waiting on lock_system_sleep()
>>>>> suspend is blocked waiting on I/O
>>>>> 
>>>> 
>>>> Hi Adrian,
>>>> 
>>>> First of all, enter_state(suspend_state_t state) uses 
>>>> mutex_trylock(&system_transition_mutex).
>>> 
>>> Yes, in the case I am outlining it gets the mutex.
>>> 
>>>> Second, even that happens, in ufshcd_queuecommand(), below logic 
>>>> will break the cycle, by
>>>> fast failing the PM request (below codes are from the code tip with 
>>>> this whole series applied).
>>> 
>>> It won't get that far because the suspend will be waiting to sync 
>>> filesystems.
>>> Filesystems will be waiting on I/O.
>>> I/O will be waiting on the error handler.
>>> The error handler will be waiting on system_transition_mutex.
>>> But system_transition_mutex is already held by PM core.
>> 
>> Hi Adrian,
>> 
>> You are right.... I missed the action of syncing filesystems...
>> 
>> Using back host_sem in suspend_prepare()/resume_complete() won't have 
>> this
>> problem of deadlock, right?
> 
> I am not sure, but what was problem that the V3 patch was fixing?
> Can you give an example?

V3 was moving host_sem from wl_system_suspend/resume() to
ufshcd_suspend_prepare()/ufshcd_resume_complete(). It is to
make sure error handling does not run concurrenly with system
PM, since error handling is recovering/clearing runtime PM
errors of all the scsi devices under hba (in patch #8). Having the
error handling doing so (in patch 8) is because runtime PM framework
may save the runtime errors of the supplier to one or more consumers (
unlike the children - parent relationship), for example if wlu resume
fails, sda and/or other scsi devices may save the resume error, then
they will be left runtime suspended permanently.

Thanks,

Can Guo.

> 
>> 
>> Thanks,
>> 
>> Can Guo.
>> 
>>> 
>>>> 
>>>>         case UFSHCD_STATE_EH_SCHEDULED_FATAL:
>>>>                 /*
>>>>                  * ufshcd_rpm_get_sync() is used at error handling 
>>>> preparation
>>>>                  * stage. If a scsi cmd, e.g., the SSU cmd, is sent 
>>>> from the
>>>>                  * PM ops, it can never be finished if we let SCSI 
>>>> layer keep
>>>>                  * retrying it, which gets err handler stuck 
>>>> forever. Neither
>>>>                  * can we let the scsi cmd pass through, because UFS 
>>>> is in bad
>>>>                  * state, the scsi cmd may eventually time out, 
>>>> which will get
>>>>                  * err handler blocked for too long. So, just fail 
>>>> the scsi cmd
>>>>                  * sent from PM ops, err handler can recover PM 
>>>> error anyways.
>>>>                  */
>>>>                 if (cmd->request->rq_flags & RQF_PM) {
>>>>                         hba->force_reset = true;
>>>>                         set_host_byte(cmd, DID_BAD_TARGET);
>>>>                         cmd->scsi_done(cmd);
>>>>                         goto out;
>>>>                 }
>>>>                 fallthrough;
>>>>         case UFSHCD_STATE_RESET:
>>>> 
>>>> Thanks,
>>>> 
>>>> Can Guo.
>>>> 
>>>>>> 
>>>>>> Thanks,
>>>>>> 
>>>>>> Can Guo.
>>>>>> 
>>>>>>> 
>>>>>>>>      ufshcd_rpm_get_sync(hba);
>>>>>>>>      if 
>>>>>>>> (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) 
>>>>>>>> ||
>>>>>>>>          hba->is_wlu_sys_suspended) {
>>>>>>>> @@ -5951,6 +5956,7 @@ static void 
>>>>>>>> ufshcd_err_handling_unprepare(struct ufs_hba *hba)
>>>>>>>>          ufshcd_clk_scaling_suspend(hba, false);
>>>>>>>>      ufshcd_clear_ua_wluns(hba);
>>>>>>>>      ufshcd_rpm_put(hba);
>>>>>>>> +    unlock_system_sleep();
>>>>>>>>  }
>>>>>>>> 
>>>>>>>>  static inline bool ufshcd_err_handling_should_stop(struct 
>>>>>>>> ufs_hba *hba)
>>>>>>>> @@ -9053,16 +9059,13 @@ static int ufshcd_wl_suspend(struct 
>>>>>>>> device *dev)
>>>>>>>>      ktime_t start = ktime_get();
>>>>>>>> 
>>>>>>>>      hba = shost_priv(sdev->host);
>>>>>>>> -    down(&hba->host_sem);
>>>>>>>> 
>>>>>>>>      if (pm_runtime_suspended(dev))
>>>>>>>>          goto out;
>>>>>>>> 
>>>>>>>>      ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
>>>>>>>> -    if (ret) {
>>>>>>>> +    if (ret)
>>>>>>>>          dev_err(&sdev->sdev_gendev, "%s failed: %d\n", 
>>>>>>>> __func__,  ret);
>>>>>>>> -        up(&hba->host_sem);
>>>>>>>> -    }
>>>>>>>> 
>>>>>>>>  out:
>>>>>>>>      if (!ret)
>>>>>>>> @@ -9095,7 +9098,6 @@ static int ufshcd_wl_resume(struct device 
>>>>>>>> *dev)
>>>>>>>>          hba->curr_dev_pwr_mode, hba->uic_link_state);
>>>>>>>>      if (!ret)
>>>>>>>>          hba->is_wlu_sys_suspended = false;
>>>>>>>> -    up(&hba->host_sem);
>>>>>>>>      return ret;
>>>>>>>>  }
>>>>>>>>  #endif
>>>>>>>> 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-28  7:01     ` Can Guo
@ 2021-06-28  7:35       ` Can Guo
  2021-06-28 17:07       ` Bart Van Assche
  1 sibling, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-28  7:35 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Andy Gross, Bjorn Andersson, Alim Akhtar, Avri Altman,
	James E.J. Bottomley, Martin K. Petersen, Stanley Chu, Bean Huo,
	Jaegeuk Kim, Adrian Hunter, Kiwoong Kim, Satya Tangirala,
	open list:ARM/QUALCOMM SUPPORT, open list

On 2021-06-28 15:01, Can Guo wrote:
> On 2021-06-25 07:42, Bart Van Assche wrote:
>> On 6/23/21 12:35 AM, Can Guo wrote:
>>> Rename pm_op_in_progress and is_sys_suspended to 
>>> wlu_pm_op_in_progress and
>>> is_wlu_sys_suspended accordingly.
>> 
>> Can the is_wlu_sys_suspended member variable be removed by checking
>> dev->power.is_suspended where dev represents the WLUN?
>> 
> 
> No, PM set dev->power.is_suspended to "false" even the device failed 
> resuming,
> while is_wlu_sys_suspended can be used to tell that.

FYI,

892 /**
893  * device_resume - Execute "resume" callbacks for given device.
894  * @dev: Device to handle.
895  * @state: PM transition of the system being carried out.
896  * @async: If true, the device is being resumed asynchronously.
897  */
898 static int device_resume(struct device *dev, pm_message_t state, 
bool async)
...
967  End:
968 	error = dpm_run_callback(callback, dev, state, info);
969 	dev->power.is_suspended = false;
...

Can Guo.

> 
> Thanks,
> 
> Can Guo.
> 
>> Thanks,
>> 
>> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-24 17:11               ` Bart Van Assche
@ 2021-06-28  8:17                 ` Can Guo
  2021-06-28 17:31                   ` Bart Van Assche
  0 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-28  8:17 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Adrian Hunter, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi,
	kernel-team, Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

Hi Bart,

On 2021-06-25 01:11, Bart Van Assche wrote:
> On 6/23/21 11:31 PM, Can Guo wrote:
>> Using back host_sem in suspend_prepare()/resume_complete() won't have 
>> this
>> problem of deadlock, right?
> 
> Although that would solve the deadlock discussed in this email thread, 
> it
> wouldn't solve the issue of potential adverse interactions of the UFS 
> error
> handler and the SCSI error handler running concurrently.

I think I've explained it before, paste it here -

ufshcd_eh_host_reset_handler() invokes ufshcd_err_handler() and flushes 
it,
so SCSI error handler and UFS error handler can safely run together.

> How about using the
> standard approach for invoking the UFS error handler instead of using a 
> custom
> mechanism, e.g. by using something like the (untested) patch below? 
> This
> approach guarantees that the UFS error handler is only activated after 
> all
> pending SCSI commands have failed or timed out and also guarantees that 
> no new
> SCSI commands will be queued while the UFS error handler is in progress 
> (see
> also scsi_host_queue_ready()).

Per my understanding, SCSI error handling is scsi cmd based, meaning it 
only
works when certain SCSI cmds failed and are added to shost->eh_cmd_q (by 
calling
scsi_eh_scmd_add(struct scsi_cmnd *scmd)).

scsi_schedule_eh() ->
   scsi_error_handler() ->
     scsi_unjam_host()

where scsi_unjam_host() may or may NOT invoke scsi_eh_ready_devs(),
and scsi_eh_ready_devs() invokes ufshcd_eh_host_reset_handler().

2140 static void scsi_unjam_host(struct Scsi_Host *shost)
2141 {
2142 	unsigned long flags;
2143 	LIST_HEAD(eh_work_q);
2144 	LIST_HEAD(eh_done_q);
2145
2146 	spin_lock_irqsave(shost->host_lock, flags);
2147 	list_splice_init(&shost->eh_cmd_q, &eh_work_q);
2148 	spin_unlock_irqrestore(shost->host_lock, flags);
...
2152 	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
2153 		scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
...

However, most UFS (UIC) errors happens during gear scaling, clk gating
and suspend/resume (due to power mode changes and/or hibern8 
enter/exit),
during which there is NO scsi cmds in UFS driver at all (because these
contexts start only when there is no ongoing data transactions). Thus,
scsi_unjam_host() won't even call scsi_eh_ready_devs() because
scsi_eh_get_sense() always returns TRUE in these cases (eh_work_q is 
empty).

Thanks,

Can Guo.

> 
> Thanks,
> 
> Bart.
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 0ac1e15ac914..c6303ea9e5db 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -17,6 +17,8 @@
>  #include <linux/blk-pm.h>
>  #include <linux/blkdev.h>
>  #include <scsi/scsi_driver.h>
> +#include <scsi/scsi_transport.h>
> +#include "../scsi_transport_api.h"
>  #include "ufshcd.h"
>  #include "ufs_quirks.h"
>  #include "unipro.h"
> @@ -233,7 +235,6 @@ static int ufshcd_scale_clks(struct ufs_hba *hba,
> bool scale_up);
>  static irqreturn_t ufshcd_intr(int irq, void *__hba);
>  static int ufshcd_change_power_mode(struct ufs_hba *hba,
>  			     struct ufs_pa_layer_attr *pwr_mode);
> -static void ufshcd_schedule_eh_work(struct ufs_hba *hba);
>  static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
>  static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
>  static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
> @@ -2697,26 +2698,7 @@ static int ufshcd_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *cmd)
> 
>  	switch (hba->ufshcd_state) {
>  	case UFSHCD_STATE_OPERATIONAL:
> -	case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL:
>  		break;
> -	case UFSHCD_STATE_EH_SCHEDULED_FATAL:
> -		/*
> -		 * pm_runtime_get_sync() is used at error handling preparation
> -		 * stage. If a scsi cmd, e.g. the SSU cmd, is sent from hba's
> -		 * PM ops, it can never be finished if we let SCSI layer keep
> -		 * retrying it, which gets err handler stuck forever. Neither
> -		 * can we let the scsi cmd pass through, because UFS is in bad
> -		 * state, the scsi cmd may eventually time out, which will get
> -		 * err handler blocked for too long. So, just fail the scsi cmd
> -		 * sent from PM ops, err handler can recover PM error anyways.
> -		 */
> -		if (hba->pm_op_in_progress) {
> -			hba->force_reset = true;
> -			set_host_byte(cmd, DID_BAD_TARGET);
> -			cmd->scsi_done(cmd);
> -			goto out;
> -		}
> -		fallthrough;
>  	case UFSHCD_STATE_RESET:
>  		err = SCSI_MLQUEUE_HOST_BUSY;
>  		goto out;
> @@ -3954,6 +3936,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba
> *hba, struct uic_command *cmd)
>  	u8 status;
>  	int ret;
>  	bool reenable_intr = false;
> +	bool schedule_eh = false;
> 
>  	mutex_lock(&hba->uic_cmd_mutex);
>  	init_completion(&uic_async_done);
> @@ -4021,10 +4004,12 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba
> *hba, struct uic_command *cmd)
>  		ufshcd_enable_intr(hba, UIC_COMMAND_COMPL);
>  	if (ret) {
>  		ufshcd_set_link_broken(hba);
> -		ufshcd_schedule_eh_work(hba);
> +		schedule_eh = true;
>  	}
>  out_unlock:
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> +	if (schedule_eh)
> +		scsi_schedule_eh(hba->host);
>  	mutex_unlock(&hba->uic_cmd_mutex);
> 
>  	return ret;
> @@ -4085,8 +4070,6 @@ static bool ufshcd_set_state(struct ufs_hba
> *hba, enum ufshcd_state new_state)
>  		}
>  		break;
>  	case UFSHCD_STATE_RESET:
> -	case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL:
> -	case UFSHCD_STATE_EH_SCHEDULED_FATAL:
>  		allowed = hba->ufshcd_state != UFSHCD_STATE_ERROR;
>  		break;
>  	case UFSHCD_STATE_ERROR:
> @@ -5886,22 +5869,6 @@ static inline bool
> ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
>  	       (hba->saved_err & (INT_FATAL_ERRORS | 
> UFSHCD_UIC_HIBERN8_MASK));
>  }
> 
> -/* host lock must be held before calling this func */
> -static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
> -{
> -	bool proceed;
> -
> -	if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> -	    ufshcd_is_saved_err_fatal(hba))
> -		proceed = ufshcd_set_state(hba,
> -					   UFSHCD_STATE_EH_SCHEDULED_FATAL);
> -	else
> -		proceed = ufshcd_set_state(hba,
> -					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);
> -	if (proceed)
> -		queue_work(hba->eh_wq, &hba->eh_work);
> -}
> -
>  static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
>  {
>  	down_write(&hba->clk_scaling_lock);
> @@ -5924,7 +5891,7 @@ static void ufshcd_clk_scaling_suspend(struct
> ufs_hba *hba, bool suspend)
> 
>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>  {
> -	ufshcd_rpm_get_sync(hba);
> +	pm_runtime_get_noresume(&hba->sdev_ufs_device->sdev_gendev);
>  	if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) 
> ||
>  	    hba->is_sys_suspended) {
>  		enum ufs_pm_op pm_op;
> @@ -6035,11 +6002,12 @@ static bool
> ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba)
> 
>  /**
>   * ufshcd_err_handler - handle UFS errors that require s/w attention
> - * @work: pointer to work structure
> + * @host: SCSI host pointer
>   */
> -static void ufshcd_err_handler(struct work_struct *work)
> +static void ufshcd_err_handler(struct Scsi_Host *host)
>  {
> -	struct ufs_hba *hba;
> +	struct ufs_hba *hba = shost_priv(host);
> +	struct completion *eh_compl = NULL;
>  	unsigned long flags;
>  	bool err_xfer = false;
>  	bool err_tm = false;
> @@ -6047,10 +6015,10 @@ static void ufshcd_err_handler(struct 
> work_struct *work)
>  	int tag;
>  	bool needs_reset = false, needs_restore = false;
> 
> -	hba = container_of(work, struct ufs_hba, eh_work);
> -
>  	down(&hba->host_sem);
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> +	/* Clear host_eh_scheduled which has been set by scsi_schedule_eh(). 
> */
> +	hba->host->host_eh_scheduled = 0;
>  	if (ufshcd_err_handling_should_stop(hba)) {
>  		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>  		spin_unlock_irqrestore(hba->host->host_lock, flags);
> @@ -6202,9 +6170,12 @@ static void ufshcd_err_handler(struct 
> work_struct *work)
>  			    __func__, hba->saved_err, hba->saved_uic_err);
>  	}
>  	ufshcd_clear_eh_in_progress(hba);
> +	swap(hba->eh_compl, eh_compl);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
>  	ufshcd_err_handling_unprepare(hba);
>  	up(&hba->host_sem);
> +	if (eh_compl)
> +		complete(eh_compl);
>  }
> 
>  /**
> @@ -6361,7 +6332,6 @@ static irqreturn_t ufshcd_check_errors(struct
> ufs_hba *hba, u32 intr_status)
>  					 "host_regs: ");
>  			ufshcd_print_pwr_info(hba);
>  		}
> -		ufshcd_schedule_eh_work(hba);
>  		retval |= IRQ_HANDLED;
>  	}
>  	/*
> @@ -6373,6 +6343,8 @@ static irqreturn_t ufshcd_check_errors(struct
> ufs_hba *hba, u32 intr_status)
>  	hba->errors = 0;
>  	hba->uic_error = 0;
>  	spin_unlock(hba->host->host_lock);
> +	if (queue_eh_work)
> +		scsi_schedule_eh(hba->host);
>  	return retval;
>  }
> 
> @@ -7045,8 +7017,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>  		set_bit(tag, &hba->outstanding_reqs);
>  		spin_lock_irqsave(host->host_lock, flags);
>  		hba->force_reset = true;
> -		ufshcd_schedule_eh_work(hba);
>  		spin_unlock_irqrestore(host->host_lock, flags);
> +		scsi_schedule_eh(hba->host);
>  		goto out;
>  	}
> 
> @@ -7172,7 +7144,8 @@ static int ufshcd_reset_and_restore(struct 
> ufs_hba *hba)
>   */
>  static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
>  {
> -	int err = SUCCESS;
> +	DECLARE_COMPLETION_ONSTACK(eh_compl);
> +	int err = SUCCESS, res;
>  	unsigned long flags;
>  	struct ufs_hba *hba;
> 
> @@ -7180,11 +7153,15 @@ static int ufshcd_eh_host_reset_handler(struct
> scsi_cmnd *cmd)
> 
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	hba->force_reset = true;
> -	ufshcd_schedule_eh_work(hba);
> +	hba->eh_compl = &eh_compl;
>  	dev_err(hba->dev, "%s: reset in progress - 1\n", __func__);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> -	flush_work(&hba->eh_work);
> +	scsi_schedule_eh(hba->host);
> +	res = wait_for_completion_interruptible_timeout(&eh_compl, 180 * HZ);
> +	WARN_ONCE(res <= 0,
> +		  "wait_for_completion_interruptible_timeout() returned %d",
> +		  res);
> 
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	if (hba->ufshcd_state == UFSHCD_STATE_ERROR)
> @@ -8511,8 +8488,6 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
>  	if (hba->is_powered) {
>  		ufshcd_exit_clk_scaling(hba);
>  		ufshcd_exit_clk_gating(hba);
> -		if (hba->eh_wq)
> -			destroy_workqueue(hba->eh_wq);
>  		ufs_debugfs_hba_exit(hba);
>  		ufshcd_variant_hba_exit(hba);
>  		ufshcd_setup_vreg(hba, false);
> @@ -9371,6 +9346,10 @@ static int ufshcd_set_dma_mask(struct ufs_hba 
> *hba)
>  	return dma_set_mask_and_coherent(hba->dev, DMA_BIT_MASK(32));
>  }
> 
> +static struct scsi_transport_template ufshcd_transport_template = {
> +	.eh_strategy_handler = ufshcd_err_handler,
> +};
> +
>  /**
>   * ufshcd_alloc_host - allocate Host Bus Adapter (HBA)
>   * @dev: pointer to device handle
> @@ -9397,6 +9376,7 @@ int ufshcd_alloc_host(struct device *dev, struct
> ufs_hba **hba_handle)
>  		err = -ENOMEM;
>  		goto out_error;
>  	}
> +	host->transportt = &ufshcd_transport_template;
>  	hba = shost_priv(host);
>  	hba->host = host;
>  	hba->dev = dev;
> @@ -9434,7 +9414,6 @@ int ufshcd_init(struct ufs_hba *hba, void
> __iomem *mmio_base, unsigned int irq)
>  	int err;
>  	struct Scsi_Host *host = hba->host;
>  	struct device *dev = hba->dev;
> -	char eh_wq_name[sizeof("ufs_eh_wq_00")];
> 
>  	if (!mmio_base) {
>  		dev_err(hba->dev,
> @@ -9488,17 +9467,6 @@ int ufshcd_init(struct ufs_hba *hba, void
> __iomem *mmio_base, unsigned int irq)
> 
>  	hba->max_pwr_info.is_valid = false;
> 
> -	/* Initialize work queues */
> -	snprintf(eh_wq_name, sizeof(eh_wq_name), "ufs_eh_wq_%d",
> -		 hba->host->host_no);
> -	hba->eh_wq = create_singlethread_workqueue(eh_wq_name);
> -	if (!hba->eh_wq) {
> -		dev_err(hba->dev, "%s: failed to create eh workqueue\n",
> -				__func__);
> -		err = -ENOMEM;
> -		goto out_disable;
> -	}
> -	INIT_WORK(&hba->eh_work, ufshcd_err_handler);
>  	INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
> 
>  	sema_init(&hba->host_sem, 1);
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index f2796ea25598..d4f7ab0171c6 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -482,18 +482,12 @@ struct ufs_stats {
>   *	processing.
>   * @UFSHCD_STATE_OPERATIONAL: The host controller is operational and
> can process
>   *	SCSI commands.
> - * @UFSHCD_STATE_EH_SCHEDULED_NON_FATAL: The error handler has been 
> scheduled.
> - *	SCSI commands may be submitted to the controller.
> - * @UFSHCD_STATE_EH_SCHEDULED_FATAL: The error handler has been 
> scheduled. Fail
> - *	newly submitted SCSI commands with error code DID_BAD_TARGET.
>   * @UFSHCD_STATE_ERROR: An unrecoverable error occurred, e.g. link 
> recovery
>   *	failed. Fail all SCSI commands with error code DID_ERROR.
>   */
>  enum ufshcd_state {
>  	UFSHCD_STATE_RESET,
>  	UFSHCD_STATE_OPERATIONAL,
> -	UFSHCD_STATE_EH_SCHEDULED_NON_FATAL,
> -	UFSHCD_STATE_EH_SCHEDULED_FATAL,
>  	UFSHCD_STATE_ERROR,
>  };
> 
> @@ -715,8 +709,7 @@ struct ufs_hba_monitor {
>   * @is_powered: flag to check if HBA is powered
>   * @shutting_down: flag to check if shutdown has been invoked
>   * @host_sem: semaphore used to serialize concurrent contexts
> - * @eh_wq: Workqueue that eh_work works on
> - * @eh_work: Worker to handle UFS errors that require s/w attention
> + * @eh_compl: Signaled by the UFS error handler after error handling 
> finished.
>   * @eeh_work: Worker to handle exception events
>   * @errors: HBA errors
>   * @uic_error: UFS interconnect layer error status
> @@ -817,9 +810,7 @@ struct ufs_hba {
>  	bool shutting_down;
>  	struct semaphore host_sem;
> 
> -	/* Work Queues */
> -	struct workqueue_struct *eh_wq;
> -	struct work_struct eh_work;
> +	struct completion *eh_compl;
>  	struct work_struct eeh_work;
> 
>  	/* HBA Errors */

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended
  2021-06-28  7:01     ` Can Guo
  2021-06-28  7:35       ` Can Guo
@ 2021-06-28 17:07       ` Bart Van Assche
  1 sibling, 0 replies; 58+ messages in thread
From: Bart Van Assche @ 2021-06-28 17:07 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Can Guo, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi,
	kernel-team, Andy Gross, Bjorn Andersson, Alim Akhtar,
	Avri Altman, James E.J. Bottomley, Martin K. Petersen,
	Stanley Chu, Bean Huo, Jaegeuk Kim, Adrian Hunter, Kiwoong Kim,
	Satya Tangirala, open list:ARM/QUALCOMM SUPPORT, open list

On 6/28/21 12:01 AM, Can Guo wrote:
> On 2021-06-25 07:42, Bart Van Assche wrote:
>> On 6/23/21 12:35 AM, Can Guo wrote:
>>> Rename pm_op_in_progress and is_sys_suspended to
>>> wlu_pm_op_in_progress and
>>> is_wlu_sys_suspended accordingly.
>>
>> Can the is_wlu_sys_suspended member variable be removed by checking
>> dev->power.is_suspended where dev represents the WLUN?
>>
> 
> No, PM set dev->power.is_suspended to "false" even the device failed
> resuming,
> while is_wlu_sys_suspended can be used to tell that.

(+Rafael)

Hi Rafael,

In drivers/base/power/main.c we found the following code:

 End:
	error = dpm_run_callback(callback, dev, state, info);
	dev->power.is_suspended = false;

Is it a bug or a feature that dev->power.is_suspended is set to false if
dpm_run_callback() fails? I'm asking this because only clearing
dev->power.is_suspended if dpm_run_callback() returns 0 would allow to
simplify the UFS driver. It can happen for UFS devices that runtime
resume fails and if this fails we need to track this.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-28  8:17                 ` Can Guo
@ 2021-06-28 17:31                   ` Bart Van Assche
  2021-06-29  6:23                     ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-28 17:31 UTC (permalink / raw)
  To: Can Guo
  Cc: Adrian Hunter, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi,
	kernel-team, Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/28/21 1:17 AM, Can Guo wrote:
> On 2021-06-25 01:11, Bart Van Assche wrote:
>> On 6/23/21 11:31 PM, Can Guo wrote:
>>> Using back host_sem in suspend_prepare()/resume_complete() won't have
>>> this problem of deadlock, right?
>>
>> Although that would solve the deadlock discussed in this email thread, it
>> wouldn't solve the issue of potential adverse interactions of the UFS
>> error handler and the SCSI error handler running concurrently.
> 
> I think I've explained it before, paste it here -
> 
> ufshcd_eh_host_reset_handler() invokes ufshcd_err_handler() and flushes it,
> so SCSI error handler and UFS error handler can safely run together.

That code path is the exception. Do you agree that the following three
functions all invoke the ufshcd_err_handler() function asynchronously?
* ufshcd_uic_pwr_ctrl()
* ufshcd_check_errors()
* ufshcd_abort()

>> How about using the
>> standard approach for invoking the UFS error handler instead of using
>> a custom
>> mechanism, e.g. by using something like the (untested) patch below? This
>> approach guarantees that the UFS error handler is only activated after
>> all
>> pending SCSI commands have failed or timed out and also guarantees
>> that no new
>> SCSI commands will be queued while the UFS error handler is in
>> progress (see
>> also scsi_host_queue_ready()).
> 
> Per my understanding, SCSI error handling is scsi cmd based, meaning it
> only works when certain SCSI cmds failed [ ... ]
That is not completely correct. The SCSI error handler is activated if
either all pending commands have failed or if it is scheduled
explicitly. Please take a look at the host_eh_scheduled member variable,
how it is used and also at scsi_schedule_eh(). The scsi_schedule_eh()
function was introduced in 2006 and that the ATA code uses it since then
to activate the SCSI error handler even if no commands are pending. See
also the patch "SCSI: make scsi_implement_eh() generic API for SCSI
transports".

> However, most UFS (UIC) errors happens during gear scaling, clk gating
> and suspend/resume (due to power mode changes and/or hibern8
> enter/exit), during which there is NO scsi cmds in UFS driver at all
> (because these contexts start only when there is no ongoing data
> transactions).

Activating the SCSI error handler if no SCSI commands are in progress is
supported by scsi_schedule_eh().

> Thus, scsi_unjam_host() won't even call scsi_eh_ready_devs() because
> scsi_eh_get_sense() always returns TRUE in these cases (eh_work_q is
> empty).

Please take another look at the patch in my previous message. There is a
scsi_transport_template instance in that patch. The eh_strategy_handler
defined in a SCSI transport template is called *instead* of
scsi_unjam_host(). In other words, scsi_unjam_host() won't be called if
my patch would be applied to the UFS driver.

Please let me know if you need more information.

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-28 17:31                   ` Bart Van Assche
@ 2021-06-29  6:23                     ` Can Guo
  2021-06-29 18:01                       ` Bart Van Assche
  0 siblings, 1 reply; 58+ messages in thread
From: Can Guo @ 2021-06-29  6:23 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Adrian Hunter, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi,
	kernel-team, Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 2021-06-29 01:31, Bart Van Assche wrote:
> On 6/28/21 1:17 AM, Can Guo wrote:
>> On 2021-06-25 01:11, Bart Van Assche wrote:
>>> On 6/23/21 11:31 PM, Can Guo wrote:
>>>> Using back host_sem in suspend_prepare()/resume_complete() won't 
>>>> have
>>>> this problem of deadlock, right?
>>> 
>>> Although that would solve the deadlock discussed in this email 
>>> thread, it
>>> wouldn't solve the issue of potential adverse interactions of the UFS
>>> error handler and the SCSI error handler running concurrently.
>> 
>> I think I've explained it before, paste it here -
>> 
>> ufshcd_eh_host_reset_handler() invokes ufshcd_err_handler() and 
>> flushes it,
>> so SCSI error handler and UFS error handler can safely run together.
> 
> That code path is the exception. Do you agree that the following three
> functions all invoke the ufshcd_err_handler() function asynchronously?
> * ufshcd_uic_pwr_ctrl()
> * ufshcd_check_errors()
> * ufshcd_abort()
> 

I agree, but I don't see what's wrong with that. Any context can invoke
ufs error handler asynchronously and ufs error handler prepare makes
sure error handler can work safely, i.e., stopping PM ops/gating/scaling
in error handler prepare makes sure no one shall call 
ufshcd_uic_pwr_ctrl()
ever again. And ufshcd_check_errors() and ufshcd_abort() are OK to run
concurrently with UFS error handler.

>>> How about using the
>>> standard approach for invoking the UFS error handler instead of using
>>> a custom
>>> mechanism, e.g. by using something like the (untested) patch below? 
>>> This
>>> approach guarantees that the UFS error handler is only activated 
>>> after
>>> all
>>> pending SCSI commands have failed or timed out and also guarantees
>>> that no new
>>> SCSI commands will be queued while the UFS error handler is in
>>> progress (see
>>> also scsi_host_queue_ready()).
>> 
>> Per my understanding, SCSI error handling is scsi cmd based, meaning 
>> it
>> only works when certain SCSI cmds failed [ ... ]
> That is not completely correct. The SCSI error handler is activated if
> either all pending commands have failed or if it is scheduled
> explicitly. Please take a look at the host_eh_scheduled member 
> variable,
> how it is used and also at scsi_schedule_eh(). The scsi_schedule_eh()
> function was introduced in 2006 and that the ATA code uses it since 
> then
> to activate the SCSI error handler even if no commands are pending. See
> also the patch "SCSI: make scsi_implement_eh() generic API for SCSI
> transports".
> 
>> However, most UFS (UIC) errors happens during gear scaling, clk gating
>> and suspend/resume (due to power mode changes and/or hibern8
>> enter/exit), during which there is NO scsi cmds in UFS driver at all
>> (because these contexts start only when there is no ongoing data
>> transactions).
> 
> Activating the SCSI error handler if no SCSI commands are in progress 
> is
> supported by scsi_schedule_eh().
> 
>> Thus, scsi_unjam_host() won't even call scsi_eh_ready_devs() because
>> scsi_eh_get_sense() always returns TRUE in these cases (eh_work_q is
>> empty).
> 
> Please take another look at the patch in my previous message. There is 
> a
> scsi_transport_template instance in that patch. The eh_strategy_handler
> defined in a SCSI transport template is called *instead* of
> scsi_unjam_host(). In other words, scsi_unjam_host() won't be called if
> my patch would be applied to the UFS driver.
> 
> Please let me know if you need more information.

Sorry that I missed the change of scsi_transport_template() in your 
previous
message. I can understand that you want to invoke UFS error hander by 
invoking
SCSI error handler, but I didn't go that far because I saw you changed
pm_runtime_get_sync() to pm_runtime_get_noresume() in ufs error handler 
prepare.
How can that change make sure that the device is not suspending or 
resuming
while error handler is running?

Thanks,

Can Guo.

> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-29  6:23                     ` Can Guo
@ 2021-06-29 18:01                       ` Bart Van Assche
  2021-06-29 21:50                         ` Can Guo
  0 siblings, 1 reply; 58+ messages in thread
From: Bart Van Assche @ 2021-06-29 18:01 UTC (permalink / raw)
  To: Can Guo
  Cc: Adrian Hunter, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi,
	kernel-team, Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 6/28/21 11:23 PM, Can Guo wrote:
> On 2021-06-29 01:31, Bart Van Assche wrote:
>> On 6/28/21 1:17 AM, Can Guo wrote:
>>> On 2021-06-25 01:11, Bart Van Assche wrote:
>>>> On 6/23/21 11:31 PM, Can Guo wrote:
>>>>> Using back host_sem in suspend_prepare()/resume_complete()
>>>>> won't have this problem of deadlock, right?
>>>> 
>>>> Although that would solve the deadlock discussed in this email 
>>>> thread, it wouldn't solve the issue of potential adverse
>>>> interactions of the UFS error handler and the SCSI error
>>>> handler running concurrently.
>>> 
>>> I think I've explained it before, paste it here -
>>> 
>>> ufshcd_eh_host_reset_handler() invokes ufshcd_err_handler() and 
>>> flushes it, so SCSI error handler and UFS error handler can
>>> safely run together.
>> 
>> That code path is the exception. Do you agree that the following
>> three functions all invoke the ufshcd_err_handler() function
>> asynchronously? * ufshcd_uic_pwr_ctrl() * ufshcd_check_errors() *
>> ufshcd_abort()
> 
> I agree, but I don't see what's wrong with that. Any context can
> invoke ufs error handler asynchronously and ufs error handler prepare
> makes sure error handler can work safely, i.e., stopping PM
> ops/gating/scaling in error handler prepare makes sure no one shall
> call ufshcd_uic_pwr_ctrl() ever again. And ufshcd_check_errors() and
> ufshcd_abort() are OK to run concurrently with UFS error handler.

The current UFS error handling approach requires the following code in
ufshcd_queuecommand():

		if (hba->pm_op_in_progress) {
			hba->force_reset = true;
			set_host_byte(cmd, DID_BAD_TARGET);
			cmd->scsi_done(cmd);
			goto out;
		}

Removing that code is not possible with the current error handling
approach. My patch makes it possible to remove that code.

> Sorry that I missed the change of scsi_transport_template() in your 
> previous message. I can understand that you want to invoke UFS error
> hander by invoking SCSI error handler, but I didn't go that far
> because I saw you changed pm_runtime_get_sync() to
> pm_runtime_get_noresume() in ufs error handler prepare. How can that
> change make sure that the device is not suspending or resuming while
> error handler is running?

UFS power state transitions happen by submitting a SCSI command to a
WLUN. The SCSI error handler is only activated after all outstanding
SCSI commands for a SCSI host have failed or completed. I think this
guarantees for the UFS driver that eh_strategy_handler is not invoked
while a command submitted to a WLUN is changing the power state of the
UFS device. The following code from scsi_error.c only wakes up the error
handler if (shost->host_failed || shost->host_eh_scheduled) &&
shost->host_failed == scsi_host_busy(shost):

	if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0)
	    || shost->host_failed != scsi_host_busy(shost)) {
		schedule();
		continue;
	}
	/* Handle SCSI errors */

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-29 18:01                       ` Bart Van Assche
@ 2021-06-29 21:50                         ` Can Guo
  0 siblings, 0 replies; 58+ messages in thread
From: Can Guo @ 2021-06-29 21:50 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Adrian Hunter, asutoshd, nguyenb, hongwus, ziqichen, linux-scsi,
	kernel-team, Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 2021-06-30 02:01, Bart Van Assche wrote:
> On 6/28/21 11:23 PM, Can Guo wrote:
>> On 2021-06-29 01:31, Bart Van Assche wrote:
>>> On 6/28/21 1:17 AM, Can Guo wrote:
>>>> On 2021-06-25 01:11, Bart Van Assche wrote:
>>>>> On 6/23/21 11:31 PM, Can Guo wrote:
>>>>>> Using back host_sem in suspend_prepare()/resume_complete()
>>>>>> won't have this problem of deadlock, right?
>>>>> 
>>>>> Although that would solve the deadlock discussed in this email
>>>>> thread, it wouldn't solve the issue of potential adverse
>>>>> interactions of the UFS error handler and the SCSI error
>>>>> handler running concurrently.
>>>> 
>>>> I think I've explained it before, paste it here -
>>>> 
>>>> ufshcd_eh_host_reset_handler() invokes ufshcd_err_handler() and
>>>> flushes it, so SCSI error handler and UFS error handler can
>>>> safely run together.
>>> 
>>> That code path is the exception. Do you agree that the following
>>> three functions all invoke the ufshcd_err_handler() function
>>> asynchronously? * ufshcd_uic_pwr_ctrl() * ufshcd_check_errors() *
>>> ufshcd_abort()
>> 
>> I agree, but I don't see what's wrong with that. Any context can
>> invoke ufs error handler asynchronously and ufs error handler prepare
>> makes sure error handler can work safely, i.e., stopping PM
>> ops/gating/scaling in error handler prepare makes sure no one shall
>> call ufshcd_uic_pwr_ctrl() ever again. And ufshcd_check_errors() and
>> ufshcd_abort() are OK to run concurrently with UFS error handler.
> 
> The current UFS error handling approach requires the following code in
> ufshcd_queuecommand():
> 
> 		if (hba->pm_op_in_progress) {
> 			hba->force_reset = true;
> 			set_host_byte(cmd, DID_BAD_TARGET);
> 			cmd->scsi_done(cmd);
> 			goto out;
> 		}
> 
> Removing that code is not possible with the current error handling
> approach. My patch makes it possible to remove that code.
> 
>> Sorry that I missed the change of scsi_transport_template() in your
>> previous message. I can understand that you want to invoke UFS error
>> hander by invoking SCSI error handler, but I didn't go that far
>> because I saw you changed pm_runtime_get_sync() to
>> pm_runtime_get_noresume() in ufs error handler prepare. How can that
>> change make sure that the device is not suspending or resuming while
>> error handler is running?
> 
> UFS power state transitions happen by submitting a SCSI command to a
> WLUN. The SCSI error handler is only activated after all outstanding
> SCSI commands for a SCSI host have failed or completed. I think this
> guarantees for the UFS driver that eh_strategy_handler is not invoked
> while a command submitted to a WLUN is changing the power state of the
> UFS device. The following code from scsi_error.c only wakes up the 
> error
> handler if (shost->host_failed || shost->host_eh_scheduled) &&
> shost->host_failed == scsi_host_busy(shost):
> 
> 	if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0)
> 	    || shost->host_failed != scsi_host_busy(shost)) {
> 		schedule();
> 		continue;
> 	}
> 	/* Handle SCSI errors */
> 

It is not completely right - wl_suspend/resume() are much more twisted.

wl_suspend() may or may NOT send a SCSI cmd to WLUN, i.e., SSU cmd may
be skipped if spm/rpm_lvl is 0/1 and/or if bkops/wb is on-going (even 
when
rpm_lvl is not 0/1), while link can still be put to hibern8/off, then
power/clks can still be shutdown to save power.

wl_resume(), in case of rpm/spm_lvl == 5, does a full reset to UFS 
device,
without sending a SSU cmd to WLU to complete the power state transition.

So above checks (in scsi_error_handler()) cannot gaurantee that actual
power state transistions in UFS driver has ceased before start UFS error
handling.

Thanks,

Can Guo.

> Thanks,
> 
> Bart.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume
  2021-06-28  7:26                 ` Can Guo
@ 2021-07-07 19:04                   ` Adrian Hunter
  0 siblings, 0 replies; 58+ messages in thread
From: Adrian Hunter @ 2021-07-07 19:04 UTC (permalink / raw)
  To: Can Guo
  Cc: asutoshd, nguyenb, hongwus, ziqichen, linux-scsi, kernel-team,
	Alim Akhtar, Avri Altman, James E.J. Bottomley,
	Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
	open list

On 28/06/21 10:26 am, Can Guo wrote:
> On 2021-06-24 18:04, Adrian Hunter wrote:
>> On 24/06/21 9:31 am, Can Guo wrote:
>>> On 2021-06-24 14:23, Adrian Hunter wrote:
>>>> On 24/06/21 9:12 am, Can Guo wrote:
>>>>> On 2021-06-24 13:52, Adrian Hunter wrote:
>>>>>> On 24/06/21 5:16 am, Can Guo wrote:
>>>>>>> On 2021-06-23 22:30, Adrian Hunter wrote:
>>>>>>>> On 23/06/21 10:35 am, Can Guo wrote:
>>>>>>>>> To protect system suspend/resume from being disturbed by error handling,
>>>>>>>>> instead of using host_sem, let error handler call lock_system_sleep() and
>>>>>>>>> unlock_system_sleep() which achieve the same purpose. Remove the host_sem
>>>>>>>>> used in suspend/resume paths to make the code more readable.
>>>>>>>>>
>>>>>>>>> Suggested-by: Bart Van Assche <bvanassche@acm.org>
>>>>>>>>> Signed-off-by: Can Guo <cang@codeaurora.org>
>>>>>>>>> ---
>>>>>>>>>  drivers/scsi/ufs/ufshcd.c | 12 +++++++-----
>>>>>>>>>  1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>>>>>>>> index 3695dd2..a09e4a2 100644
>>>>>>>>> --- a/drivers/scsi/ufs/ufshcd.c
>>>>>>>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>>>>>>>> @@ -5907,6 +5907,11 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
>>>>>>>>>
>>>>>>>>>  static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
>>>>>>>>>  {
>>>>>>>>> +    /*
>>>>>>>>> +     * It is not safe to perform error handling while suspend or resume is
>>>>>>>>> +     * in progress. Hence the lock_system_sleep() call.
>>>>>>>>> +     */
>>>>>>>>> +    lock_system_sleep();
>>>>>>>>
>>>>>>>> It looks to me like the system takes this lock quite early, even before
>>>>>>>> freezing tasks, so if anything needs the error handler to run it will
>>>>>>>> deadlock.
>>>>>>>
>>>>>>> Hi Adrian,
>>>>>>>
>>>>>>> UFS/hba system suspend/resume does not invoke or call error handling in a
>>>>>>> synchronous way. So, whatever UFS errors (which schedules the error handler)
>>>>>>> happens during suspend/resume, error handler will just wait here till system
>>>>>>> suspend/resume release the lock. Hence no worries of deadlock here.
>>>>>>
>>>>>> It looks to me like the state can change to UFSHCD_STATE_EH_SCHEDULED_FATAL
>>>>>> and since user processes are not frozen, nor file systems sync'ed, everything
>>>>>> is going to deadlock.
>>>>>> i.e.
>>>>>> I/O is blocked waiting on error handling
>>>>>> error handling is blocked waiting on lock_system_sleep()
>>>>>> suspend is blocked waiting on I/O
>>>>>>
>>>>>
>>>>> Hi Adrian,
>>>>>
>>>>> First of all, enter_state(suspend_state_t state) uses mutex_trylock(&system_transition_mutex).
>>>>
>>>> Yes, in the case I am outlining it gets the mutex.
>>>>
>>>>> Second, even that happens, in ufshcd_queuecommand(), below logic will break the cycle, by
>>>>> fast failing the PM request (below codes are from the code tip with this whole series applied).
>>>>
>>>> It won't get that far because the suspend will be waiting to sync filesystems.
>>>> Filesystems will be waiting on I/O.
>>>> I/O will be waiting on the error handler.
>>>> The error handler will be waiting on system_transition_mutex.
>>>> But system_transition_mutex is already held by PM core.
>>>
>>> Hi Adrian,
>>>
>>> You are right.... I missed the action of syncing filesystems...
>>>
>>> Using back host_sem in suspend_prepare()/resume_complete() won't have this
>>> problem of deadlock, right?
>>
>> I am not sure, but what was problem that the V3 patch was fixing?
>> Can you give an example?
> 
> V3 was moving host_sem from wl_system_suspend/resume() to
> ufshcd_suspend_prepare()/ufshcd_resume_complete(). It is to
> make sure error handling does not run concurrenly with system
> PM, since error handling is recovering/clearing runtime PM
> errors of all the scsi devices under hba (in patch #8). Having the
> error handling doing so (in patch 8) is because runtime PM framework
> may save the runtime errors of the supplier to one or more consumers (
> unlike the children - parent relationship), for example if wlu resume
> fails, sda and/or other scsi devices may save the resume error, then
> they will be left runtime suspended permanently.

Sorry for the slow reply.  I was going to do some more investigation but
never found time.

I was wondering if it would be simpler to do the error recovery for
wl_system_suspend/resume() before exiting wl_system_suspend/resume().

Then it would be possible to do something along the lines:
	- prevent runtime suspend while the error handler is outstanding
	- at suspend, block queuing of the error handler work and flush it
	- at resume, allow queuing of the error handler work

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2021-07-07 19:04 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1624433711-9339-1-git-send-email-cang@codeaurora.org>
2021-06-23  7:35 ` [PATCH v4 01/10] scsi: ufs: Rename flags pm_op_in_progress and is_sys_suspended Can Guo
2021-06-23 20:05   ` Bart Van Assche
2021-06-23 20:57     ` Bart Van Assche
2021-06-24  2:02       ` Can Guo
2021-06-24  2:34         ` Can Guo
2021-06-24  6:04         ` Adrian Hunter
2021-06-23 20:42   ` Bjorn Andersson
2021-06-23 22:41     ` Bart Van Assche
2021-06-24  2:04     ` Can Guo
2021-06-24 17:32   ` Bart Van Assche
2021-06-24 23:42   ` Bart Van Assche
2021-06-28  7:01     ` Can Guo
2021-06-28  7:35       ` Can Guo
2021-06-28 17:07       ` Bart Van Assche
2021-06-23  7:35 ` [PATCH v4 02/10] scsi: ufs: Add " Can Guo
2021-06-23 12:33   ` Adrian Hunter
2021-06-24  2:05     ` Can Guo
2021-06-23 20:59   ` Bart Van Assche
2021-06-24  2:07     ` Can Guo
2021-06-24 17:35   ` Bart Van Assche
2021-06-28  7:11     ` Can Guo
2021-06-23  7:35 ` [PATCH v4 03/10] scsi: ufs: Update the return value of supplier pm ops Can Guo
2021-06-23 21:08   ` Bart Van Assche
2021-06-24  2:11     ` Can Guo
2021-06-23  7:35 ` [PATCH v4 04/10] scsi: ufs: Enable IRQ after enabling clocks in error handling preparation Can Guo
2021-06-23 21:20   ` Bart Van Assche
2021-06-23  7:35 ` [PATCH 05/10] scsi: ufs: Complete the cmd before returning in queuecommand Can Guo
2021-06-23  7:39   ` Can Guo
2021-06-23  7:35 ` [PATCH v4 05/10] scsi: ufs: Remove a redundant tag check in ufshcd_queuecommand() Can Guo
2021-06-23 21:24   ` Bart Van Assche
2021-06-23  7:35 ` [PATCH v4 06/10] scsi: ufs: Remove host_sem used in suspend/resume Can Guo
2021-06-23 14:30   ` Adrian Hunter
2021-06-24  2:16     ` Can Guo
2021-06-24  5:52       ` Adrian Hunter
2021-06-24  6:12         ` Can Guo
2021-06-24  6:23           ` Adrian Hunter
2021-06-24  6:31             ` Can Guo
2021-06-24 10:04               ` Adrian Hunter
2021-06-28  7:26                 ` Can Guo
2021-07-07 19:04                   ` Adrian Hunter
2021-06-24 17:11               ` Bart Van Assche
2021-06-28  8:17                 ` Can Guo
2021-06-28 17:31                   ` Bart Van Assche
2021-06-29  6:23                     ` Can Guo
2021-06-29 18:01                       ` Bart Van Assche
2021-06-29 21:50                         ` Can Guo
2021-06-23  7:35 ` [PATCH v4 07/10] scsi: ufs: Simplify error handling preparation Can Guo
2021-06-23 21:30   ` Bart Van Assche
2021-06-23  7:35 ` [PATCH v4 08/10] scsi: ufs: Update ufshcd_recover_pm_error() Can Guo
2021-06-23  7:35 ` [PATCH v4 09/10] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests Can Guo
2021-06-23 21:33   ` Bart Van Assche
2021-06-24  4:16     ` Can Guo
2021-06-24 16:57       ` Bart Van Assche
2021-06-23  7:35 ` [PATCH v4 10/10] scsi: ufs: Apply more limitations to user access Can Guo
2021-06-23 21:51   ` Bart Van Assche
2021-06-24  2:23     ` Can Guo
2021-06-24 22:25       ` Bart Van Assche
2021-06-28  7:16         ` Can Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).