* [PATCH v2 1/6] scsi: ufs: Differentiate status between hba pm ops and wl pm ops
[not found] <1621846046-22204-1-git-send-email-cang@codeaurora.org>
@ 2021-05-24 8:47 ` Can Guo
2021-05-24 8:47 ` [PATCH v2 2/6] scsi: ufs: Update the return value of supplier " Can Guo
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Can Guo @ 2021-05-24 8:47 UTC (permalink / raw)
To: asutoshd, nguyenb, hongwus, linux-scsi, kernel-team, cang
Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
Adrian Hunter, Kiwoong Kim, Satya Tangirala, open list
Put pm_op_in_progress and is_sys_suspend flags back to ufshcd hba pm ops,
add two new flags, namely wl_pm_op_in_progress and is_wl_sys_suspended, to
track the UFS device W-LU pm ops. This helps us differentiate the status of
hba and wl pm ops when we need to do troubleshooting.
Signed-off-by: Can Guo <cang@codeaurora.org>
---
drivers/scsi/ufs/ufshcd.c | 42 ++++++++++++++++++++++++++++--------------
drivers/scsi/ufs/ufshcd.h | 4 +++-
2 files changed, 31 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 2b7ad26..3836a0a 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -543,7 +543,9 @@ static void ufshcd_print_host_state(struct ufs_hba *hba)
hba->saved_err, hba->saved_uic_err);
dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n",
hba->curr_dev_pwr_mode, hba->uic_link_state);
- dev_err(hba->dev, "PM in progress=%d, sys. suspended=%d\n",
+ dev_err(hba->dev, "wl_pm_op_in_progress=%d, is_wl_sys_suspended=%d\n",
+ hba->wl_pm_op_in_progress, hba->is_wl_sys_suspended);
+ dev_err(hba->dev, "pm_op_in_progress=%d, is_sys_suspended=%d\n",
hba->pm_op_in_progress, hba->is_sys_suspended);
dev_err(hba->dev, "Auto BKOPS=%d, Host self-block=%d\n",
hba->auto_bkops_enabled, hba->host->host_self_blocked);
@@ -1993,7 +1995,7 @@ static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba)
if (!hba->clk_scaling.active_reqs++)
queue_resume_work = true;
- if (!hba->clk_scaling.is_enabled || hba->pm_op_in_progress) {
+ if (!hba->clk_scaling.is_enabled || hba->wl_pm_op_in_progress) {
spin_unlock_irqrestore(hba->host->host_lock, flags);
return;
}
@@ -2728,7 +2730,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
* err handler blocked for too long. So, just fail the scsi cmd
* sent from PM ops, err handler can recover PM error anyways.
*/
- if (hba->pm_op_in_progress) {
+ if (hba->wl_pm_op_in_progress) {
hba->force_reset = true;
set_host_byte(cmd, DID_BAD_TARGET);
cmd->scsi_done(cmd);
@@ -2761,7 +2763,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
(hba->clk_gating.state != CLKS_ON));
if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
- if (hba->pm_op_in_progress)
+ if (hba->wl_pm_op_in_progress)
set_host_byte(cmd, DID_BAD_TARGET);
else
err = SCSI_MLQUEUE_HOST_BUSY;
@@ -5110,7 +5112,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
* solution could be to abort the system suspend if
* UFS device needs urgent BKOPs.
*/
- if (!hba->pm_op_in_progress &&
+ if (!hba->wl_pm_op_in_progress &&
!ufshcd_eh_in_progress(hba) &&
ufshcd_is_exception_event(lrbp->ucd_rsp_ptr))
/* Flushed in suspend */
@@ -5910,7 +5912,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
{
ufshcd_rpm_get_sync(hba);
if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
- hba->is_sys_suspended) {
+ hba->is_wl_sys_suspended) {
enum ufs_pm_op pm_op;
/*
@@ -5927,7 +5929,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
if (!ufshcd_is_clkgating_allowed(hba))
ufshcd_setup_clocks(hba, true);
ufshcd_release(hba);
- pm_op = hba->is_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
+ pm_op = hba->is_wl_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
ufshcd_vops_resume(hba, pm_op);
} else {
ufshcd_hold(hba, false);
@@ -5970,7 +5972,7 @@ static void ufshcd_recover_pm_error(struct ufs_hba *hba)
struct request_queue *q;
int ret;
- hba->is_sys_suspended = false;
+ hba->is_wl_sys_suspended = false;
/*
* Set RPM status of wlun device to RPM_ACTIVE,
* this also clears its runtime error.
@@ -8774,7 +8776,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
enum ufs_dev_pwr_mode req_dev_pwr_mode;
enum uic_link_state req_link_state;
- hba->pm_op_in_progress = true;
+ hba->wl_pm_op_in_progress = true;
if (pm_op != UFS_SHUTDOWN_PM) {
pm_lvl = pm_op == UFS_RUNTIME_PM ?
hba->rpm_lvl : hba->spm_lvl;
@@ -8909,7 +8911,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
hba->clk_gating.is_suspended = false;
ufshcd_release(hba);
}
- hba->pm_op_in_progress = false;
+ hba->wl_pm_op_in_progress = false;
return ret;
}
@@ -8918,7 +8920,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
int ret;
enum uic_link_state old_link_state = hba->uic_link_state;
- hba->pm_op_in_progress = true;
+ hba->wl_pm_op_in_progress = true;
/*
* Call vendor specific resume callback. As these callbacks may access
@@ -8996,7 +8998,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
ufshcd_update_evt_hist(hba, UFS_EVT_WL_RES_ERR, (u32)ret);
hba->clk_gating.is_suspended = false;
ufshcd_release(hba);
- hba->pm_op_in_progress = false;
+ hba->wl_pm_op_in_progress = false;
return ret;
}
@@ -9062,7 +9064,7 @@ static int ufshcd_wl_suspend(struct device *dev)
out:
if (!ret)
- hba->is_sys_suspended = true;
+ hba->is_wl_sys_suspended = true;
trace_ufshcd_wl_suspend(dev_name(dev), ret,
ktime_to_us(ktime_sub(ktime_get(), start)),
hba->curr_dev_pwr_mode, hba->uic_link_state);
@@ -9090,7 +9092,7 @@ static int ufshcd_wl_resume(struct device *dev)
ktime_to_us(ktime_sub(ktime_get(), start)),
hba->curr_dev_pwr_mode, hba->uic_link_state);
if (!ret)
- hba->is_sys_suspended = false;
+ hba->is_wl_sys_suspended = false;
up(&hba->host_sem);
return ret;
}
@@ -9132,6 +9134,8 @@ static int ufshcd_suspend(struct ufs_hba *hba)
if (!hba->is_powered)
return 0;
+
+ hba->pm_op_in_progress = true;
/*
* Disable the host irq as host controller as there won't be any
* host controller transaction expected till resume.
@@ -9151,6 +9155,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
ufshcd_vreg_set_lpm(hba);
/* Put the host controller in low power mode if possible */
ufshcd_hba_vreg_set_lpm(hba);
+ hba->pm_op_in_progress = false;
return ret;
}
@@ -9171,6 +9176,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
if (!hba->is_powered)
return 0;
+ hba->pm_op_in_progress = true;
ufshcd_hba_vreg_set_hpm(hba);
ret = ufshcd_vreg_set_hpm(hba);
if (ret)
@@ -9190,6 +9196,7 @@ static int ufshcd_resume(struct ufs_hba *hba)
out:
if (ret)
ufshcd_update_evt_hist(hba, UFS_EVT_RESUME_ERR, (u32)ret);
+ hba->pm_op_in_progress = false;
return ret;
}
@@ -9215,6 +9222,10 @@ int ufshcd_system_suspend(struct ufs_hba *hba)
trace_ufshcd_system_suspend(dev_name(hba->dev), ret,
ktime_to_us(ktime_sub(ktime_get(), start)),
hba->curr_dev_pwr_mode, hba->uic_link_state);
+
+ if (!ret)
+ hba->is_sys_suspended = true;
+
return ret;
}
EXPORT_SYMBOL(ufshcd_system_suspend);
@@ -9241,6 +9252,9 @@ int ufshcd_system_resume(struct ufs_hba *hba)
ktime_to_us(ktime_sub(ktime_get(), start)),
hba->curr_dev_pwr_mode, hba->uic_link_state);
+ if (!ret)
+ hba->is_sys_suspended = false;
+
return ret;
}
EXPORT_SYMBOL(ufshcd_system_resume);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index d5325e8..fe7ee30 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -752,7 +752,8 @@ struct ufs_hba {
enum ufs_pm_level spm_lvl;
struct device_attribute rpm_lvl_attr;
struct device_attribute spm_lvl_attr;
- int pm_op_in_progress;
+ bool pm_op_in_progress;
+ bool wl_pm_op_in_progress;
/* Auto-Hibernate Idle Timer register value */
u32 ahit;
@@ -839,6 +840,7 @@ struct ufs_hba {
struct devfreq *devfreq;
struct ufs_clk_scaling clk_scaling;
bool is_sys_suspended;
+ bool is_wl_sys_suspended;
enum bkops_status urgent_bkops_lvl;
bool is_urgent_bkops_lvl_checked;
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 2/6] scsi: ufs: Update the return value of supplier pm ops
[not found] <1621846046-22204-1-git-send-email-cang@codeaurora.org>
2021-05-24 8:47 ` [PATCH v2 1/6] scsi: ufs: Differentiate status between hba pm ops and wl pm ops Can Guo
@ 2021-05-24 8:47 ` Can Guo
2021-05-24 8:47 ` [PATCH v2 3/6] scsi: ufs: Simplify error handling preparation Can Guo
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Can Guo @ 2021-05-24 8:47 UTC (permalink / raw)
To: asutoshd, nguyenb, hongwus, linux-scsi, kernel-team, cang
Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
open list
rpm_get_suppliers() is returning an error only if the error is negative.
However, ufshcd_wl_resume() may return a positive error code, e.g., when
hibern8 or SSU cmd fails. Make the positive return value a negative error
code so that consumers are aware of any resume failure from their supplier.
Make the same change to ufshcd_wl_suspend() just to keep symmetry.
Signed-off-by: Can Guo <cang@codeaurora.org>
---
drivers/scsi/ufs/ufshcd.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 3836a0a..809d0cb 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -8912,7 +8912,7 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
ufshcd_release(hba);
}
hba->wl_pm_op_in_progress = false;
- return ret;
+ return ret <= 0 ? ret : -EINVAL;
}
static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
@@ -8999,7 +8999,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
hba->clk_gating.is_suspended = false;
ufshcd_release(hba);
hba->wl_pm_op_in_progress = false;
- return ret;
+ return ret <= 0 ? ret : -EINVAL;
}
static int ufshcd_wl_runtime_suspend(struct device *dev)
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 3/6] scsi: ufs: Simplify error handling preparation
[not found] <1621846046-22204-1-git-send-email-cang@codeaurora.org>
2021-05-24 8:47 ` [PATCH v2 1/6] scsi: ufs: Differentiate status between hba pm ops and wl pm ops Can Guo
2021-05-24 8:47 ` [PATCH v2 2/6] scsi: ufs: Update the return value of supplier " Can Guo
@ 2021-05-24 8:47 ` Can Guo
2021-05-24 8:47 ` [PATCH v2 4/6] scsi: ufs: Update ufshcd_recover_pm_error() Can Guo
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Can Guo @ 2021-05-24 8:47 UTC (permalink / raw)
To: asutoshd, nguyenb, hongwus, linux-scsi, kernel-team, cang
Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
open list
Commit cb7e6f05fce67c965194ac04467e1ba7bc70b069 ("scsi: ufs: core: Enable
power management for wlun") moves UFS operations out of ufshcd_resume(), so
in error handling preparation, if ufshcd hba has failed to resume, there is
no point to re-enable IRQ/clk/pwr.
Signed-off-by: Can Guo <cang@codeaurora.org>
---
drivers/scsi/ufs/ufshcd.c | 61 ++++++++++++++++++++++++++++-------------------
1 file changed, 37 insertions(+), 24 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 809d0cb..301304b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2721,8 +2721,8 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
break;
case UFSHCD_STATE_EH_SCHEDULED_FATAL:
/*
- * pm_runtime_get_sync() is used at error handling preparation
- * stage. If a scsi cmd, e.g. the SSU cmd, is sent from hba's
+ * ufshcd_rpm_get_sync() is used at error handling preparation
+ * stage. If a scsi cmd, e.g., the SSU cmd, is sent from the
* PM ops, it can never be finished if we let SCSI layer keep
* retrying it, which gets err handler stuck forever. Neither
* can we let the scsi cmd pass through, because UFS is in bad
@@ -5908,28 +5908,33 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend)
}
}
-static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
+static int ufshcd_err_handling_prepare(struct ufs_hba *hba)
{
+ unsigned long flags;
+
+ /*
+ * Exclusively call pm_runtime_get_sync(hba->dev) once, in case
+ * following ufshcd_rpm_get_sync() fails.
+ */
+ pm_runtime_get_sync(hba->dev);
+ /*
+ * IRQ, clocks and powers are supposed to be ON by now, unless error
+ * happened to ufshcd_resume(), which is out of our control.
+ */
+ if (pm_runtime_suspended(hba->dev) || hba->is_sys_suspended) {
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ hba->ufshcd_state = UFSHCD_STATE_ERROR;
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+ pm_runtime_put(hba->dev);
+ return -EINVAL;
+ }
+
ufshcd_rpm_get_sync(hba);
- if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
+ if (pm_runtime_suspended(&hba->sdev_ufs_device->sdev_gendev) ||
hba->is_wl_sys_suspended) {
- enum ufs_pm_op pm_op;
+ enum ufs_pm_op pm_op = hba->is_wl_sys_suspended ?
+ UFS_SYSTEM_PM : UFS_RUNTIME_PM;
- /*
- * Don't assume anything of resume, if
- * resume fails, irq and clocks can be OFF, and powers
- * can be OFF or in LPM.
- */
- ufshcd_setup_hba_vreg(hba, true);
- ufshcd_enable_irq(hba);
- ufshcd_setup_vreg(hba, true);
- ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq);
- ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq2);
- ufshcd_hold(hba, false);
- if (!ufshcd_is_clkgating_allowed(hba))
- ufshcd_setup_clocks(hba, true);
- ufshcd_release(hba);
- pm_op = hba->is_wl_sys_suspended ? UFS_SYSTEM_PM : UFS_RUNTIME_PM;
ufshcd_vops_resume(hba, pm_op);
} else {
ufshcd_hold(hba, false);
@@ -5943,6 +5948,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
down_write(&hba->clk_scaling_lock);
up_write(&hba->clk_scaling_lock);
cancel_work_sync(&hba->eeh_work);
+ return 0;
}
static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
@@ -5953,6 +5959,7 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
ufshcd_clk_scaling_suspend(hba, false);
ufshcd_clear_ua_wluns(hba);
ufshcd_rpm_put(hba);
+ pm_runtime_put(hba->dev);
}
static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
@@ -6044,12 +6051,17 @@ static void ufshcd_err_handler(struct work_struct *work)
up(&hba->host_sem);
return;
}
- ufshcd_set_eh_in_progress(hba);
spin_unlock_irqrestore(hba->host->host_lock, flags);
- ufshcd_err_handling_prepare(hba);
+ if (ufshcd_err_handling_prepare(hba)) {
+ dev_err(hba->dev, "%s: error handling preparation failed\n",
+ __func__);
+ up(&hba->host_sem);
+ return;
+ }
/* Complete requests that have door-bell cleared by h/w */
ufshcd_complete_requests(hba);
spin_lock_irqsave(hba->host->host_lock, flags);
+ ufshcd_set_eh_in_progress(hba);
if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
hba->ufshcd_state = UFSHCD_STATE_RESET;
/*
@@ -8987,6 +8999,9 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
/* Enable Auto-Hibernate if configured */
ufshcd_auto_hibern8_enable(hba);
+
+ hba->clk_gating.is_suspended = false;
+ ufshcd_release(hba);
goto out;
set_old_link_state:
@@ -8996,8 +9011,6 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
out:
if (ret)
ufshcd_update_evt_hist(hba, UFS_EVT_WL_RES_ERR, (u32)ret);
- hba->clk_gating.is_suspended = false;
- ufshcd_release(hba);
hba->wl_pm_op_in_progress = false;
return ret <= 0 ? ret : -EINVAL;
}
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 4/6] scsi: ufs: Update ufshcd_recover_pm_error()
[not found] <1621846046-22204-1-git-send-email-cang@codeaurora.org>
` (2 preceding siblings ...)
2021-05-24 8:47 ` [PATCH v2 3/6] scsi: ufs: Simplify error handling preparation Can Guo
@ 2021-05-24 8:47 ` Can Guo
2021-05-24 8:47 ` [PATCH v2 5/6] scsi: ufs: Let host_sem cover the entire system suspend/resume Can Guo
2021-05-24 8:47 ` [PATCH v2 6/6] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests Can Guo
5 siblings, 0 replies; 8+ messages in thread
From: Can Guo @ 2021-05-24 8:47 UTC (permalink / raw)
To: asutoshd, nguyenb, hongwus, linux-scsi, kernel-team, cang
Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
open list
Commit cb7e6f05fce67c965194ac04467e1ba7bc70b069 ("scsi: ufs: core: Enable
power management for wlun") makes UFS device W-LU scsi device the supplier
and the others as consumers. If the supplier's runtime pm ops fails or has
failed, not only supplier has the error saved to dev->power.runtime_error,
the error may (most likely) be saved by one or more consumers, see also
rpm_get_suppliers(). Update ufshcd_recover_pm_error() accordingly to clear
the runtime_error of the supplier and its consumers to get runtime PM back
to work on them.
Signed-off-by: Can Guo <cang@codeaurora.org>
---
drivers/scsi/ufs/ufshcd.c | 49 ++++++++++++++++++++---------------------------
1 file changed, 21 insertions(+), 28 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 301304b..5e6e3ac 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -242,6 +242,7 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba);
static int ufshcd_change_power_mode(struct ufs_hba *hba,
struct ufs_pa_layer_attr *pwr_mode);
static void ufshcd_schedule_eh_work(struct ufs_hba *hba);
+static void ufshcd_recover_pm_error(struct ufs_hba *hba);
static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on);
static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on);
static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
@@ -5951,12 +5952,14 @@ static int ufshcd_err_handling_prepare(struct ufs_hba *hba)
return 0;
}
-static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
+static void ufshcd_err_handling_unprepare(struct ufs_hba *hba, int reset_err)
{
ufshcd_scsi_unblock_requests(hba);
ufshcd_release(hba);
if (ufshcd_is_clkscaling_supported(hba))
ufshcd_clk_scaling_suspend(hba, false);
+ if (!reset_err)
+ ufshcd_recover_pm_error(hba);
ufshcd_clear_ua_wluns(hba);
ufshcd_rpm_put(hba);
pm_runtime_put(hba->dev);
@@ -5975,34 +5978,26 @@ static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba)
static void ufshcd_recover_pm_error(struct ufs_hba *hba)
{
struct Scsi_Host *shost = hba->host;
- struct scsi_device *sdev;
- struct request_queue *q;
+ struct scsi_device *sdev = hba->sdev_ufs_device;
+ struct scsi_target *starget = sdev->sdev_target;
int ret;
hba->is_wl_sys_suspended = false;
- /*
- * Set RPM status of wlun device to RPM_ACTIVE,
- * this also clears its runtime error.
- */
- ret = pm_runtime_set_active(&hba->sdev_ufs_device->sdev_gendev);
- /* hba device might have a runtime error otherwise */
- if (ret)
- ret = pm_runtime_set_active(hba->dev);
- /*
- * If wlun device had runtime error, we also need to resume those
- * consumer scsi devices in case any of them has failed to be
- * resumed due to supplier runtime resume failure. This is to unblock
- * blk_queue_enter in case there are bios waiting inside it.
- */
- if (!ret) {
- shost_for_each_device(sdev, shost) {
- q = sdev->request_queue;
- if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
- q->rpm_status == RPM_SUSPENDING))
- pm_request_resume(q->dev);
- }
+ /* Resume parent/target to clear path for pm_runtime_set_active() */
+ pm_runtime_get_sync(&starget->dev);
+ shost_for_each_device(sdev, shost) {
+ struct device *dev = &sdev->sdev_gendev;
+
+ pm_runtime_get_sync(dev);
+ /* Clear saved dev->power.runtime_error */
+ ret = pm_runtime_set_active(dev);
+ if (!ret)
+ /* runtime_error cleared, kick blk_queue_enter() */
+ blk_set_runtime_active(sdev->request_queue);
+ pm_runtime_put(dev);
}
+ pm_runtime_put(&starget->dev);
}
#else
static inline void ufshcd_recover_pm_error(struct ufs_hba *hba)
@@ -6036,7 +6031,7 @@ static void ufshcd_err_handler(struct work_struct *work)
unsigned long flags;
bool err_xfer = false;
bool err_tm = false;
- int err = 0, pmc_err;
+ int err = -1, pmc_err;
int tag;
bool needs_reset = false, needs_restore = false;
@@ -6189,8 +6184,6 @@ static void ufshcd_err_handler(struct work_struct *work)
if (err)
dev_err(hba->dev, "%s: reset and restore failed with err %d\n",
__func__, err);
- else
- ufshcd_recover_pm_error(hba);
spin_lock_irqsave(hba->host->host_lock, flags);
}
@@ -6204,7 +6197,7 @@ static void ufshcd_err_handler(struct work_struct *work)
}
ufshcd_clear_eh_in_progress(hba);
spin_unlock_irqrestore(hba->host->host_lock, flags);
- ufshcd_err_handling_unprepare(hba);
+ ufshcd_err_handling_unprepare(hba, err);
up(&hba->host_sem);
}
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 5/6] scsi: ufs: Let host_sem cover the entire system suspend/resume
[not found] <1621846046-22204-1-git-send-email-cang@codeaurora.org>
` (3 preceding siblings ...)
2021-05-24 8:47 ` [PATCH v2 4/6] scsi: ufs: Update ufshcd_recover_pm_error() Can Guo
@ 2021-05-24 8:47 ` Can Guo
2021-05-24 16:56 ` Bart Van Assche
2021-05-24 8:47 ` [PATCH v2 6/6] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests Can Guo
5 siblings, 1 reply; 8+ messages in thread
From: Can Guo @ 2021-05-24 8:47 UTC (permalink / raw)
To: asutoshd, nguyenb, hongwus, linux-scsi, kernel-team, cang
Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
open list
UFS error handling now is doing more than just re-probing, but also sending
scsi cmds, e.g., for clearing UACs, and recovering runtime PM error, which
may change runtime status of scsi devices. To protect system suspend/resume
from being disturbed by error handling, move the host_sem from wl pm ops
to ufshcd_suspend_prepare() and ufshcd_resume_complete().
Signed-off-by: Can Guo <cang@codeaurora.org>
---
drivers/scsi/ufs/ufshcd.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5e6e3ac..9a3bc04 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -9057,16 +9057,13 @@ static int ufshcd_wl_suspend(struct device *dev)
ktime_t start = ktime_get();
hba = shost_priv(sdev->host);
- down(&hba->host_sem);
if (pm_runtime_suspended(dev))
goto out;
ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM);
- if (ret) {
+ if (ret)
dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__, ret);
- up(&hba->host_sem);
- }
out:
if (!ret)
@@ -9099,7 +9096,6 @@ static int ufshcd_wl_resume(struct device *dev)
hba->curr_dev_pwr_mode, hba->uic_link_state);
if (!ret)
hba->is_wl_sys_suspended = false;
- up(&hba->host_sem);
return ret;
}
#endif
@@ -9666,6 +9662,7 @@ void ufshcd_resume_complete(struct device *dev)
ufshcd_rpmb_rpm_put(hba);
hba->rpmb_complete_put = false;
}
+ up(&hba->host_sem);
}
EXPORT_SYMBOL_GPL(ufshcd_resume_complete);
@@ -9692,6 +9689,7 @@ int ufshcd_suspend_prepare(struct device *dev)
ufshcd_rpmb_rpm_get_sync(hba);
hba->rpmb_complete_put = true;
}
+ down(&hba->host_sem);
return 0;
}
EXPORT_SYMBOL_GPL(ufshcd_suspend_prepare);
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2 5/6] scsi: ufs: Let host_sem cover the entire system suspend/resume
2021-05-24 8:47 ` [PATCH v2 5/6] scsi: ufs: Let host_sem cover the entire system suspend/resume Can Guo
@ 2021-05-24 16:56 ` Bart Van Assche
2021-05-25 2:04 ` Can Guo
0 siblings, 1 reply; 8+ messages in thread
From: Bart Van Assche @ 2021-05-24 16:56 UTC (permalink / raw)
To: Can Guo, asutoshd, nguyenb, hongwus, linux-scsi, kernel-team
Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
open list
On 5/24/21 1:47 AM, Can Guo wrote:
> UFS error handling now is doing more than just re-probing, but also sending
> scsi cmds, e.g., for clearing UACs, and recovering runtime PM error, which
> may change runtime status of scsi devices. To protect system suspend/resume
> from being disturbed by error handling, move the host_sem from wl pm ops
> to ufshcd_suspend_prepare() and ufshcd_resume_complete().
Other SCSI LLDs can perform error handling while system suspend/resume
is in progress. Why can't the UFS driver do this?
Additionally, please document what the purpose of host_sem is before
making any changes to how host_sem is used. The only documentation I
have found of host_sem is the following: "* @host_sem: semaphore used to
serialize concurrent contexts". To me that text is less than useful
since semaphores are almost always used to serialize concurrent code.
Thanks,
Bart.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 5/6] scsi: ufs: Let host_sem cover the entire system suspend/resume
2021-05-24 16:56 ` Bart Van Assche
@ 2021-05-25 2:04 ` Can Guo
0 siblings, 0 replies; 8+ messages in thread
From: Can Guo @ 2021-05-25 2:04 UTC (permalink / raw)
To: Bart Van Assche
Cc: asutoshd, nguyenb, hongwus, linux-scsi, kernel-team, Alim Akhtar,
Avri Altman, James E.J. Bottomley, Martin K. Petersen,
Stanley Chu, Bean Huo, Jaegeuk Kim, open list
Hi Bart,
On 2021-05-25 00:56, Bart Van Assche wrote:
> On 5/24/21 1:47 AM, Can Guo wrote:
>> UFS error handling now is doing more than just re-probing, but also
>> sending
>> scsi cmds, e.g., for clearing UACs, and recovering runtime PM error,
>> which
>> may change runtime status of scsi devices. To protect system
>> suspend/resume
>> from being disturbed by error handling, move the host_sem from wl pm
>> ops
>> to ufshcd_suspend_prepare() and ufshcd_resume_complete().
>
> Other SCSI LLDs can perform error handling while system suspend/resume
> is in progress. Why can't the UFS driver do this?
I don't know about other SCSI LLDs, but UFS error handling is basically
doing a re-probe/re-initialization to UFS device. Having UFS error
handling
running in parallel with system suspend/resume, neither of them will end
up well.
I didn't design all this, it is just happening, I am trying to fix it
and
semaphore works well for me. I am really glad to see someone cares about
error handling and fix it with better ideas (maybe using WQ_FREEZABLE)
later.
>
> Additionally, please document what the purpose of host_sem is before
> making any changes to how host_sem is used. The only documentation I
> have found of host_sem is the following: "* @host_sem: semaphore used
> to
> serialize concurrent contexts". To me that text is less than useful
> since semaphores are almost always used to serialize concurrent code.
>
Sure, host_sem is actually preventing cocurrency happens among any of
contexts, such as sysfs access, shutdown, error handling, system
suspend/resume and async probe, I will update its message in next
version.
Thanks,
Can Guo.
> Thanks,
>
> Bart.
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 6/6] scsi: ufs: Update the fast abort path in ufshcd_abort() for PM requests
[not found] <1621846046-22204-1-git-send-email-cang@codeaurora.org>
` (4 preceding siblings ...)
2021-05-24 8:47 ` [PATCH v2 5/6] scsi: ufs: Let host_sem cover the entire system suspend/resume Can Guo
@ 2021-05-24 8:47 ` Can Guo
5 siblings, 0 replies; 8+ messages in thread
From: Can Guo @ 2021-05-24 8:47 UTC (permalink / raw)
To: asutoshd, nguyenb, hongwus, linux-scsi, kernel-team, cang
Cc: Alim Akhtar, Avri Altman, James E.J. Bottomley,
Martin K. Petersen, Stanley Chu, Bean Huo, Jaegeuk Kim,
open list
If PM requests fail during runtime suspend/resume, RPM framework saves the
error to dev->power.runtime_error. Before the runtime_error gets cleared,
runtime PM on this specific device won't work again, leaving the device
in either suspended or active state permanently.
When task abort happens to a PM request sent during runtime suspend/resume,
even if it can be successfully aborted, RPM framework anyways saves the
(TIMEOUT) error. But we want more and we can do better - let error handling
recover and clear the runtime_error. So, let PM requests take the fast
abort path in ufshcd_abort().
Signed-off-by: Can Guo <cang@codeaurora.org>
---
drivers/scsi/ufs/ufshcd.c | 38 ++++++++++++++++++++++----------------
1 file changed, 22 insertions(+), 16 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 9a3bc04..8312b31 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2731,7 +2731,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
* err handler blocked for too long. So, just fail the scsi cmd
* sent from PM ops, err handler can recover PM error anyways.
*/
- if (hba->wl_pm_op_in_progress) {
+ if (cmd->request->rq_flags & RQF_PM) {
hba->force_reset = true;
set_host_byte(cmd, DID_BAD_TARGET);
cmd->scsi_done(cmd);
@@ -2764,7 +2764,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
(hba->clk_gating.state != CLKS_ON));
if (unlikely(test_bit(tag, &hba->outstanding_reqs))) {
- if (hba->wl_pm_op_in_progress)
+ if (cmd->request->rq_flags & RQF_PM)
set_host_byte(cmd, DID_BAD_TARGET);
else
err = SCSI_MLQUEUE_HOST_BUSY;
@@ -6982,11 +6982,14 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
int err = 0;
struct ufshcd_lrb *lrbp;
u32 reg;
+ bool need_eh = false;
host = cmd->device->host;
hba = shost_priv(host);
tag = cmd->request->tag;
lrbp = &hba->lrb[tag];
+
+ dev_info(hba->dev, "%s: Device abort task at tag %d\n", __func__, tag);
if (!ufshcd_valid_tag(hba, tag)) {
dev_err(hba->dev,
"%s: invalid command tag %d: cmd=0x%p, cmd->request=0x%p",
@@ -7004,9 +7007,6 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
goto out;
}
- /* Print Transfer Request of aborted task */
- dev_info(hba->dev, "%s: Device abort task at tag %d\n", __func__, tag);
-
/*
* Print detailed info about aborted request.
* As more than one request might get aborted at the same time,
@@ -7034,21 +7034,21 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
}
/*
- * Task abort to the device W-LUN is illegal. When this command
- * will fail, due to spec violation, scsi err handling next step
- * will be to send LU reset which, again, is a spec violation.
- * To avoid these unnecessary/illegal steps, first we clean up
- * the lrb taken by this cmd and re-set it in outstanding_reqs,
- * then queue the eh_work and bail.
+ * This fast path guarantees the cmd always gets aborted successfully,
+ * meanwhile it invokes the error handler. It allows contexts, which
+ * are blocked by this cmd, to fail fast. It serves multiple purposes:
+ * #1 To avoid unnecessary/illagal abort attempts to the W-LU.
+ * #2 To avoid live lock between eh_work and specific contexts, i.e.,
+ * suspend/resume and eh_work itself.
+ * #3 To let eh_work recover runtime PM error in case abort happens
+ * to cmds sent from runtime suspend/resume ops.
*/
- if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
+ if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN ||
+ (cmd->request->rq_flags & RQF_PM)) {
ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
__ufshcd_transfer_req_compl(hba, (1UL << tag));
set_bit(tag, &hba->outstanding_reqs);
- spin_lock_irqsave(host->host_lock, flags);
- hba->force_reset = true;
- ufshcd_schedule_eh_work(hba);
- spin_unlock_irqrestore(host->host_lock, flags);
+ need_eh = true;
goto out;
}
@@ -7062,6 +7062,12 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
cleanup:
__ufshcd_transfer_req_compl(hba, (1UL << tag));
out:
+ if (cmd->request->rq_flags & RQF_PM || need_eh) {
+ spin_lock_irqsave(host->host_lock, flags);
+ hba->force_reset = true;
+ ufshcd_schedule_eh_work(hba);
+ spin_unlock_irqrestore(host->host_lock, flags);
+ }
err = SUCCESS;
} else {
dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
^ permalink raw reply related [flat|nested] 8+ messages in thread