All of lore.kernel.org
 help / color / mirror / Atom feed
From: Can Guo <cang@codeaurora.org>
To: Bart Van Assche <bvanassche@acm.org>
Cc: "Martin K . Petersen" <martin.petersen@oracle.com>,
	linux-scsi@vger.kernel.org,
	"James E.J. Bottomley" <jejb@linux.ibm.com>,
	Stanley Chu <stanley.chu@mediatek.com>,
	Bean Huo <beanhuo@micron.com>, Avri Altman <avri.altman@wdc.com>,
	Jaegeuk Kim <jaegeuk@kernel.org>,
	Asutosh Das <asutoshd@codeaurora.org>
Subject: Re: [PATCH RFC 4/4] ufs: Make host controller state change handling more systematic
Date: Wed, 23 Jun 2021 16:08:36 +0800	[thread overview]
Message-ID: <f237403527eab4f180a3aabed2510d41@codeaurora.org> (raw)
In-Reply-To: <20210619005228.28569-5-bvanassche@acm.org>

Hi Bart,

On 2021-06-19 08:52, Bart Van Assche wrote:
> Introduce a function that reports whether or not a controller state 
> change
> is allowed instead of duplicating these checks in every context that
> changes the host controller state. This patch includes a behavior 
> change:
> ufshcd_link_recovery() no longer can change the host controller state 
> from
> UFSHCD_STATE_ERROR into UFSHCD_STATE_RESET.
> 
> Cc: Can Guo <cang@codeaurora.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/ufs/ufshcd.c | 59 ++++++++++++++++++++++++---------------
>  1 file changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index c213daec20f7..a10c8ec28468 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -4070,16 +4070,32 @@ static int ufshcd_uic_change_pwr_mode(struct
> ufs_hba *hba, u8 mode)
>  	return ret;
>  }
> 
> +static bool ufshcd_set_state(struct ufs_hba *hba, enum ufshcd_state 
> new_state)
> +{
> +	lockdep_assert_held(hba->host->host_lock);
> +
> +	if (old_state != UFSHCD_STATE_ERROR || new_state == 
> UFSHCD_STATE_ERROR)
> +		hba->ufshcd_state = new_state;
> +		return true;
> +	}
> +	return false;
> +}
> +
>  int ufshcd_link_recovery(struct ufs_hba *hba)
>  {
>  	int ret;
>  	unsigned long flags;
> +	bool proceed;
> 
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	hba->ufshcd_state = UFSHCD_STATE_RESET;
> -	ufshcd_set_eh_in_progress(hba);
> +	proceed = ufshcd_set_state(hba, UFSHCD_STATE_RESET);
> +	if (proceed)
> +		ufshcd_set_eh_in_progress(hba);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> +	if (!proceed)
> +		return -EPERM;
> +
>  	/* Reset the attached device */
>  	ufshcd_device_reset(hba);
> 
> @@ -4087,7 +4103,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)

I want to remove this function since it is only used by ufs-mediatek.c
through ufshcd_vops_resume(). Althrough it does not race with error
handling as of now, by removing it we can reduce one more caller of
full reset. I will ask Stanley help comment on this.

> 
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	if (ret)
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
>  	ufshcd_clear_eh_in_progress(hba);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
> @@ -5856,15 +5872,17 @@ static inline bool
> ufshcd_is_saved_err_fatal(struct ufs_hba *hba)
>  /* host lock must be held before calling this func */
>  static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba)
>  {
> -	/* handle fatal errors only when link is not in error state */
> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR) {
> -		if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> -		    ufshcd_is_saved_err_fatal(hba))
> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_FATAL;
> -		else
> -			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED_NON_FATAL;
> +	bool proceed;
> +
> +	if (hba->force_reset || ufshcd_is_link_broken(hba) ||
> +	    ufshcd_is_saved_err_fatal(hba))
> +		proceed = ufshcd_set_state(hba,
> +					   UFSHCD_STATE_EH_SCHEDULED_FATAL);
> +	else
> +		proceed = ufshcd_set_state(hba,
> +					   UFSHCD_STATE_EH_SCHEDULED_NON_FATAL);
> +	if (proceed)
>  		queue_work(hba->eh_wq, &hba->eh_work);
> -	}
>  }
> 
>  static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow)
> @@ -6017,8 +6035,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
>  	down(&hba->host_sem);
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	if (ufshcd_err_handling_should_stop(hba)) {
> -		if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>  		spin_unlock_irqrestore(hba->host->host_lock, flags);
>  		up(&hba->host_sem);
>  		return;
> @@ -6029,8 +6046,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
>  	/* Complete requests that have door-bell cleared by h/w */
>  	ufshcd_complete_requests(hba);
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
> -		hba->ufshcd_state = UFSHCD_STATE_RESET;
> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);
>  	/*
>  	 * A full reset and restore might have happened after preparation
>  	 * is finished, double check whether we should stop.
> @@ -6163,8 +6179,7 @@ static void ufshcd_err_handler(struct work_struct 
> *work)
> 
>  skip_err_handling:
>  	if (!needs_reset) {
> -		if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -			hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +		ufshcd_set_state(hba, UFSHCD_STATE_OPERATIONAL);
>  		if (hba->saved_err || hba->saved_uic_err)
>  			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x 
> saved_uic_err 0x%x",
>  			    __func__, hba->saved_err, hba->saved_uic_err);
> @@ -7135,7 +7150,7 @@ static int ufshcd_reset_and_restore(struct 
> ufs_hba *hba)
>  	 */
>  	scsi_report_bus_reset(hba->host, 0);
>  	if (err) {
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> +		ufshcd_set_state(hba, UFSHCD_STATE_ERROR);
>  		hba->saved_err |= saved_err;
>  		hba->saved_uic_err |= saved_uic_err;
>  	}
> @@ -7951,7 +7966,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,
> bool init_dev_params)
>  	unsigned long flags;
>  	ktime_t start = ktime_get();
> 
> -	hba->ufshcd_state = UFSHCD_STATE_RESET;
> +	ufshcd_set_state(hba, UFSHCD_STATE_RESET);

1. We don't hold host lock here, but ufshcd_set_state() is expecting it.

2. Here we need to override the state to RESET anyways even state is 
ERROR,
because in ufshcd_reset_and_restore(), ufshcd_probe_hba() can be called 
5
times (retries == 5, see below code snippet). Otherwise, say on the 2nd
retry of ufshcd_probe_hba(), even if ufshcd_probe_hba() pass through 
without
an error (ret == 0), the ufshcd_set_state() (down below) cannot set the
state back to OPERATIONAL.

7042 	do {
7043 		/* Reset the attached device */
7044 		ufshcd_vops_device_reset(hba);
7045
7046 		err = ufshcd_host_reset_and_restore(hba);
7047 	} while (err && --retries);

Thanks,

Can Guo.

> 
>  	ret = ufshcd_link_startup(hba);
>  	if (ret)
> @@ -8024,10 +8039,8 @@ static int ufshcd_probe_hba(struct ufs_hba
> *hba, bool init_dev_params)
> 
>  out:
>  	spin_lock_irqsave(hba->host->host_lock, flags);
> -	if (ret)
> -		hba->ufshcd_state = UFSHCD_STATE_ERROR;
> -	else if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> -		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +	ufshcd_set_state(hba, ret ? UFSHCD_STATE_ERROR :
> +			 UFSHCD_STATE_OPERATIONAL);
>  	spin_unlock_irqrestore(hba->host->host_lock, flags);
> 
>  	trace_ufshcd_init(dev_name(hba->dev), ret,

  parent reply	other threads:[~2021-06-23  8:09 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-19  0:52 [PATCH RFC 0/4] UFS patches for Linux kernel v5.14 Bart Van Assche
2021-06-19  0:52 ` [PATCH RFC 1/4] ufs: Rename the second ufshcd_probe_hba() argument Bart Van Assche
2021-06-21  8:13   ` Avri Altman
2021-06-19  0:52 ` [PATCH RFC 2/4] ufs: Remove a check from ufshcd_queuecommand() Bart Van Assche
2021-06-21  8:22   ` Avri Altman
2021-06-21 17:31     ` Bart Van Assche
2021-06-19  0:52 ` [PATCH RFC 3/4] ufs: Improve static type checking for the host controller state Bart Van Assche
2021-06-21  8:26   ` Avri Altman
2021-06-23  7:42   ` Can Guo
2021-06-23 16:10     ` Bart Van Assche
2021-06-19  0:52 ` [PATCH RFC 4/4] ufs: Make host controller state change handling more systematic Bart Van Assche
2021-06-19  7:09   ` kernel test robot
2021-06-21  8:55   ` Avri Altman
2021-06-21 17:38     ` Bart Van Assche
2021-06-23  8:08   ` Can Guo [this message]
2021-06-23 12:02   ` Can Guo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f237403527eab4f180a3aabed2510d41@codeaurora.org \
    --to=cang@codeaurora.org \
    --cc=asutoshd@codeaurora.org \
    --cc=avri.altman@wdc.com \
    --cc=beanhuo@micron.com \
    --cc=bvanassche@acm.org \
    --cc=jaegeuk@kernel.org \
    --cc=jejb@linux.ibm.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=stanley.chu@mediatek.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.