All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2] mmc: debugfs: add error statistics
@ 2021-12-14 14:41 Shaik Sajida Bhanu
  2021-12-15 14:03 ` Adrian Hunter
  0 siblings, 1 reply; 11+ messages in thread
From: Shaik Sajida Bhanu @ 2021-12-14 14:41 UTC (permalink / raw)
  To: adrian.hunter, riteshh, asutoshd, ulf.hansson, agross,
	bjorn.andersson, linux-mmc, linux-arm-msm, linux-kernel
  Cc: stummala, vbadigan, quic_rampraka, quic_pragalla, sartgarg,
	nitirawa, sayalil, Shaik Sajida Bhanu

Add debugfs entry to query eMMC and SD card errors statistics.
This feature is useful for debug and testing

Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
---

Changes since V1:
	-Removed sysfs entry for eMMC and SD card error statistics and added
	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
---
 drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/mmc/core/queue.c   |   2 +
 drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
 include/linux/mmc/host.h   |  37 ++++++++++++++++
 4 files changed, 186 insertions(+), 12 deletions(-)

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 3fdbc80..40210c34 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64 val)
 DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
 	"%llu\n");
 
+static int mmc_err_state_get(void *data, u64 *val)
+{
+	struct mmc_host *host = data;
+
+	if (!host)
+		return -EINVAL;
+
+	*val = host->err_state ? 1 : 0;
+
+	return 0;
+}
+
+static int mmc_err_state_clear(void *data, u64 val)
+{
+	struct mmc_host *host = data;
+
+	if (!host)
+		return -EINVAL;
+
+	host->err_state = false;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
+		mmc_err_state_clear, "%llu\n");
+
+static int mmc_err_stats_show(struct seq_file *file, void *data)
+{
+	struct mmc_host *host = (struct mmc_host *)file->private;
+
+	if (!host)
+		return -EINVAL;
+
+	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
+		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);
+
+	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
+		   host->err_stats[MMC_ERR_CMD_CRC]);
+
+	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
+		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
+
+	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
+		   host->err_stats[MMC_ERR_DAT_CRC]);
+
+	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
+		   host->err_stats[MMC_ERR_ADMA]);
+
+	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
+		   host->err_stats[MMC_ERR_ADMA]);
+
+	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
+		   host->err_stats[MMC_ERR_TUNING]);
+
+	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
+		   host->err_stats[MMC_ERR_CMDQ_RED]);
+
+	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
+		   host->err_stats[MMC_ERR_CMDQ_GCE]);
+
+	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
+		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
+
+	seq_printf(file, "# Request Timedout:\t %d\n",
+		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
+
+	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
+		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
+
+	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
+		   host->err_stats[MMC_ERR_ICE_CFG]);
+
+	return 0;
+}
+
+static int mmc_err_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mmc_err_stats_show, inode->i_private);
+}
+
+static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
+				   size_t cnt, loff_t *ppos)
+{
+	struct mmc_host *host = filp->f_mapping->host->i_private;
+
+	if (!host)
+		return -EINVAL;
+
+	pr_debug("%s: Resetting MMC error statistics\n", __func__);
+	memset(host->err_stats, 0, sizeof(host->err_stats));
+
+	return cnt;
+}
+
+static const struct file_operations mmc_err_stats_fops = {
+	.open	= mmc_err_stats_open,
+	.read	= seq_read,
+	.write	= mmc_err_stats_write,
+};
+
 void mmc_add_host_debugfs(struct mmc_host *host)
 {
 	struct dentry *root;
@@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
 	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
 				   &mmc_clock_fops);
 
+	debugfs_create_file("err_state", 0600, root, host,
+		&mmc_err_state);
+	debugfs_create_file("err_stats", 0600, root, host,
+		&mmc_err_stats_fops);
+
 #ifdef CONFIG_FAIL_MMC_REQUEST
 	if (fail_request)
 		setup_fault_attr(&fail_default_attr, fail_request);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index b15c034..5243929 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
 	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
 	bool recovery_needed = false;
 
+	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
+
 	switch (issue_type) {
 	case MMC_ISSUE_ASYNC:
 	case MMC_ISSUE_DCMD:
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 07c6da1..d742051 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
 	if (host->ops->dump_vendor_regs)
 		host->ops->dump_vendor_regs(host);
 
+	mmc_debugfs_err_stats_enable(host->mmc);
 	SDHCI_DUMP("============================================\n");
 }
 EXPORT_SYMBOL_GPL(sdhci_dumpregs);
@@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
+		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
 		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
 		       mmc_hostname(host->mmc));
 		sdhci_dumpregs(host);
@@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
 
 	if (host->data || host->data_cmd ||
 	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
+		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
 		pr_err("%s: Timeout waiting for hardware interrupt.\n",
 		       mmc_hostname(host->mmc));
 		sdhci_dumpregs(host);
@@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
 
 	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
 		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
-		if (intmask & SDHCI_INT_TIMEOUT)
+		if (intmask & SDHCI_INT_TIMEOUT) {
 			host->cmd->error = -ETIMEDOUT;
-		else
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
+		} else {
 			host->cmd->error = -EILSEQ;
-
+			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
+				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
+		}
 		/* Treat data command CRC error the same as data CRC error */
 		if (host->cmd->data &&
 		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
@@ -3266,6 +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
 			  -ETIMEDOUT :
 			  -EILSEQ;
 
+		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
 		if (sdhci_auto_cmd23(host, mrq)) {
 			mrq->sbc->error = err;
 			__sdhci_finish_mrq(host, mrq);
@@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
 				host->data_cmd = NULL;
 				data_cmd->error = -ETIMEDOUT;
+				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
 				__sdhci_finish_mrq(host, data_cmd->mrq);
 				return;
 			}
@@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 		return;
 	}
 
-	if (intmask & SDHCI_INT_DATA_TIMEOUT)
+	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
 		host->data->error = -ETIMEDOUT;
+		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
+	}
 	else if (intmask & SDHCI_INT_DATA_END_BIT)
 		host->data->error = -EILSEQ;
 	else if ((intmask & SDHCI_INT_DATA_CRC) &&
 		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
-			!= MMC_BUS_TEST_R)
+			!= MMC_BUS_TEST_R) {
 		host->data->error = -EILSEQ;
+		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
+			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
+	}
 	else if (intmask & SDHCI_INT_ADMA_ERROR) {
 		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
 		       intmask);
 		sdhci_adma_show_error(host);
+		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
 		host->data->error = -EIO;
 		if (host->ops->adma_workaround)
 			host->ops->adma_workaround(host, intmask);
@@ -3905,20 +3921,33 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
 	if (!host->cqe_on)
 		return false;
 
-	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
+	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
 		*cmd_error = -EILSEQ;
-	else if (intmask & SDHCI_INT_TIMEOUT)
+		if (intmask & SDHCI_INT_CRC) {
+			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
+				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
+		}
+	} else if (intmask & SDHCI_INT_TIMEOUT) {
 		*cmd_error = -ETIMEDOUT;
-	else
+		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
+	} else
 		*cmd_error = 0;
 
-	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
+	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
 		*data_error = -EILSEQ;
-	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
+		if (intmask & SDHCI_INT_DATA_CRC) {
+			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
+				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
+		}
+	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
 		*data_error = -ETIMEDOUT;
-	else if (intmask & SDHCI_INT_ADMA_ERROR)
+		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
+	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
 		*data_error = -EIO;
-	else
+		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
+	} else
 		*data_error = 0;
 
 	/* Clear selected interrupts. */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 7afb57c..c263f8f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
 
 struct mmc_host;
 
+enum mmc_err_stat {
+	MMC_ERR_CMD_TIMEOUT,
+	MMC_ERR_CMD_CRC,
+	MMC_ERR_DAT_TIMEOUT,
+	MMC_ERR_DAT_CRC,
+	MMC_ERR_AUTO_CMD,
+	MMC_ERR_ADMA,
+	MMC_ERR_TUNING,
+	MMC_ERR_CMDQ_RED,
+	MMC_ERR_CMDQ_GCE,
+	MMC_ERR_CMDQ_ICCE,
+	MMC_ERR_REQ_TIMEOUT,
+	MMC_ERR_CMDQ_REQ_TIMEOUT,
+	MMC_ERR_ICE_CFG,
+	MMC_ERR_MAX,
+};
+
 struct mmc_host_ops {
 	/*
 	 * It is optional for the host to implement pre_req and post_req in
@@ -500,6 +517,8 @@ struct mmc_host {
 
 	/* Host Software Queue support */
 	bool			hsq_enabled;
+	u32                     err_stats[MMC_ERR_MAX];
+	bool			err_state;
 
 	unsigned long		private[] ____cacheline_aligned;
 };
@@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
 	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
 }
 
+static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
+{
+	mmc->err_state = true;
+}
+
+static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
+		enum mmc_err_stat stat) {
+
+	/*
+	 * Ignore the command timeout errors observed during
+	 * the card init as those are excepted.
+	 */
+	if (!mmc->err_state)
+		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
+
+	mmc->err_stats[stat] += 1;
+}
+
 int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
 int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member 
of Code Aurora Forum, hosted by The Linux Foundation


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH V2] mmc: debugfs: add error statistics
  2021-12-14 14:41 [PATCH V2] mmc: debugfs: add error statistics Shaik Sajida Bhanu
@ 2021-12-15 14:03 ` Adrian Hunter
  2021-12-21  7:16   ` Sajida Bhanu (Temp) (QUIC)
  0 siblings, 1 reply; 11+ messages in thread
From: Adrian Hunter @ 2021-12-15 14:03 UTC (permalink / raw)
  To: Shaik Sajida Bhanu, riteshh, asutoshd, ulf.hansson, agross,
	bjorn.andersson, linux-mmc, linux-arm-msm, linux-kernel
  Cc: stummala, vbadigan, quic_rampraka, quic_pragalla, sartgarg,
	nitirawa, sayalil

On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
> Add debugfs entry to query eMMC and SD card errors statistics.
> This feature is useful for debug and testing
> 
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> ---
> 
> Changes since V1:
> 	-Removed sysfs entry for eMMC and SD card error statistics and added
> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.

Thanks for doing this.

> ---
>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/mmc/core/queue.c   |   2 +
>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>  4 files changed, 186 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
> index 3fdbc80..40210c34 100644
> --- a/drivers/mmc/core/debugfs.c
> +++ b/drivers/mmc/core/debugfs.c
> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64 val)
>  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>  	"%llu\n");
>  
> +static int mmc_err_state_get(void *data, u64 *val)
> +{
> +	struct mmc_host *host = data;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	*val = host->err_state ? 1 : 0;
> +
> +	return 0;
> +}
> +
> +static int mmc_err_state_clear(void *data, u64 val)
> +{
> +	struct mmc_host *host = data;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	host->err_state = false;

Is there much reason to disable err stats from userspace?

> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
> +		mmc_err_state_clear, "%llu\n");
> +
> +static int mmc_err_stats_show(struct seq_file *file, void *data)
> +{
> +	struct mmc_host *host = (struct mmc_host *)file->private;
> +
> +	if (!host)
> +		return -EINVAL;

I was thinking we needed a way to determine whether stats were being
collected because not all drivers would support it at least initially
e.g.

	if (!host->err_stats_enabled) {
		seq_printf(file, "Not supported by driver\n");
		return 0;
	}

> +
> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);

Maybe put the descriptions in an array and iterate e.g.

	const char *desc[MMC_ERR_MAX] = {
		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
		etc
	};
	int i;

	if (!host)
		return -EINVAL;

	for (i = 0; i < MMC_ERR_MAX; i++) {
		if (desc[i])
			seq_printf(file, "# %s:\t %d\n",
				   desc[1], host->err_stats[i]);
	}

> +
> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMD_CRC]);
> +
> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
> +
> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_DAT_CRC]);
> +
> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_ADMA]);
> +
> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_ADMA]);
> +
> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_TUNING]);
> +
> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
> +
> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
> +
> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
> +
> +	seq_printf(file, "# Request Timedout:\t %d\n",
> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
> +
> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
> +
> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_ICE_CFG]);
> +
> +	return 0;
> +}
> +
> +static int mmc_err_stats_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, mmc_err_stats_show, inode->i_private);
> +}
> +
> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
> +				   size_t cnt, loff_t *ppos)
> +{
> +	struct mmc_host *host = filp->f_mapping->host->i_private;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
> +	memset(host->err_stats, 0, sizeof(host->err_stats));
> +
> +	return cnt;
> +}
> +
> +static const struct file_operations mmc_err_stats_fops = {
> +	.open	= mmc_err_stats_open,
> +	.read	= seq_read,
> +	.write	= mmc_err_stats_write,
> +};
> +
>  void mmc_add_host_debugfs(struct mmc_host *host)
>  {
>  	struct dentry *root;
> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>  				   &mmc_clock_fops);
>  
> +	debugfs_create_file("err_state", 0600, root, host,
> +		&mmc_err_state);
> +	debugfs_create_file("err_stats", 0600, root, host,
> +		&mmc_err_stats_fops);
> +
>  #ifdef CONFIG_FAIL_MMC_REQUEST
>  	if (fail_request)
>  		setup_fault_attr(&fail_default_attr, fail_request);
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> index b15c034..5243929 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>  	bool recovery_needed = false;
>  
> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
> +
>  	switch (issue_type) {
>  	case MMC_ISSUE_ASYNC:
>  	case MMC_ISSUE_DCMD:
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c

I think the core changes should be a separate patch from sdhci.
I would probably split into 4:
	mmc core
	mmc block driver
	cqhci driver
	sdhci driver

> index 07c6da1..d742051 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>  	if (host->ops->dump_vendor_regs)
>  		host->ops->dump_vendor_regs(host);
>  
> +	mmc_debugfs_err_stats_enable(host->mmc);

Why here and not in e.g. __sdhci_add_host() ?

>  	SDHCI_DUMP("============================================\n");
>  }
>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>  	spin_lock_irqsave(&host->lock, flags);
>  
>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
>  
>  	if (host->data || host->data_cmd ||
>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  
>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> -		if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_TIMEOUT) {
>  			host->cmd->error = -ETIMEDOUT;
> -		else
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +		} else {
>  			host->cmd->error = -EILSEQ;
> -
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +		}
>  		/* Treat data command CRC error the same as data CRC error */
>  		if (host->cmd->data &&
>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
> @@ -3266,6 +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  			  -ETIMEDOUT :
>  			  -EILSEQ;
>  
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>  		if (sdhci_auto_cmd23(host, mrq)) {
>  			mrq->sbc->error = err;
>  			__sdhci_finish_mrq(host, mrq);
> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  				host->data_cmd = NULL;
>  				data_cmd->error = -ETIMEDOUT;
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>  				return;
>  			}
> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  		return;
>  	}
>  
> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		host->data->error = -ETIMEDOUT;
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	}
>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>  		host->data->error = -EILSEQ;
>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> -			!= MMC_BUS_TEST_R)
> +			!= MMC_BUS_TEST_R) {
>  		host->data->error = -EILSEQ;
> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +	}
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>  		       intmask);
>  		sdhci_adma_show_error(host);
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>  			host->ops->adma_workaround(host, intmask);
> @@ -3905,20 +3921,33 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  	if (!host->cqe_on)
>  		return false;
>  
> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
>  		*cmd_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +		}
> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>  		*cmd_error = -ETIMEDOUT;
> -	else
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +	} else
>  		*cmd_error = 0;
>  
> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>  		*data_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +		if (intmask & SDHCI_INT_DATA_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +		}
> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		*data_error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		*data_error = -EIO;
> -	else
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> +	} else
>  		*data_error = 0;
>  
>  	/* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index 7afb57c..c263f8f 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>  
>  struct mmc_host;
>  
> +enum mmc_err_stat {
> +	MMC_ERR_CMD_TIMEOUT,
> +	MMC_ERR_CMD_CRC,
> +	MMC_ERR_DAT_TIMEOUT,
> +	MMC_ERR_DAT_CRC,
> +	MMC_ERR_AUTO_CMD,
> +	MMC_ERR_ADMA,
> +	MMC_ERR_TUNING,
> +	MMC_ERR_CMDQ_RED,
> +	MMC_ERR_CMDQ_GCE,
> +	MMC_ERR_CMDQ_ICCE,
> +	MMC_ERR_REQ_TIMEOUT,
> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
> +	MMC_ERR_ICE_CFG,
> +	MMC_ERR_MAX,
> +};
> +
>  struct mmc_host_ops {
>  	/*
>  	 * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,8 @@ struct mmc_host {
>  
>  	/* Host Software Queue support */
>  	bool			hsq_enabled;
> +	u32                     err_stats[MMC_ERR_MAX];

If you make it u64 then we don't have to think about the value overflowing.

> +	bool			err_state;
>  
>  	unsigned long		private[] ____cacheline_aligned;
>  };
> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
>  }
>  
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
> +{
> +	mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> +		enum mmc_err_stat stat) {
> +
> +	/*
> +	 * Ignore the command timeout errors observed during
> +	 * the card init as those are excepted.
> +	 */
> +	if (!mmc->err_state)
> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;

This would be better handled in the card init code somewhere, not here.

> +
> +	mmc->err_stats[stat] += 1;
> +}
> +
>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
>  int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
>  int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH V2] mmc: debugfs: add error statistics
  2021-12-15 14:03 ` Adrian Hunter
@ 2021-12-21  7:16   ` Sajida Bhanu (Temp) (QUIC)
  2021-12-29  7:36     ` Sajida Bhanu (Temp) (QUIC)
  2022-01-03  9:50     ` Adrian Hunter
  0 siblings, 2 replies; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2021-12-21  7:16 UTC (permalink / raw)
  To: Adrian Hunter, Sajida Bhanu (Temp) (QUIC),
	riteshh, Asutosh Das (asd),
	ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
	linux-kernel
  Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
	Pradeep Pragallapati (QUIC),
	sartgarg, nitirawa, sayalil

Hi Adrian,

Thanks for the review.

Please find the inline comments.

Thanks,
Sajida

-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com> 
Sent: Wednesday, December 15, 2021 7:33 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics

On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
> Add debugfs entry to query eMMC and SD card errors statistics.
> This feature is useful for debug and testing
> 
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> ---
> 
> Changes since V1:
> 	-Removed sysfs entry for eMMC and SD card error statistics and added
> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.

Thanks for doing this.

> ---
>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/mmc/core/queue.c   |   2 +
>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>  4 files changed, 186 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c 
> index 3fdbc80..40210c34 100644
> --- a/drivers/mmc/core/debugfs.c
> +++ b/drivers/mmc/core/debugfs.c
> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64 
> val)  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>  	"%llu\n");
>  
> +static int mmc_err_state_get(void *data, u64 *val) {
> +	struct mmc_host *host = data;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	*val = host->err_state ? 1 : 0;
> +
> +	return 0;
> +}
> +
> +static int mmc_err_state_clear(void *data, u64 val) {
> +	struct mmc_host *host = data;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	host->err_state = false;

Is there much reason to disable err stats from userspace?

>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.

> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
> +		mmc_err_state_clear, "%llu\n");
> +
> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
> +	struct mmc_host *host = (struct mmc_host *)file->private;
> +
> +	if (!host)
> +		return -EINVAL;

I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.

	if (!host->err_stats_enabled) {
		seq_printf(file, "Not supported by driver\n");
		return 0;
	}

>>>>>>>You mean declare another variable (err_stats_enabled) and enable it in probe?

> +
> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);

Maybe put the descriptions in an array and iterate e.g.

	const char *desc[MMC_ERR_MAX] = {
		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
		etc
	};
	int i;

	if (!host)
		return -EINVAL;

	for (i = 0; i < MMC_ERR_MAX; i++) {
		if (desc[i])
			seq_printf(file, "# %s:\t %d\n",
				   desc[1], host->err_stats[i]);
	}

>>>>>>> Sure 

> +
> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMD_CRC]);
> +
> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
> +
> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_DAT_CRC]);
> +
> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_ADMA]);
> +
> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_ADMA]);
> +
> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_TUNING]);
> +
> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
> +
> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
> +
> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
> +
> +	seq_printf(file, "# Request Timedout:\t %d\n",
> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
> +
> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
> +
> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_ICE_CFG]);
> +
> +	return 0;
> +}
> +
> +static int mmc_err_stats_open(struct inode *inode, struct file *file) 
> +{
> +	return single_open(file, mmc_err_stats_show, inode->i_private); }
> +
> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
> +				   size_t cnt, loff_t *ppos)
> +{
> +	struct mmc_host *host = filp->f_mapping->host->i_private;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
> +	memset(host->err_stats, 0, sizeof(host->err_stats));
> +
> +	return cnt;
> +}
> +
> +static const struct file_operations mmc_err_stats_fops = {
> +	.open	= mmc_err_stats_open,
> +	.read	= seq_read,
> +	.write	= mmc_err_stats_write,
> +};
> +
>  void mmc_add_host_debugfs(struct mmc_host *host)  {
>  	struct dentry *root;
> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>  				   &mmc_clock_fops);
>  
> +	debugfs_create_file("err_state", 0600, root, host,
> +		&mmc_err_state);
> +	debugfs_create_file("err_stats", 0600, root, host,
> +		&mmc_err_stats_fops);
> +
>  #ifdef CONFIG_FAIL_MMC_REQUEST
>  	if (fail_request)
>  		setup_fault_attr(&fail_default_attr, fail_request); diff --git 
> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 
> b15c034..5243929 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>  	bool recovery_needed = false;
>  
> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
> +
>  	switch (issue_type) {
>  	case MMC_ISSUE_ASYNC:
>  	case MMC_ISSUE_DCMD:
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c

I think the core changes should be a separate patch from sdhci.
I would probably split into 4:
	mmc core
	mmc block driver
	cqhci driver
	sdhci driver

>>>> Sure

> index 07c6da1..d742051 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>  	if (host->ops->dump_vendor_regs)
>  		host->ops->dump_vendor_regs(host);
>  
> +	mmc_debugfs_err_stats_enable(host->mmc);

Why here and not in e.g. __sdhci_add_host() ?

>>>> If any errors happened  in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ).  So it is better to call mmc_debugfs_err_stats_enable() here.

>  	SDHCI_DUMP("============================================\n");
>  }
>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>  	spin_lock_irqsave(&host->lock, flags);
>  
>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct 
> timer_list *t)
>  
>  	if (host->data || host->data_cmd ||
>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host 
> *host, u32 intmask, u32 *intmask_p)
>  
>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> -		if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_TIMEOUT) {
>  			host->cmd->error = -ETIMEDOUT;
> -		else
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +		} else {
>  			host->cmd->error = -EILSEQ;
> -
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +		}
>  		/* Treat data command CRC error the same as data CRC error */
>  		if (host->cmd->data &&
>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6 
> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>  			  -ETIMEDOUT :
>  			  -EILSEQ;
>  
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>  		if (sdhci_auto_cmd23(host, mrq)) {
>  			mrq->sbc->error = err;
>  			__sdhci_finish_mrq(host, mrq);
> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  				host->data_cmd = NULL;
>  				data_cmd->error = -ETIMEDOUT;
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>  				return;
>  			}
> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  		return;
>  	}
>  
> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		host->data->error = -ETIMEDOUT;
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	}
>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>  		host->data->error = -EILSEQ;
>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> -			!= MMC_BUS_TEST_R)
> +			!= MMC_BUS_TEST_R) {
>  		host->data->error = -EILSEQ;
> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +	}
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>  		       intmask);
>  		sdhci_adma_show_error(host);
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 @@ 
> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  	if (!host->cqe_on)
>  		return false;
>  
> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) 
> +{
>  		*cmd_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +		}
> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>  		*cmd_error = -ETIMEDOUT;
> -	else
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +	} else
>  		*cmd_error = 0;
>  
> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>  		*data_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +		if (intmask & SDHCI_INT_DATA_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +		}
> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		*data_error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		*data_error = -EIO;
> -	else
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> +	} else
>  		*data_error = 0;
>  
>  	/* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 
> 7afb57c..c263f8f 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>  
>  struct mmc_host;
>  
> +enum mmc_err_stat {
> +	MMC_ERR_CMD_TIMEOUT,
> +	MMC_ERR_CMD_CRC,
> +	MMC_ERR_DAT_TIMEOUT,
> +	MMC_ERR_DAT_CRC,
> +	MMC_ERR_AUTO_CMD,
> +	MMC_ERR_ADMA,
> +	MMC_ERR_TUNING,
> +	MMC_ERR_CMDQ_RED,
> +	MMC_ERR_CMDQ_GCE,
> +	MMC_ERR_CMDQ_ICCE,
> +	MMC_ERR_REQ_TIMEOUT,
> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
> +	MMC_ERR_ICE_CFG,
> +	MMC_ERR_MAX,
> +};
> +
>  struct mmc_host_ops {
>  	/*
>  	 * It is optional for the host to implement pre_req and post_req in 
> @@ -500,6 +517,8 @@ struct mmc_host {
>  
>  	/* Host Software Queue support */
>  	bool			hsq_enabled;
> +	u32                     err_stats[MMC_ERR_MAX];

If you make it u64 then we don't have to think about the value overflowing.

>>> Sure

> +	bool			err_state;
>  
>  	unsigned long		private[] ____cacheline_aligned;
>  };
> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
> DMA_FROM_DEVICE;  }
>  
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc) 
> +{
> +	mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> +		enum mmc_err_stat stat) {
> +
> +	/*
> +	 * Ignore the command timeout errors observed during
> +	 * the card init as those are excepted.
> +	 */
> +	if (!mmc->err_state)
> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;

This would be better handled in the card init code somewhere, not here.

>>>> Sure.

> +
> +	mmc->err_stats[stat] += 1;
> +}
> +
>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
> **new_ext_csd);
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH V2] mmc: debugfs: add error statistics
  2021-12-21  7:16   ` Sajida Bhanu (Temp) (QUIC)
@ 2021-12-29  7:36     ` Sajida Bhanu (Temp) (QUIC)
  2022-01-03  9:50     ` Adrian Hunter
  1 sibling, 0 replies; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2021-12-29  7:36 UTC (permalink / raw)
  To: Sajida Bhanu (Temp) (QUIC),
	Adrian Hunter, riteshh, Asutosh Das (asd),
	ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
	linux-kernel
  Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
	Pradeep Pragallapati (QUIC),
	sartgarg, nitirawa, sayalil

Gentle Reminder!!!

Thanks,
Sajida

-----Original Message-----
From: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com> 
Sent: Tuesday, December 21, 2021 12:46 PM
To: Adrian Hunter <adrian.hunter@intel.com>; Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: RE: [PATCH V2] mmc: debugfs: add error statistics

Hi Adrian,

Thanks for the review.

Please find the inline comments.

Thanks,
Sajida

-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com>
Sent: Wednesday, December 15, 2021 7:33 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics

On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
> Add debugfs entry to query eMMC and SD card errors statistics.
> This feature is useful for debug and testing
> 
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> ---
> 
> Changes since V1:
> 	-Removed sysfs entry for eMMC and SD card error statistics and added
> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.

Thanks for doing this.

> ---
>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>  drivers/mmc/core/queue.c   |   2 +
>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>  4 files changed, 186 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c 
> index 3fdbc80..40210c34 100644
> --- a/drivers/mmc/core/debugfs.c
> +++ b/drivers/mmc/core/debugfs.c
> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
> val)  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>  	"%llu\n");
>  
> +static int mmc_err_state_get(void *data, u64 *val) {
> +	struct mmc_host *host = data;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	*val = host->err_state ? 1 : 0;
> +
> +	return 0;
> +}
> +
> +static int mmc_err_state_clear(void *data, u64 val) {
> +	struct mmc_host *host = data;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	host->err_state = false;

Is there much reason to disable err stats from userspace?

>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.

> +
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
> +		mmc_err_state_clear, "%llu\n");
> +
> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
> +	struct mmc_host *host = (struct mmc_host *)file->private;
> +
> +	if (!host)
> +		return -EINVAL;

I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.

	if (!host->err_stats_enabled) {
		seq_printf(file, "Not supported by driver\n");
		return 0;
	}

>>>>>>>You mean declare another variable (err_stats_enabled) and enable it in probe?

> +
> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);

Maybe put the descriptions in an array and iterate e.g.

	const char *desc[MMC_ERR_MAX] = {
		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
		etc
	};
	int i;

	if (!host)
		return -EINVAL;

	for (i = 0; i < MMC_ERR_MAX; i++) {
		if (desc[i])
			seq_printf(file, "# %s:\t %d\n",
				   desc[1], host->err_stats[i]);
	}

>>>>>>> Sure

> +
> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMD_CRC]);
> +
> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
> +
> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_DAT_CRC]);
> +
> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_ADMA]);
> +
> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_ADMA]);
> +
> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
> +		   host->err_stats[MMC_ERR_TUNING]);
> +
> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
> +
> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
> +
> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
> +
> +	seq_printf(file, "# Request Timedout:\t %d\n",
> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
> +
> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
> +
> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
> +		   host->err_stats[MMC_ERR_ICE_CFG]);
> +
> +	return 0;
> +}
> +
> +static int mmc_err_stats_open(struct inode *inode, struct file *file) 
> +{
> +	return single_open(file, mmc_err_stats_show, inode->i_private); }
> +
> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
> +				   size_t cnt, loff_t *ppos)
> +{
> +	struct mmc_host *host = filp->f_mapping->host->i_private;
> +
> +	if (!host)
> +		return -EINVAL;
> +
> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
> +	memset(host->err_stats, 0, sizeof(host->err_stats));
> +
> +	return cnt;
> +}
> +
> +static const struct file_operations mmc_err_stats_fops = {
> +	.open	= mmc_err_stats_open,
> +	.read	= seq_read,
> +	.write	= mmc_err_stats_write,
> +};
> +
>  void mmc_add_host_debugfs(struct mmc_host *host)  {
>  	struct dentry *root;
> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>  				   &mmc_clock_fops);
>  
> +	debugfs_create_file("err_state", 0600, root, host,
> +		&mmc_err_state);
> +	debugfs_create_file("err_stats", 0600, root, host,
> +		&mmc_err_stats_fops);
> +
>  #ifdef CONFIG_FAIL_MMC_REQUEST
>  	if (fail_request)
>  		setup_fault_attr(&fail_default_attr, fail_request); diff --git 
> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
> b15c034..5243929 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>  	bool recovery_needed = false;
>  
> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
> +
>  	switch (issue_type) {
>  	case MMC_ISSUE_ASYNC:
>  	case MMC_ISSUE_DCMD:
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c

I think the core changes should be a separate patch from sdhci.
I would probably split into 4:
	mmc core
	mmc block driver
	cqhci driver
	sdhci driver

>>>> Sure

> index 07c6da1..d742051 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>  	if (host->ops->dump_vendor_regs)
>  		host->ops->dump_vendor_regs(host);
>  
> +	mmc_debugfs_err_stats_enable(host->mmc);

Why here and not in e.g. __sdhci_add_host() ?

>>>> If any errors happened  in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ).  So it is better to call mmc_debugfs_err_stats_enable() here.

>  	SDHCI_DUMP("============================================\n");
>  }
>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>  	spin_lock_irqsave(&host->lock, flags);
>  
>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct 
> timer_list *t)
>  
>  	if (host->data || host->data_cmd ||
>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>  		       mmc_hostname(host->mmc));
>  		sdhci_dumpregs(host);
> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host 
> *host, u32 intmask, u32 *intmask_p)
>  
>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> -		if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_TIMEOUT) {
>  			host->cmd->error = -ETIMEDOUT;
> -		else
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +		} else {
>  			host->cmd->error = -EILSEQ;
> -
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +		}
>  		/* Treat data command CRC error the same as data CRC error */
>  		if (host->cmd->data &&
>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
> +intmask, u32 *intmask_p)
>  			  -ETIMEDOUT :
>  			  -EILSEQ;
>  
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>  		if (sdhci_auto_cmd23(host, mrq)) {
>  			mrq->sbc->error = err;
>  			__sdhci_finish_mrq(host, mrq);
> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  				host->data_cmd = NULL;
>  				data_cmd->error = -ETIMEDOUT;
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>  				return;
>  			}
> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>  		return;
>  	}
>  
> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		host->data->error = -ETIMEDOUT;
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	}
>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>  		host->data->error = -EILSEQ;
>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> -			!= MMC_BUS_TEST_R)
> +			!= MMC_BUS_TEST_R) {
>  		host->data->error = -EILSEQ;
> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +	}
>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>  		       intmask);
>  		sdhci_adma_show_error(host);
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>  		host->data->error = -EIO;
>  		if (host->ops->adma_workaround)
>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 @@ 
> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>  	if (!host->cqe_on)
>  		return false;
>  
> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) 
> +{
>  		*cmd_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_TIMEOUT)
> +		if (intmask & SDHCI_INT_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> +		}
> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>  		*cmd_error = -ETIMEDOUT;
> -	else
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> +	} else
>  		*cmd_error = 0;
>  
> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>  		*data_error = -EILSEQ;
> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> +		if (intmask & SDHCI_INT_DATA_CRC) {
> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> +		}
> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>  		*data_error = -ETIMEDOUT;
> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>  		*data_error = -EIO;
> -	else
> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> +	} else
>  		*data_error = 0;
>  
>  	/* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 
> 7afb57c..c263f8f 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>  
>  struct mmc_host;
>  
> +enum mmc_err_stat {
> +	MMC_ERR_CMD_TIMEOUT,
> +	MMC_ERR_CMD_CRC,
> +	MMC_ERR_DAT_TIMEOUT,
> +	MMC_ERR_DAT_CRC,
> +	MMC_ERR_AUTO_CMD,
> +	MMC_ERR_ADMA,
> +	MMC_ERR_TUNING,
> +	MMC_ERR_CMDQ_RED,
> +	MMC_ERR_CMDQ_GCE,
> +	MMC_ERR_CMDQ_ICCE,
> +	MMC_ERR_REQ_TIMEOUT,
> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
> +	MMC_ERR_ICE_CFG,
> +	MMC_ERR_MAX,
> +};
> +
>  struct mmc_host_ops {
>  	/*
>  	 * It is optional for the host to implement pre_req and post_req in 
> @@ -500,6 +517,8 @@ struct mmc_host {
>  
>  	/* Host Software Queue support */
>  	bool			hsq_enabled;
> +	u32                     err_stats[MMC_ERR_MAX];

If you make it u64 then we don't have to think about the value overflowing.

>>> Sure

> +	bool			err_state;
>  
>  	unsigned long		private[] ____cacheline_aligned;
>  };
> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
> DMA_FROM_DEVICE;  }
>  
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc) 
> +{
> +	mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> +		enum mmc_err_stat stat) {
> +
> +	/*
> +	 * Ignore the command timeout errors observed during
> +	 * the card init as those are excepted.
> +	 */
> +	if (!mmc->err_state)
> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;

This would be better handled in the card init code somewhere, not here.

>>>> Sure.

> +
> +	mmc->err_stats[stat] += 1;
> +}
> +
>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
> **new_ext_csd);
> 



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2] mmc: debugfs: add error statistics
  2021-12-21  7:16   ` Sajida Bhanu (Temp) (QUIC)
  2021-12-29  7:36     ` Sajida Bhanu (Temp) (QUIC)
@ 2022-01-03  9:50     ` Adrian Hunter
  2022-01-04 15:02       ` Sajida Bhanu (Temp) (QUIC)
  1 sibling, 1 reply; 11+ messages in thread
From: Adrian Hunter @ 2022-01-03  9:50 UTC (permalink / raw)
  To: Sajida Bhanu (Temp) (QUIC), riteshh, Asutosh Das (asd),
	ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
	linux-kernel
  Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
	Pradeep Pragallapati (QUIC),
	sartgarg, nitirawa, sayalil

On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
> 
> Thanks for the review.
> 
> Please find the inline comments.

I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted.  Normally it is the other way around.

> 
> Thanks,
> Sajida
> 
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com> 
> Sent: Wednesday, December 15, 2021 7:33 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
> 
> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>> Add debugfs entry to query eMMC and SD card errors statistics.
>> This feature is useful for debug and testing
>>
>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>> ---
>>
>> Changes since V1:
>> 	-Removed sysfs entry for eMMC and SD card error statistics and added
>> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
> 
> Thanks for doing this.
> 
>> ---
>>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>  drivers/mmc/core/queue.c   |   2 +
>>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>>  4 files changed, 186 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c 
>> index 3fdbc80..40210c34 100644
>> --- a/drivers/mmc/core/debugfs.c
>> +++ b/drivers/mmc/core/debugfs.c
>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64 
>> val)  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>  	"%llu\n");
>>  
>> +static int mmc_err_state_get(void *data, u64 *val) {
>> +	struct mmc_host *host = data;
>> +
>> +	if (!host)
>> +		return -EINVAL;
>> +
>> +	*val = host->err_state ? 1 : 0;
>> +
>> +	return 0;
>> +}
>> +
>> +static int mmc_err_state_clear(void *data, u64 val) {
>> +	struct mmc_host *host = data;
>> +
>> +	if (!host)
>> +		return -EINVAL;
>> +
>> +	host->err_state = false;
> 
> Is there much reason to disable err stats from userspace?
> 
>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.

That is not exectly how it is programmed.  "err_state is false" means no errors have been recorded, not that no errors happended.

> 
>> +
>> +	return 0;
>> +}
>> +
>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>> +		mmc_err_state_clear, "%llu\n");
>> +
>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>> +	struct mmc_host *host = (struct mmc_host *)file->private;
>> +
>> +	if (!host)
>> +		return -EINVAL;
> 
> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
> 
> 	if (!host->err_stats_enabled) {
> 		seq_printf(file, "Not supported by driver\n");
> 		return 0;
> 	}
> 
>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?

Yes, although it is not clear if this is the same as what you want from err_state,
i.e. is err_state different from err_stats_enabled?

> 
>> +
>> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);
> 
> Maybe put the descriptions in an array and iterate e.g.
> 
> 	const char *desc[MMC_ERR_MAX] = {
> 		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
> 		etc
> 	};
> 	int i;
> 
> 	if (!host)
> 		return -EINVAL;
> 
> 	for (i = 0; i < MMC_ERR_MAX; i++) {
> 		if (desc[i])
> 			seq_printf(file, "# %s:\t %d\n",
> 				   desc[1], host->err_stats[i]);
> 	}
> 
>>>>>>>> Sure 
> 
>> +
>> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMD_CRC]);
>> +
>> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>> +
>> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_DAT_CRC]);
>> +
>> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_ADMA]);
>> +
>> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_ADMA]);
>> +
>> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_TUNING]);
>> +
>> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
>> +
>> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
>> +
>> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
>> +
>> +	seq_printf(file, "# Request Timedout:\t %d\n",
>> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>> +
>> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>> +
>> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>> +		   host->err_stats[MMC_ERR_ICE_CFG]);
>> +
>> +	return 0;
>> +}
>> +
>> +static int mmc_err_stats_open(struct inode *inode, struct file *file) 
>> +{
>> +	return single_open(file, mmc_err_stats_show, inode->i_private); }
>> +
>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>> +				   size_t cnt, loff_t *ppos)
>> +{
>> +	struct mmc_host *host = filp->f_mapping->host->i_private;
>> +
>> +	if (!host)
>> +		return -EINVAL;
>> +
>> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
>> +	memset(host->err_stats, 0, sizeof(host->err_stats));
>> +
>> +	return cnt;
>> +}
>> +
>> +static const struct file_operations mmc_err_stats_fops = {
>> +	.open	= mmc_err_stats_open,
>> +	.read	= seq_read,
>> +	.write	= mmc_err_stats_write,
>> +};
>> +
>>  void mmc_add_host_debugfs(struct mmc_host *host)  {
>>  	struct dentry *root;
>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>  				   &mmc_clock_fops);
>>  
>> +	debugfs_create_file("err_state", 0600, root, host,
>> +		&mmc_err_state);
>> +	debugfs_create_file("err_stats", 0600, root, host,
>> +		&mmc_err_stats_fops);
>> +
>>  #ifdef CONFIG_FAIL_MMC_REQUEST
>>  	if (fail_request)
>>  		setup_fault_attr(&fail_default_attr, fail_request); diff --git 
>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 
>> b15c034..5243929 100644
>> --- a/drivers/mmc/core/queue.c
>> +++ b/drivers/mmc/core/queue.c
>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>  	bool recovery_needed = false;
>>  
>> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>> +
>>  	switch (issue_type) {
>>  	case MMC_ISSUE_ASYNC:
>>  	case MMC_ISSUE_DCMD:
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> 
> I think the core changes should be a separate patch from sdhci.
> I would probably split into 4:
> 	mmc core
> 	mmc block driver
> 	cqhci driver
> 	sdhci driver
> 
>>>>> Sure
> 
>> index 07c6da1..d742051 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>  	if (host->ops->dump_vendor_regs)
>>  		host->ops->dump_vendor_regs(host);
>>  
>> +	mmc_debugfs_err_stats_enable(host->mmc);
> 
> Why here and not in e.g. __sdhci_add_host() ?
> 
>>>>> If any errors happened  in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ).  So it is better to call mmc_debugfs_err_stats_enable() here.

Registers are not dumped for most errors.  Please move this to __sdhci_add_host().

> 
>>  	SDHCI_DUMP("============================================\n");
>>  }
>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>  	spin_lock_irqsave(&host->lock, flags);
>>  
>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>  		       mmc_hostname(host->mmc));
>>  		sdhci_dumpregs(host);
>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct 
>> timer_list *t)
>>  
>>  	if (host->data || host->data_cmd ||
>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>  		       mmc_hostname(host->mmc));
>>  		sdhci_dumpregs(host);
>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host 
>> *host, u32 intmask, u32 *intmask_p)
>>  
>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>> -		if (intmask & SDHCI_INT_TIMEOUT)
>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>  			host->cmd->error = -ETIMEDOUT;
>> -		else
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> +		} else {
>>  			host->cmd->error = -EILSEQ;
>> -
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> +		}
>>  		/* Treat data command CRC error the same as data CRC error */
>>  		if (host->cmd->data &&
>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6 
>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>>  			  -ETIMEDOUT :
>>  			  -EILSEQ;
>>  
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>  			mrq->sbc->error = err;
>>  			__sdhci_finish_mrq(host, mrq);
>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  				host->data_cmd = NULL;
>>  				data_cmd->error = -ETIMEDOUT;
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>  				return;
>>  			}
>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>  		return;
>>  	}
>>  
>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  		host->data->error = -ETIMEDOUT;
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> +	}
>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>  		host->data->error = -EILSEQ;
>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>> -			!= MMC_BUS_TEST_R)
>> +			!= MMC_BUS_TEST_R) {
>>  		host->data->error = -EILSEQ;
>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> +	}
>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>  		       intmask);
>>  		sdhci_adma_show_error(host);
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>  		host->data->error = -EIO;
>>  		if (host->ops->adma_workaround)
>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 @@ 
>> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>  	if (!host->cqe_on)
>>  		return false;
>>  
>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) 
>> +{
>>  		*cmd_error = -EILSEQ;
>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>> +		if (intmask & SDHCI_INT_CRC) {
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> +		}
>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>  		*cmd_error = -ETIMEDOUT;
>> -	else
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> +	} else
>>  		*cmd_error = 0;
>>  
>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>  		*data_error = -EILSEQ;
>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> +		}
>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  		*data_error = -ETIMEDOUT;
>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>  		*data_error = -EIO;
>> -	else
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> +	} else
>>  		*data_error = 0;
>>  
>>  	/* Clear selected interrupts. */
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 
>> 7afb57c..c263f8f 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>  
>>  struct mmc_host;
>>  
>> +enum mmc_err_stat {
>> +	MMC_ERR_CMD_TIMEOUT,
>> +	MMC_ERR_CMD_CRC,
>> +	MMC_ERR_DAT_TIMEOUT,
>> +	MMC_ERR_DAT_CRC,
>> +	MMC_ERR_AUTO_CMD,
>> +	MMC_ERR_ADMA,
>> +	MMC_ERR_TUNING,
>> +	MMC_ERR_CMDQ_RED,
>> +	MMC_ERR_CMDQ_GCE,
>> +	MMC_ERR_CMDQ_ICCE,
>> +	MMC_ERR_REQ_TIMEOUT,
>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>> +	MMC_ERR_ICE_CFG,
>> +	MMC_ERR_MAX,
>> +};
>> +
>>  struct mmc_host_ops {
>>  	/*
>>  	 * It is optional for the host to implement pre_req and post_req in 
>> @@ -500,6 +517,8 @@ struct mmc_host {
>>  
>>  	/* Host Software Queue support */
>>  	bool			hsq_enabled;
>> +	u32                     err_stats[MMC_ERR_MAX];
> 
> If you make it u64 then we don't have to think about the value overflowing.
> 
>>>> Sure
> 
>> +	bool			err_state;
>>  
>>  	unsigned long		private[] ____cacheline_aligned;
>>  };
>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>> DMA_FROM_DEVICE;  }
>>  
>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc) 
>> +{
>> +	mmc->err_state = true;
>> +}
>> +
>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>> +		enum mmc_err_stat stat) {
>> +
>> +	/*
>> +	 * Ignore the command timeout errors observed during
>> +	 * the card init as those are excepted.
>> +	 */
>> +	if (!mmc->err_state)
>> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
> 
> This would be better handled in the card init code somewhere, not here.
> 
>>>>> Sure.
> 
>> +
>> +	mmc->err_stats[stat] += 1;
>> +}
>> +
>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>> **new_ext_csd);
>>
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH V2] mmc: debugfs: add error statistics
  2022-01-03  9:50     ` Adrian Hunter
@ 2022-01-04 15:02       ` Sajida Bhanu (Temp) (QUIC)
  2022-01-07  7:42         ` Adrian Hunter
  0 siblings, 1 reply; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2022-01-04 15:02 UTC (permalink / raw)
  To: Adrian Hunter, Sajida Bhanu (Temp) (QUIC),
	riteshh, Asutosh Das (asd),
	ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
	linux-kernel
  Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
	Pradeep Pragallapati (QUIC),
	sartgarg, nitirawa, sayalil

Hi Adrian,

Thanks for the review.

Please find the inline comments.

Thanks,
Sajida

-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com> 
Sent: Monday, January 3, 2022 3:20 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics

On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
> 
> Thanks for the review.
> 
> Please find the inline comments.

I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted.  Normally it is the other way around.

> 
> Thanks,
> Sajida
> 
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Wednesday, December 15, 2021 7:33 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; 
> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
> nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
> 
> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>> Add debugfs entry to query eMMC and SD card errors statistics.
>> This feature is useful for debug and testing
>>
>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>> ---
>>
>> Changes since V1:
>> 	-Removed sysfs entry for eMMC and SD card error statistics and added
>> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
> 
> Thanks for doing this.
> 
>> ---
>>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>  drivers/mmc/core/queue.c   |   2 +
>>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>>  4 files changed, 186 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c 
>> index 3fdbc80..40210c34 100644
>> --- a/drivers/mmc/core/debugfs.c
>> +++ b/drivers/mmc/core/debugfs.c
>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>> val)  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>  	"%llu\n");
>>  
>> +static int mmc_err_state_get(void *data, u64 *val) {
>> +	struct mmc_host *host = data;
>> +
>> +	if (!host)
>> +		return -EINVAL;
>> +
>> +	*val = host->err_state ? 1 : 0;
>> +
>> +	return 0;
>> +}
>> +
>> +static int mmc_err_state_clear(void *data, u64 val) {
>> +	struct mmc_host *host = data;
>> +
>> +	if (!host)
>> +		return -EINVAL;
>> +
>> +	host->err_state = false;
> 
> Is there much reason to disable err stats from userspace?
> 
>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.

That is not exectly how it is programmed.  "err_state is false" means no errors have been recorded, not that no errors happended.

>>>>>> If user wants to explicitly clear then he can use this.

> 
>> +
>> +	return 0;
>> +}
>> +
>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>> +		mmc_err_state_clear, "%llu\n");
>> +
>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>> +	struct mmc_host *host = (struct mmc_host *)file->private;
>> +
>> +	if (!host)
>> +		return -EINVAL;
> 
> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
> 
> 	if (!host->err_stats_enabled) {
> 		seq_printf(file, "Not supported by driver\n");
> 		return 0;
> 	}
> 
>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?

Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?

>>>>> Yes, err_state and err_stats_enabled both are different.  err_state will be set if any errors happened in driver level. 
 err_stats_enabled will be set  if err_stats feature enabled,  if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.

> 
>> +
>> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);
> 
> Maybe put the descriptions in an array and iterate e.g.
> 
> 	const char *desc[MMC_ERR_MAX] = {
> 		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
> 		etc
> 	};
> 	int i;
> 
> 	if (!host)
> 		return -EINVAL;
> 
> 	for (i = 0; i < MMC_ERR_MAX; i++) {
> 		if (desc[i])
> 			seq_printf(file, "# %s:\t %d\n",
> 				   desc[1], host->err_stats[i]);
> 	}
> 
>>>>>>>> Sure
> 
>> +
>> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMD_CRC]);
>> +
>> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>> +
>> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_DAT_CRC]);
>> +
>> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_ADMA]);
>> +
>> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_ADMA]);
>> +
>> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>> +		   host->err_stats[MMC_ERR_TUNING]);
>> +
>> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
>> +
>> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
>> +
>> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
>> +
>> +	seq_printf(file, "# Request Timedout:\t %d\n",
>> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>> +
>> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>> +
>> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>> +		   host->err_stats[MMC_ERR_ICE_CFG]);
>> +
>> +	return 0;
>> +}
>> +
>> +static int mmc_err_stats_open(struct inode *inode, struct file 
>> +*file) {
>> +	return single_open(file, mmc_err_stats_show, inode->i_private); }
>> +
>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>> +				   size_t cnt, loff_t *ppos)
>> +{
>> +	struct mmc_host *host = filp->f_mapping->host->i_private;
>> +
>> +	if (!host)
>> +		return -EINVAL;
>> +
>> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
>> +	memset(host->err_stats, 0, sizeof(host->err_stats));
>> +
>> +	return cnt;
>> +}
>> +
>> +static const struct file_operations mmc_err_stats_fops = {
>> +	.open	= mmc_err_stats_open,
>> +	.read	= seq_read,
>> +	.write	= mmc_err_stats_write,
>> +};
>> +
>>  void mmc_add_host_debugfs(struct mmc_host *host)  {
>>  	struct dentry *root;
>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>  				   &mmc_clock_fops);
>>  
>> +	debugfs_create_file("err_state", 0600, root, host,
>> +		&mmc_err_state);
>> +	debugfs_create_file("err_stats", 0600, root, host,
>> +		&mmc_err_stats_fops);
>> +
>>  #ifdef CONFIG_FAIL_MMC_REQUEST
>>  	if (fail_request)
>>  		setup_fault_attr(&fail_default_attr, fail_request); diff --git 
>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>> b15c034..5243929 100644
>> --- a/drivers/mmc/core/queue.c
>> +++ b/drivers/mmc/core/queue.c
>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>  	bool recovery_needed = false;
>>  
>> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>> +
>>  	switch (issue_type) {
>>  	case MMC_ISSUE_ASYNC:
>>  	case MMC_ISSUE_DCMD:
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> 
> I think the core changes should be a separate patch from sdhci.
> I would probably split into 4:
> 	mmc core
> 	mmc block driver
> 	cqhci driver
> 	sdhci driver
> 
>>>>> Sure
> 
>> index 07c6da1..d742051 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>  	if (host->ops->dump_vendor_regs)
>>  		host->ops->dump_vendor_regs(host);
>>  
>> +	mmc_debugfs_err_stats_enable(host->mmc);
> 
> Why here and not in e.g. __sdhci_add_host() ?
> 
>>>>> If any errors happened  in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ).  So it is better to call mmc_debugfs_err_stats_enable() here.

Registers are not dumped for most errors.  Please move this to __sdhci_add_host().

>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.

> 
>>  	SDHCI_DUMP("============================================\n");
>>  }
>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>  	spin_lock_irqsave(&host->lock, flags);
>>  
>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>  		       mmc_hostname(host->mmc));
>>  		sdhci_dumpregs(host);
>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct 
>> timer_list *t)
>>  
>>  	if (host->data || host->data_cmd ||
>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>  		       mmc_hostname(host->mmc));
>>  		sdhci_dumpregs(host);
>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host 
>> *host, u32 intmask, u32 *intmask_p)
>>  
>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>> -		if (intmask & SDHCI_INT_TIMEOUT)
>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>  			host->cmd->error = -ETIMEDOUT;
>> -		else
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> +		} else {
>>  			host->cmd->error = -EILSEQ;
>> -
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> +		}
>>  		/* Treat data command CRC error the same as data CRC error */
>>  		if (host->cmd->data &&
>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
>> +intmask, u32 *intmask_p)
>>  			  -ETIMEDOUT :
>>  			  -EILSEQ;
>>  
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>  			mrq->sbc->error = err;
>>  			__sdhci_finish_mrq(host, mrq);
>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  				host->data_cmd = NULL;
>>  				data_cmd->error = -ETIMEDOUT;
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>  				return;
>>  			}
>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>  		return;
>>  	}
>>  
>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  		host->data->error = -ETIMEDOUT;
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> +	}
>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>  		host->data->error = -EILSEQ;
>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>> -			!= MMC_BUS_TEST_R)
>> +			!= MMC_BUS_TEST_R) {
>>  		host->data->error = -EILSEQ;
>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> +	}
>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>  		       intmask);
>>  		sdhci_adma_show_error(host);
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>  		host->data->error = -EIO;
>>  		if (host->ops->adma_workaround)
>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 
>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>  	if (!host->cqe_on)
>>  		return false;
>>  
>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | 
>> +SDHCI_INT_CRC)) {
>>  		*cmd_error = -EILSEQ;
>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>> +		if (intmask & SDHCI_INT_CRC) {
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> +		}
>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>  		*cmd_error = -ETIMEDOUT;
>> -	else
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> +	} else
>>  		*cmd_error = 0;
>>  
>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>  		*data_error = -EILSEQ;
>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> +		}
>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>  		*data_error = -ETIMEDOUT;
>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>  		*data_error = -EIO;
>> -	else
>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> +	} else
>>  		*data_error = 0;
>>  
>>  	/* Clear selected interrupts. */
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h 
>> index 7afb57c..c263f8f 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>  
>>  struct mmc_host;
>>  
>> +enum mmc_err_stat {
>> +	MMC_ERR_CMD_TIMEOUT,
>> +	MMC_ERR_CMD_CRC,
>> +	MMC_ERR_DAT_TIMEOUT,
>> +	MMC_ERR_DAT_CRC,
>> +	MMC_ERR_AUTO_CMD,
>> +	MMC_ERR_ADMA,
>> +	MMC_ERR_TUNING,
>> +	MMC_ERR_CMDQ_RED,
>> +	MMC_ERR_CMDQ_GCE,
>> +	MMC_ERR_CMDQ_ICCE,
>> +	MMC_ERR_REQ_TIMEOUT,
>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>> +	MMC_ERR_ICE_CFG,
>> +	MMC_ERR_MAX,
>> +};
>> +
>>  struct mmc_host_ops {
>>  	/*
>>  	 * It is optional for the host to implement pre_req and post_req in 
>> @@ -500,6 +517,8 @@ struct mmc_host {
>>  
>>  	/* Host Software Queue support */
>>  	bool			hsq_enabled;
>> +	u32                     err_stats[MMC_ERR_MAX];
> 
> If you make it u64 then we don't have to think about the value overflowing.
> 
>>>> Sure
> 
>> +	bool			err_state;
>>  
>>  	unsigned long		private[] ____cacheline_aligned;
>>  };
>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>> DMA_FROM_DEVICE;  }
>>  
>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host 
>> +*mmc) {
>> +	mmc->err_state = true;
>> +}
>> +
>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>> +		enum mmc_err_stat stat) {
>> +
>> +	/*
>> +	 * Ignore the command timeout errors observed during
>> +	 * the card init as those are excepted.
>> +	 */
>> +	if (!mmc->err_state)
>> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
> 
> This would be better handled in the card init code somewhere, not here.
> 
>>>>> Sure.
> 
>> +
>> +	mmc->err_stats[stat] += 1;
>> +}
>> +
>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>> **new_ext_csd);
>>
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2] mmc: debugfs: add error statistics
  2022-01-04 15:02       ` Sajida Bhanu (Temp) (QUIC)
@ 2022-01-07  7:42         ` Adrian Hunter
  2022-01-10 13:11           ` Sajida Bhanu (Temp) (QUIC)
  0 siblings, 1 reply; 11+ messages in thread
From: Adrian Hunter @ 2022-01-07  7:42 UTC (permalink / raw)
  To: Sajida Bhanu (Temp) (QUIC), riteshh, Asutosh Das (asd),
	ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
	linux-kernel
  Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
	Pradeep Pragallapati (QUIC),
	sartgarg, nitirawa, sayalil

On 04/01/2022 17:02, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
> 
> Thanks for the review.
> 
> Please find the inline comments.
> 
> Thanks,
> Sajida
> 
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com> 
> Sent: Monday, January 3, 2022 3:20 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
> 
> On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
>> Hi Adrian,
>>
>> Thanks for the review.
>>
>> Please find the inline comments.
> 
> I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted.  Normally it is the other way around.
> 
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Wednesday, December 15, 2021 7:33 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
>> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; 
>> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
>> linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>
>> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>>> Add debugfs entry to query eMMC and SD card errors statistics.
>>> This feature is useful for debug and testing
>>>
>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>> ---
>>>
>>> Changes since V1:
>>> 	-Removed sysfs entry for eMMC and SD card error statistics and added
>>> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>>
>> Thanks for doing this.
>>
>>> ---
>>>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>>  drivers/mmc/core/queue.c   |   2 +
>>>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>>>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>>>  4 files changed, 186 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c 
>>> index 3fdbc80..40210c34 100644
>>> --- a/drivers/mmc/core/debugfs.c
>>> +++ b/drivers/mmc/core/debugfs.c
>>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>>> val)  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>>  	"%llu\n");
>>>  
>>> +static int mmc_err_state_get(void *data, u64 *val) {
>>> +	struct mmc_host *host = data;
>>> +
>>> +	if (!host)
>>> +		return -EINVAL;
>>> +
>>> +	*val = host->err_state ? 1 : 0;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int mmc_err_state_clear(void *data, u64 val) {
>>> +	struct mmc_host *host = data;
>>> +
>>> +	if (!host)
>>> +		return -EINVAL;
>>> +
>>> +	host->err_state = false;
>>
>> Is there much reason to disable err stats from userspace?
>>
>>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
> 
> That is not exectly how it is programmed.  "err_state is false" means no errors have been recorded, not that no errors happended.
> 
>>>>>>> If user wants to explicitly clear then he can use this.

Seems over compilicated.  A user can just diff the old and new values:

cat /sys/kernel/debug/mmc0/err_stats > /tmp/old-stats
...later...
cat /sys/kernel/debug/mmc0/err_stats > /tmp/new-stats
diff /tmp/old-stats /tmp/new-stats
mv /tmp/new-stats /tmp/old-stats

I suggest just outputting the stats

> 
>>
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>>> +		mmc_err_state_clear, "%llu\n");
>>> +
>>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>>> +	struct mmc_host *host = (struct mmc_host *)file->private;
>>> +
>>> +	if (!host)
>>> +		return -EINVAL;
>>
>> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>>
>> 	if (!host->err_stats_enabled) {
>> 		seq_printf(file, "Not supported by driver\n");
>> 		return 0;
>> 	}
>>
>>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
> 
> Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
> 
>>>>>> Yes, err_state and err_stats_enabled both are different.  err_state will be set if any errors happened in driver level. 
>  err_stats_enabled will be set  if err_stats feature enabled,  if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
> 
>>
>>> +
>>> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>>
>> Maybe put the descriptions in an array and iterate e.g.
>>
>> 	const char *desc[MMC_ERR_MAX] = {
>> 		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>> 		etc
>> 	};
>> 	int i;
>>
>> 	if (!host)
>> 		return -EINVAL;
>>
>> 	for (i = 0; i < MMC_ERR_MAX; i++) {
>> 		if (desc[i])
>> 			seq_printf(file, "# %s:\t %d\n",
>> 				   desc[1], host->err_stats[i]);
>> 	}
>>
>>>>>>>>> Sure
>>
>>> +
>>> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMD_CRC]);
>>> +
>>> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>>> +
>>> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_DAT_CRC]);
>>> +
>>> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_ADMA]);
>>> +
>>> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_ADMA]);
>>> +
>>> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_TUNING]);
>>> +
>>> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
>>> +
>>> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
>>> +
>>> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
>>> +
>>> +	seq_printf(file, "# Request Timedout:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>>> +
>>> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>>> +
>>> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>>> +		   host->err_stats[MMC_ERR_ICE_CFG]);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int mmc_err_stats_open(struct inode *inode, struct file 
>>> +*file) {
>>> +	return single_open(file, mmc_err_stats_show, inode->i_private); }
>>> +
>>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>>> +				   size_t cnt, loff_t *ppos)
>>> +{
>>> +	struct mmc_host *host = filp->f_mapping->host->i_private;
>>> +
>>> +	if (!host)
>>> +		return -EINVAL;
>>> +
>>> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
>>> +	memset(host->err_stats, 0, sizeof(host->err_stats));
>>> +
>>> +	return cnt;
>>> +}
>>> +
>>> +static const struct file_operations mmc_err_stats_fops = {
>>> +	.open	= mmc_err_stats_open,
>>> +	.read	= seq_read,
>>> +	.write	= mmc_err_stats_write,
>>> +};
>>> +
>>>  void mmc_add_host_debugfs(struct mmc_host *host)  {
>>>  	struct dentry *root;
>>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>>  				   &mmc_clock_fops);
>>>  
>>> +	debugfs_create_file("err_state", 0600, root, host,
>>> +		&mmc_err_state);
>>> +	debugfs_create_file("err_stats", 0600, root, host,
>>> +		&mmc_err_stats_fops);
>>> +
>>>  #ifdef CONFIG_FAIL_MMC_REQUEST
>>>  	if (fail_request)
>>>  		setup_fault_attr(&fail_default_attr, fail_request); diff --git 
>>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>>> b15c034..5243929 100644
>>> --- a/drivers/mmc/core/queue.c
>>> +++ b/drivers/mmc/core/queue.c
>>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>>  	bool recovery_needed = false;
>>>  
>>> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>>> +
>>>  	switch (issue_type) {
>>>  	case MMC_ISSUE_ASYNC:
>>>  	case MMC_ISSUE_DCMD:
>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>
>> I think the core changes should be a separate patch from sdhci.
>> I would probably split into 4:
>> 	mmc core
>> 	mmc block driver
>> 	cqhci driver
>> 	sdhci driver
>>
>>>>>> Sure
>>
>>> index 07c6da1..d742051 100644
>>> --- a/drivers/mmc/host/sdhci.c
>>> +++ b/drivers/mmc/host/sdhci.c
>>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>>  	if (host->ops->dump_vendor_regs)
>>>  		host->ops->dump_vendor_regs(host);
>>>  
>>> +	mmc_debugfs_err_stats_enable(host->mmc);
>>
>> Why here and not in e.g. __sdhci_add_host() ?
>>
>>>>>> If any errors happened  in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ).  So it is better to call mmc_debugfs_err_stats_enable() here.
> 
> Registers are not dumped for most errors.  Please move this to __sdhci_add_host().
> 
>>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
> 
>>
>>>  	SDHCI_DUMP("============================================\n");
>>>  }
>>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>>  	spin_lock_irqsave(&host->lock, flags);
>>>  
>>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>>  		       mmc_hostname(host->mmc));
>>>  		sdhci_dumpregs(host);
>>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct 
>>> timer_list *t)
>>>  
>>>  	if (host->data || host->data_cmd ||
>>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>>  		       mmc_hostname(host->mmc));
>>>  		sdhci_dumpregs(host);
>>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host 
>>> *host, u32 intmask, u32 *intmask_p)
>>>  
>>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>> -		if (intmask & SDHCI_INT_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>>  			host->cmd->error = -ETIMEDOUT;
>>> -		else
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> +		} else {
>>>  			host->cmd->error = -EILSEQ;
>>> -
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> +		}
>>>  		/* Treat data command CRC error the same as data CRC error */
>>>  		if (host->cmd->data &&
>>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
>>> +intmask, u32 *intmask_p)
>>>  			  -ETIMEDOUT :
>>>  			  -EILSEQ;
>>>  
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>>  			mrq->sbc->error = err;
>>>  			__sdhci_finish_mrq(host, mrq);
>>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  				host->data_cmd = NULL;
>>>  				data_cmd->error = -ETIMEDOUT;
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>>  				return;
>>>  			}
>>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>  		return;
>>>  	}
>>>  
>>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  		host->data->error = -ETIMEDOUT;
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> +	}
>>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>>  		host->data->error = -EILSEQ;
>>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>> -			!= MMC_BUS_TEST_R)
>>> +			!= MMC_BUS_TEST_R) {
>>>  		host->data->error = -EILSEQ;
>>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> +	}
>>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>>  		       intmask);
>>>  		sdhci_adma_show_error(host);
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>  		host->data->error = -EIO;
>>>  		if (host->ops->adma_workaround)
>>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 
>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>>  	if (!host->cqe_on)
>>>  		return false;
>>>  
>>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | 
>>> +SDHCI_INT_CRC)) {
>>>  		*cmd_error = -EILSEQ;
>>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_CRC) {
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> +		}
>>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>>  		*cmd_error = -ETIMEDOUT;
>>> -	else
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> +	} else
>>>  		*cmd_error = 0;
>>>  
>>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>>  		*data_error = -EILSEQ;
>>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> +		}
>>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  		*data_error = -ETIMEDOUT;
>>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>  		*data_error = -EIO;
>>> -	else
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> +	} else
>>>  		*data_error = 0;
>>>  
>>>  	/* Clear selected interrupts. */
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h 
>>> index 7afb57c..c263f8f 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>  
>>>  struct mmc_host;
>>>  
>>> +enum mmc_err_stat {
>>> +	MMC_ERR_CMD_TIMEOUT,
>>> +	MMC_ERR_CMD_CRC,
>>> +	MMC_ERR_DAT_TIMEOUT,
>>> +	MMC_ERR_DAT_CRC,
>>> +	MMC_ERR_AUTO_CMD,
>>> +	MMC_ERR_ADMA,
>>> +	MMC_ERR_TUNING,
>>> +	MMC_ERR_CMDQ_RED,
>>> +	MMC_ERR_CMDQ_GCE,
>>> +	MMC_ERR_CMDQ_ICCE,
>>> +	MMC_ERR_REQ_TIMEOUT,
>>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>>> +	MMC_ERR_ICE_CFG,
>>> +	MMC_ERR_MAX,
>>> +};
>>> +
>>>  struct mmc_host_ops {
>>>  	/*
>>>  	 * It is optional for the host to implement pre_req and post_req in 
>>> @@ -500,6 +517,8 @@ struct mmc_host {
>>>  
>>>  	/* Host Software Queue support */
>>>  	bool			hsq_enabled;
>>> +	u32                     err_stats[MMC_ERR_MAX];
>>
>> If you make it u64 then we don't have to think about the value overflowing.
>>
>>>>> Sure
>>
>>> +	bool			err_state;
>>>  
>>>  	unsigned long		private[] ____cacheline_aligned;
>>>  };
>>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>>> DMA_FROM_DEVICE;  }
>>>  
>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host 
>>> +*mmc) {
>>> +	mmc->err_state = true;
>>> +}
>>> +
>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>> +		enum mmc_err_stat stat) {
>>> +
>>> +	/*
>>> +	 * Ignore the command timeout errors observed during
>>> +	 * the card init as those are excepted.
>>> +	 */
>>> +	if (!mmc->err_state)
>>> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>>
>> This would be better handled in the card init code somewhere, not here.
>>
>>>>>> Sure.
>>
>>> +
>>> +	mmc->err_stats[stat] += 1;
>>> +}
>>> +
>>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>>> **new_ext_csd);
>>>
>>
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH V2] mmc: debugfs: add error statistics
  2022-01-07  7:42         ` Adrian Hunter
@ 2022-01-10 13:11           ` Sajida Bhanu (Temp) (QUIC)
  2022-01-10 13:29             ` Adrian Hunter
  0 siblings, 1 reply; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2022-01-10 13:11 UTC (permalink / raw)
  To: Adrian Hunter, Sajida Bhanu (Temp) (QUIC),
	riteshh, Asutosh Das (asd),
	ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
	linux-kernel
  Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
	Pradeep Pragallapati (QUIC),
	sartgarg, nitirawa, sayalil

Hi Adrian,

Thanks for the review.

Please find the inline comments

Thanks,
Sajida

-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com> 
Sent: Friday, January 7, 2022 1:13 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics

On 04/01/2022 17:02, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
> 
> Thanks for the review.
> 
> Please find the inline comments.
> 
> Thanks,
> Sajida
> 
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Monday, January 3, 2022 3:20 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; 
> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
> nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
> 
> On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
>> Hi Adrian,
>>
>> Thanks for the review.
>>
>> Please find the inline comments.
> 
> I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted.  Normally it is the other way around.
> 
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Wednesday, December 15, 2021 7:33 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
>> ulf.hansson@linaro.org; agross@kernel.org; 
>> bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; 
>> linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>
>> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>>> Add debugfs entry to query eMMC and SD card errors statistics.
>>> This feature is useful for debug and testing
>>>
>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>> ---
>>>
>>> Changes since V1:
>>> 	-Removed sysfs entry for eMMC and SD card error statistics and added
>>> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>>
>> Thanks for doing this.
>>
>>> ---
>>>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>>  drivers/mmc/core/queue.c   |   2 +
>>>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>>>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>>>  4 files changed, 186 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c 
>>> index 3fdbc80..40210c34 100644
>>> --- a/drivers/mmc/core/debugfs.c
>>> +++ b/drivers/mmc/core/debugfs.c
>>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>>> val)  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>>  	"%llu\n");
>>>  
>>> +static int mmc_err_state_get(void *data, u64 *val) {
>>> +	struct mmc_host *host = data;
>>> +
>>> +	if (!host)
>>> +		return -EINVAL;
>>> +
>>> +	*val = host->err_state ? 1 : 0;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int mmc_err_state_clear(void *data, u64 val) {
>>> +	struct mmc_host *host = data;
>>> +
>>> +	if (!host)
>>> +		return -EINVAL;
>>> +
>>> +	host->err_state = false;
>>
>> Is there much reason to disable err stats from userspace?
>>
>>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
> 
> That is not exectly how it is programmed.  "err_state is false" means no errors have been recorded, not that no errors happended.
> 
>>>>>>> If user wants to explicitly clear then he can use this.

Seems over compilicated.  A user can just diff the old and new values:

cat /sys/kernel/debug/mmc0/err_stats > /tmp/old-stats ...later...
cat /sys/kernel/debug/mmc0/err_stats > /tmp/new-stats diff /tmp/old-stats /tmp/new-stats mv /tmp/new-stats /tmp/old-stats

I suggest just outputting the stats

>>>>>>> Thanks for the suggestion Adrain.
This way user has to call write to store the err_stats data to /tmp/old-stats and  user has to call read to read /tmp/old-stats.

And our idea is user call only read to get error stats info.

Please suggest me which is okay.

Thanks,
Sajida	

> 
>>
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>>> +		mmc_err_state_clear, "%llu\n");
>>> +
>>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>>> +	struct mmc_host *host = (struct mmc_host *)file->private;
>>> +
>>> +	if (!host)
>>> +		return -EINVAL;
>>
>> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>>
>> 	if (!host->err_stats_enabled) {
>> 		seq_printf(file, "Not supported by driver\n");
>> 		return 0;
>> 	}
>>
>>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
> 
> Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
> 
>>>>>> Yes, err_state and err_stats_enabled both are different.  err_state will be set if any errors happened in driver level. 
>  err_stats_enabled will be set  if err_stats feature enabled,  if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
> 
>>
>>> +
>>> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>>
>> Maybe put the descriptions in an array and iterate e.g.
>>
>> 	const char *desc[MMC_ERR_MAX] = {
>> 		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>> 		etc
>> 	};
>> 	int i;
>>
>> 	if (!host)
>> 		return -EINVAL;
>>
>> 	for (i = 0; i < MMC_ERR_MAX; i++) {
>> 		if (desc[i])
>> 			seq_printf(file, "# %s:\t %d\n",
>> 				   desc[1], host->err_stats[i]);
>> 	}
>>
>>>>>>>>> Sure
>>
>>> +
>>> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMD_CRC]);
>>> +
>>> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>>> +
>>> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_DAT_CRC]);
>>> +
>>> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_ADMA]);
>>> +
>>> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_ADMA]);
>>> +
>>> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_TUNING]);
>>> +
>>> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
>>> +
>>> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
>>> +
>>> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
>>> +
>>> +	seq_printf(file, "# Request Timedout:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>>> +
>>> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>>> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>>> +
>>> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>>> +		   host->err_stats[MMC_ERR_ICE_CFG]);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int mmc_err_stats_open(struct inode *inode, struct file
>>> +*file) {
>>> +	return single_open(file, mmc_err_stats_show, inode->i_private); }
>>> +
>>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>>> +				   size_t cnt, loff_t *ppos)
>>> +{
>>> +	struct mmc_host *host = filp->f_mapping->host->i_private;
>>> +
>>> +	if (!host)
>>> +		return -EINVAL;
>>> +
>>> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
>>> +	memset(host->err_stats, 0, sizeof(host->err_stats));
>>> +
>>> +	return cnt;
>>> +}
>>> +
>>> +static const struct file_operations mmc_err_stats_fops = {
>>> +	.open	= mmc_err_stats_open,
>>> +	.read	= seq_read,
>>> +	.write	= mmc_err_stats_write,
>>> +};
>>> +
>>>  void mmc_add_host_debugfs(struct mmc_host *host)  {
>>>  	struct dentry *root;
>>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>>  				   &mmc_clock_fops);
>>>  
>>> +	debugfs_create_file("err_state", 0600, root, host,
>>> +		&mmc_err_state);
>>> +	debugfs_create_file("err_stats", 0600, root, host,
>>> +		&mmc_err_stats_fops);
>>> +
>>>  #ifdef CONFIG_FAIL_MMC_REQUEST
>>>  	if (fail_request)
>>>  		setup_fault_attr(&fail_default_attr, fail_request); diff --git 
>>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>>> b15c034..5243929 100644
>>> --- a/drivers/mmc/core/queue.c
>>> +++ b/drivers/mmc/core/queue.c
>>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>>  	bool recovery_needed = false;
>>>  
>>> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>>> +
>>>  	switch (issue_type) {
>>>  	case MMC_ISSUE_ASYNC:
>>>  	case MMC_ISSUE_DCMD:
>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>
>> I think the core changes should be a separate patch from sdhci.
>> I would probably split into 4:
>> 	mmc core
>> 	mmc block driver
>> 	cqhci driver
>> 	sdhci driver
>>
>>>>>> Sure
>>
>>> index 07c6da1..d742051 100644
>>> --- a/drivers/mmc/host/sdhci.c
>>> +++ b/drivers/mmc/host/sdhci.c
>>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>>  	if (host->ops->dump_vendor_regs)
>>>  		host->ops->dump_vendor_regs(host);
>>>  
>>> +	mmc_debugfs_err_stats_enable(host->mmc);
>>
>> Why here and not in e.g. __sdhci_add_host() ?
>>
>>>>>> If any errors happened  in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ).  So it is better to call mmc_debugfs_err_stats_enable() here.
> 
> Registers are not dumped for most errors.  Please move this to __sdhci_add_host().
> 
>>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
> 
>>
>>>  	SDHCI_DUMP("============================================\n");
>>>  }
>>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>>  	spin_lock_irqsave(&host->lock, flags);
>>>  
>>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>>  		       mmc_hostname(host->mmc));
>>>  		sdhci_dumpregs(host);
>>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct 
>>> timer_list *t)
>>>  
>>>  	if (host->data || host->data_cmd ||
>>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>>  		       mmc_hostname(host->mmc));
>>>  		sdhci_dumpregs(host);
>>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host 
>>> *host, u32 intmask, u32 *intmask_p)
>>>  
>>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>> -		if (intmask & SDHCI_INT_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>>  			host->cmd->error = -ETIMEDOUT;
>>> -		else
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> +		} else {
>>>  			host->cmd->error = -EILSEQ;
>>> -
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> +		}
>>>  		/* Treat data command CRC error the same as data CRC error */
>>>  		if (host->cmd->data &&
>>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
>>> +intmask, u32 *intmask_p)
>>>  			  -ETIMEDOUT :
>>>  			  -EILSEQ;
>>>  
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>>  			mrq->sbc->error = err;
>>>  			__sdhci_finish_mrq(host, mrq);
>>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  				host->data_cmd = NULL;
>>>  				data_cmd->error = -ETIMEDOUT;
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>>  				return;
>>>  			}
>>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>  		return;
>>>  	}
>>>  
>>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  		host->data->error = -ETIMEDOUT;
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> +	}
>>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>>  		host->data->error = -EILSEQ;
>>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>> -			!= MMC_BUS_TEST_R)
>>> +			!= MMC_BUS_TEST_R) {
>>>  		host->data->error = -EILSEQ;
>>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> +	}
>>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>>  		       intmask);
>>>  		sdhci_adma_show_error(host);
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>  		host->data->error = -EIO;
>>>  		if (host->ops->adma_workaround)
>>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 
>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>>  	if (!host->cqe_on)
>>>  		return false;
>>>  
>>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>> +SDHCI_INT_CRC)) {
>>>  		*cmd_error = -EILSEQ;
>>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_CRC) {
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> +		}
>>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>>  		*cmd_error = -ETIMEDOUT;
>>> -	else
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> +	} else
>>>  		*cmd_error = 0;
>>>  
>>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>>  		*data_error = -EILSEQ;
>>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> +		}
>>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>  		*data_error = -ETIMEDOUT;
>>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>  		*data_error = -EIO;
>>> -	else
>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> +	} else
>>>  		*data_error = 0;
>>>  
>>>  	/* Clear selected interrupts. */
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h 
>>> index 7afb57c..c263f8f 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>  
>>>  struct mmc_host;
>>>  
>>> +enum mmc_err_stat {
>>> +	MMC_ERR_CMD_TIMEOUT,
>>> +	MMC_ERR_CMD_CRC,
>>> +	MMC_ERR_DAT_TIMEOUT,
>>> +	MMC_ERR_DAT_CRC,
>>> +	MMC_ERR_AUTO_CMD,
>>> +	MMC_ERR_ADMA,
>>> +	MMC_ERR_TUNING,
>>> +	MMC_ERR_CMDQ_RED,
>>> +	MMC_ERR_CMDQ_GCE,
>>> +	MMC_ERR_CMDQ_ICCE,
>>> +	MMC_ERR_REQ_TIMEOUT,
>>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>>> +	MMC_ERR_ICE_CFG,
>>> +	MMC_ERR_MAX,
>>> +};
>>> +
>>>  struct mmc_host_ops {
>>>  	/*
>>>  	 * It is optional for the host to implement pre_req and post_req 
>>> in @@ -500,6 +517,8 @@ struct mmc_host {
>>>  
>>>  	/* Host Software Queue support */
>>>  	bool			hsq_enabled;
>>> +	u32                     err_stats[MMC_ERR_MAX];
>>
>> If you make it u64 then we don't have to think about the value overflowing.
>>
>>>>> Sure
>>
>>> +	bool			err_state;
>>>  
>>>  	unsigned long		private[] ____cacheline_aligned;
>>>  };
>>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>>> DMA_FROM_DEVICE;  }
>>>  
>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>> +*mmc) {
>>> +	mmc->err_state = true;
>>> +}
>>> +
>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>> +		enum mmc_err_stat stat) {
>>> +
>>> +	/*
>>> +	 * Ignore the command timeout errors observed during
>>> +	 * the card init as those are excepted.
>>> +	 */
>>> +	if (!mmc->err_state)
>>> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>>
>> This would be better handled in the card init code somewhere, not here.
>>
>>>>>> Sure.
>>
>>> +
>>> +	mmc->err_stats[stat] += 1;
>>> +}
>>> +
>>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>>> **new_ext_csd);
>>>
>>
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2] mmc: debugfs: add error statistics
  2022-01-10 13:11           ` Sajida Bhanu (Temp) (QUIC)
@ 2022-01-10 13:29             ` Adrian Hunter
  2022-01-10 14:59               ` Sajida Bhanu (Temp) (QUIC)
  0 siblings, 1 reply; 11+ messages in thread
From: Adrian Hunter @ 2022-01-10 13:29 UTC (permalink / raw)
  To: Sajida Bhanu (Temp) (QUIC), riteshh, Asutosh Das (asd),
	ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
	linux-kernel
  Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
	Pradeep Pragallapati (QUIC),
	sartgarg, nitirawa, sayalil

On 10/01/2022 15:11, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
> 
> Thanks for the review.
> 
> Please find the inline comments
> 
> Thanks,
> Sajida
> 
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com> 
> Sent: Friday, January 7, 2022 1:13 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
> 
> On 04/01/2022 17:02, Sajida Bhanu (Temp) (QUIC) wrote:
>> Hi Adrian,
>>
>> Thanks for the review.
>>
>> Please find the inline comments.
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Monday, January 3, 2022 3:20 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
>> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; 
>> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
>> linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>
>> On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
>>> Hi Adrian,
>>>
>>> Thanks for the review.
>>>
>>> Please find the inline comments.
>>
>> I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted.  Normally it is the other way around.
>>
>>>
>>> Thanks,
>>> Sajida
>>>
>>> -----Original Message-----
>>> From: Adrian Hunter <adrian.hunter@intel.com>
>>> Sent: Wednesday, December 15, 2021 7:33 PM
>>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
>>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
>>> ulf.hansson@linaro.org; agross@kernel.org; 
>>> bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; 
>>> linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
>>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
>>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
>>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
>>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>>
>>> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>>>> Add debugfs entry to query eMMC and SD card errors statistics.
>>>> This feature is useful for debug and testing
>>>>
>>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>>> ---
>>>>
>>>> Changes since V1:
>>>> 	-Removed sysfs entry for eMMC and SD card error statistics and added
>>>> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>>>
>>> Thanks for doing this.
>>>
>>>> ---
>>>>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>>>  drivers/mmc/core/queue.c   |   2 +
>>>>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>>>>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>>>>  4 files changed, 186 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c 
>>>> index 3fdbc80..40210c34 100644
>>>> --- a/drivers/mmc/core/debugfs.c
>>>> +++ b/drivers/mmc/core/debugfs.c
>>>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>>>> val)  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>>>  	"%llu\n");
>>>>  
>>>> +static int mmc_err_state_get(void *data, u64 *val) {
>>>> +	struct mmc_host *host = data;
>>>> +
>>>> +	if (!host)
>>>> +		return -EINVAL;
>>>> +
>>>> +	*val = host->err_state ? 1 : 0;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int mmc_err_state_clear(void *data, u64 val) {
>>>> +	struct mmc_host *host = data;
>>>> +
>>>> +	if (!host)
>>>> +		return -EINVAL;
>>>> +
>>>> +	host->err_state = false;
>>>
>>> Is there much reason to disable err stats from userspace?
>>>
>>>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
>>
>> That is not exectly how it is programmed.  "err_state is false" means no errors have been recorded, not that no errors happended.
>>
>>>>>>>> If user wants to explicitly clear then he can use this.
> 
> Seems over compilicated.  A user can just diff the old and new values:
> 
> cat /sys/kernel/debug/mmc0/err_stats > /tmp/old-stats ...later...
> cat /sys/kernel/debug/mmc0/err_stats > /tmp/new-stats diff /tmp/old-stats /tmp/new-stats mv /tmp/new-stats /tmp/old-stats
> 
> I suggest just outputting the stats
> 
>>>>>>>> Thanks for the suggestion Adrain.
> This way user has to call write to store the err_stats data to /tmp/old-stats and  user has to call read to read /tmp/old-stats.

Only if you need to see what has changed

> 
> And our idea is user call only read to get error stats info.
> 
> Please suggest me which is okay.

Please let's start with just outputting the stats.

> 
> Thanks,
> Sajida	
> 
>>
>>>
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>>>> +		mmc_err_state_clear, "%llu\n");
>>>> +
>>>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>>>> +	struct mmc_host *host = (struct mmc_host *)file->private;
>>>> +
>>>> +	if (!host)
>>>> +		return -EINVAL;
>>>
>>> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>>>
>>> 	if (!host->err_stats_enabled) {
>>> 		seq_printf(file, "Not supported by driver\n");
>>> 		return 0;
>>> 	}
>>>
>>>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
>>
>> Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
>>
>>>>>>> Yes, err_state and err_stats_enabled both are different.  err_state will be set if any errors happened in driver level. 
>>  err_stats_enabled will be set  if err_stats feature enabled,  if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
>>
>>>
>>>> +
>>>> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>>>
>>> Maybe put the descriptions in an array and iterate e.g.
>>>
>>> 	const char *desc[MMC_ERR_MAX] = {
>>> 		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>>> 		etc
>>> 	};
>>> 	int i;
>>>
>>> 	if (!host)
>>> 		return -EINVAL;
>>>
>>> 	for (i = 0; i < MMC_ERR_MAX; i++) {
>>> 		if (desc[i])
>>> 			seq_printf(file, "# %s:\t %d\n",
>>> 				   desc[1], host->err_stats[i]);
>>> 	}
>>>
>>>>>>>>>> Sure
>>>
>>>> +
>>>> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMD_CRC]);
>>>> +
>>>> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>>>> +
>>>> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_DAT_CRC]);
>>>> +
>>>> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_ADMA]);
>>>> +
>>>> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_ADMA]);
>>>> +
>>>> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_TUNING]);
>>>> +
>>>> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
>>>> +
>>>> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
>>>> +
>>>> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
>>>> +
>>>> +	seq_printf(file, "# Request Timedout:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>>>> +
>>>> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>>>> +
>>>> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_ICE_CFG]);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int mmc_err_stats_open(struct inode *inode, struct file
>>>> +*file) {
>>>> +	return single_open(file, mmc_err_stats_show, inode->i_private); }
>>>> +
>>>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>>>> +				   size_t cnt, loff_t *ppos)
>>>> +{
>>>> +	struct mmc_host *host = filp->f_mapping->host->i_private;
>>>> +
>>>> +	if (!host)
>>>> +		return -EINVAL;
>>>> +
>>>> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
>>>> +	memset(host->err_stats, 0, sizeof(host->err_stats));
>>>> +
>>>> +	return cnt;
>>>> +}
>>>> +
>>>> +static const struct file_operations mmc_err_stats_fops = {
>>>> +	.open	= mmc_err_stats_open,
>>>> +	.read	= seq_read,
>>>> +	.write	= mmc_err_stats_write,
>>>> +};
>>>> +
>>>>  void mmc_add_host_debugfs(struct mmc_host *host)  {
>>>>  	struct dentry *root;
>>>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>>>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>>>  				   &mmc_clock_fops);
>>>>  
>>>> +	debugfs_create_file("err_state", 0600, root, host,
>>>> +		&mmc_err_state);
>>>> +	debugfs_create_file("err_stats", 0600, root, host,
>>>> +		&mmc_err_stats_fops);
>>>> +
>>>>  #ifdef CONFIG_FAIL_MMC_REQUEST
>>>>  	if (fail_request)
>>>>  		setup_fault_attr(&fail_default_attr, fail_request); diff --git 
>>>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>>>> b15c034..5243929 100644
>>>> --- a/drivers/mmc/core/queue.c
>>>> +++ b/drivers/mmc/core/queue.c
>>>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>>>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>>>  	bool recovery_needed = false;
>>>>  
>>>> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>>>> +
>>>>  	switch (issue_type) {
>>>>  	case MMC_ISSUE_ASYNC:
>>>>  	case MMC_ISSUE_DCMD:
>>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>>
>>> I think the core changes should be a separate patch from sdhci.
>>> I would probably split into 4:
>>> 	mmc core
>>> 	mmc block driver
>>> 	cqhci driver
>>> 	sdhci driver
>>>
>>>>>>> Sure
>>>
>>>> index 07c6da1..d742051 100644
>>>> --- a/drivers/mmc/host/sdhci.c
>>>> +++ b/drivers/mmc/host/sdhci.c
>>>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>>>  	if (host->ops->dump_vendor_regs)
>>>>  		host->ops->dump_vendor_regs(host);
>>>>  
>>>> +	mmc_debugfs_err_stats_enable(host->mmc);
>>>
>>> Why here and not in e.g. __sdhci_add_host() ?
>>>
>>>>>>> If any errors happened  in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ).  So it is better to call mmc_debugfs_err_stats_enable() here.
>>
>> Registers are not dumped for most errors.  Please move this to __sdhci_add_host().
>>
>>>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
>>
>>>
>>>>  	SDHCI_DUMP("============================================\n");
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>>>  	spin_lock_irqsave(&host->lock, flags);
>>>>  
>>>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>>>  		       mmc_hostname(host->mmc));
>>>>  		sdhci_dumpregs(host);
>>>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct 
>>>> timer_list *t)
>>>>  
>>>>  	if (host->data || host->data_cmd ||
>>>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>>>  		       mmc_hostname(host->mmc));
>>>>  		sdhci_dumpregs(host);
>>>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host 
>>>> *host, u32 intmask, u32 *intmask_p)
>>>>  
>>>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>>> -		if (intmask & SDHCI_INT_TIMEOUT)
>>>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>>>  			host->cmd->error = -ETIMEDOUT;
>>>> -		else
>>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> +		} else {
>>>>  			host->cmd->error = -EILSEQ;
>>>> -
>>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>>> +		}
>>>>  		/* Treat data command CRC error the same as data CRC error */
>>>>  		if (host->cmd->data &&
>>>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>>>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
>>>> +intmask, u32 *intmask_p)
>>>>  			  -ETIMEDOUT :
>>>>  			  -EILSEQ;
>>>>  
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>>>  			mrq->sbc->error = err;
>>>>  			__sdhci_finish_mrq(host, mrq);
>>>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>>  				host->data_cmd = NULL;
>>>>  				data_cmd->error = -ETIMEDOUT;
>>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>>>  				return;
>>>>  			}
>>>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>>  		return;
>>>>  	}
>>>>  
>>>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>>  		host->data->error = -ETIMEDOUT;
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>>> +	}
>>>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>>>  		host->data->error = -EILSEQ;
>>>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>>> -			!= MMC_BUS_TEST_R)
>>>> +			!= MMC_BUS_TEST_R) {
>>>>  		host->data->error = -EILSEQ;
>>>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>>> +	}
>>>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>>>  		       intmask);
>>>>  		sdhci_adma_show_error(host);
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>>  		host->data->error = -EIO;
>>>>  		if (host->ops->adma_workaround)
>>>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 
>>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>>>  	if (!host->cqe_on)
>>>>  		return false;
>>>>  
>>>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>>> +SDHCI_INT_CRC)) {
>>>>  		*cmd_error = -EILSEQ;
>>>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>>>> +		if (intmask & SDHCI_INT_CRC) {
>>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>>> +		}
>>>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>>>  		*cmd_error = -ETIMEDOUT;
>>>> -	else
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> +	} else
>>>>  		*cmd_error = 0;
>>>>  
>>>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>>>  		*data_error = -EILSEQ;
>>>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>>> +		}
>>>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>>  		*data_error = -ETIMEDOUT;
>>>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>>  		*data_error = -EIO;
>>>> -	else
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>> +	} else
>>>>  		*data_error = 0;
>>>>  
>>>>  	/* Clear selected interrupts. */
>>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h 
>>>> index 7afb57c..c263f8f 100644
>>>> --- a/include/linux/mmc/host.h
>>>> +++ b/include/linux/mmc/host.h
>>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>>  
>>>>  struct mmc_host;
>>>>  
>>>> +enum mmc_err_stat {
>>>> +	MMC_ERR_CMD_TIMEOUT,
>>>> +	MMC_ERR_CMD_CRC,
>>>> +	MMC_ERR_DAT_TIMEOUT,
>>>> +	MMC_ERR_DAT_CRC,
>>>> +	MMC_ERR_AUTO_CMD,
>>>> +	MMC_ERR_ADMA,
>>>> +	MMC_ERR_TUNING,
>>>> +	MMC_ERR_CMDQ_RED,
>>>> +	MMC_ERR_CMDQ_GCE,
>>>> +	MMC_ERR_CMDQ_ICCE,
>>>> +	MMC_ERR_REQ_TIMEOUT,
>>>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>>>> +	MMC_ERR_ICE_CFG,
>>>> +	MMC_ERR_MAX,
>>>> +};
>>>> +
>>>>  struct mmc_host_ops {
>>>>  	/*
>>>>  	 * It is optional for the host to implement pre_req and post_req 
>>>> in @@ -500,6 +517,8 @@ struct mmc_host {
>>>>  
>>>>  	/* Host Software Queue support */
>>>>  	bool			hsq_enabled;
>>>> +	u32                     err_stats[MMC_ERR_MAX];
>>>
>>> If you make it u64 then we don't have to think about the value overflowing.
>>>
>>>>>> Sure
>>>
>>>> +	bool			err_state;
>>>>  
>>>>  	unsigned long		private[] ____cacheline_aligned;
>>>>  };
>>>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>>>> DMA_FROM_DEVICE;  }
>>>>  
>>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>>> +*mmc) {
>>>> +	mmc->err_state = true;
>>>> +}
>>>> +
>>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>>> +		enum mmc_err_stat stat) {
>>>> +
>>>> +	/*
>>>> +	 * Ignore the command timeout errors observed during
>>>> +	 * the card init as those are excepted.
>>>> +	 */
>>>> +	if (!mmc->err_state)
>>>> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>>>
>>> This would be better handled in the card init code somewhere, not here.
>>>
>>>>>>> Sure.
>>>
>>>> +
>>>> +	mmc->err_stats[stat] += 1;
>>>> +}
>>>> +
>>>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>>>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>>>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>>>> **new_ext_csd);
>>>>
>>>
>>
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH V2] mmc: debugfs: add error statistics
  2022-01-10 13:29             ` Adrian Hunter
@ 2022-01-10 14:59               ` Sajida Bhanu (Temp) (QUIC)
  0 siblings, 0 replies; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2022-01-10 14:59 UTC (permalink / raw)
  To: Adrian Hunter, Sajida Bhanu (Temp) (QUIC),
	riteshh, Asutosh Das (asd),
	ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
	linux-kernel
  Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
	Pradeep Pragallapati (QUIC),
	sartgarg, nitirawa, sayalil



-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com> 
Sent: Monday, January 10, 2022 6:59 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics

On 10/01/2022 15:11, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
> 
> Thanks for the review.
> 
> Please find the inline comments
> 
> Thanks,
> Sajida
> 
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Friday, January 7, 2022 1:13 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; 
> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; 
> linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
> nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
> 
> On 04/01/2022 17:02, Sajida Bhanu (Temp) (QUIC) wrote:
>> Hi Adrian,
>>
>> Thanks for the review.
>>
>> Please find the inline comments.
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Monday, January 3, 2022 3:20 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
>> ulf.hansson@linaro.org; agross@kernel.org; 
>> bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; 
>> linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) 
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>
>> On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
>>> Hi Adrian,
>>>
>>> Thanks for the review.
>>>
>>> Please find the inline comments.
>>
>> I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted.  Normally it is the other way around.
>>
>>>
>>> Thanks,
>>> Sajida
>>>
>>> -----Original Message-----
>>> From: Adrian Hunter <adrian.hunter@intel.com>
>>> Sent: Wednesday, December 15, 2021 7:33 PM
>>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; 
>>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; 
>>> ulf.hansson@linaro.org; agross@kernel.org; 
>>> bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; 
>>> linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
>>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash 
>>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati 
>>> (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; 
>>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>>
>>> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>>>> Add debugfs entry to query eMMC and SD card errors statistics.
>>>> This feature is useful for debug and testing
>>>>
>>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>>> ---
>>>>
>>>> Changes since V1:
>>>> 	-Removed sysfs entry for eMMC and SD card error statistics and added
>>>> 	 debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>>>
>>> Thanks for doing this.
>>>
>>>> ---
>>>>  drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>>>  drivers/mmc/core/queue.c   |   2 +
>>>>  drivers/mmc/host/sdhci.c   |  53 ++++++++++++++++++-----
>>>>  include/linux/mmc/host.h   |  37 ++++++++++++++++
>>>>  4 files changed, 186 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/drivers/mmc/core/debugfs.c 
>>>> b/drivers/mmc/core/debugfs.c index 3fdbc80..40210c34 100644
>>>> --- a/drivers/mmc/core/debugfs.c
>>>> +++ b/drivers/mmc/core/debugfs.c
>>>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>>>> val)  DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>>>  	"%llu\n");
>>>>  
>>>> +static int mmc_err_state_get(void *data, u64 *val) {
>>>> +	struct mmc_host *host = data;
>>>> +
>>>> +	if (!host)
>>>> +		return -EINVAL;
>>>> +
>>>> +	*val = host->err_state ? 1 : 0;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int mmc_err_state_clear(void *data, u64 val) {
>>>> +	struct mmc_host *host = data;
>>>> +
>>>> +	if (!host)
>>>> +		return -EINVAL;
>>>> +
>>>> +	host->err_state = false;
>>>
>>> Is there much reason to disable err stats from userspace?
>>>
>>>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
>>
>> That is not exectly how it is programmed.  "err_state is false" means no errors have been recorded, not that no errors happended.
>>
>>>>>>>> If user wants to explicitly clear then he can use this.
> 
> Seems over compilicated.  A user can just diff the old and new values:
> 
> cat /sys/kernel/debug/mmc0/err_stats > /tmp/old-stats ...later...
> cat /sys/kernel/debug/mmc0/err_stats > /tmp/new-stats diff 
> /tmp/old-stats /tmp/new-stats mv /tmp/new-stats /tmp/old-stats
> 
> I suggest just outputting the stats
> 
>>>>>>>> Thanks for the suggestion Adrain.
> This way user has to call write to store the err_stats data to /tmp/old-stats and  user has to call read to read /tmp/old-stats.

Only if you need to see what has changed

> 
> And our idea is user call only read to get error stats info.
> 
> Please suggest me which is okay.

Please let's start with just outputting the stats.

>>>>> Sure. Thanks for the suggestion.
> 
> Thanks,
> Sajida	
> 
>>
>>>
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>>>> +		mmc_err_state_clear, "%llu\n");
>>>> +
>>>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>>>> +	struct mmc_host *host = (struct mmc_host *)file->private;
>>>> +
>>>> +	if (!host)
>>>> +		return -EINVAL;
>>>
>>> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>>>
>>> 	if (!host->err_stats_enabled) {
>>> 		seq_printf(file, "Not supported by driver\n");
>>> 		return 0;
>>> 	}
>>>
>>>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
>>
>> Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
>>
>>>>>>> Yes, err_state and err_stats_enabled both are different.  err_state will be set if any errors happened in driver level. 
>>  err_stats_enabled will be set  if err_stats feature enabled,  if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
>>
>>>
>>>> +
>>>> +	seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>>>
>>> Maybe put the descriptions in an array and iterate e.g.
>>>
>>> 	const char *desc[MMC_ERR_MAX] = {
>>> 		[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>>> 		etc
>>> 	};
>>> 	int i;
>>>
>>> 	if (!host)
>>> 		return -EINVAL;
>>>
>>> 	for (i = 0; i < MMC_ERR_MAX; i++) {
>>> 		if (desc[i])
>>> 			seq_printf(file, "# %s:\t %d\n",
>>> 				   desc[1], host->err_stats[i]);
>>> 	}
>>>
>>>>>>>>>> Sure
>>>
>>>> +
>>>> +	seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMD_CRC]);
>>>> +
>>>> +	seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>>>> +
>>>> +	seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_DAT_CRC]);
>>>> +
>>>> +	seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_ADMA]);
>>>> +
>>>> +	seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_ADMA]);
>>>> +
>>>> +	seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_TUNING]);
>>>> +
>>>> +	seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMDQ_RED]);
>>>> +
>>>> +	seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMDQ_GCE]);
>>>> +
>>>> +	seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMDQ_ICCE]);
>>>> +
>>>> +	seq_printf(file, "# Request Timedout:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>>>> +
>>>> +	seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>>>> +
>>>> +	seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>>>> +		   host->err_stats[MMC_ERR_ICE_CFG]);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int mmc_err_stats_open(struct inode *inode, struct file
>>>> +*file) {
>>>> +	return single_open(file, mmc_err_stats_show, inode->i_private); }
>>>> +
>>>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>>>> +				   size_t cnt, loff_t *ppos)
>>>> +{
>>>> +	struct mmc_host *host = filp->f_mapping->host->i_private;
>>>> +
>>>> +	if (!host)
>>>> +		return -EINVAL;
>>>> +
>>>> +	pr_debug("%s: Resetting MMC error statistics\n", __func__);
>>>> +	memset(host->err_stats, 0, sizeof(host->err_stats));
>>>> +
>>>> +	return cnt;
>>>> +}
>>>> +
>>>> +static const struct file_operations mmc_err_stats_fops = {
>>>> +	.open	= mmc_err_stats_open,
>>>> +	.read	= seq_read,
>>>> +	.write	= mmc_err_stats_write,
>>>> +};
>>>> +
>>>>  void mmc_add_host_debugfs(struct mmc_host *host)  {
>>>>  	struct dentry *root;
>>>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>>>  	debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>>>  				   &mmc_clock_fops);
>>>>  
>>>> +	debugfs_create_file("err_state", 0600, root, host,
>>>> +		&mmc_err_state);
>>>> +	debugfs_create_file("err_stats", 0600, root, host,
>>>> +		&mmc_err_stats_fops);
>>>> +
>>>>  #ifdef CONFIG_FAIL_MMC_REQUEST
>>>>  	if (fail_request)
>>>>  		setup_fault_attr(&fail_default_attr, fail_request); diff --git 
>>>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>>>> b15c034..5243929 100644
>>>> --- a/drivers/mmc/core/queue.c
>>>> +++ b/drivers/mmc/core/queue.c
>>>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>>>  	enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>>>  	bool recovery_needed = false;
>>>>  
>>>> +	mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>>>> +
>>>>  	switch (issue_type) {
>>>>  	case MMC_ISSUE_ASYNC:
>>>>  	case MMC_ISSUE_DCMD:
>>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>>
>>> I think the core changes should be a separate patch from sdhci.
>>> I would probably split into 4:
>>> 	mmc core
>>> 	mmc block driver
>>> 	cqhci driver
>>> 	sdhci driver
>>>
>>>>>>> Sure
>>>
>>>> index 07c6da1..d742051 100644
>>>> --- a/drivers/mmc/host/sdhci.c
>>>> +++ b/drivers/mmc/host/sdhci.c
>>>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>>>  	if (host->ops->dump_vendor_regs)
>>>>  		host->ops->dump_vendor_regs(host);
>>>>  
>>>> +	mmc_debugfs_err_stats_enable(host->mmc);
>>>
>>> Why here and not in e.g. __sdhci_add_host() ?
>>>
>>>>>>> If any errors happened  in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ).  So it is better to call mmc_debugfs_err_stats_enable() here.
>>
>> Registers are not dumped for most errors.  Please move this to __sdhci_add_host().
>>
>>>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
>>
>>>
>>>>  	SDHCI_DUMP("============================================\n");
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>>>  	spin_lock_irqsave(&host->lock, flags);
>>>>  
>>>>  	if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>>  		pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>>>  		       mmc_hostname(host->mmc));
>>>>  		sdhci_dumpregs(host);
>>>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct 
>>>> timer_list *t)
>>>>  
>>>>  	if (host->data || host->data_cmd ||
>>>>  	    (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>>  		pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>>>  		       mmc_hostname(host->mmc));
>>>>  		sdhci_dumpregs(host);
>>>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host 
>>>> *host, u32 intmask, u32 *intmask_p)
>>>>  
>>>>  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>>>  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>>> -		if (intmask & SDHCI_INT_TIMEOUT)
>>>> +		if (intmask & SDHCI_INT_TIMEOUT) {
>>>>  			host->cmd->error = -ETIMEDOUT;
>>>> -		else
>>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> +		} else {
>>>>  			host->cmd->error = -EILSEQ;
>>>> -
>>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>>> +		}
>>>>  		/* Treat data command CRC error the same as data CRC error */
>>>>  		if (host->cmd->data &&
>>>>  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ 
>>>> -3266,6
>>>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 
>>>> +intmask, u32 *intmask_p)
>>>>  			  -ETIMEDOUT :
>>>>  			  -EILSEQ;
>>>>  
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>>>  		if (sdhci_auto_cmd23(host, mrq)) {
>>>>  			mrq->sbc->error = err;
>>>>  			__sdhci_finish_mrq(host, mrq);
>>>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>>  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>>  				host->data_cmd = NULL;
>>>>  				data_cmd->error = -ETIMEDOUT;
>>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>>  				__sdhci_finish_mrq(host, data_cmd->mrq);
>>>>  				return;
>>>>  			}
>>>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>>  		return;
>>>>  	}
>>>>  
>>>> -	if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>>> +	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>>  		host->data->error = -ETIMEDOUT;
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>>> +	}
>>>>  	else if (intmask & SDHCI_INT_DATA_END_BIT)
>>>>  		host->data->error = -EILSEQ;
>>>>  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>>>  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>>> -			!= MMC_BUS_TEST_R)
>>>> +			!= MMC_BUS_TEST_R) {
>>>>  		host->data->error = -EILSEQ;
>>>> +		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> +				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> +			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>>> +	}
>>>>  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>>  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>>>  		       intmask);
>>>>  		sdhci_adma_show_error(host);
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>>  		host->data->error = -EIO;
>>>>  		if (host->ops->adma_workaround)
>>>>  			host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 
>>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>>>  	if (!host->cqe_on)
>>>>  		return false;
>>>>  
>>>> -	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>>> +	if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>>> +SDHCI_INT_CRC)) {
>>>>  		*cmd_error = -EILSEQ;
>>>> -	else if (intmask & SDHCI_INT_TIMEOUT)
>>>> +		if (intmask & SDHCI_INT_CRC) {
>>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>>> +		}
>>>> +	} else if (intmask & SDHCI_INT_TIMEOUT) {
>>>>  		*cmd_error = -ETIMEDOUT;
>>>> -	else
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> +	} else
>>>>  		*cmd_error = 0;
>>>>  
>>>> -	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>>> +	if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>>>  		*data_error = -EILSEQ;
>>>> -	else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>>> +		if (intmask & SDHCI_INT_DATA_CRC) {
>>>> +			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> +					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> +				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>>> +		}
>>>> +	} else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>>  		*data_error = -ETIMEDOUT;
>>>> -	else if (intmask & SDHCI_INT_ADMA_ERROR)
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>>> +	} else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>>  		*data_error = -EIO;
>>>> -	else
>>>> +		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>> +	} else
>>>>  		*data_error = 0;
>>>>  
>>>>  	/* Clear selected interrupts. */
>>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h 
>>>> index 7afb57c..c263f8f 100644
>>>> --- a/include/linux/mmc/host.h
>>>> +++ b/include/linux/mmc/host.h
>>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>>  
>>>>  struct mmc_host;
>>>>  
>>>> +enum mmc_err_stat {
>>>> +	MMC_ERR_CMD_TIMEOUT,
>>>> +	MMC_ERR_CMD_CRC,
>>>> +	MMC_ERR_DAT_TIMEOUT,
>>>> +	MMC_ERR_DAT_CRC,
>>>> +	MMC_ERR_AUTO_CMD,
>>>> +	MMC_ERR_ADMA,
>>>> +	MMC_ERR_TUNING,
>>>> +	MMC_ERR_CMDQ_RED,
>>>> +	MMC_ERR_CMDQ_GCE,
>>>> +	MMC_ERR_CMDQ_ICCE,
>>>> +	MMC_ERR_REQ_TIMEOUT,
>>>> +	MMC_ERR_CMDQ_REQ_TIMEOUT,
>>>> +	MMC_ERR_ICE_CFG,
>>>> +	MMC_ERR_MAX,
>>>> +};
>>>> +
>>>>  struct mmc_host_ops {
>>>>  	/*
>>>>  	 * It is optional for the host to implement pre_req and post_req 
>>>> in @@ -500,6 +517,8 @@ struct mmc_host {
>>>>  
>>>>  	/* Host Software Queue support */
>>>>  	bool			hsq_enabled;
>>>> +	u32                     err_stats[MMC_ERR_MAX];
>>>
>>> If you make it u64 then we don't have to think about the value overflowing.
>>>
>>>>>> Sure
>>>
>>>> +	bool			err_state;
>>>>  
>>>>  	unsigned long		private[] ____cacheline_aligned;
>>>>  };
>>>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>>>  	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : 
>>>> DMA_FROM_DEVICE;  }
>>>>  
>>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>>> +*mmc) {
>>>> +	mmc->err_state = true;
>>>> +}
>>>> +
>>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>>> +		enum mmc_err_stat stat) {
>>>> +
>>>> +	/*
>>>> +	 * Ignore the command timeout errors observed during
>>>> +	 * the card init as those are excepted.
>>>> +	 */
>>>> +	if (!mmc->err_state)
>>>> +		mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>>>
>>> This would be better handled in the card init code somewhere, not here.
>>>
>>>>>>> Sure.
>>>
>>>> +
>>>> +	mmc->err_stats[stat] += 1;
>>>> +}
>>>> +
>>>>  int mmc_send_tuning(struct mmc_host *host, u32 opcode, int 
>>>> *cmd_error);  int mmc_send_abort_tuning(struct mmc_host *host, u32 
>>>> opcode);  int mmc_get_ext_csd(struct mmc_card *card, u8 
>>>> **new_ext_csd);
>>>>
>>>
>>
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH V2] mmc: debugfs: add error statistics
@ 2021-12-14 21:11 kernel test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2021-12-14 21:11 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 28711 bytes --]

CC: kbuild-all(a)lists.01.org
In-Reply-To: <1639492863-7053-1-git-send-email-quic_c_sbhanu@quicinc.com>
References: <1639492863-7053-1-git-send-email-quic_c_sbhanu@quicinc.com>
TO: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
TO: adrian.hunter(a)intel.com
TO: riteshh(a)codeaurora.org
TO: asutoshd(a)quicinc.com
TO: ulf.hansson(a)linaro.org
TO: agross(a)kernel.org
TO: bjorn.andersson(a)linaro.org
TO: linux-mmc(a)vger.kernel.org
TO: linux-arm-msm(a)vger.kernel.org
TO: linux-kernel(a)vger.kernel.org
CC: stummala(a)codeaurora.org

Hi Shaik,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.16-rc5 next-20211213]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Shaik-Sajida-Bhanu/mmc-debugfs-add-error-statistics/20211214-224314
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5472f14a37421d1bca3dddf33cabd3bd6dbefbbc
:::::: branch date: 6 hours ago
:::::: commit date: 6 hours ago
config: i386-randconfig-m021-20211214 (https://download.01.org/0day-ci/archive/20211215/202112150555.zkOS6PKM-lkp(a)intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

New smatch warnings:
drivers/mmc/host/sdhci.c:3251 sdhci_cmd_irq() warn: was && intended here instead of ||?
drivers/mmc/host/sdhci.c:3397 sdhci_data_irq() warn: was && intended here instead of ||?
drivers/mmc/host/sdhci.c:3927 sdhci_cqe_irq() warn: was && intended here instead of ||?

Old smatch warnings:
drivers/mmc/host/sdhci.c:3940 sdhci_cqe_irq() warn: was && intended here instead of ||?

vim +3251 drivers/mmc/host/sdhci.c

d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3212  
4bf780996669280 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3213  static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3214  {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3215  	/* Handle auto-CMD12 error */
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3216  	if (intmask & SDHCI_INT_AUTO_CMD_ERR && host->data_cmd) {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3217  		struct mmc_request *mrq = host->data_cmd->mrq;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3218  		u16 auto_cmd_status = sdhci_readw(host, SDHCI_AUTO_CMD_STATUS);
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3219  		int data_err_bit = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3220  				   SDHCI_INT_DATA_TIMEOUT :
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3221  				   SDHCI_INT_DATA_CRC;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3222  
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3223  		/* Treat auto-CMD12 error the same as data error */
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3224  		if (!mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3225  			*intmask_p |= data_err_bit;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3226  			return;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3227  		}
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3228  	}
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3229  
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3230  	if (!host->cmd) {
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3231  		/*
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3232  		 * SDHCI recovers from errors by resetting the cmd and data
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3233  		 * circuits.  Until that is done, there very well might be more
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3234  		 * interrupts, so ignore them in that case.
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3235  		 */
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3236  		if (host->pending_reset)
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3237  			return;
2e4456f08fa81b9 drivers/mmc/host/sdhci.c Marek Vasut        2015-11-18  3238  		pr_err("%s: Got command interrupt 0x%08x even though no command operation was in progress.\n",
b67ac3f339c76df drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3239  		       mmc_hostname(host->mmc), (unsigned)intmask);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3240  		sdhci_dumpregs(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3241  		return;
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3242  	}
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3243  
ec014cbacf6229c drivers/mmc/host/sdhci.c Russell King       2016-01-26  3244  	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
ec014cbacf6229c drivers/mmc/host/sdhci.c Russell King       2016-01-26  3245  		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3246  		if (intmask & SDHCI_INT_TIMEOUT) {
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman      2007-07-22  3247  			host->cmd->error = -ETIMEDOUT;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3248  			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3249  		} else {
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman      2007-07-22  3250  			host->cmd->error = -EILSEQ;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 @3251  			if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3252  					host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3253  				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3254  		}
4bf780996669280 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3255  		/* Treat data command CRC error the same as data CRC error */
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King       2016-01-26  3256  		if (host->cmd->data &&
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King       2016-01-26  3257  		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King       2016-01-26  3258  		     SDHCI_INT_CRC) {
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King       2016-01-26  3259  			host->cmd = NULL;
4bf780996669280 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3260  			*intmask_p |= SDHCI_INT_DATA_CRC;
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King       2016-01-26  3261  			return;
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King       2016-01-26  3262  		}
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King       2016-01-26  3263  
19d2f695f4e8279 drivers/mmc/host/sdhci.c Adrian Hunter      2019-04-05  3264  		__sdhci_finish_mrq(host, host->cmd->mrq);
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3265  		return;
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3266  	}
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3267  
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3268  	/* Handle auto-CMD23 error */
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3269  	if (intmask & SDHCI_INT_AUTO_CMD_ERR) {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3270  		struct mmc_request *mrq = host->cmd->mrq;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3271  		u16 auto_cmd_status = sdhci_readw(host, SDHCI_AUTO_CMD_STATUS);
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3272  		int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3273  			  -ETIMEDOUT :
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3274  			  -EILSEQ;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3275  
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3276  		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
38929d4f0d811df drivers/mmc/host/sdhci.c ChanWoo Lee        2021-08-25  3277  		if (sdhci_auto_cmd23(host, mrq)) {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3278  			mrq->sbc->error = err;
19d2f695f4e8279 drivers/mmc/host/sdhci.c Adrian Hunter      2019-04-05  3279  			__sdhci_finish_mrq(host, mrq);
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3280  			return;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3281  		}
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3282  	}
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter      2018-11-15  3283  
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3284  	if (intmask & SDHCI_INT_RESPONSE)
43b58b36b7e6554 drivers/mmc/host/sdhci.c Pierre Ossman      2007-07-25  3285  		sdhci_finish_command(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3286  }
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3287  
08621b18a15ee21 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3288  static void sdhci_adma_show_error(struct sdhci_host *host)
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3289  {
1c3d5f6ddcb915c drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3290  	void *desc = host->adma_table;
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3291  	dma_addr_t dma = host->adma_addr;
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3292  
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3293  	sdhci_dumpregs(host);
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3294  
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3295  	while (true) {
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3296  		struct sdhci_adma2_64_desc *dma_desc = desc;
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3297  
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3298  		if (host->flags & SDHCI_USE_64_BIT_DMA)
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3299  			SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3300  			    (unsigned long long)dma,
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3301  			    le32_to_cpu(dma_desc->addr_hi),
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3302  			    le32_to_cpu(dma_desc->addr_lo),
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3303  			    le16_to_cpu(dma_desc->len),
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3304  			    le16_to_cpu(dma_desc->cmd));
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3305  		else
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3306  			SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3307  			    (unsigned long long)dma,
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3308  			    le32_to_cpu(dma_desc->addr_lo),
0545230f1764bc6 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3309  			    le16_to_cpu(dma_desc->len),
0545230f1764bc6 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3310  			    le16_to_cpu(dma_desc->cmd));
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3311  
76fe379acaeb857 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3312  		desc += host->desc_sz;
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3313  		dma += host->desc_sz;
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3314  
0545230f1764bc6 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3315  		if (dma_desc->cmd & cpu_to_le16(ADMA2_END))
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3316  			break;
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3317  	}
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3318  }
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3319  
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3320  static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3321  {
069c9f142822d55 drivers/mmc/host/sdhci.c Girish K S         2012-01-06  3322  	u32 command;
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3323  
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen         2021-08-13  3324  	/*
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen         2021-08-13  3325  	 * CMD19 generates _only_ Buffer Read Ready interrupt if
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen         2021-08-13  3326  	 * use sdhci_send_tuning.
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen         2021-08-13  3327  	 * Need to exclude this case: PIO mode and use mmc_send_tuning,
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen         2021-08-13  3328  	 * If not, sdhci_transfer_pio will never be called, make the
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen         2021-08-13  3329  	 * SDHCI_INT_DATA_AVAIL always there, stuck in irq storm.
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen         2021-08-13  3330  	 */
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen         2021-08-13  3331  	if (intmask & SDHCI_INT_DATA_AVAIL && !host->data) {
069c9f142822d55 drivers/mmc/host/sdhci.c Girish K S         2012-01-06  3332  		command = SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND));
069c9f142822d55 drivers/mmc/host/sdhci.c Girish K S         2012-01-06  3333  		if (command == MMC_SEND_TUNING_BLOCK ||
069c9f142822d55 drivers/mmc/host/sdhci.c Girish K S         2012-01-06  3334  		    command == MMC_SEND_TUNING_BLOCK_HS200) {
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath       2011-05-05  3335  			host->tuning_done = 1;
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath       2011-05-05  3336  			wake_up(&host->buf_ready_int);
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath       2011-05-05  3337  			return;
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath       2011-05-05  3338  		}
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath       2011-05-05  3339  	}
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath       2011-05-05  3340  
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3341  	if (!host->data) {
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3342  		struct mmc_command *data_cmd = host->data_cmd;
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3343  
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3344  		/*
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3345  		 * The "data complete" interrupt is also used to
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3346  		 * indicate that a busy state has ended. See comment
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3347  		 * above in sdhci_cmd_irq().
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3348  		 */
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3349  		if (data_cmd && (data_cmd->flags & MMC_RSP_BUSY)) {
c5abd5e8998e20c drivers/mmc/host/sdhci.c Matthieu CASTET    2014-08-14  3350  			if (intmask & SDHCI_INT_DATA_TIMEOUT) {
69b962a65a54769 drivers/mmc/host/sdhci.c Adrian Hunter      2016-11-02  3351  				host->data_cmd = NULL;
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3352  				data_cmd->error = -ETIMEDOUT;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3353  				mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
19d2f695f4e8279 drivers/mmc/host/sdhci.c Adrian Hunter      2019-04-05  3354  				__sdhci_finish_mrq(host, data_cmd->mrq);
c5abd5e8998e20c drivers/mmc/host/sdhci.c Matthieu CASTET    2014-08-14  3355  				return;
c5abd5e8998e20c drivers/mmc/host/sdhci.c Matthieu CASTET    2014-08-14  3356  			}
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3357  			if (intmask & SDHCI_INT_DATA_END) {
69b962a65a54769 drivers/mmc/host/sdhci.c Adrian Hunter      2016-11-02  3358  				host->data_cmd = NULL;
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min         2014-08-30  3359  				/*
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min         2014-08-30  3360  				 * Some cards handle busy-end interrupt
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min         2014-08-30  3361  				 * before the command completed, so make
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min         2014-08-30  3362  				 * sure we do things in the proper order.
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min         2014-08-30  3363  				 */
ea96802384cd062 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3364  				if (host->cmd == data_cmd)
ea96802384cd062 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3365  					return;
ea96802384cd062 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3366  
19d2f695f4e8279 drivers/mmc/host/sdhci.c Adrian Hunter      2019-04-05  3367  				__sdhci_finish_mrq(host, data_cmd->mrq);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3368  				return;
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3369  			}
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman      2008-07-25  3370  		}
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3371  
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3372  		/*
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3373  		 * SDHCI recovers from errors by resetting the cmd and data
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3374  		 * circuits. Until that is done, there very well might be more
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3375  		 * interrupts, so ignore them in that case.
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3376  		 */
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3377  		if (host->pending_reset)
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3378  			return;
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3379  
2e4456f08fa81b9 drivers/mmc/host/sdhci.c Marek Vasut        2015-11-18  3380  		pr_err("%s: Got data interrupt 0x%08x even though no data operation was in progress.\n",
b67ac3f339c76df drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3381  		       mmc_hostname(host->mmc), (unsigned)intmask);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3382  		sdhci_dumpregs(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3383  
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3384  		return;
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3385  	}
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3386  
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3387  	if (intmask & SDHCI_INT_DATA_TIMEOUT) {
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman      2007-07-22  3388  		host->data->error = -ETIMEDOUT;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3389  		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3390  	}
22113efd0049131 drivers/mmc/host/sdhci.c Aries Lee          2010-12-15  3391  	else if (intmask & SDHCI_INT_DATA_END_BIT)
22113efd0049131 drivers/mmc/host/sdhci.c Aries Lee          2010-12-15  3392  		host->data->error = -EILSEQ;
22113efd0049131 drivers/mmc/host/sdhci.c Aries Lee          2010-12-15  3393  	else if ((intmask & SDHCI_INT_DATA_CRC) &&
22113efd0049131 drivers/mmc/host/sdhci.c Aries Lee          2010-12-15  3394  		SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3395  			!= MMC_BUS_TEST_R) {
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman      2007-07-22  3396  		host->data->error = -EILSEQ;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 @3397  		if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3398  				host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3399  			mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3400  	}
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3401  	else if (intmask & SDHCI_INT_ADMA_ERROR) {
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3402  		pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King       2019-09-22  3403  		       intmask);
08621b18a15ee21 drivers/mmc/host/sdhci.c Adrian Hunter      2014-11-04  3404  		sdhci_adma_show_error(host);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14  3405  		mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
2134a922c6e75c7 drivers/mmc/host/sdhci.c Pierre Ossman      2008-06-28  3406  		host->data->error = -EIO;
a4071fbbb9edbc5 drivers/mmc/host/sdhci.c Haijun Zhang       2012-12-04  3407  		if (host->ops->adma_workaround)
a4071fbbb9edbc5 drivers/mmc/host/sdhci.c Haijun Zhang       2012-12-04  3408  			host->ops->adma_workaround(host, intmask);
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks          2009-06-14  3409  	}
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3410  
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman      2007-07-22  3411  	if (host->data->error)
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3412  		sdhci_finish_data(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3413  	else {
a406f5a3b68ee1d drivers/mmc/sdhci.c      Pierre Ossman      2006-07-02  3414  		if (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL))
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3415  			sdhci_transfer_pio(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3416  
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman      2007-05-13  3417  		/*
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman      2007-05-13  3418  		 * We currently don't do anything fancy with DMA
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman      2007-05-13  3419  		 * boundaries, but as we can't disable the feature
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman      2007-05-13  3420  		 * we need to at least restart the transfer.
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3421  		 *
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3422  		 * According to the spec sdhci_readl(host, SDHCI_DMA_ADDRESS)
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3423  		 * should return a valid address to continue from, but as
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3424  		 * some controllers are faulty, don't trust them.
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman      2007-05-13  3425  		 */
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3426  		if (intmask & SDHCI_INT_DMA_END) {
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang      2018-08-30  3427  			dma_addr_t dmastart, dmanow;
bd9b902798ab14d drivers/mmc/host/sdhci.c Linus Walleij      2018-01-29  3428  
bd9b902798ab14d drivers/mmc/host/sdhci.c Linus Walleij      2018-01-29  3429  			dmastart = sdhci_sdma_address(host);
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3430  			dmanow = dmastart + host->data->bytes_xfered;
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3431  			/*
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3432  			 * Force update to the next DMA block boundary.
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3433  			 */
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3434  			dmanow = (dmanow &
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang      2018-08-30  3435  				~((dma_addr_t)SDHCI_DEFAULT_BOUNDARY_SIZE - 1)) +
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3436  				SDHCI_DEFAULT_BOUNDARY_SIZE;
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3437  			host->data->bytes_xfered = dmanow - dmastart;
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang      2018-08-30  3438  			DBG("DMA base %pad, transferred 0x%06x bytes, next %pad\n",
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang      2018-08-30  3439  			    &dmastart, host->data->bytes_xfered, &dmanow);
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang      2018-08-30  3440  			sdhci_set_sdma_addr(host, dmanow);
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni        2011-04-12  3441  		}
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman      2007-05-13  3442  
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3443  		if (intmask & SDHCI_INT_DATA_END) {
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter      2016-06-29  3444  			if (host->cmd == host->data_cmd) {
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3445  				/*
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3446  				 * Data managed to finish before the
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3447  				 * command completed. Make sure we do
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3448  				 * things in the proper order.
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3449  				 */
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3450  				host->data_early = 1;
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3451  			} else {
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3452  				sdhci_finish_data(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3453  			}
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3454  		}
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3455  	}
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman      2007-08-12  3456  }
d129bceb1d44ed3 drivers/mmc/sdhci.c      Pierre Ossman      2006-03-24  3457  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-01-10 15:01 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-14 14:41 [PATCH V2] mmc: debugfs: add error statistics Shaik Sajida Bhanu
2021-12-15 14:03 ` Adrian Hunter
2021-12-21  7:16   ` Sajida Bhanu (Temp) (QUIC)
2021-12-29  7:36     ` Sajida Bhanu (Temp) (QUIC)
2022-01-03  9:50     ` Adrian Hunter
2022-01-04 15:02       ` Sajida Bhanu (Temp) (QUIC)
2022-01-07  7:42         ` Adrian Hunter
2022-01-10 13:11           ` Sajida Bhanu (Temp) (QUIC)
2022-01-10 13:29             ` Adrian Hunter
2022-01-10 14:59               ` Sajida Bhanu (Temp) (QUIC)
2021-12-14 21:11 kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.