* [PATCH V2] mmc: debugfs: add error statistics
@ 2021-12-14 14:41 Shaik Sajida Bhanu
2021-12-15 14:03 ` Adrian Hunter
0 siblings, 1 reply; 11+ messages in thread
From: Shaik Sajida Bhanu @ 2021-12-14 14:41 UTC (permalink / raw)
To: adrian.hunter, riteshh, asutoshd, ulf.hansson, agross,
bjorn.andersson, linux-mmc, linux-arm-msm, linux-kernel
Cc: stummala, vbadigan, quic_rampraka, quic_pragalla, sartgarg,
nitirawa, sayalil, Shaik Sajida Bhanu
Add debugfs entry to query eMMC and SD card errors statistics.
This feature is useful for debug and testing
Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
---
Changes since V1:
-Removed sysfs entry for eMMC and SD card error statistics and added
debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
---
drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
drivers/mmc/core/queue.c | 2 +
drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
include/linux/mmc/host.h | 37 ++++++++++++++++
4 files changed, 186 insertions(+), 12 deletions(-)
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 3fdbc80..40210c34 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64 val)
DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
"%llu\n");
+static int mmc_err_state_get(void *data, u64 *val)
+{
+ struct mmc_host *host = data;
+
+ if (!host)
+ return -EINVAL;
+
+ *val = host->err_state ? 1 : 0;
+
+ return 0;
+}
+
+static int mmc_err_state_clear(void *data, u64 val)
+{
+ struct mmc_host *host = data;
+
+ if (!host)
+ return -EINVAL;
+
+ host->err_state = false;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
+ mmc_err_state_clear, "%llu\n");
+
+static int mmc_err_stats_show(struct seq_file *file, void *data)
+{
+ struct mmc_host *host = (struct mmc_host *)file->private;
+
+ if (!host)
+ return -EINVAL;
+
+ seq_printf(file, "# Command Timeout Occurred:\t %d\n",
+ host->err_stats[MMC_ERR_CMD_TIMEOUT]);
+
+ seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
+ host->err_stats[MMC_ERR_CMD_CRC]);
+
+ seq_printf(file, "# Data Timeout Occurred:\t %d\n",
+ host->err_stats[MMC_ERR_DAT_TIMEOUT]);
+
+ seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
+ host->err_stats[MMC_ERR_DAT_CRC]);
+
+ seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
+ host->err_stats[MMC_ERR_ADMA]);
+
+ seq_printf(file, "# ADMA Error Occurred:\t %d\n",
+ host->err_stats[MMC_ERR_ADMA]);
+
+ seq_printf(file, "# Tuning Error Occurred:\t %d\n",
+ host->err_stats[MMC_ERR_TUNING]);
+
+ seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
+ host->err_stats[MMC_ERR_CMDQ_RED]);
+
+ seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
+ host->err_stats[MMC_ERR_CMDQ_GCE]);
+
+ seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
+ host->err_stats[MMC_ERR_CMDQ_ICCE]);
+
+ seq_printf(file, "# Request Timedout:\t %d\n",
+ host->err_stats[MMC_ERR_REQ_TIMEOUT]);
+
+ seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
+ host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
+
+ seq_printf(file, "# ICE Config Errors:\t\t %d\n",
+ host->err_stats[MMC_ERR_ICE_CFG]);
+
+ return 0;
+}
+
+static int mmc_err_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, mmc_err_stats_show, inode->i_private);
+}
+
+static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct mmc_host *host = filp->f_mapping->host->i_private;
+
+ if (!host)
+ return -EINVAL;
+
+ pr_debug("%s: Resetting MMC error statistics\n", __func__);
+ memset(host->err_stats, 0, sizeof(host->err_stats));
+
+ return cnt;
+}
+
+static const struct file_operations mmc_err_stats_fops = {
+ .open = mmc_err_stats_open,
+ .read = seq_read,
+ .write = mmc_err_stats_write,
+};
+
void mmc_add_host_debugfs(struct mmc_host *host)
{
struct dentry *root;
@@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
&mmc_clock_fops);
+ debugfs_create_file("err_state", 0600, root, host,
+ &mmc_err_state);
+ debugfs_create_file("err_stats", 0600, root, host,
+ &mmc_err_stats_fops);
+
#ifdef CONFIG_FAIL_MMC_REQUEST
if (fail_request)
setup_fault_attr(&fail_default_attr, fail_request);
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index b15c034..5243929 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
bool recovery_needed = false;
+ mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
+
switch (issue_type) {
case MMC_ISSUE_ASYNC:
case MMC_ISSUE_DCMD:
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 07c6da1..d742051 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
if (host->ops->dump_vendor_regs)
host->ops->dump_vendor_regs(host);
+ mmc_debugfs_err_stats_enable(host->mmc);
SDHCI_DUMP("============================================\n");
}
EXPORT_SYMBOL_GPL(sdhci_dumpregs);
@@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
spin_lock_irqsave(&host->lock, flags);
if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
mmc_hostname(host->mmc));
sdhci_dumpregs(host);
@@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
if (host->data || host->data_cmd ||
(host->cmd && sdhci_data_line_cmd(host->cmd))) {
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
pr_err("%s: Timeout waiting for hardware interrupt.\n",
mmc_hostname(host->mmc));
sdhci_dumpregs(host);
@@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
- if (intmask & SDHCI_INT_TIMEOUT)
+ if (intmask & SDHCI_INT_TIMEOUT) {
host->cmd->error = -ETIMEDOUT;
- else
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
+ } else {
host->cmd->error = -EILSEQ;
-
+ if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+ host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
+ }
/* Treat data command CRC error the same as data CRC error */
if (host->cmd->data &&
(intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
@@ -3266,6 +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
-ETIMEDOUT :
-EILSEQ;
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
if (sdhci_auto_cmd23(host, mrq)) {
mrq->sbc->error = err;
__sdhci_finish_mrq(host, mrq);
@@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
if (intmask & SDHCI_INT_DATA_TIMEOUT) {
host->data_cmd = NULL;
data_cmd->error = -ETIMEDOUT;
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
__sdhci_finish_mrq(host, data_cmd->mrq);
return;
}
@@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
return;
}
- if (intmask & SDHCI_INT_DATA_TIMEOUT)
+ if (intmask & SDHCI_INT_DATA_TIMEOUT) {
host->data->error = -ETIMEDOUT;
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
+ }
else if (intmask & SDHCI_INT_DATA_END_BIT)
host->data->error = -EILSEQ;
else if ((intmask & SDHCI_INT_DATA_CRC) &&
SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
- != MMC_BUS_TEST_R)
+ != MMC_BUS_TEST_R) {
host->data->error = -EILSEQ;
+ if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+ host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
+ }
else if (intmask & SDHCI_INT_ADMA_ERROR) {
pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
intmask);
sdhci_adma_show_error(host);
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
host->data->error = -EIO;
if (host->ops->adma_workaround)
host->ops->adma_workaround(host, intmask);
@@ -3905,20 +3921,33 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
if (!host->cqe_on)
return false;
- if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
+ if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
*cmd_error = -EILSEQ;
- else if (intmask & SDHCI_INT_TIMEOUT)
+ if (intmask & SDHCI_INT_CRC) {
+ if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+ host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
+ }
+ } else if (intmask & SDHCI_INT_TIMEOUT) {
*cmd_error = -ETIMEDOUT;
- else
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
+ } else
*cmd_error = 0;
- if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
+ if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
*data_error = -EILSEQ;
- else if (intmask & SDHCI_INT_DATA_TIMEOUT)
+ if (intmask & SDHCI_INT_DATA_CRC) {
+ if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
+ host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
+ }
+ } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
*data_error = -ETIMEDOUT;
- else if (intmask & SDHCI_INT_ADMA_ERROR)
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
+ } else if (intmask & SDHCI_INT_ADMA_ERROR) {
*data_error = -EIO;
- else
+ mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
+ } else
*data_error = 0;
/* Clear selected interrupts. */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 7afb57c..c263f8f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
struct mmc_host;
+enum mmc_err_stat {
+ MMC_ERR_CMD_TIMEOUT,
+ MMC_ERR_CMD_CRC,
+ MMC_ERR_DAT_TIMEOUT,
+ MMC_ERR_DAT_CRC,
+ MMC_ERR_AUTO_CMD,
+ MMC_ERR_ADMA,
+ MMC_ERR_TUNING,
+ MMC_ERR_CMDQ_RED,
+ MMC_ERR_CMDQ_GCE,
+ MMC_ERR_CMDQ_ICCE,
+ MMC_ERR_REQ_TIMEOUT,
+ MMC_ERR_CMDQ_REQ_TIMEOUT,
+ MMC_ERR_ICE_CFG,
+ MMC_ERR_MAX,
+};
+
struct mmc_host_ops {
/*
* It is optional for the host to implement pre_req and post_req in
@@ -500,6 +517,8 @@ struct mmc_host {
/* Host Software Queue support */
bool hsq_enabled;
+ u32 err_stats[MMC_ERR_MAX];
+ bool err_state;
unsigned long private[] ____cacheline_aligned;
};
@@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
}
+static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
+{
+ mmc->err_state = true;
+}
+
+static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
+ enum mmc_err_stat stat) {
+
+ /*
+ * Ignore the command timeout errors observed during
+ * the card init as those are excepted.
+ */
+ if (!mmc->err_state)
+ mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
+
+ mmc->err_stats[stat] += 1;
+}
+
int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH V2] mmc: debugfs: add error statistics
2021-12-14 14:41 [PATCH V2] mmc: debugfs: add error statistics Shaik Sajida Bhanu
@ 2021-12-15 14:03 ` Adrian Hunter
2021-12-21 7:16 ` Sajida Bhanu (Temp) (QUIC)
0 siblings, 1 reply; 11+ messages in thread
From: Adrian Hunter @ 2021-12-15 14:03 UTC (permalink / raw)
To: Shaik Sajida Bhanu, riteshh, asutoshd, ulf.hansson, agross,
bjorn.andersson, linux-mmc, linux-arm-msm, linux-kernel
Cc: stummala, vbadigan, quic_rampraka, quic_pragalla, sartgarg,
nitirawa, sayalil
On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
> Add debugfs entry to query eMMC and SD card errors statistics.
> This feature is useful for debug and testing
>
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> ---
>
> Changes since V1:
> -Removed sysfs entry for eMMC and SD card error statistics and added
> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
Thanks for doing this.
> ---
> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
> drivers/mmc/core/queue.c | 2 +
> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
> include/linux/mmc/host.h | 37 ++++++++++++++++
> 4 files changed, 186 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
> index 3fdbc80..40210c34 100644
> --- a/drivers/mmc/core/debugfs.c
> +++ b/drivers/mmc/core/debugfs.c
> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64 val)
> DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
> "%llu\n");
>
> +static int mmc_err_state_get(void *data, u64 *val)
> +{
> + struct mmc_host *host = data;
> +
> + if (!host)
> + return -EINVAL;
> +
> + *val = host->err_state ? 1 : 0;
> +
> + return 0;
> +}
> +
> +static int mmc_err_state_clear(void *data, u64 val)
> +{
> + struct mmc_host *host = data;
> +
> + if (!host)
> + return -EINVAL;
> +
> + host->err_state = false;
Is there much reason to disable err stats from userspace?
> +
> + return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
> + mmc_err_state_clear, "%llu\n");
> +
> +static int mmc_err_stats_show(struct seq_file *file, void *data)
> +{
> + struct mmc_host *host = (struct mmc_host *)file->private;
> +
> + if (!host)
> + return -EINVAL;
I was thinking we needed a way to determine whether stats were being
collected because not all drivers would support it at least initially
e.g.
if (!host->err_stats_enabled) {
seq_printf(file, "Not supported by driver\n");
return 0;
}
> +
> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
Maybe put the descriptions in an array and iterate e.g.
const char *desc[MMC_ERR_MAX] = {
[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
etc
};
int i;
if (!host)
return -EINVAL;
for (i = 0; i < MMC_ERR_MAX; i++) {
if (desc[i])
seq_printf(file, "# %s:\t %d\n",
desc[1], host->err_stats[i]);
}
> +
> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_CMD_CRC]);
> +
> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
> +
> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_DAT_CRC]);
> +
> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_ADMA]);
> +
> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_ADMA]);
> +
> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_TUNING]);
> +
> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_RED]);
> +
> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_GCE]);
> +
> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
> +
> + seq_printf(file, "# Request Timedout:\t %d\n",
> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
> +
> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
> +
> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_ICE_CFG]);
> +
> + return 0;
> +}
> +
> +static int mmc_err_stats_open(struct inode *inode, struct file *file)
> +{
> + return single_open(file, mmc_err_stats_show, inode->i_private);
> +}
> +
> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
> + size_t cnt, loff_t *ppos)
> +{
> + struct mmc_host *host = filp->f_mapping->host->i_private;
> +
> + if (!host)
> + return -EINVAL;
> +
> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
> + memset(host->err_stats, 0, sizeof(host->err_stats));
> +
> + return cnt;
> +}
> +
> +static const struct file_operations mmc_err_stats_fops = {
> + .open = mmc_err_stats_open,
> + .read = seq_read,
> + .write = mmc_err_stats_write,
> +};
> +
> void mmc_add_host_debugfs(struct mmc_host *host)
> {
> struct dentry *root;
> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
> &mmc_clock_fops);
>
> + debugfs_create_file("err_state", 0600, root, host,
> + &mmc_err_state);
> + debugfs_create_file("err_stats", 0600, root, host,
> + &mmc_err_stats_fops);
> +
> #ifdef CONFIG_FAIL_MMC_REQUEST
> if (fail_request)
> setup_fault_attr(&fail_default_attr, fail_request);
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> index b15c034..5243929 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
> bool recovery_needed = false;
>
> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
> +
> switch (issue_type) {
> case MMC_ISSUE_ASYNC:
> case MMC_ISSUE_DCMD:
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
I think the core changes should be a separate patch from sdhci.
I would probably split into 4:
mmc core
mmc block driver
cqhci driver
sdhci driver
> index 07c6da1..d742051 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
> if (host->ops->dump_vendor_regs)
> host->ops->dump_vendor_regs(host);
>
> + mmc_debugfs_err_stats_enable(host->mmc);
Why here and not in e.g. __sdhci_add_host() ?
> SDHCI_DUMP("============================================\n");
> }
> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
> spin_lock_irqsave(&host->lock, flags);
>
> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct timer_list *t)
>
> if (host->data || host->data_cmd ||
> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>
> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> - if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_TIMEOUT) {
> host->cmd->error = -ETIMEDOUT;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else {
> host->cmd->error = -EILSEQ;
> -
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> /* Treat data command CRC error the same as data CRC error */
> if (host->cmd->data &&
> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
> @@ -3266,6 +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
> -ETIMEDOUT :
> -EILSEQ;
>
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
> if (sdhci_auto_cmd23(host, mrq)) {
> mrq->sbc->error = err;
> __sdhci_finish_mrq(host, mrq);
> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data_cmd = NULL;
> data_cmd->error = -ETIMEDOUT;
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> __sdhci_finish_mrq(host, data_cmd->mrq);
> return;
> }
> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> return;
> }
>
> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data->error = -ETIMEDOUT;
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + }
> else if (intmask & SDHCI_INT_DATA_END_BIT)
> host->data->error = -EILSEQ;
> else if ((intmask & SDHCI_INT_DATA_CRC) &&
> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> - != MMC_BUS_TEST_R)
> + != MMC_BUS_TEST_R) {
> host->data->error = -EILSEQ;
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> else if (intmask & SDHCI_INT_ADMA_ERROR) {
> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> intmask);
> sdhci_adma_show_error(host);
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> host->data->error = -EIO;
> if (host->ops->adma_workaround)
> host->ops->adma_workaround(host, intmask);
> @@ -3905,20 +3921,33 @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
> if (!host->cqe_on)
> return false;
>
> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC)) {
> *cmd_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + } else if (intmask & SDHCI_INT_TIMEOUT) {
> *cmd_error = -ETIMEDOUT;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else
> *cmd_error = 0;
>
> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
> *data_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> *data_error = -ETIMEDOUT;
> - else if (intmask & SDHCI_INT_ADMA_ERROR)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
> *data_error = -EIO;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> + } else
> *data_error = 0;
>
> /* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index 7afb57c..c263f8f 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>
> struct mmc_host;
>
> +enum mmc_err_stat {
> + MMC_ERR_CMD_TIMEOUT,
> + MMC_ERR_CMD_CRC,
> + MMC_ERR_DAT_TIMEOUT,
> + MMC_ERR_DAT_CRC,
> + MMC_ERR_AUTO_CMD,
> + MMC_ERR_ADMA,
> + MMC_ERR_TUNING,
> + MMC_ERR_CMDQ_RED,
> + MMC_ERR_CMDQ_GCE,
> + MMC_ERR_CMDQ_ICCE,
> + MMC_ERR_REQ_TIMEOUT,
> + MMC_ERR_CMDQ_REQ_TIMEOUT,
> + MMC_ERR_ICE_CFG,
> + MMC_ERR_MAX,
> +};
> +
> struct mmc_host_ops {
> /*
> * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,8 @@ struct mmc_host {
>
> /* Host Software Queue support */
> bool hsq_enabled;
> + u32 err_stats[MMC_ERR_MAX];
If you make it u64 then we don't have to think about the value overflowing.
> + bool err_state;
>
> unsigned long private[] ____cacheline_aligned;
> };
> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> }
>
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
> +{
> + mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> + enum mmc_err_stat stat) {
> +
> + /*
> + * Ignore the command timeout errors observed during
> + * the card init as those are excepted.
> + */
> + if (!mmc->err_state)
> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
This would be better handled in the card init code somewhere, not here.
> +
> + mmc->err_stats[stat] += 1;
> +}
> +
> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
> int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
> int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH V2] mmc: debugfs: add error statistics
2021-12-15 14:03 ` Adrian Hunter
@ 2021-12-21 7:16 ` Sajida Bhanu (Temp) (QUIC)
2021-12-29 7:36 ` Sajida Bhanu (Temp) (QUIC)
2022-01-03 9:50 ` Adrian Hunter
0 siblings, 2 replies; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2021-12-21 7:16 UTC (permalink / raw)
To: Adrian Hunter, Sajida Bhanu (Temp) (QUIC),
riteshh, Asutosh Das (asd),
ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
linux-kernel
Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
Pradeep Pragallapati (QUIC),
sartgarg, nitirawa, sayalil
Hi Adrian,
Thanks for the review.
Please find the inline comments.
Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com>
Sent: Wednesday, December 15, 2021 7:33 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
> Add debugfs entry to query eMMC and SD card errors statistics.
> This feature is useful for debug and testing
>
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> ---
>
> Changes since V1:
> -Removed sysfs entry for eMMC and SD card error statistics and added
> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
Thanks for doing this.
> ---
> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
> drivers/mmc/core/queue.c | 2 +
> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
> include/linux/mmc/host.h | 37 ++++++++++++++++
> 4 files changed, 186 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
> index 3fdbc80..40210c34 100644
> --- a/drivers/mmc/core/debugfs.c
> +++ b/drivers/mmc/core/debugfs.c
> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
> val) DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
> "%llu\n");
>
> +static int mmc_err_state_get(void *data, u64 *val) {
> + struct mmc_host *host = data;
> +
> + if (!host)
> + return -EINVAL;
> +
> + *val = host->err_state ? 1 : 0;
> +
> + return 0;
> +}
> +
> +static int mmc_err_state_clear(void *data, u64 val) {
> + struct mmc_host *host = data;
> +
> + if (!host)
> + return -EINVAL;
> +
> + host->err_state = false;
Is there much reason to disable err stats from userspace?
>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
> +
> + return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
> + mmc_err_state_clear, "%llu\n");
> +
> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
> + struct mmc_host *host = (struct mmc_host *)file->private;
> +
> + if (!host)
> + return -EINVAL;
I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
if (!host->err_stats_enabled) {
seq_printf(file, "Not supported by driver\n");
return 0;
}
>>>>>>>You mean declare another variable (err_stats_enabled) and enable it in probe?
> +
> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
Maybe put the descriptions in an array and iterate e.g.
const char *desc[MMC_ERR_MAX] = {
[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
etc
};
int i;
if (!host)
return -EINVAL;
for (i = 0; i < MMC_ERR_MAX; i++) {
if (desc[i])
seq_printf(file, "# %s:\t %d\n",
desc[1], host->err_stats[i]);
}
>>>>>>> Sure
> +
> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_CMD_CRC]);
> +
> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
> +
> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_DAT_CRC]);
> +
> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_ADMA]);
> +
> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_ADMA]);
> +
> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_TUNING]);
> +
> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_RED]);
> +
> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_GCE]);
> +
> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
> +
> + seq_printf(file, "# Request Timedout:\t %d\n",
> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
> +
> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
> +
> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_ICE_CFG]);
> +
> + return 0;
> +}
> +
> +static int mmc_err_stats_open(struct inode *inode, struct file *file)
> +{
> + return single_open(file, mmc_err_stats_show, inode->i_private); }
> +
> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
> + size_t cnt, loff_t *ppos)
> +{
> + struct mmc_host *host = filp->f_mapping->host->i_private;
> +
> + if (!host)
> + return -EINVAL;
> +
> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
> + memset(host->err_stats, 0, sizeof(host->err_stats));
> +
> + return cnt;
> +}
> +
> +static const struct file_operations mmc_err_stats_fops = {
> + .open = mmc_err_stats_open,
> + .read = seq_read,
> + .write = mmc_err_stats_write,
> +};
> +
> void mmc_add_host_debugfs(struct mmc_host *host) {
> struct dentry *root;
> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
> &mmc_clock_fops);
>
> + debugfs_create_file("err_state", 0600, root, host,
> + &mmc_err_state);
> + debugfs_create_file("err_stats", 0600, root, host,
> + &mmc_err_stats_fops);
> +
> #ifdef CONFIG_FAIL_MMC_REQUEST
> if (fail_request)
> setup_fault_attr(&fail_default_attr, fail_request); diff --git
> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
> b15c034..5243929 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
> bool recovery_needed = false;
>
> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
> +
> switch (issue_type) {
> case MMC_ISSUE_ASYNC:
> case MMC_ISSUE_DCMD:
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
I think the core changes should be a separate patch from sdhci.
I would probably split into 4:
mmc core
mmc block driver
cqhci driver
sdhci driver
>>>> Sure
> index 07c6da1..d742051 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
> if (host->ops->dump_vendor_regs)
> host->ops->dump_vendor_regs(host);
>
> + mmc_debugfs_err_stats_enable(host->mmc);
Why here and not in e.g. __sdhci_add_host() ?
>>>> If any errors happened in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ). So it is better to call mmc_debugfs_err_stats_enable() here.
> SDHCI_DUMP("============================================\n");
> }
> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
> spin_lock_irqsave(&host->lock, flags);
>
> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct
> timer_list *t)
>
> if (host->data || host->data_cmd ||
> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host
> *host, u32 intmask, u32 *intmask_p)
>
> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> - if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_TIMEOUT) {
> host->cmd->error = -ETIMEDOUT;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else {
> host->cmd->error = -EILSEQ;
> -
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> /* Treat data command CRC error the same as data CRC error */
> if (host->cmd->data &&
> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
> -ETIMEDOUT :
> -EILSEQ;
>
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
> if (sdhci_auto_cmd23(host, mrq)) {
> mrq->sbc->error = err;
> __sdhci_finish_mrq(host, mrq);
> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data_cmd = NULL;
> data_cmd->error = -ETIMEDOUT;
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> __sdhci_finish_mrq(host, data_cmd->mrq);
> return;
> }
> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> return;
> }
>
> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data->error = -ETIMEDOUT;
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + }
> else if (intmask & SDHCI_INT_DATA_END_BIT)
> host->data->error = -EILSEQ;
> else if ((intmask & SDHCI_INT_DATA_CRC) &&
> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> - != MMC_BUS_TEST_R)
> + != MMC_BUS_TEST_R) {
> host->data->error = -EILSEQ;
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> else if (intmask & SDHCI_INT_ADMA_ERROR) {
> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> intmask);
> sdhci_adma_show_error(host);
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> host->data->error = -EIO;
> if (host->ops->adma_workaround)
> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 @@
> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
> if (!host->cqe_on)
> return false;
>
> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +{
> *cmd_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + } else if (intmask & SDHCI_INT_TIMEOUT) {
> *cmd_error = -ETIMEDOUT;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else
> *cmd_error = 0;
>
> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
> *data_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> *data_error = -ETIMEDOUT;
> - else if (intmask & SDHCI_INT_ADMA_ERROR)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
> *data_error = -EIO;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> + } else
> *data_error = 0;
>
> /* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index
> 7afb57c..c263f8f 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>
> struct mmc_host;
>
> +enum mmc_err_stat {
> + MMC_ERR_CMD_TIMEOUT,
> + MMC_ERR_CMD_CRC,
> + MMC_ERR_DAT_TIMEOUT,
> + MMC_ERR_DAT_CRC,
> + MMC_ERR_AUTO_CMD,
> + MMC_ERR_ADMA,
> + MMC_ERR_TUNING,
> + MMC_ERR_CMDQ_RED,
> + MMC_ERR_CMDQ_GCE,
> + MMC_ERR_CMDQ_ICCE,
> + MMC_ERR_REQ_TIMEOUT,
> + MMC_ERR_CMDQ_REQ_TIMEOUT,
> + MMC_ERR_ICE_CFG,
> + MMC_ERR_MAX,
> +};
> +
> struct mmc_host_ops {
> /*
> * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,8 @@ struct mmc_host {
>
> /* Host Software Queue support */
> bool hsq_enabled;
> + u32 err_stats[MMC_ERR_MAX];
If you make it u64 then we don't have to think about the value overflowing.
>>> Sure
> + bool err_state;
>
> unsigned long private[] ____cacheline_aligned;
> };
> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
> DMA_FROM_DEVICE; }
>
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
> +{
> + mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> + enum mmc_err_stat stat) {
> +
> + /*
> + * Ignore the command timeout errors observed during
> + * the card init as those are excepted.
> + */
> + if (!mmc->err_state)
> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
This would be better handled in the card init code somewhere, not here.
>>>> Sure.
> +
> + mmc->err_stats[stat] += 1;
> +}
> +
> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
> **new_ext_csd);
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH V2] mmc: debugfs: add error statistics
2021-12-21 7:16 ` Sajida Bhanu (Temp) (QUIC)
@ 2021-12-29 7:36 ` Sajida Bhanu (Temp) (QUIC)
2022-01-03 9:50 ` Adrian Hunter
1 sibling, 0 replies; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2021-12-29 7:36 UTC (permalink / raw)
To: Sajida Bhanu (Temp) (QUIC),
Adrian Hunter, riteshh, Asutosh Das (asd),
ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
linux-kernel
Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
Pradeep Pragallapati (QUIC),
sartgarg, nitirawa, sayalil
Gentle Reminder!!!
Thanks,
Sajida
-----Original Message-----
From: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>
Sent: Tuesday, December 21, 2021 12:46 PM
To: Adrian Hunter <adrian.hunter@intel.com>; Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: RE: [PATCH V2] mmc: debugfs: add error statistics
Hi Adrian,
Thanks for the review.
Please find the inline comments.
Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com>
Sent: Wednesday, December 15, 2021 7:33 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
> Add debugfs entry to query eMMC and SD card errors statistics.
> This feature is useful for debug and testing
>
> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
> ---
>
> Changes since V1:
> -Removed sysfs entry for eMMC and SD card error statistics and added
> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
Thanks for doing this.
> ---
> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
> drivers/mmc/core/queue.c | 2 +
> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
> include/linux/mmc/host.h | 37 ++++++++++++++++
> 4 files changed, 186 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
> index 3fdbc80..40210c34 100644
> --- a/drivers/mmc/core/debugfs.c
> +++ b/drivers/mmc/core/debugfs.c
> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
> val) DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
> "%llu\n");
>
> +static int mmc_err_state_get(void *data, u64 *val) {
> + struct mmc_host *host = data;
> +
> + if (!host)
> + return -EINVAL;
> +
> + *val = host->err_state ? 1 : 0;
> +
> + return 0;
> +}
> +
> +static int mmc_err_state_clear(void *data, u64 val) {
> + struct mmc_host *host = data;
> +
> + if (!host)
> + return -EINVAL;
> +
> + host->err_state = false;
Is there much reason to disable err stats from userspace?
>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
> +
> + return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
> + mmc_err_state_clear, "%llu\n");
> +
> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
> + struct mmc_host *host = (struct mmc_host *)file->private;
> +
> + if (!host)
> + return -EINVAL;
I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
if (!host->err_stats_enabled) {
seq_printf(file, "Not supported by driver\n");
return 0;
}
>>>>>>>You mean declare another variable (err_stats_enabled) and enable it in probe?
> +
> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
Maybe put the descriptions in an array and iterate e.g.
const char *desc[MMC_ERR_MAX] = {
[MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
etc
};
int i;
if (!host)
return -EINVAL;
for (i = 0; i < MMC_ERR_MAX; i++) {
if (desc[i])
seq_printf(file, "# %s:\t %d\n",
desc[1], host->err_stats[i]);
}
>>>>>>> Sure
> +
> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_CMD_CRC]);
> +
> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
> +
> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_DAT_CRC]);
> +
> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_ADMA]);
> +
> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_ADMA]);
> +
> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
> + host->err_stats[MMC_ERR_TUNING]);
> +
> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_RED]);
> +
> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_GCE]);
> +
> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
> +
> + seq_printf(file, "# Request Timedout:\t %d\n",
> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
> +
> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
> +
> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
> + host->err_stats[MMC_ERR_ICE_CFG]);
> +
> + return 0;
> +}
> +
> +static int mmc_err_stats_open(struct inode *inode, struct file *file)
> +{
> + return single_open(file, mmc_err_stats_show, inode->i_private); }
> +
> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
> + size_t cnt, loff_t *ppos)
> +{
> + struct mmc_host *host = filp->f_mapping->host->i_private;
> +
> + if (!host)
> + return -EINVAL;
> +
> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
> + memset(host->err_stats, 0, sizeof(host->err_stats));
> +
> + return cnt;
> +}
> +
> +static const struct file_operations mmc_err_stats_fops = {
> + .open = mmc_err_stats_open,
> + .read = seq_read,
> + .write = mmc_err_stats_write,
> +};
> +
> void mmc_add_host_debugfs(struct mmc_host *host) {
> struct dentry *root;
> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
> &mmc_clock_fops);
>
> + debugfs_create_file("err_state", 0600, root, host,
> + &mmc_err_state);
> + debugfs_create_file("err_stats", 0600, root, host,
> + &mmc_err_stats_fops);
> +
> #ifdef CONFIG_FAIL_MMC_REQUEST
> if (fail_request)
> setup_fault_attr(&fail_default_attr, fail_request); diff --git
> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
> b15c034..5243929 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
> bool recovery_needed = false;
>
> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
> +
> switch (issue_type) {
> case MMC_ISSUE_ASYNC:
> case MMC_ISSUE_DCMD:
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
I think the core changes should be a separate patch from sdhci.
I would probably split into 4:
mmc core
mmc block driver
cqhci driver
sdhci driver
>>>> Sure
> index 07c6da1..d742051 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
> if (host->ops->dump_vendor_regs)
> host->ops->dump_vendor_regs(host);
>
> + mmc_debugfs_err_stats_enable(host->mmc);
Why here and not in e.g. __sdhci_add_host() ?
>>>> If any errors happened in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ). So it is better to call mmc_debugfs_err_stats_enable() here.
> SDHCI_DUMP("============================================\n");
> }
> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
> spin_lock_irqsave(&host->lock, flags);
>
> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct
> timer_list *t)
>
> if (host->data || host->data_cmd ||
> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
> pr_err("%s: Timeout waiting for hardware interrupt.\n",
> mmc_hostname(host->mmc));
> sdhci_dumpregs(host);
> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host
> *host, u32 intmask, u32 *intmask_p)
>
> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
> - if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_TIMEOUT) {
> host->cmd->error = -ETIMEDOUT;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else {
> host->cmd->error = -EILSEQ;
> -
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> /* Treat data command CRC error the same as data CRC error */
> if (host->cmd->data &&
> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
> +intmask, u32 *intmask_p)
> -ETIMEDOUT :
> -EILSEQ;
>
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
> if (sdhci_auto_cmd23(host, mrq)) {
> mrq->sbc->error = err;
> __sdhci_finish_mrq(host, mrq);
> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data_cmd = NULL;
> data_cmd->error = -ETIMEDOUT;
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> __sdhci_finish_mrq(host, data_cmd->mrq);
> return;
> }
> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
> return;
> }
>
> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> host->data->error = -ETIMEDOUT;
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + }
> else if (intmask & SDHCI_INT_DATA_END_BIT)
> host->data->error = -EILSEQ;
> else if ((intmask & SDHCI_INT_DATA_CRC) &&
> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
> - != MMC_BUS_TEST_R)
> + != MMC_BUS_TEST_R) {
> host->data->error = -EILSEQ;
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> else if (intmask & SDHCI_INT_ADMA_ERROR) {
> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
> intmask);
> sdhci_adma_show_error(host);
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> host->data->error = -EIO;
> if (host->ops->adma_workaround)
> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 @@
> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
> if (!host->cqe_on)
> return false;
>
> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
> +{
> *cmd_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_TIMEOUT)
> + if (intmask & SDHCI_INT_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
> + }
> + } else if (intmask & SDHCI_INT_TIMEOUT) {
> *cmd_error = -ETIMEDOUT;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
> + } else
> *cmd_error = 0;
>
> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
> *data_error = -EILSEQ;
> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
> + if (intmask & SDHCI_INT_DATA_CRC) {
> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
> + }
> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
> *data_error = -ETIMEDOUT;
> - else if (intmask & SDHCI_INT_ADMA_ERROR)
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
> *data_error = -EIO;
> - else
> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
> + } else
> *data_error = 0;
>
> /* Clear selected interrupts. */
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index
> 7afb57c..c263f8f 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>
> struct mmc_host;
>
> +enum mmc_err_stat {
> + MMC_ERR_CMD_TIMEOUT,
> + MMC_ERR_CMD_CRC,
> + MMC_ERR_DAT_TIMEOUT,
> + MMC_ERR_DAT_CRC,
> + MMC_ERR_AUTO_CMD,
> + MMC_ERR_ADMA,
> + MMC_ERR_TUNING,
> + MMC_ERR_CMDQ_RED,
> + MMC_ERR_CMDQ_GCE,
> + MMC_ERR_CMDQ_ICCE,
> + MMC_ERR_REQ_TIMEOUT,
> + MMC_ERR_CMDQ_REQ_TIMEOUT,
> + MMC_ERR_ICE_CFG,
> + MMC_ERR_MAX,
> +};
> +
> struct mmc_host_ops {
> /*
> * It is optional for the host to implement pre_req and post_req in
> @@ -500,6 +517,8 @@ struct mmc_host {
>
> /* Host Software Queue support */
> bool hsq_enabled;
> + u32 err_stats[MMC_ERR_MAX];
If you make it u64 then we don't have to think about the value overflowing.
>>> Sure
> + bool err_state;
>
> unsigned long private[] ____cacheline_aligned;
> };
> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
> DMA_FROM_DEVICE; }
>
> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
> +{
> + mmc->err_state = true;
> +}
> +
> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
> + enum mmc_err_stat stat) {
> +
> + /*
> + * Ignore the command timeout errors observed during
> + * the card init as those are excepted.
> + */
> + if (!mmc->err_state)
> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
This would be better handled in the card init code somewhere, not here.
>>>> Sure.
> +
> + mmc->err_stats[stat] += 1;
> +}
> +
> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
> **new_ext_csd);
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V2] mmc: debugfs: add error statistics
2021-12-21 7:16 ` Sajida Bhanu (Temp) (QUIC)
2021-12-29 7:36 ` Sajida Bhanu (Temp) (QUIC)
@ 2022-01-03 9:50 ` Adrian Hunter
2022-01-04 15:02 ` Sajida Bhanu (Temp) (QUIC)
1 sibling, 1 reply; 11+ messages in thread
From: Adrian Hunter @ 2022-01-03 9:50 UTC (permalink / raw)
To: Sajida Bhanu (Temp) (QUIC), riteshh, Asutosh Das (asd),
ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
linux-kernel
Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
Pradeep Pragallapati (QUIC),
sartgarg, nitirawa, sayalil
On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
>
> Thanks for the review.
>
> Please find the inline comments.
I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted. Normally it is the other way around.
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Wednesday, December 15, 2021 7:33 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>
> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>> Add debugfs entry to query eMMC and SD card errors statistics.
>> This feature is useful for debug and testing
>>
>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>> ---
>>
>> Changes since V1:
>> -Removed sysfs entry for eMMC and SD card error statistics and added
>> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>
> Thanks for doing this.
>
>> ---
>> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>> drivers/mmc/core/queue.c | 2 +
>> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
>> include/linux/mmc/host.h | 37 ++++++++++++++++
>> 4 files changed, 186 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
>> index 3fdbc80..40210c34 100644
>> --- a/drivers/mmc/core/debugfs.c
>> +++ b/drivers/mmc/core/debugfs.c
>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>> val) DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>> "%llu\n");
>>
>> +static int mmc_err_state_get(void *data, u64 *val) {
>> + struct mmc_host *host = data;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + *val = host->err_state ? 1 : 0;
>> +
>> + return 0;
>> +}
>> +
>> +static int mmc_err_state_clear(void *data, u64 val) {
>> + struct mmc_host *host = data;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + host->err_state = false;
>
> Is there much reason to disable err stats from userspace?
>
>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
That is not exectly how it is programmed. "err_state is false" means no errors have been recorded, not that no errors happended.
>
>> +
>> + return 0;
>> +}
>> +
>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>> + mmc_err_state_clear, "%llu\n");
>> +
>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>> + struct mmc_host *host = (struct mmc_host *)file->private;
>> +
>> + if (!host)
>> + return -EINVAL;
>
> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>
> if (!host->err_stats_enabled) {
> seq_printf(file, "Not supported by driver\n");
> return 0;
> }
>
>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
Yes, although it is not clear if this is the same as what you want from err_state,
i.e. is err_state different from err_stats_enabled?
>
>> +
>> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>
> Maybe put the descriptions in an array and iterate e.g.
>
> const char *desc[MMC_ERR_MAX] = {
> [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
> etc
> };
> int i;
>
> if (!host)
> return -EINVAL;
>
> for (i = 0; i < MMC_ERR_MAX; i++) {
> if (desc[i])
> seq_printf(file, "# %s:\t %d\n",
> desc[1], host->err_stats[i]);
> }
>
>>>>>>>> Sure
>
>> +
>> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_CMD_CRC]);
>> +
>> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>> +
>> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_DAT_CRC]);
>> +
>> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_ADMA]);
>> +
>> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_ADMA]);
>> +
>> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_TUNING]);
>> +
>> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>> + host->err_stats[MMC_ERR_CMDQ_RED]);
>> +
>> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>> + host->err_stats[MMC_ERR_CMDQ_GCE]);
>> +
>> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
>> +
>> + seq_printf(file, "# Request Timedout:\t %d\n",
>> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>> +
>> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>> +
>> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>> + host->err_stats[MMC_ERR_ICE_CFG]);
>> +
>> + return 0;
>> +}
>> +
>> +static int mmc_err_stats_open(struct inode *inode, struct file *file)
>> +{
>> + return single_open(file, mmc_err_stats_show, inode->i_private); }
>> +
>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>> + size_t cnt, loff_t *ppos)
>> +{
>> + struct mmc_host *host = filp->f_mapping->host->i_private;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
>> + memset(host->err_stats, 0, sizeof(host->err_stats));
>> +
>> + return cnt;
>> +}
>> +
>> +static const struct file_operations mmc_err_stats_fops = {
>> + .open = mmc_err_stats_open,
>> + .read = seq_read,
>> + .write = mmc_err_stats_write,
>> +};
>> +
>> void mmc_add_host_debugfs(struct mmc_host *host) {
>> struct dentry *root;
>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>> &mmc_clock_fops);
>>
>> + debugfs_create_file("err_state", 0600, root, host,
>> + &mmc_err_state);
>> + debugfs_create_file("err_stats", 0600, root, host,
>> + &mmc_err_stats_fops);
>> +
>> #ifdef CONFIG_FAIL_MMC_REQUEST
>> if (fail_request)
>> setup_fault_attr(&fail_default_attr, fail_request); diff --git
>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>> b15c034..5243929 100644
>> --- a/drivers/mmc/core/queue.c
>> +++ b/drivers/mmc/core/queue.c
>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>> bool recovery_needed = false;
>>
>> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>> +
>> switch (issue_type) {
>> case MMC_ISSUE_ASYNC:
>> case MMC_ISSUE_DCMD:
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>
> I think the core changes should be a separate patch from sdhci.
> I would probably split into 4:
> mmc core
> mmc block driver
> cqhci driver
> sdhci driver
>
>>>>> Sure
>
>> index 07c6da1..d742051 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>> if (host->ops->dump_vendor_regs)
>> host->ops->dump_vendor_regs(host);
>>
>> + mmc_debugfs_err_stats_enable(host->mmc);
>
> Why here and not in e.g. __sdhci_add_host() ?
>
>>>>> If any errors happened in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ). So it is better to call mmc_debugfs_err_stats_enable() here.
Registers are not dumped for most errors. Please move this to __sdhci_add_host().
>
>> SDHCI_DUMP("============================================\n");
>> }
>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>> spin_lock_irqsave(&host->lock, flags);
>>
>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>> mmc_hostname(host->mmc));
>> sdhci_dumpregs(host);
>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct
>> timer_list *t)
>>
>> if (host->data || host->data_cmd ||
>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>> mmc_hostname(host->mmc));
>> sdhci_dumpregs(host);
>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host
>> *host, u32 intmask, u32 *intmask_p)
>>
>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>> - if (intmask & SDHCI_INT_TIMEOUT)
>> + if (intmask & SDHCI_INT_TIMEOUT) {
>> host->cmd->error = -ETIMEDOUT;
>> - else
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> + } else {
>> host->cmd->error = -EILSEQ;
>> -
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> + }
>> /* Treat data command CRC error the same as data CRC error */
>> if (host->cmd->data &&
>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
>> -ETIMEDOUT :
>> -EILSEQ;
>>
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>> if (sdhci_auto_cmd23(host, mrq)) {
>> mrq->sbc->error = err;
>> __sdhci_finish_mrq(host, mrq);
>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> host->data_cmd = NULL;
>> data_cmd->error = -ETIMEDOUT;
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> __sdhci_finish_mrq(host, data_cmd->mrq);
>> return;
>> }
>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>> return;
>> }
>>
>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> host->data->error = -ETIMEDOUT;
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> + }
>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>> host->data->error = -EILSEQ;
>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>> - != MMC_BUS_TEST_R)
>> + != MMC_BUS_TEST_R) {
>> host->data->error = -EILSEQ;
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> + }
>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>> intmask);
>> sdhci_adma_show_error(host);
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> host->data->error = -EIO;
>> if (host->ops->adma_workaround)
>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33 @@
>> bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>> if (!host->cqe_on)
>> return false;
>>
>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> +{
>> *cmd_error = -EILSEQ;
>> - else if (intmask & SDHCI_INT_TIMEOUT)
>> + if (intmask & SDHCI_INT_CRC) {
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> + }
>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>> *cmd_error = -ETIMEDOUT;
>> - else
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> + } else
>> *cmd_error = 0;
>>
>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>> *data_error = -EILSEQ;
>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> + if (intmask & SDHCI_INT_DATA_CRC) {
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> + }
>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> *data_error = -ETIMEDOUT;
>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>> *data_error = -EIO;
>> - else
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> + } else
>> *data_error = 0;
>>
>> /* Clear selected interrupts. */
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index
>> 7afb57c..c263f8f 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>
>> struct mmc_host;
>>
>> +enum mmc_err_stat {
>> + MMC_ERR_CMD_TIMEOUT,
>> + MMC_ERR_CMD_CRC,
>> + MMC_ERR_DAT_TIMEOUT,
>> + MMC_ERR_DAT_CRC,
>> + MMC_ERR_AUTO_CMD,
>> + MMC_ERR_ADMA,
>> + MMC_ERR_TUNING,
>> + MMC_ERR_CMDQ_RED,
>> + MMC_ERR_CMDQ_GCE,
>> + MMC_ERR_CMDQ_ICCE,
>> + MMC_ERR_REQ_TIMEOUT,
>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>> + MMC_ERR_ICE_CFG,
>> + MMC_ERR_MAX,
>> +};
>> +
>> struct mmc_host_ops {
>> /*
>> * It is optional for the host to implement pre_req and post_req in
>> @@ -500,6 +517,8 @@ struct mmc_host {
>>
>> /* Host Software Queue support */
>> bool hsq_enabled;
>> + u32 err_stats[MMC_ERR_MAX];
>
> If you make it u64 then we don't have to think about the value overflowing.
>
>>>> Sure
>
>> + bool err_state;
>>
>> unsigned long private[] ____cacheline_aligned;
>> };
>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>> DMA_FROM_DEVICE; }
>>
>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host *mmc)
>> +{
>> + mmc->err_state = true;
>> +}
>> +
>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>> + enum mmc_err_stat stat) {
>> +
>> + /*
>> + * Ignore the command timeout errors observed during
>> + * the card init as those are excepted.
>> + */
>> + if (!mmc->err_state)
>> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>
> This would be better handled in the card init code somewhere, not here.
>
>>>>> Sure.
>
>> +
>> + mmc->err_stats[stat] += 1;
>> +}
>> +
>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>> **new_ext_csd);
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH V2] mmc: debugfs: add error statistics
2022-01-03 9:50 ` Adrian Hunter
@ 2022-01-04 15:02 ` Sajida Bhanu (Temp) (QUIC)
2022-01-07 7:42 ` Adrian Hunter
0 siblings, 1 reply; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2022-01-04 15:02 UTC (permalink / raw)
To: Adrian Hunter, Sajida Bhanu (Temp) (QUIC),
riteshh, Asutosh Das (asd),
ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
linux-kernel
Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
Pradeep Pragallapati (QUIC),
sartgarg, nitirawa, sayalil
Hi Adrian,
Thanks for the review.
Please find the inline comments.
Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com>
Sent: Monday, January 3, 2022 3:20 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
>
> Thanks for the review.
>
> Please find the inline comments.
I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted. Normally it is the other way around.
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Wednesday, December 15, 2021 7:33 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org;
> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org;
> linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC)
> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
> nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>
> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>> Add debugfs entry to query eMMC and SD card errors statistics.
>> This feature is useful for debug and testing
>>
>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>> ---
>>
>> Changes since V1:
>> -Removed sysfs entry for eMMC and SD card error statistics and added
>> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>
> Thanks for doing this.
>
>> ---
>> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>> drivers/mmc/core/queue.c | 2 +
>> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
>> include/linux/mmc/host.h | 37 ++++++++++++++++
>> 4 files changed, 186 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
>> index 3fdbc80..40210c34 100644
>> --- a/drivers/mmc/core/debugfs.c
>> +++ b/drivers/mmc/core/debugfs.c
>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>> val) DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>> "%llu\n");
>>
>> +static int mmc_err_state_get(void *data, u64 *val) {
>> + struct mmc_host *host = data;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + *val = host->err_state ? 1 : 0;
>> +
>> + return 0;
>> +}
>> +
>> +static int mmc_err_state_clear(void *data, u64 val) {
>> + struct mmc_host *host = data;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + host->err_state = false;
>
> Is there much reason to disable err stats from userspace?
>
>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
That is not exectly how it is programmed. "err_state is false" means no errors have been recorded, not that no errors happended.
>>>>>> If user wants to explicitly clear then he can use this.
>
>> +
>> + return 0;
>> +}
>> +
>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>> + mmc_err_state_clear, "%llu\n");
>> +
>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>> + struct mmc_host *host = (struct mmc_host *)file->private;
>> +
>> + if (!host)
>> + return -EINVAL;
>
> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>
> if (!host->err_stats_enabled) {
> seq_printf(file, "Not supported by driver\n");
> return 0;
> }
>
>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
>>>>> Yes, err_state and err_stats_enabled both are different. err_state will be set if any errors happened in driver level.
err_stats_enabled will be set if err_stats feature enabled, if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
>
>> +
>> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>
> Maybe put the descriptions in an array and iterate e.g.
>
> const char *desc[MMC_ERR_MAX] = {
> [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
> etc
> };
> int i;
>
> if (!host)
> return -EINVAL;
>
> for (i = 0; i < MMC_ERR_MAX; i++) {
> if (desc[i])
> seq_printf(file, "# %s:\t %d\n",
> desc[1], host->err_stats[i]);
> }
>
>>>>>>>> Sure
>
>> +
>> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_CMD_CRC]);
>> +
>> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>> +
>> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_DAT_CRC]);
>> +
>> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_ADMA]);
>> +
>> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_ADMA]);
>> +
>> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>> + host->err_stats[MMC_ERR_TUNING]);
>> +
>> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>> + host->err_stats[MMC_ERR_CMDQ_RED]);
>> +
>> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>> + host->err_stats[MMC_ERR_CMDQ_GCE]);
>> +
>> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
>> +
>> + seq_printf(file, "# Request Timedout:\t %d\n",
>> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>> +
>> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>> +
>> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>> + host->err_stats[MMC_ERR_ICE_CFG]);
>> +
>> + return 0;
>> +}
>> +
>> +static int mmc_err_stats_open(struct inode *inode, struct file
>> +*file) {
>> + return single_open(file, mmc_err_stats_show, inode->i_private); }
>> +
>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>> + size_t cnt, loff_t *ppos)
>> +{
>> + struct mmc_host *host = filp->f_mapping->host->i_private;
>> +
>> + if (!host)
>> + return -EINVAL;
>> +
>> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
>> + memset(host->err_stats, 0, sizeof(host->err_stats));
>> +
>> + return cnt;
>> +}
>> +
>> +static const struct file_operations mmc_err_stats_fops = {
>> + .open = mmc_err_stats_open,
>> + .read = seq_read,
>> + .write = mmc_err_stats_write,
>> +};
>> +
>> void mmc_add_host_debugfs(struct mmc_host *host) {
>> struct dentry *root;
>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>> &mmc_clock_fops);
>>
>> + debugfs_create_file("err_state", 0600, root, host,
>> + &mmc_err_state);
>> + debugfs_create_file("err_stats", 0600, root, host,
>> + &mmc_err_stats_fops);
>> +
>> #ifdef CONFIG_FAIL_MMC_REQUEST
>> if (fail_request)
>> setup_fault_attr(&fail_default_attr, fail_request); diff --git
>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>> b15c034..5243929 100644
>> --- a/drivers/mmc/core/queue.c
>> +++ b/drivers/mmc/core/queue.c
>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>> bool recovery_needed = false;
>>
>> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>> +
>> switch (issue_type) {
>> case MMC_ISSUE_ASYNC:
>> case MMC_ISSUE_DCMD:
>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>
> I think the core changes should be a separate patch from sdhci.
> I would probably split into 4:
> mmc core
> mmc block driver
> cqhci driver
> sdhci driver
>
>>>>> Sure
>
>> index 07c6da1..d742051 100644
>> --- a/drivers/mmc/host/sdhci.c
>> +++ b/drivers/mmc/host/sdhci.c
>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>> if (host->ops->dump_vendor_regs)
>> host->ops->dump_vendor_regs(host);
>>
>> + mmc_debugfs_err_stats_enable(host->mmc);
>
> Why here and not in e.g. __sdhci_add_host() ?
>
>>>>> If any errors happened in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ). So it is better to call mmc_debugfs_err_stats_enable() here.
Registers are not dumped for most errors. Please move this to __sdhci_add_host().
>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
>
>> SDHCI_DUMP("============================================\n");
>> }
>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>> spin_lock_irqsave(&host->lock, flags);
>>
>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>> mmc_hostname(host->mmc));
>> sdhci_dumpregs(host);
>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct
>> timer_list *t)
>>
>> if (host->data || host->data_cmd ||
>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>> mmc_hostname(host->mmc));
>> sdhci_dumpregs(host);
>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host
>> *host, u32 intmask, u32 *intmask_p)
>>
>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>> - if (intmask & SDHCI_INT_TIMEOUT)
>> + if (intmask & SDHCI_INT_TIMEOUT) {
>> host->cmd->error = -ETIMEDOUT;
>> - else
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> + } else {
>> host->cmd->error = -EILSEQ;
>> -
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> + }
>> /* Treat data command CRC error the same as data CRC error */
>> if (host->cmd->data &&
>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
>> +intmask, u32 *intmask_p)
>> -ETIMEDOUT :
>> -EILSEQ;
>>
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>> if (sdhci_auto_cmd23(host, mrq)) {
>> mrq->sbc->error = err;
>> __sdhci_finish_mrq(host, mrq);
>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> host->data_cmd = NULL;
>> data_cmd->error = -ETIMEDOUT;
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> __sdhci_finish_mrq(host, data_cmd->mrq);
>> return;
>> }
>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>> return;
>> }
>>
>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> host->data->error = -ETIMEDOUT;
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> + }
>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>> host->data->error = -EILSEQ;
>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>> - != MMC_BUS_TEST_R)
>> + != MMC_BUS_TEST_R) {
>> host->data->error = -EILSEQ;
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> + }
>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>> intmask);
>> sdhci_adma_show_error(host);
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> host->data->error = -EIO;
>> if (host->ops->adma_workaround)
>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33
>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>> if (!host->cqe_on)
>> return false;
>>
>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>> +SDHCI_INT_CRC)) {
>> *cmd_error = -EILSEQ;
>> - else if (intmask & SDHCI_INT_TIMEOUT)
>> + if (intmask & SDHCI_INT_CRC) {
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>> + }
>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>> *cmd_error = -ETIMEDOUT;
>> - else
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>> + } else
>> *cmd_error = 0;
>>
>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>> *data_error = -EILSEQ;
>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>> + if (intmask & SDHCI_INT_DATA_CRC) {
>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>> + }
>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>> *data_error = -ETIMEDOUT;
>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>> *data_error = -EIO;
>> - else
>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>> + } else
>> *data_error = 0;
>>
>> /* Clear selected interrupts. */
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>> index 7afb57c..c263f8f 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>
>> struct mmc_host;
>>
>> +enum mmc_err_stat {
>> + MMC_ERR_CMD_TIMEOUT,
>> + MMC_ERR_CMD_CRC,
>> + MMC_ERR_DAT_TIMEOUT,
>> + MMC_ERR_DAT_CRC,
>> + MMC_ERR_AUTO_CMD,
>> + MMC_ERR_ADMA,
>> + MMC_ERR_TUNING,
>> + MMC_ERR_CMDQ_RED,
>> + MMC_ERR_CMDQ_GCE,
>> + MMC_ERR_CMDQ_ICCE,
>> + MMC_ERR_REQ_TIMEOUT,
>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>> + MMC_ERR_ICE_CFG,
>> + MMC_ERR_MAX,
>> +};
>> +
>> struct mmc_host_ops {
>> /*
>> * It is optional for the host to implement pre_req and post_req in
>> @@ -500,6 +517,8 @@ struct mmc_host {
>>
>> /* Host Software Queue support */
>> bool hsq_enabled;
>> + u32 err_stats[MMC_ERR_MAX];
>
> If you make it u64 then we don't have to think about the value overflowing.
>
>>>> Sure
>
>> + bool err_state;
>>
>> unsigned long private[] ____cacheline_aligned;
>> };
>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>> DMA_FROM_DEVICE; }
>>
>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>> +*mmc) {
>> + mmc->err_state = true;
>> +}
>> +
>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>> + enum mmc_err_stat stat) {
>> +
>> + /*
>> + * Ignore the command timeout errors observed during
>> + * the card init as those are excepted.
>> + */
>> + if (!mmc->err_state)
>> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>
> This would be better handled in the card init code somewhere, not here.
>
>>>>> Sure.
>
>> +
>> + mmc->err_stats[stat] += 1;
>> +}
>> +
>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>> **new_ext_csd);
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V2] mmc: debugfs: add error statistics
2022-01-04 15:02 ` Sajida Bhanu (Temp) (QUIC)
@ 2022-01-07 7:42 ` Adrian Hunter
2022-01-10 13:11 ` Sajida Bhanu (Temp) (QUIC)
0 siblings, 1 reply; 11+ messages in thread
From: Adrian Hunter @ 2022-01-07 7:42 UTC (permalink / raw)
To: Sajida Bhanu (Temp) (QUIC), riteshh, Asutosh Das (asd),
ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
linux-kernel
Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
Pradeep Pragallapati (QUIC),
sartgarg, nitirawa, sayalil
On 04/01/2022 17:02, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
>
> Thanks for the review.
>
> Please find the inline comments.
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Monday, January 3, 2022 3:20 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>
> On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
>> Hi Adrian,
>>
>> Thanks for the review.
>>
>> Please find the inline comments.
>
> I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted. Normally it is the other way around.
>
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Wednesday, December 15, 2021 7:33 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
>> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org;
>> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org;
>> linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC)
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>
>> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>>> Add debugfs entry to query eMMC and SD card errors statistics.
>>> This feature is useful for debug and testing
>>>
>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>> ---
>>>
>>> Changes since V1:
>>> -Removed sysfs entry for eMMC and SD card error statistics and added
>>> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>>
>> Thanks for doing this.
>>
>>> ---
>>> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>> drivers/mmc/core/queue.c | 2 +
>>> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
>>> include/linux/mmc/host.h | 37 ++++++++++++++++
>>> 4 files changed, 186 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
>>> index 3fdbc80..40210c34 100644
>>> --- a/drivers/mmc/core/debugfs.c
>>> +++ b/drivers/mmc/core/debugfs.c
>>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>>> val) DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>> "%llu\n");
>>>
>>> +static int mmc_err_state_get(void *data, u64 *val) {
>>> + struct mmc_host *host = data;
>>> +
>>> + if (!host)
>>> + return -EINVAL;
>>> +
>>> + *val = host->err_state ? 1 : 0;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int mmc_err_state_clear(void *data, u64 val) {
>>> + struct mmc_host *host = data;
>>> +
>>> + if (!host)
>>> + return -EINVAL;
>>> +
>>> + host->err_state = false;
>>
>> Is there much reason to disable err stats from userspace?
>>
>>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
>
> That is not exectly how it is programmed. "err_state is false" means no errors have been recorded, not that no errors happended.
>
>>>>>>> If user wants to explicitly clear then he can use this.
Seems over compilicated. A user can just diff the old and new values:
cat /sys/kernel/debug/mmc0/err_stats > /tmp/old-stats
...later...
cat /sys/kernel/debug/mmc0/err_stats > /tmp/new-stats
diff /tmp/old-stats /tmp/new-stats
mv /tmp/new-stats /tmp/old-stats
I suggest just outputting the stats
>
>>
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>>> + mmc_err_state_clear, "%llu\n");
>>> +
>>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>>> + struct mmc_host *host = (struct mmc_host *)file->private;
>>> +
>>> + if (!host)
>>> + return -EINVAL;
>>
>> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>>
>> if (!host->err_stats_enabled) {
>> seq_printf(file, "Not supported by driver\n");
>> return 0;
>> }
>>
>>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
>
> Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
>
>>>>>> Yes, err_state and err_stats_enabled both are different. err_state will be set if any errors happened in driver level.
> err_stats_enabled will be set if err_stats feature enabled, if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
>
>>
>>> +
>>> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>>
>> Maybe put the descriptions in an array and iterate e.g.
>>
>> const char *desc[MMC_ERR_MAX] = {
>> [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>> etc
>> };
>> int i;
>>
>> if (!host)
>> return -EINVAL;
>>
>> for (i = 0; i < MMC_ERR_MAX; i++) {
>> if (desc[i])
>> seq_printf(file, "# %s:\t %d\n",
>> desc[1], host->err_stats[i]);
>> }
>>
>>>>>>>>> Sure
>>
>>> +
>>> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_CMD_CRC]);
>>> +
>>> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>>> +
>>> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_DAT_CRC]);
>>> +
>>> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_ADMA]);
>>> +
>>> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_ADMA]);
>>> +
>>> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_TUNING]);
>>> +
>>> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>>> + host->err_stats[MMC_ERR_CMDQ_RED]);
>>> +
>>> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>>> + host->err_stats[MMC_ERR_CMDQ_GCE]);
>>> +
>>> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>>> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
>>> +
>>> + seq_printf(file, "# Request Timedout:\t %d\n",
>>> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>>> +
>>> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>>> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>>> +
>>> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>>> + host->err_stats[MMC_ERR_ICE_CFG]);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int mmc_err_stats_open(struct inode *inode, struct file
>>> +*file) {
>>> + return single_open(file, mmc_err_stats_show, inode->i_private); }
>>> +
>>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>>> + size_t cnt, loff_t *ppos)
>>> +{
>>> + struct mmc_host *host = filp->f_mapping->host->i_private;
>>> +
>>> + if (!host)
>>> + return -EINVAL;
>>> +
>>> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
>>> + memset(host->err_stats, 0, sizeof(host->err_stats));
>>> +
>>> + return cnt;
>>> +}
>>> +
>>> +static const struct file_operations mmc_err_stats_fops = {
>>> + .open = mmc_err_stats_open,
>>> + .read = seq_read,
>>> + .write = mmc_err_stats_write,
>>> +};
>>> +
>>> void mmc_add_host_debugfs(struct mmc_host *host) {
>>> struct dentry *root;
>>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>> &mmc_clock_fops);
>>>
>>> + debugfs_create_file("err_state", 0600, root, host,
>>> + &mmc_err_state);
>>> + debugfs_create_file("err_stats", 0600, root, host,
>>> + &mmc_err_stats_fops);
>>> +
>>> #ifdef CONFIG_FAIL_MMC_REQUEST
>>> if (fail_request)
>>> setup_fault_attr(&fail_default_attr, fail_request); diff --git
>>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>>> b15c034..5243929 100644
>>> --- a/drivers/mmc/core/queue.c
>>> +++ b/drivers/mmc/core/queue.c
>>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>> bool recovery_needed = false;
>>>
>>> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>>> +
>>> switch (issue_type) {
>>> case MMC_ISSUE_ASYNC:
>>> case MMC_ISSUE_DCMD:
>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>
>> I think the core changes should be a separate patch from sdhci.
>> I would probably split into 4:
>> mmc core
>> mmc block driver
>> cqhci driver
>> sdhci driver
>>
>>>>>> Sure
>>
>>> index 07c6da1..d742051 100644
>>> --- a/drivers/mmc/host/sdhci.c
>>> +++ b/drivers/mmc/host/sdhci.c
>>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>> if (host->ops->dump_vendor_regs)
>>> host->ops->dump_vendor_regs(host);
>>>
>>> + mmc_debugfs_err_stats_enable(host->mmc);
>>
>> Why here and not in e.g. __sdhci_add_host() ?
>>
>>>>>> If any errors happened in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ). So it is better to call mmc_debugfs_err_stats_enable() here.
>
> Registers are not dumped for most errors. Please move this to __sdhci_add_host().
>
>>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
>
>>
>>> SDHCI_DUMP("============================================\n");
>>> }
>>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>> spin_lock_irqsave(&host->lock, flags);
>>>
>>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>> mmc_hostname(host->mmc));
>>> sdhci_dumpregs(host);
>>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct
>>> timer_list *t)
>>>
>>> if (host->data || host->data_cmd ||
>>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>> mmc_hostname(host->mmc));
>>> sdhci_dumpregs(host);
>>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host
>>> *host, u32 intmask, u32 *intmask_p)
>>>
>>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>> - if (intmask & SDHCI_INT_TIMEOUT)
>>> + if (intmask & SDHCI_INT_TIMEOUT) {
>>> host->cmd->error = -ETIMEDOUT;
>>> - else
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> + } else {
>>> host->cmd->error = -EILSEQ;
>>> -
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> + }
>>> /* Treat data command CRC error the same as data CRC error */
>>> if (host->cmd->data &&
>>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
>>> +intmask, u32 *intmask_p)
>>> -ETIMEDOUT :
>>> -EILSEQ;
>>>
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>> if (sdhci_auto_cmd23(host, mrq)) {
>>> mrq->sbc->error = err;
>>> __sdhci_finish_mrq(host, mrq);
>>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> host->data_cmd = NULL;
>>> data_cmd->error = -ETIMEDOUT;
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> __sdhci_finish_mrq(host, data_cmd->mrq);
>>> return;
>>> }
>>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>> return;
>>> }
>>>
>>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> host->data->error = -ETIMEDOUT;
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> + }
>>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>>> host->data->error = -EILSEQ;
>>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>> - != MMC_BUS_TEST_R)
>>> + != MMC_BUS_TEST_R) {
>>> host->data->error = -EILSEQ;
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> + }
>>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>> intmask);
>>> sdhci_adma_show_error(host);
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> host->data->error = -EIO;
>>> if (host->ops->adma_workaround)
>>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33
>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>> if (!host->cqe_on)
>>> return false;
>>>
>>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>> +SDHCI_INT_CRC)) {
>>> *cmd_error = -EILSEQ;
>>> - else if (intmask & SDHCI_INT_TIMEOUT)
>>> + if (intmask & SDHCI_INT_CRC) {
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> + }
>>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>>> *cmd_error = -ETIMEDOUT;
>>> - else
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> + } else
>>> *cmd_error = 0;
>>>
>>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>> *data_error = -EILSEQ;
>>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> + if (intmask & SDHCI_INT_DATA_CRC) {
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> + }
>>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> *data_error = -ETIMEDOUT;
>>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>> *data_error = -EIO;
>>> - else
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> + } else
>>> *data_error = 0;
>>>
>>> /* Clear selected interrupts. */
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>> index 7afb57c..c263f8f 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>
>>> struct mmc_host;
>>>
>>> +enum mmc_err_stat {
>>> + MMC_ERR_CMD_TIMEOUT,
>>> + MMC_ERR_CMD_CRC,
>>> + MMC_ERR_DAT_TIMEOUT,
>>> + MMC_ERR_DAT_CRC,
>>> + MMC_ERR_AUTO_CMD,
>>> + MMC_ERR_ADMA,
>>> + MMC_ERR_TUNING,
>>> + MMC_ERR_CMDQ_RED,
>>> + MMC_ERR_CMDQ_GCE,
>>> + MMC_ERR_CMDQ_ICCE,
>>> + MMC_ERR_REQ_TIMEOUT,
>>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>>> + MMC_ERR_ICE_CFG,
>>> + MMC_ERR_MAX,
>>> +};
>>> +
>>> struct mmc_host_ops {
>>> /*
>>> * It is optional for the host to implement pre_req and post_req in
>>> @@ -500,6 +517,8 @@ struct mmc_host {
>>>
>>> /* Host Software Queue support */
>>> bool hsq_enabled;
>>> + u32 err_stats[MMC_ERR_MAX];
>>
>> If you make it u64 then we don't have to think about the value overflowing.
>>
>>>>> Sure
>>
>>> + bool err_state;
>>>
>>> unsigned long private[] ____cacheline_aligned;
>>> };
>>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>>> DMA_FROM_DEVICE; }
>>>
>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>> +*mmc) {
>>> + mmc->err_state = true;
>>> +}
>>> +
>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>> + enum mmc_err_stat stat) {
>>> +
>>> + /*
>>> + * Ignore the command timeout errors observed during
>>> + * the card init as those are excepted.
>>> + */
>>> + if (!mmc->err_state)
>>> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>>
>> This would be better handled in the card init code somewhere, not here.
>>
>>>>>> Sure.
>>
>>> +
>>> + mmc->err_stats[stat] += 1;
>>> +}
>>> +
>>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>>> **new_ext_csd);
>>>
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH V2] mmc: debugfs: add error statistics
2022-01-07 7:42 ` Adrian Hunter
@ 2022-01-10 13:11 ` Sajida Bhanu (Temp) (QUIC)
2022-01-10 13:29 ` Adrian Hunter
0 siblings, 1 reply; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2022-01-10 13:11 UTC (permalink / raw)
To: Adrian Hunter, Sajida Bhanu (Temp) (QUIC),
riteshh, Asutosh Das (asd),
ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
linux-kernel
Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
Pradeep Pragallapati (QUIC),
sartgarg, nitirawa, sayalil
Hi Adrian,
Thanks for the review.
Please find the inline comments
Thanks,
Sajida
-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com>
Sent: Friday, January 7, 2022 1:13 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
On 04/01/2022 17:02, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
>
> Thanks for the review.
>
> Please find the inline comments.
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Monday, January 3, 2022 3:20 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org;
> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org;
> linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC)
> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
> nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>
> On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
>> Hi Adrian,
>>
>> Thanks for the review.
>>
>> Please find the inline comments.
>
> I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted. Normally it is the other way around.
>
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Wednesday, December 15, 2021 7:33 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
>> ulf.hansson@linaro.org; agross@kernel.org;
>> bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org;
>> linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC)
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>
>> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>>> Add debugfs entry to query eMMC and SD card errors statistics.
>>> This feature is useful for debug and testing
>>>
>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>> ---
>>>
>>> Changes since V1:
>>> -Removed sysfs entry for eMMC and SD card error statistics and added
>>> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>>
>> Thanks for doing this.
>>
>>> ---
>>> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>> drivers/mmc/core/queue.c | 2 +
>>> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
>>> include/linux/mmc/host.h | 37 ++++++++++++++++
>>> 4 files changed, 186 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
>>> index 3fdbc80..40210c34 100644
>>> --- a/drivers/mmc/core/debugfs.c
>>> +++ b/drivers/mmc/core/debugfs.c
>>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>>> val) DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>> "%llu\n");
>>>
>>> +static int mmc_err_state_get(void *data, u64 *val) {
>>> + struct mmc_host *host = data;
>>> +
>>> + if (!host)
>>> + return -EINVAL;
>>> +
>>> + *val = host->err_state ? 1 : 0;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int mmc_err_state_clear(void *data, u64 val) {
>>> + struct mmc_host *host = data;
>>> +
>>> + if (!host)
>>> + return -EINVAL;
>>> +
>>> + host->err_state = false;
>>
>> Is there much reason to disable err stats from userspace?
>>
>>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
>
> That is not exectly how it is programmed. "err_state is false" means no errors have been recorded, not that no errors happended.
>
>>>>>>> If user wants to explicitly clear then he can use this.
Seems over compilicated. A user can just diff the old and new values:
cat /sys/kernel/debug/mmc0/err_stats > /tmp/old-stats ...later...
cat /sys/kernel/debug/mmc0/err_stats > /tmp/new-stats diff /tmp/old-stats /tmp/new-stats mv /tmp/new-stats /tmp/old-stats
I suggest just outputting the stats
>>>>>>> Thanks for the suggestion Adrain.
This way user has to call write to store the err_stats data to /tmp/old-stats and user has to call read to read /tmp/old-stats.
And our idea is user call only read to get error stats info.
Please suggest me which is okay.
Thanks,
Sajida
>
>>
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>>> + mmc_err_state_clear, "%llu\n");
>>> +
>>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>>> + struct mmc_host *host = (struct mmc_host *)file->private;
>>> +
>>> + if (!host)
>>> + return -EINVAL;
>>
>> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>>
>> if (!host->err_stats_enabled) {
>> seq_printf(file, "Not supported by driver\n");
>> return 0;
>> }
>>
>>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
>
> Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
>
>>>>>> Yes, err_state and err_stats_enabled both are different. err_state will be set if any errors happened in driver level.
> err_stats_enabled will be set if err_stats feature enabled, if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
>
>>
>>> +
>>> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>>
>> Maybe put the descriptions in an array and iterate e.g.
>>
>> const char *desc[MMC_ERR_MAX] = {
>> [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>> etc
>> };
>> int i;
>>
>> if (!host)
>> return -EINVAL;
>>
>> for (i = 0; i < MMC_ERR_MAX; i++) {
>> if (desc[i])
>> seq_printf(file, "# %s:\t %d\n",
>> desc[1], host->err_stats[i]);
>> }
>>
>>>>>>>>> Sure
>>
>>> +
>>> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_CMD_CRC]);
>>> +
>>> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>>> +
>>> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_DAT_CRC]);
>>> +
>>> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_ADMA]);
>>> +
>>> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_ADMA]);
>>> +
>>> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>>> + host->err_stats[MMC_ERR_TUNING]);
>>> +
>>> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>>> + host->err_stats[MMC_ERR_CMDQ_RED]);
>>> +
>>> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>>> + host->err_stats[MMC_ERR_CMDQ_GCE]);
>>> +
>>> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>>> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
>>> +
>>> + seq_printf(file, "# Request Timedout:\t %d\n",
>>> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>>> +
>>> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>>> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>>> +
>>> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>>> + host->err_stats[MMC_ERR_ICE_CFG]);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static int mmc_err_stats_open(struct inode *inode, struct file
>>> +*file) {
>>> + return single_open(file, mmc_err_stats_show, inode->i_private); }
>>> +
>>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>>> + size_t cnt, loff_t *ppos)
>>> +{
>>> + struct mmc_host *host = filp->f_mapping->host->i_private;
>>> +
>>> + if (!host)
>>> + return -EINVAL;
>>> +
>>> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
>>> + memset(host->err_stats, 0, sizeof(host->err_stats));
>>> +
>>> + return cnt;
>>> +}
>>> +
>>> +static const struct file_operations mmc_err_stats_fops = {
>>> + .open = mmc_err_stats_open,
>>> + .read = seq_read,
>>> + .write = mmc_err_stats_write,
>>> +};
>>> +
>>> void mmc_add_host_debugfs(struct mmc_host *host) {
>>> struct dentry *root;
>>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>> &mmc_clock_fops);
>>>
>>> + debugfs_create_file("err_state", 0600, root, host,
>>> + &mmc_err_state);
>>> + debugfs_create_file("err_stats", 0600, root, host,
>>> + &mmc_err_stats_fops);
>>> +
>>> #ifdef CONFIG_FAIL_MMC_REQUEST
>>> if (fail_request)
>>> setup_fault_attr(&fail_default_attr, fail_request); diff --git
>>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>>> b15c034..5243929 100644
>>> --- a/drivers/mmc/core/queue.c
>>> +++ b/drivers/mmc/core/queue.c
>>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>> bool recovery_needed = false;
>>>
>>> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>>> +
>>> switch (issue_type) {
>>> case MMC_ISSUE_ASYNC:
>>> case MMC_ISSUE_DCMD:
>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>
>> I think the core changes should be a separate patch from sdhci.
>> I would probably split into 4:
>> mmc core
>> mmc block driver
>> cqhci driver
>> sdhci driver
>>
>>>>>> Sure
>>
>>> index 07c6da1..d742051 100644
>>> --- a/drivers/mmc/host/sdhci.c
>>> +++ b/drivers/mmc/host/sdhci.c
>>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>> if (host->ops->dump_vendor_regs)
>>> host->ops->dump_vendor_regs(host);
>>>
>>> + mmc_debugfs_err_stats_enable(host->mmc);
>>
>> Why here and not in e.g. __sdhci_add_host() ?
>>
>>>>>> If any errors happened in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ). So it is better to call mmc_debugfs_err_stats_enable() here.
>
> Registers are not dumped for most errors. Please move this to __sdhci_add_host().
>
>>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
>
>>
>>> SDHCI_DUMP("============================================\n");
>>> }
>>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>> spin_lock_irqsave(&host->lock, flags);
>>>
>>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>> mmc_hostname(host->mmc));
>>> sdhci_dumpregs(host);
>>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct
>>> timer_list *t)
>>>
>>> if (host->data || host->data_cmd ||
>>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>> mmc_hostname(host->mmc));
>>> sdhci_dumpregs(host);
>>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host
>>> *host, u32 intmask, u32 *intmask_p)
>>>
>>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>> - if (intmask & SDHCI_INT_TIMEOUT)
>>> + if (intmask & SDHCI_INT_TIMEOUT) {
>>> host->cmd->error = -ETIMEDOUT;
>>> - else
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> + } else {
>>> host->cmd->error = -EILSEQ;
>>> -
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> + }
>>> /* Treat data command CRC error the same as data CRC error */
>>> if (host->cmd->data &&
>>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
>>> +intmask, u32 *intmask_p)
>>> -ETIMEDOUT :
>>> -EILSEQ;
>>>
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>> if (sdhci_auto_cmd23(host, mrq)) {
>>> mrq->sbc->error = err;
>>> __sdhci_finish_mrq(host, mrq);
>>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> host->data_cmd = NULL;
>>> data_cmd->error = -ETIMEDOUT;
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> __sdhci_finish_mrq(host, data_cmd->mrq);
>>> return;
>>> }
>>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>> return;
>>> }
>>>
>>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> host->data->error = -ETIMEDOUT;
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> + }
>>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>>> host->data->error = -EILSEQ;
>>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>> - != MMC_BUS_TEST_R)
>>> + != MMC_BUS_TEST_R) {
>>> host->data->error = -EILSEQ;
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> + }
>>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>> intmask);
>>> sdhci_adma_show_error(host);
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> host->data->error = -EIO;
>>> if (host->ops->adma_workaround)
>>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33
>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>> if (!host->cqe_on)
>>> return false;
>>>
>>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>> +SDHCI_INT_CRC)) {
>>> *cmd_error = -EILSEQ;
>>> - else if (intmask & SDHCI_INT_TIMEOUT)
>>> + if (intmask & SDHCI_INT_CRC) {
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>> + }
>>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>>> *cmd_error = -ETIMEDOUT;
>>> - else
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>> + } else
>>> *cmd_error = 0;
>>>
>>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>> *data_error = -EILSEQ;
>>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>> + if (intmask & SDHCI_INT_DATA_CRC) {
>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>> + }
>>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>> *data_error = -ETIMEDOUT;
>>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>> *data_error = -EIO;
>>> - else
>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>> + } else
>>> *data_error = 0;
>>>
>>> /* Clear selected interrupts. */
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>> index 7afb57c..c263f8f 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>
>>> struct mmc_host;
>>>
>>> +enum mmc_err_stat {
>>> + MMC_ERR_CMD_TIMEOUT,
>>> + MMC_ERR_CMD_CRC,
>>> + MMC_ERR_DAT_TIMEOUT,
>>> + MMC_ERR_DAT_CRC,
>>> + MMC_ERR_AUTO_CMD,
>>> + MMC_ERR_ADMA,
>>> + MMC_ERR_TUNING,
>>> + MMC_ERR_CMDQ_RED,
>>> + MMC_ERR_CMDQ_GCE,
>>> + MMC_ERR_CMDQ_ICCE,
>>> + MMC_ERR_REQ_TIMEOUT,
>>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>>> + MMC_ERR_ICE_CFG,
>>> + MMC_ERR_MAX,
>>> +};
>>> +
>>> struct mmc_host_ops {
>>> /*
>>> * It is optional for the host to implement pre_req and post_req
>>> in @@ -500,6 +517,8 @@ struct mmc_host {
>>>
>>> /* Host Software Queue support */
>>> bool hsq_enabled;
>>> + u32 err_stats[MMC_ERR_MAX];
>>
>> If you make it u64 then we don't have to think about the value overflowing.
>>
>>>>> Sure
>>
>>> + bool err_state;
>>>
>>> unsigned long private[] ____cacheline_aligned;
>>> };
>>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>>> DMA_FROM_DEVICE; }
>>>
>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>> +*mmc) {
>>> + mmc->err_state = true;
>>> +}
>>> +
>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>> + enum mmc_err_stat stat) {
>>> +
>>> + /*
>>> + * Ignore the command timeout errors observed during
>>> + * the card init as those are excepted.
>>> + */
>>> + if (!mmc->err_state)
>>> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>>
>> This would be better handled in the card init code somewhere, not here.
>>
>>>>>> Sure.
>>
>>> +
>>> + mmc->err_stats[stat] += 1;
>>> +}
>>> +
>>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>>> **new_ext_csd);
>>>
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V2] mmc: debugfs: add error statistics
2022-01-10 13:11 ` Sajida Bhanu (Temp) (QUIC)
@ 2022-01-10 13:29 ` Adrian Hunter
2022-01-10 14:59 ` Sajida Bhanu (Temp) (QUIC)
0 siblings, 1 reply; 11+ messages in thread
From: Adrian Hunter @ 2022-01-10 13:29 UTC (permalink / raw)
To: Sajida Bhanu (Temp) (QUIC), riteshh, Asutosh Das (asd),
ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
linux-kernel
Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
Pradeep Pragallapati (QUIC),
sartgarg, nitirawa, sayalil
On 10/01/2022 15:11, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
>
> Thanks for the review.
>
> Please find the inline comments
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Friday, January 7, 2022 1:13 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>
> On 04/01/2022 17:02, Sajida Bhanu (Temp) (QUIC) wrote:
>> Hi Adrian,
>>
>> Thanks for the review.
>>
>> Please find the inline comments.
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Monday, January 3, 2022 3:20 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
>> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org;
>> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org;
>> linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC)
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>
>> On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
>>> Hi Adrian,
>>>
>>> Thanks for the review.
>>>
>>> Please find the inline comments.
>>
>> I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted. Normally it is the other way around.
>>
>>>
>>> Thanks,
>>> Sajida
>>>
>>> -----Original Message-----
>>> From: Adrian Hunter <adrian.hunter@intel.com>
>>> Sent: Wednesday, December 15, 2021 7:33 PM
>>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
>>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
>>> ulf.hansson@linaro.org; agross@kernel.org;
>>> bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org;
>>> linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
>>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
>>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC)
>>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
>>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>>
>>> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>>>> Add debugfs entry to query eMMC and SD card errors statistics.
>>>> This feature is useful for debug and testing
>>>>
>>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>>> ---
>>>>
>>>> Changes since V1:
>>>> -Removed sysfs entry for eMMC and SD card error statistics and added
>>>> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>>>
>>> Thanks for doing this.
>>>
>>>> ---
>>>> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>>> drivers/mmc/core/queue.c | 2 +
>>>> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
>>>> include/linux/mmc/host.h | 37 ++++++++++++++++
>>>> 4 files changed, 186 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
>>>> index 3fdbc80..40210c34 100644
>>>> --- a/drivers/mmc/core/debugfs.c
>>>> +++ b/drivers/mmc/core/debugfs.c
>>>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>>>> val) DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>>> "%llu\n");
>>>>
>>>> +static int mmc_err_state_get(void *data, u64 *val) {
>>>> + struct mmc_host *host = data;
>>>> +
>>>> + if (!host)
>>>> + return -EINVAL;
>>>> +
>>>> + *val = host->err_state ? 1 : 0;
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int mmc_err_state_clear(void *data, u64 val) {
>>>> + struct mmc_host *host = data;
>>>> +
>>>> + if (!host)
>>>> + return -EINVAL;
>>>> +
>>>> + host->err_state = false;
>>>
>>> Is there much reason to disable err stats from userspace?
>>>
>>>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
>>
>> That is not exectly how it is programmed. "err_state is false" means no errors have been recorded, not that no errors happended.
>>
>>>>>>>> If user wants to explicitly clear then he can use this.
>
> Seems over compilicated. A user can just diff the old and new values:
>
> cat /sys/kernel/debug/mmc0/err_stats > /tmp/old-stats ...later...
> cat /sys/kernel/debug/mmc0/err_stats > /tmp/new-stats diff /tmp/old-stats /tmp/new-stats mv /tmp/new-stats /tmp/old-stats
>
> I suggest just outputting the stats
>
>>>>>>>> Thanks for the suggestion Adrain.
> This way user has to call write to store the err_stats data to /tmp/old-stats and user has to call read to read /tmp/old-stats.
Only if you need to see what has changed
>
> And our idea is user call only read to get error stats info.
>
> Please suggest me which is okay.
Please let's start with just outputting the stats.
>
> Thanks,
> Sajida
>
>>
>>>
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>>>> + mmc_err_state_clear, "%llu\n");
>>>> +
>>>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>>>> + struct mmc_host *host = (struct mmc_host *)file->private;
>>>> +
>>>> + if (!host)
>>>> + return -EINVAL;
>>>
>>> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>>>
>>> if (!host->err_stats_enabled) {
>>> seq_printf(file, "Not supported by driver\n");
>>> return 0;
>>> }
>>>
>>>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
>>
>> Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
>>
>>>>>>> Yes, err_state and err_stats_enabled both are different. err_state will be set if any errors happened in driver level.
>> err_stats_enabled will be set if err_stats feature enabled, if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
>>
>>>
>>>> +
>>>> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>>>
>>> Maybe put the descriptions in an array and iterate e.g.
>>>
>>> const char *desc[MMC_ERR_MAX] = {
>>> [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>>> etc
>>> };
>>> int i;
>>>
>>> if (!host)
>>> return -EINVAL;
>>>
>>> for (i = 0; i < MMC_ERR_MAX; i++) {
>>> if (desc[i])
>>> seq_printf(file, "# %s:\t %d\n",
>>> desc[1], host->err_stats[i]);
>>> }
>>>
>>>>>>>>>> Sure
>>>
>>>> +
>>>> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMD_CRC]);
>>>> +
>>>> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>>>> +
>>>> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_DAT_CRC]);
>>>> +
>>>> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_ADMA]);
>>>> +
>>>> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_ADMA]);
>>>> +
>>>> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_TUNING]);
>>>> +
>>>> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMDQ_RED]);
>>>> +
>>>> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMDQ_GCE]);
>>>> +
>>>> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
>>>> +
>>>> + seq_printf(file, "# Request Timedout:\t %d\n",
>>>> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>>>> +
>>>> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>>>> +
>>>> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>>>> + host->err_stats[MMC_ERR_ICE_CFG]);
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int mmc_err_stats_open(struct inode *inode, struct file
>>>> +*file) {
>>>> + return single_open(file, mmc_err_stats_show, inode->i_private); }
>>>> +
>>>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>>>> + size_t cnt, loff_t *ppos)
>>>> +{
>>>> + struct mmc_host *host = filp->f_mapping->host->i_private;
>>>> +
>>>> + if (!host)
>>>> + return -EINVAL;
>>>> +
>>>> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
>>>> + memset(host->err_stats, 0, sizeof(host->err_stats));
>>>> +
>>>> + return cnt;
>>>> +}
>>>> +
>>>> +static const struct file_operations mmc_err_stats_fops = {
>>>> + .open = mmc_err_stats_open,
>>>> + .read = seq_read,
>>>> + .write = mmc_err_stats_write,
>>>> +};
>>>> +
>>>> void mmc_add_host_debugfs(struct mmc_host *host) {
>>>> struct dentry *root;
>>>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>>> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>>> &mmc_clock_fops);
>>>>
>>>> + debugfs_create_file("err_state", 0600, root, host,
>>>> + &mmc_err_state);
>>>> + debugfs_create_file("err_stats", 0600, root, host,
>>>> + &mmc_err_stats_fops);
>>>> +
>>>> #ifdef CONFIG_FAIL_MMC_REQUEST
>>>> if (fail_request)
>>>> setup_fault_attr(&fail_default_attr, fail_request); diff --git
>>>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>>>> b15c034..5243929 100644
>>>> --- a/drivers/mmc/core/queue.c
>>>> +++ b/drivers/mmc/core/queue.c
>>>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>>> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>>> bool recovery_needed = false;
>>>>
>>>> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>>>> +
>>>> switch (issue_type) {
>>>> case MMC_ISSUE_ASYNC:
>>>> case MMC_ISSUE_DCMD:
>>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>>
>>> I think the core changes should be a separate patch from sdhci.
>>> I would probably split into 4:
>>> mmc core
>>> mmc block driver
>>> cqhci driver
>>> sdhci driver
>>>
>>>>>>> Sure
>>>
>>>> index 07c6da1..d742051 100644
>>>> --- a/drivers/mmc/host/sdhci.c
>>>> +++ b/drivers/mmc/host/sdhci.c
>>>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>>> if (host->ops->dump_vendor_regs)
>>>> host->ops->dump_vendor_regs(host);
>>>>
>>>> + mmc_debugfs_err_stats_enable(host->mmc);
>>>
>>> Why here and not in e.g. __sdhci_add_host() ?
>>>
>>>>>>> If any errors happened in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ). So it is better to call mmc_debugfs_err_stats_enable() here.
>>
>> Registers are not dumped for most errors. Please move this to __sdhci_add_host().
>>
>>>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
>>
>>>
>>>> SDHCI_DUMP("============================================\n");
>>>> }
>>>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>>> spin_lock_irqsave(&host->lock, flags);
>>>>
>>>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>>> mmc_hostname(host->mmc));
>>>> sdhci_dumpregs(host);
>>>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct
>>>> timer_list *t)
>>>>
>>>> if (host->data || host->data_cmd ||
>>>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>>> mmc_hostname(host->mmc));
>>>> sdhci_dumpregs(host);
>>>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host
>>>> *host, u32 intmask, u32 *intmask_p)
>>>>
>>>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>>> - if (intmask & SDHCI_INT_TIMEOUT)
>>>> + if (intmask & SDHCI_INT_TIMEOUT) {
>>>> host->cmd->error = -ETIMEDOUT;
>>>> - else
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> + } else {
>>>> host->cmd->error = -EILSEQ;
>>>> -
>>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>>> + }
>>>> /* Treat data command CRC error the same as data CRC error */
>>>> if (host->cmd->data &&
>>>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@ -3266,6
>>>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
>>>> +intmask, u32 *intmask_p)
>>>> -ETIMEDOUT :
>>>> -EILSEQ;
>>>>
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>>> if (sdhci_auto_cmd23(host, mrq)) {
>>>> mrq->sbc->error = err;
>>>> __sdhci_finish_mrq(host, mrq);
>>>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>> host->data_cmd = NULL;
>>>> data_cmd->error = -ETIMEDOUT;
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> __sdhci_finish_mrq(host, data_cmd->mrq);
>>>> return;
>>>> }
>>>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>> return;
>>>> }
>>>>
>>>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>> host->data->error = -ETIMEDOUT;
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>>> + }
>>>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>>>> host->data->error = -EILSEQ;
>>>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>>> - != MMC_BUS_TEST_R)
>>>> + != MMC_BUS_TEST_R) {
>>>> host->data->error = -EILSEQ;
>>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>>> + }
>>>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>>> intmask);
>>>> sdhci_adma_show_error(host);
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>> host->data->error = -EIO;
>>>> if (host->ops->adma_workaround)
>>>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33
>>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>>> if (!host->cqe_on)
>>>> return false;
>>>>
>>>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>>> +SDHCI_INT_CRC)) {
>>>> *cmd_error = -EILSEQ;
>>>> - else if (intmask & SDHCI_INT_TIMEOUT)
>>>> + if (intmask & SDHCI_INT_CRC) {
>>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>>> + }
>>>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>>>> *cmd_error = -ETIMEDOUT;
>>>> - else
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> + } else
>>>> *cmd_error = 0;
>>>>
>>>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>>> *data_error = -EILSEQ;
>>>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>>> + if (intmask & SDHCI_INT_DATA_CRC) {
>>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>>> + }
>>>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>> *data_error = -ETIMEDOUT;
>>>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>> *data_error = -EIO;
>>>> - else
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>> + } else
>>>> *data_error = 0;
>>>>
>>>> /* Clear selected interrupts. */
>>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>>> index 7afb57c..c263f8f 100644
>>>> --- a/include/linux/mmc/host.h
>>>> +++ b/include/linux/mmc/host.h
>>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>>
>>>> struct mmc_host;
>>>>
>>>> +enum mmc_err_stat {
>>>> + MMC_ERR_CMD_TIMEOUT,
>>>> + MMC_ERR_CMD_CRC,
>>>> + MMC_ERR_DAT_TIMEOUT,
>>>> + MMC_ERR_DAT_CRC,
>>>> + MMC_ERR_AUTO_CMD,
>>>> + MMC_ERR_ADMA,
>>>> + MMC_ERR_TUNING,
>>>> + MMC_ERR_CMDQ_RED,
>>>> + MMC_ERR_CMDQ_GCE,
>>>> + MMC_ERR_CMDQ_ICCE,
>>>> + MMC_ERR_REQ_TIMEOUT,
>>>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>>>> + MMC_ERR_ICE_CFG,
>>>> + MMC_ERR_MAX,
>>>> +};
>>>> +
>>>> struct mmc_host_ops {
>>>> /*
>>>> * It is optional for the host to implement pre_req and post_req
>>>> in @@ -500,6 +517,8 @@ struct mmc_host {
>>>>
>>>> /* Host Software Queue support */
>>>> bool hsq_enabled;
>>>> + u32 err_stats[MMC_ERR_MAX];
>>>
>>> If you make it u64 then we don't have to think about the value overflowing.
>>>
>>>>>> Sure
>>>
>>>> + bool err_state;
>>>>
>>>> unsigned long private[] ____cacheline_aligned;
>>>> };
>>>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>>>> DMA_FROM_DEVICE; }
>>>>
>>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>>> +*mmc) {
>>>> + mmc->err_state = true;
>>>> +}
>>>> +
>>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>>> + enum mmc_err_stat stat) {
>>>> +
>>>> + /*
>>>> + * Ignore the command timeout errors observed during
>>>> + * the card init as those are excepted.
>>>> + */
>>>> + if (!mmc->err_state)
>>>> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>>>
>>> This would be better handled in the card init code somewhere, not here.
>>>
>>>>>>> Sure.
>>>
>>>> +
>>>> + mmc->err_stats[stat] += 1;
>>>> +}
>>>> +
>>>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>>>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>>>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>>>> **new_ext_csd);
>>>>
>>>
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH V2] mmc: debugfs: add error statistics
2022-01-10 13:29 ` Adrian Hunter
@ 2022-01-10 14:59 ` Sajida Bhanu (Temp) (QUIC)
0 siblings, 0 replies; 11+ messages in thread
From: Sajida Bhanu (Temp) (QUIC) @ 2022-01-10 14:59 UTC (permalink / raw)
To: Adrian Hunter, Sajida Bhanu (Temp) (QUIC),
riteshh, Asutosh Das (asd),
ulf.hansson, agross, bjorn.andersson, linux-mmc, linux-arm-msm,
linux-kernel
Cc: stummala, vbadigan, Ram Prakash Gupta (QUIC),
Pradeep Pragallapati (QUIC),
sartgarg, nitirawa, sayalil
-----Original Message-----
From: Adrian Hunter <adrian.hunter@intel.com>
Sent: Monday, January 10, 2022 6:59 PM
To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>; riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>; ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org; nitirawa@codeaurora.org; sayalil@codeaurora.org
Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
On 10/01/2022 15:11, Sajida Bhanu (Temp) (QUIC) wrote:
> Hi Adrian,
>
> Thanks for the review.
>
> Please find the inline comments
>
> Thanks,
> Sajida
>
> -----Original Message-----
> From: Adrian Hunter <adrian.hunter@intel.com>
> Sent: Friday, January 7, 2022 1:13 PM
> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
> ulf.hansson@linaro.org; agross@kernel.org; bjorn.andersson@linaro.org;
> linux-mmc@vger.kernel.org; linux-arm-msm@vger.kernel.org;
> linux-kernel@vger.kernel.org
> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC)
> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
> nitirawa@codeaurora.org; sayalil@codeaurora.org
> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>
> On 04/01/2022 17:02, Sajida Bhanu (Temp) (QUIC) wrote:
>> Hi Adrian,
>>
>> Thanks for the review.
>>
>> Please find the inline comments.
>>
>> Thanks,
>> Sajida
>>
>> -----Original Message-----
>> From: Adrian Hunter <adrian.hunter@intel.com>
>> Sent: Monday, January 3, 2022 3:20 PM
>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
>> ulf.hansson@linaro.org; agross@kernel.org;
>> bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org;
>> linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati (QUIC)
>> <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>
>> On 21/12/2021 09:16, Sajida Bhanu (Temp) (QUIC) wrote:
>>> Hi Adrian,
>>>
>>> Thanks for the review.
>>>
>>> Please find the inline comments.
>>
>> I find the way the inline comments are done a bit difficult to follow, since what I wrote is not quoted, and what you wrote is quoted. Normally it is the other way around.
>>
>>>
>>> Thanks,
>>> Sajida
>>>
>>> -----Original Message-----
>>> From: Adrian Hunter <adrian.hunter@intel.com>
>>> Sent: Wednesday, December 15, 2021 7:33 PM
>>> To: Sajida Bhanu (Temp) (QUIC) <quic_c_sbhanu@quicinc.com>;
>>> riteshh@codeaurora.org; Asutosh Das (asd) <asutoshd@quicinc.com>;
>>> ulf.hansson@linaro.org; agross@kernel.org;
>>> bjorn.andersson@linaro.org; linux-mmc@vger.kernel.org;
>>> linux-arm-msm@vger.kernel.org; linux-kernel@vger.kernel.org
>>> Cc: stummala@codeaurora.org; vbadigan@codeaurora.org; Ram Prakash
>>> Gupta (QUIC) <quic_rampraka@quicinc.com>; Pradeep Pragallapati
>>> (QUIC) <quic_pragalla@quicinc.com>; sartgarg@codeaurora.org;
>>> nitirawa@codeaurora.org; sayalil@codeaurora.org
>>> Subject: Re: [PATCH V2] mmc: debugfs: add error statistics
>>>
>>> On 14/12/2021 16:41, Shaik Sajida Bhanu wrote:
>>>> Add debugfs entry to query eMMC and SD card errors statistics.
>>>> This feature is useful for debug and testing
>>>>
>>>> Signed-off-by: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
>>>> ---
>>>>
>>>> Changes since V1:
>>>> -Removed sysfs entry for eMMC and SD card error statistics and added
>>>> debugfs entry as suggested by Adrian Hunter and Ulf Hansson.
>>>
>>> Thanks for doing this.
>>>
>>>> ---
>>>> drivers/mmc/core/debugfs.c | 106 +++++++++++++++++++++++++++++++++++++++++++++
>>>> drivers/mmc/core/queue.c | 2 +
>>>> drivers/mmc/host/sdhci.c | 53 ++++++++++++++++++-----
>>>> include/linux/mmc/host.h | 37 ++++++++++++++++
>>>> 4 files changed, 186 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/drivers/mmc/core/debugfs.c
>>>> b/drivers/mmc/core/debugfs.c index 3fdbc80..40210c34 100644
>>>> --- a/drivers/mmc/core/debugfs.c
>>>> +++ b/drivers/mmc/core/debugfs.c
>>>> @@ -223,6 +223,107 @@ static int mmc_clock_opt_set(void *data, u64
>>>> val) DEFINE_DEBUGFS_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
>>>> "%llu\n");
>>>>
>>>> +static int mmc_err_state_get(void *data, u64 *val) {
>>>> + struct mmc_host *host = data;
>>>> +
>>>> + if (!host)
>>>> + return -EINVAL;
>>>> +
>>>> + *val = host->err_state ? 1 : 0;
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int mmc_err_state_clear(void *data, u64 val) {
>>>> + struct mmc_host *host = data;
>>>> +
>>>> + if (!host)
>>>> + return -EINVAL;
>>>> +
>>>> + host->err_state = false;
>>>
>>> Is there much reason to disable err stats from userspace?
>>>
>>>>>>>> Yes , while debugging we can go and check err_state , It is false means no errors happened in driver level and true means errors happened in driver level and then we can go and check err_stats[] to know more on error details like data CRC , command CRC etc.
>>
>> That is not exectly how it is programmed. "err_state is false" means no errors have been recorded, not that no errors happended.
>>
>>>>>>>> If user wants to explicitly clear then he can use this.
>
> Seems over compilicated. A user can just diff the old and new values:
>
> cat /sys/kernel/debug/mmc0/err_stats > /tmp/old-stats ...later...
> cat /sys/kernel/debug/mmc0/err_stats > /tmp/new-stats diff
> /tmp/old-stats /tmp/new-stats mv /tmp/new-stats /tmp/old-stats
>
> I suggest just outputting the stats
>
>>>>>>>> Thanks for the suggestion Adrain.
> This way user has to call write to store the err_stats data to /tmp/old-stats and user has to call read to read /tmp/old-stats.
Only if you need to see what has changed
>
> And our idea is user call only read to get error stats info.
>
> Please suggest me which is okay.
Please let's start with just outputting the stats.
>>>>> Sure. Thanks for the suggestion.
>
> Thanks,
> Sajida
>
>>
>>>
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +DEFINE_SIMPLE_ATTRIBUTE(mmc_err_state, mmc_err_state_get,
>>>> + mmc_err_state_clear, "%llu\n");
>>>> +
>>>> +static int mmc_err_stats_show(struct seq_file *file, void *data) {
>>>> + struct mmc_host *host = (struct mmc_host *)file->private;
>>>> +
>>>> + if (!host)
>>>> + return -EINVAL;
>>>
>>> I was thinking we needed a way to determine whether stats were being collected because not all drivers would support it at least initially e.g.
>>>
>>> if (!host->err_stats_enabled) {
>>> seq_printf(file, "Not supported by driver\n");
>>> return 0;
>>> }
>>>
>>>>>>>>>> You mean declare another variable (err_stats_enabled) and enable it in probe?
>>
>> Yes, although it is not clear if this is the same as what you want from err_state, i.e. is err_state different from err_stats_enabled?
>>
>>>>>>> Yes, err_state and err_stats_enabled both are different. err_state will be set if any errors happened in driver level.
>> err_stats_enabled will be set if err_stats feature enabled, if any vendor wants to use err_stats feature they will set this err_stats_enabled in their vendor specific file.
>>
>>>
>>>> +
>>>> + seq_printf(file, "# Command Timeout Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMD_TIMEOUT]);
>>>
>>> Maybe put the descriptions in an array and iterate e.g.
>>>
>>> const char *desc[MMC_ERR_MAX] = {
>>> [MMC_ERR_CMD_TIMEOUT] = "Command Timeout Occurred",
>>> etc
>>> };
>>> int i;
>>>
>>> if (!host)
>>> return -EINVAL;
>>>
>>> for (i = 0; i < MMC_ERR_MAX; i++) {
>>> if (desc[i])
>>> seq_printf(file, "# %s:\t %d\n",
>>> desc[1], host->err_stats[i]);
>>> }
>>>
>>>>>>>>>> Sure
>>>
>>>> +
>>>> + seq_printf(file, "# Command CRC Errors Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMD_CRC]);
>>>> +
>>>> + seq_printf(file, "# Data Timeout Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_DAT_TIMEOUT]);
>>>> +
>>>> + seq_printf(file, "# Data CRC Errors Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_DAT_CRC]);
>>>> +
>>>> + seq_printf(file, "# Auto-Cmd Error Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_ADMA]);
>>>> +
>>>> + seq_printf(file, "# ADMA Error Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_ADMA]);
>>>> +
>>>> + seq_printf(file, "# Tuning Error Occurred:\t %d\n",
>>>> + host->err_stats[MMC_ERR_TUNING]);
>>>> +
>>>> + seq_printf(file, "# CMDQ RED Errors:\t\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMDQ_RED]);
>>>> +
>>>> + seq_printf(file, "# CMDQ GCE Errors:\t\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMDQ_GCE]);
>>>> +
>>>> + seq_printf(file, "# CMDQ ICCE Errors:\t\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMDQ_ICCE]);
>>>> +
>>>> + seq_printf(file, "# Request Timedout:\t %d\n",
>>>> + host->err_stats[MMC_ERR_REQ_TIMEOUT]);
>>>> +
>>>> + seq_printf(file, "# CMDQ Request Timedout:\t %d\n",
>>>> + host->err_stats[MMC_ERR_CMDQ_REQ_TIMEOUT]);
>>>> +
>>>> + seq_printf(file, "# ICE Config Errors:\t\t %d\n",
>>>> + host->err_stats[MMC_ERR_ICE_CFG]);
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int mmc_err_stats_open(struct inode *inode, struct file
>>>> +*file) {
>>>> + return single_open(file, mmc_err_stats_show, inode->i_private); }
>>>> +
>>>> +static ssize_t mmc_err_stats_write(struct file *filp, const char __user *ubuf,
>>>> + size_t cnt, loff_t *ppos)
>>>> +{
>>>> + struct mmc_host *host = filp->f_mapping->host->i_private;
>>>> +
>>>> + if (!host)
>>>> + return -EINVAL;
>>>> +
>>>> + pr_debug("%s: Resetting MMC error statistics\n", __func__);
>>>> + memset(host->err_stats, 0, sizeof(host->err_stats));
>>>> +
>>>> + return cnt;
>>>> +}
>>>> +
>>>> +static const struct file_operations mmc_err_stats_fops = {
>>>> + .open = mmc_err_stats_open,
>>>> + .read = seq_read,
>>>> + .write = mmc_err_stats_write,
>>>> +};
>>>> +
>>>> void mmc_add_host_debugfs(struct mmc_host *host) {
>>>> struct dentry *root;
>>>> @@ -236,6 +337,11 @@ void mmc_add_host_debugfs(struct mmc_host *host)
>>>> debugfs_create_file_unsafe("clock", S_IRUSR | S_IWUSR, root, host,
>>>> &mmc_clock_fops);
>>>>
>>>> + debugfs_create_file("err_state", 0600, root, host,
>>>> + &mmc_err_state);
>>>> + debugfs_create_file("err_stats", 0600, root, host,
>>>> + &mmc_err_stats_fops);
>>>> +
>>>> #ifdef CONFIG_FAIL_MMC_REQUEST
>>>> if (fail_request)
>>>> setup_fault_attr(&fail_default_attr, fail_request); diff --git
>>>> a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index
>>>> b15c034..5243929 100644
>>>> --- a/drivers/mmc/core/queue.c
>>>> +++ b/drivers/mmc/core/queue.c
>>>> @@ -100,6 +100,8 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req)
>>>> enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
>>>> bool recovery_needed = false;
>>>>
>>>> + mmc_debugfs_err_stats_inc(host, MMC_ERR_CMDQ_REQ_TIMEOUT);
>>>> +
>>>> switch (issue_type) {
>>>> case MMC_ISSUE_ASYNC:
>>>> case MMC_ISSUE_DCMD:
>>>> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
>>>
>>> I think the core changes should be a separate patch from sdhci.
>>> I would probably split into 4:
>>> mmc core
>>> mmc block driver
>>> cqhci driver
>>> sdhci driver
>>>
>>>>>>> Sure
>>>
>>>> index 07c6da1..d742051 100644
>>>> --- a/drivers/mmc/host/sdhci.c
>>>> +++ b/drivers/mmc/host/sdhci.c
>>>> @@ -113,6 +113,7 @@ void sdhci_dumpregs(struct sdhci_host *host)
>>>> if (host->ops->dump_vendor_regs)
>>>> host->ops->dump_vendor_regs(host);
>>>>
>>>> + mmc_debugfs_err_stats_enable(host->mmc);
>>>
>>> Why here and not in e.g. __sdhci_add_host() ?
>>>
>>>>>>> If any errors happened in driver level then we will call sdhci_dumpregs() right( err_state true means some errors happened in driver level ). So it is better to call mmc_debugfs_err_stats_enable() here.
>>
>> Registers are not dumped for most errors. Please move this to __sdhci_add_host().
>>
>>>>>> err_state is true means errors happened in driver level and for most of the errors we are dumping the registers, so I am thinking it is better to have this call in sdhci_dumpregs() only.
>>
>>>
>>>> SDHCI_DUMP("============================================\n");
>>>> }
>>>> EXPORT_SYMBOL_GPL(sdhci_dumpregs);
>>>> @@ -3159,6 +3160,7 @@ static void sdhci_timeout_timer(struct timer_list *t)
>>>> spin_lock_irqsave(&host->lock, flags);
>>>>
>>>> if (host->cmd && !sdhci_data_line_cmd(host->cmd)) {
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>> pr_err("%s: Timeout waiting for hardware cmd interrupt.\n",
>>>> mmc_hostname(host->mmc));
>>>> sdhci_dumpregs(host);
>>>> @@ -3181,6 +3183,7 @@ static void sdhci_timeout_data_timer(struct
>>>> timer_list *t)
>>>>
>>>> if (host->data || host->data_cmd ||
>>>> (host->cmd && sdhci_data_line_cmd(host->cmd))) {
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_REQ_TIMEOUT);
>>>> pr_err("%s: Timeout waiting for hardware interrupt.\n",
>>>> mmc_hostname(host->mmc));
>>>> sdhci_dumpregs(host);
>>>> @@ -3240,11 +3243,15 @@ static void sdhci_cmd_irq(struct sdhci_host
>>>> *host, u32 intmask, u32 *intmask_p)
>>>>
>>>> if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
>>>> SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
>>>> - if (intmask & SDHCI_INT_TIMEOUT)
>>>> + if (intmask & SDHCI_INT_TIMEOUT) {
>>>> host->cmd->error = -ETIMEDOUT;
>>>> - else
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> + } else {
>>>> host->cmd->error = -EILSEQ;
>>>> -
>>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>>> + }
>>>> /* Treat data command CRC error the same as data CRC error */
>>>> if (host->cmd->data &&
>>>> (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) == @@
>>>> -3266,6
>>>> +3273,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32
>>>> +intmask, u32 *intmask_p)
>>>> -ETIMEDOUT :
>>>> -EILSEQ;
>>>>
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
>>>> if (sdhci_auto_cmd23(host, mrq)) {
>>>> mrq->sbc->error = err;
>>>> __sdhci_finish_mrq(host, mrq);
>>>> @@ -3342,6 +3350,7 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>> if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>> host->data_cmd = NULL;
>>>> data_cmd->error = -ETIMEDOUT;
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> __sdhci_finish_mrq(host, data_cmd->mrq);
>>>> return;
>>>> }
>>>> @@ -3375,18 +3384,25 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
>>>> return;
>>>> }
>>>>
>>>> - if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>>> + if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>> host->data->error = -ETIMEDOUT;
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>>> + }
>>>> else if (intmask & SDHCI_INT_DATA_END_BIT)
>>>> host->data->error = -EILSEQ;
>>>> else if ((intmask & SDHCI_INT_DATA_CRC) &&
>>>> SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
>>>> - != MMC_BUS_TEST_R)
>>>> + != MMC_BUS_TEST_R) {
>>>> host->data->error = -EILSEQ;
>>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>>> + }
>>>> else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>> pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
>>>> intmask);
>>>> sdhci_adma_show_error(host);
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>> host->data->error = -EIO;
>>>> if (host->ops->adma_workaround)
>>>> host->ops->adma_workaround(host, intmask); @@ -3905,20 +3921,33
>>>> @@ bool sdhci_cqe_irq(struct sdhci_host *host, u32 intmask, int *cmd_error,
>>>> if (!host->cqe_on)
>>>> return false;
>>>>
>>>> - if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC))
>>>> + if (intmask & (SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
>>>> +SDHCI_INT_CRC)) {
>>>> *cmd_error = -EILSEQ;
>>>> - else if (intmask & SDHCI_INT_TIMEOUT)
>>>> + if (intmask & SDHCI_INT_CRC) {
>>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
>>>> + }
>>>> + } else if (intmask & SDHCI_INT_TIMEOUT) {
>>>> *cmd_error = -ETIMEDOUT;
>>>> - else
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
>>>> + } else
>>>> *cmd_error = 0;
>>>>
>>>> - if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
>>>> + if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC)) {
>>>> *data_error = -EILSEQ;
>>>> - else if (intmask & SDHCI_INT_DATA_TIMEOUT)
>>>> + if (intmask & SDHCI_INT_DATA_CRC) {
>>>> + if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
>>>> + host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
>>>> + }
>>>> + } else if (intmask & SDHCI_INT_DATA_TIMEOUT) {
>>>> *data_error = -ETIMEDOUT;
>>>> - else if (intmask & SDHCI_INT_ADMA_ERROR)
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
>>>> + } else if (intmask & SDHCI_INT_ADMA_ERROR) {
>>>> *data_error = -EIO;
>>>> - else
>>>> + mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
>>>> + } else
>>>> *data_error = 0;
>>>>
>>>> /* Clear selected interrupts. */
>>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>>> index 7afb57c..c263f8f 100644
>>>> --- a/include/linux/mmc/host.h
>>>> +++ b/include/linux/mmc/host.h
>>>> @@ -93,6 +93,23 @@ struct mmc_clk_phase_map {
>>>>
>>>> struct mmc_host;
>>>>
>>>> +enum mmc_err_stat {
>>>> + MMC_ERR_CMD_TIMEOUT,
>>>> + MMC_ERR_CMD_CRC,
>>>> + MMC_ERR_DAT_TIMEOUT,
>>>> + MMC_ERR_DAT_CRC,
>>>> + MMC_ERR_AUTO_CMD,
>>>> + MMC_ERR_ADMA,
>>>> + MMC_ERR_TUNING,
>>>> + MMC_ERR_CMDQ_RED,
>>>> + MMC_ERR_CMDQ_GCE,
>>>> + MMC_ERR_CMDQ_ICCE,
>>>> + MMC_ERR_REQ_TIMEOUT,
>>>> + MMC_ERR_CMDQ_REQ_TIMEOUT,
>>>> + MMC_ERR_ICE_CFG,
>>>> + MMC_ERR_MAX,
>>>> +};
>>>> +
>>>> struct mmc_host_ops {
>>>> /*
>>>> * It is optional for the host to implement pre_req and post_req
>>>> in @@ -500,6 +517,8 @@ struct mmc_host {
>>>>
>>>> /* Host Software Queue support */
>>>> bool hsq_enabled;
>>>> + u32 err_stats[MMC_ERR_MAX];
>>>
>>> If you make it u64 then we don't have to think about the value overflowing.
>>>
>>>>>> Sure
>>>
>>>> + bool err_state;
>>>>
>>>> unsigned long private[] ____cacheline_aligned;
>>>> };
>>>> @@ -635,6 +654,24 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
>>>> return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE :
>>>> DMA_FROM_DEVICE; }
>>>>
>>>> +static inline void mmc_debugfs_err_stats_enable(struct mmc_host
>>>> +*mmc) {
>>>> + mmc->err_state = true;
>>>> +}
>>>> +
>>>> +static inline void mmc_debugfs_err_stats_inc(struct mmc_host *mmc,
>>>> + enum mmc_err_stat stat) {
>>>> +
>>>> + /*
>>>> + * Ignore the command timeout errors observed during
>>>> + * the card init as those are excepted.
>>>> + */
>>>> + if (!mmc->err_state)
>>>> + mmc->err_stats[MMC_ERR_CMD_TIMEOUT] = 0;
>>>
>>> This would be better handled in the card init code somewhere, not here.
>>>
>>>>>>> Sure.
>>>
>>>> +
>>>> + mmc->err_stats[stat] += 1;
>>>> +}
>>>> +
>>>> int mmc_send_tuning(struct mmc_host *host, u32 opcode, int
>>>> *cmd_error); int mmc_send_abort_tuning(struct mmc_host *host, u32
>>>> opcode); int mmc_get_ext_csd(struct mmc_card *card, u8
>>>> **new_ext_csd);
>>>>
>>>
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V2] mmc: debugfs: add error statistics
@ 2021-12-14 21:11 kernel test robot
0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2021-12-14 21:11 UTC (permalink / raw)
To: kbuild
[-- Attachment #1: Type: text/plain, Size: 28711 bytes --]
CC: kbuild-all(a)lists.01.org
In-Reply-To: <1639492863-7053-1-git-send-email-quic_c_sbhanu@quicinc.com>
References: <1639492863-7053-1-git-send-email-quic_c_sbhanu@quicinc.com>
TO: Shaik Sajida Bhanu <quic_c_sbhanu@quicinc.com>
TO: adrian.hunter(a)intel.com
TO: riteshh(a)codeaurora.org
TO: asutoshd(a)quicinc.com
TO: ulf.hansson(a)linaro.org
TO: agross(a)kernel.org
TO: bjorn.andersson(a)linaro.org
TO: linux-mmc(a)vger.kernel.org
TO: linux-arm-msm(a)vger.kernel.org
TO: linux-kernel(a)vger.kernel.org
CC: stummala(a)codeaurora.org
Hi Shaik,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on linus/master]
[also build test WARNING on v5.16-rc5 next-20211213]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Shaik-Sajida-Bhanu/mmc-debugfs-add-error-statistics/20211214-224314
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 5472f14a37421d1bca3dddf33cabd3bd6dbefbbc
:::::: branch date: 6 hours ago
:::::: commit date: 6 hours ago
config: i386-randconfig-m021-20211214 (https://download.01.org/0day-ci/archive/20211215/202112150555.zkOS6PKM-lkp(a)intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
New smatch warnings:
drivers/mmc/host/sdhci.c:3251 sdhci_cmd_irq() warn: was && intended here instead of ||?
drivers/mmc/host/sdhci.c:3397 sdhci_data_irq() warn: was && intended here instead of ||?
drivers/mmc/host/sdhci.c:3927 sdhci_cqe_irq() warn: was && intended here instead of ||?
Old smatch warnings:
drivers/mmc/host/sdhci.c:3940 sdhci_cqe_irq() warn: was && intended here instead of ||?
vim +3251 drivers/mmc/host/sdhci.c
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3212
4bf780996669280 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3213 static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *intmask_p)
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3214 {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3215 /* Handle auto-CMD12 error */
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3216 if (intmask & SDHCI_INT_AUTO_CMD_ERR && host->data_cmd) {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3217 struct mmc_request *mrq = host->data_cmd->mrq;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3218 u16 auto_cmd_status = sdhci_readw(host, SDHCI_AUTO_CMD_STATUS);
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3219 int data_err_bit = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3220 SDHCI_INT_DATA_TIMEOUT :
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3221 SDHCI_INT_DATA_CRC;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3222
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3223 /* Treat auto-CMD12 error the same as data error */
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3224 if (!mrq->sbc && (host->flags & SDHCI_AUTO_CMD12)) {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3225 *intmask_p |= data_err_bit;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3226 return;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3227 }
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3228 }
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3229
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3230 if (!host->cmd) {
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3231 /*
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3232 * SDHCI recovers from errors by resetting the cmd and data
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3233 * circuits. Until that is done, there very well might be more
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3234 * interrupts, so ignore them in that case.
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3235 */
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3236 if (host->pending_reset)
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3237 return;
2e4456f08fa81b9 drivers/mmc/host/sdhci.c Marek Vasut 2015-11-18 3238 pr_err("%s: Got command interrupt 0x%08x even though no command operation was in progress.\n",
b67ac3f339c76df drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3239 mmc_hostname(host->mmc), (unsigned)intmask);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3240 sdhci_dumpregs(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3241 return;
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3242 }
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3243
ec014cbacf6229c drivers/mmc/host/sdhci.c Russell King 2016-01-26 3244 if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
ec014cbacf6229c drivers/mmc/host/sdhci.c Russell King 2016-01-26 3245 SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3246 if (intmask & SDHCI_INT_TIMEOUT) {
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman 2007-07-22 3247 host->cmd->error = -ETIMEDOUT;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3248 mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3249 } else {
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman 2007-07-22 3250 host->cmd->error = -EILSEQ;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 @3251 if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3252 host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3253 mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_CRC);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3254 }
4bf780996669280 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3255 /* Treat data command CRC error the same as data CRC error */
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King 2016-01-26 3256 if (host->cmd->data &&
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King 2016-01-26 3257 (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King 2016-01-26 3258 SDHCI_INT_CRC) {
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King 2016-01-26 3259 host->cmd = NULL;
4bf780996669280 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3260 *intmask_p |= SDHCI_INT_DATA_CRC;
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King 2016-01-26 3261 return;
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King 2016-01-26 3262 }
71fcbda0fcddd08 drivers/mmc/host/sdhci.c Russell King 2016-01-26 3263
19d2f695f4e8279 drivers/mmc/host/sdhci.c Adrian Hunter 2019-04-05 3264 __sdhci_finish_mrq(host, host->cmd->mrq);
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3265 return;
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3266 }
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3267
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3268 /* Handle auto-CMD23 error */
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3269 if (intmask & SDHCI_INT_AUTO_CMD_ERR) {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3270 struct mmc_request *mrq = host->cmd->mrq;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3271 u16 auto_cmd_status = sdhci_readw(host, SDHCI_AUTO_CMD_STATUS);
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3272 int err = (auto_cmd_status & SDHCI_AUTO_CMD_TIMEOUT) ?
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3273 -ETIMEDOUT :
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3274 -EILSEQ;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3275
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3276 mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_AUTO_CMD);
38929d4f0d811df drivers/mmc/host/sdhci.c ChanWoo Lee 2021-08-25 3277 if (sdhci_auto_cmd23(host, mrq)) {
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3278 mrq->sbc->error = err;
19d2f695f4e8279 drivers/mmc/host/sdhci.c Adrian Hunter 2019-04-05 3279 __sdhci_finish_mrq(host, mrq);
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3280 return;
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3281 }
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3282 }
af849c86109d792 drivers/mmc/host/sdhci.c Adrian Hunter 2018-11-15 3283
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3284 if (intmask & SDHCI_INT_RESPONSE)
43b58b36b7e6554 drivers/mmc/host/sdhci.c Pierre Ossman 2007-07-25 3285 sdhci_finish_command(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3286 }
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3287
08621b18a15ee21 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3288 static void sdhci_adma_show_error(struct sdhci_host *host)
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3289 {
1c3d5f6ddcb915c drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3290 void *desc = host->adma_table;
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3291 dma_addr_t dma = host->adma_addr;
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3292
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3293 sdhci_dumpregs(host);
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3294
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3295 while (true) {
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3296 struct sdhci_adma2_64_desc *dma_desc = desc;
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3297
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3298 if (host->flags & SDHCI_USE_64_BIT_DMA)
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3299 SDHCI_DUMP("%08llx: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3300 (unsigned long long)dma,
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3301 le32_to_cpu(dma_desc->addr_hi),
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3302 le32_to_cpu(dma_desc->addr_lo),
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3303 le16_to_cpu(dma_desc->len),
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3304 le16_to_cpu(dma_desc->cmd));
e57a5f61eae7e14 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3305 else
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3306 SDHCI_DUMP("%08llx: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3307 (unsigned long long)dma,
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3308 le32_to_cpu(dma_desc->addr_lo),
0545230f1764bc6 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3309 le16_to_cpu(dma_desc->len),
0545230f1764bc6 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3310 le16_to_cpu(dma_desc->cmd));
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3311
76fe379acaeb857 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3312 desc += host->desc_sz;
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3313 dma += host->desc_sz;
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3314
0545230f1764bc6 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3315 if (dma_desc->cmd & cpu_to_le16(ADMA2_END))
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3316 break;
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3317 }
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3318 }
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3319
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3320 static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3321 {
069c9f142822d55 drivers/mmc/host/sdhci.c Girish K S 2012-01-06 3322 u32 command;
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3323
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen 2021-08-13 3324 /*
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen 2021-08-13 3325 * CMD19 generates _only_ Buffer Read Ready interrupt if
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen 2021-08-13 3326 * use sdhci_send_tuning.
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen 2021-08-13 3327 * Need to exclude this case: PIO mode and use mmc_send_tuning,
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen 2021-08-13 3328 * If not, sdhci_transfer_pio will never be called, make the
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen 2021-08-13 3329 * SDHCI_INT_DATA_AVAIL always there, stuck in irq storm.
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen 2021-08-13 3330 */
f4ff24f8a7c144e drivers/mmc/host/sdhci.c Haibo Chen 2021-08-13 3331 if (intmask & SDHCI_INT_DATA_AVAIL && !host->data) {
069c9f142822d55 drivers/mmc/host/sdhci.c Girish K S 2012-01-06 3332 command = SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND));
069c9f142822d55 drivers/mmc/host/sdhci.c Girish K S 2012-01-06 3333 if (command == MMC_SEND_TUNING_BLOCK ||
069c9f142822d55 drivers/mmc/host/sdhci.c Girish K S 2012-01-06 3334 command == MMC_SEND_TUNING_BLOCK_HS200) {
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath 2011-05-05 3335 host->tuning_done = 1;
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath 2011-05-05 3336 wake_up(&host->buf_ready_int);
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath 2011-05-05 3337 return;
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath 2011-05-05 3338 }
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath 2011-05-05 3339 }
b513ea250eb7c36 drivers/mmc/host/sdhci.c Arindam Nath 2011-05-05 3340
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3341 if (!host->data) {
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3342 struct mmc_command *data_cmd = host->data_cmd;
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3343
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3344 /*
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3345 * The "data complete" interrupt is also used to
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3346 * indicate that a busy state has ended. See comment
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3347 * above in sdhci_cmd_irq().
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3348 */
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3349 if (data_cmd && (data_cmd->flags & MMC_RSP_BUSY)) {
c5abd5e8998e20c drivers/mmc/host/sdhci.c Matthieu CASTET 2014-08-14 3350 if (intmask & SDHCI_INT_DATA_TIMEOUT) {
69b962a65a54769 drivers/mmc/host/sdhci.c Adrian Hunter 2016-11-02 3351 host->data_cmd = NULL;
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3352 data_cmd->error = -ETIMEDOUT;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3353 mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_CMD_TIMEOUT);
19d2f695f4e8279 drivers/mmc/host/sdhci.c Adrian Hunter 2019-04-05 3354 __sdhci_finish_mrq(host, data_cmd->mrq);
c5abd5e8998e20c drivers/mmc/host/sdhci.c Matthieu CASTET 2014-08-14 3355 return;
c5abd5e8998e20c drivers/mmc/host/sdhci.c Matthieu CASTET 2014-08-14 3356 }
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3357 if (intmask & SDHCI_INT_DATA_END) {
69b962a65a54769 drivers/mmc/host/sdhci.c Adrian Hunter 2016-11-02 3358 host->data_cmd = NULL;
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min 2014-08-30 3359 /*
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min 2014-08-30 3360 * Some cards handle busy-end interrupt
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min 2014-08-30 3361 * before the command completed, so make
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min 2014-08-30 3362 * sure we do things in the proper order.
e99783a45220a2c drivers/mmc/host/sdhci.c Chanho Min 2014-08-30 3363 */
ea96802384cd062 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3364 if (host->cmd == data_cmd)
ea96802384cd062 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3365 return;
ea96802384cd062 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3366
19d2f695f4e8279 drivers/mmc/host/sdhci.c Adrian Hunter 2019-04-05 3367 __sdhci_finish_mrq(host, data_cmd->mrq);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3368 return;
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3369 }
e809517f6fa5803 drivers/mmc/host/sdhci.c Pierre Ossman 2008-07-25 3370 }
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3371
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3372 /*
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3373 * SDHCI recovers from errors by resetting the cmd and data
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3374 * circuits. Until that is done, there very well might be more
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3375 * interrupts, so ignore them in that case.
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3376 */
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3377 if (host->pending_reset)
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3378 return;
ed1563de0bc90e0 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3379
2e4456f08fa81b9 drivers/mmc/host/sdhci.c Marek Vasut 2015-11-18 3380 pr_err("%s: Got data interrupt 0x%08x even though no data operation was in progress.\n",
b67ac3f339c76df drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3381 mmc_hostname(host->mmc), (unsigned)intmask);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3382 sdhci_dumpregs(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3383
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3384 return;
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3385 }
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3386
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3387 if (intmask & SDHCI_INT_DATA_TIMEOUT) {
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman 2007-07-22 3388 host->data->error = -ETIMEDOUT;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3389 mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_TIMEOUT);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3390 }
22113efd0049131 drivers/mmc/host/sdhci.c Aries Lee 2010-12-15 3391 else if (intmask & SDHCI_INT_DATA_END_BIT)
22113efd0049131 drivers/mmc/host/sdhci.c Aries Lee 2010-12-15 3392 host->data->error = -EILSEQ;
22113efd0049131 drivers/mmc/host/sdhci.c Aries Lee 2010-12-15 3393 else if ((intmask & SDHCI_INT_DATA_CRC) &&
22113efd0049131 drivers/mmc/host/sdhci.c Aries Lee 2010-12-15 3394 SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND))
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3395 != MMC_BUS_TEST_R) {
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman 2007-07-22 3396 host->data->error = -EILSEQ;
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 @3397 if (host->cmd->opcode != MMC_SEND_TUNING_BLOCK ||
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3398 host->cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200)
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3399 mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_DAT_CRC);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3400 }
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3401 else if (intmask & SDHCI_INT_ADMA_ERROR) {
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3402 pr_err("%s: ADMA error: 0x%08x\n", mmc_hostname(host->mmc),
d1c536e3177390d drivers/mmc/host/sdhci.c Russell King 2019-09-22 3403 intmask);
08621b18a15ee21 drivers/mmc/host/sdhci.c Adrian Hunter 2014-11-04 3404 sdhci_adma_show_error(host);
53118c31c49fbda drivers/mmc/host/sdhci.c Shaik Sajida Bhanu 2021-12-14 3405 mmc_debugfs_err_stats_inc(host->mmc, MMC_ERR_ADMA);
2134a922c6e75c7 drivers/mmc/host/sdhci.c Pierre Ossman 2008-06-28 3406 host->data->error = -EIO;
a4071fbbb9edbc5 drivers/mmc/host/sdhci.c Haijun Zhang 2012-12-04 3407 if (host->ops->adma_workaround)
a4071fbbb9edbc5 drivers/mmc/host/sdhci.c Haijun Zhang 2012-12-04 3408 host->ops->adma_workaround(host, intmask);
6882a8c071d609f drivers/mmc/host/sdhci.c Ben Dooks 2009-06-14 3409 }
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3410
17b0429dde9ab60 drivers/mmc/host/sdhci.c Pierre Ossman 2007-07-22 3411 if (host->data->error)
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3412 sdhci_finish_data(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3413 else {
a406f5a3b68ee1d drivers/mmc/sdhci.c Pierre Ossman 2006-07-02 3414 if (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL))
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3415 sdhci_transfer_pio(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3416
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman 2007-05-13 3417 /*
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman 2007-05-13 3418 * We currently don't do anything fancy with DMA
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman 2007-05-13 3419 * boundaries, but as we can't disable the feature
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman 2007-05-13 3420 * we need to at least restart the transfer.
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3421 *
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3422 * According to the spec sdhci_readl(host, SDHCI_DMA_ADDRESS)
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3423 * should return a valid address to continue from, but as
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3424 * some controllers are faulty, don't trust them.
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman 2007-05-13 3425 */
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3426 if (intmask & SDHCI_INT_DMA_END) {
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang 2018-08-30 3427 dma_addr_t dmastart, dmanow;
bd9b902798ab14d drivers/mmc/host/sdhci.c Linus Walleij 2018-01-29 3428
bd9b902798ab14d drivers/mmc/host/sdhci.c Linus Walleij 2018-01-29 3429 dmastart = sdhci_sdma_address(host);
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3430 dmanow = dmastart + host->data->bytes_xfered;
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3431 /*
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3432 * Force update to the next DMA block boundary.
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3433 */
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3434 dmanow = (dmanow &
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang 2018-08-30 3435 ~((dma_addr_t)SDHCI_DEFAULT_BOUNDARY_SIZE - 1)) +
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3436 SDHCI_DEFAULT_BOUNDARY_SIZE;
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3437 host->data->bytes_xfered = dmanow - dmastart;
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang 2018-08-30 3438 DBG("DMA base %pad, transferred 0x%06x bytes, next %pad\n",
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang 2018-08-30 3439 &dmastart, host->data->bytes_xfered, &dmanow);
917a0c52d6c3b47 drivers/mmc/host/sdhci.c Chunyan Zhang 2018-08-30 3440 sdhci_set_sdma_addr(host, dmanow);
f6a03cbf43e5862 drivers/mmc/host/sdhci.c Mikko Vinni 2011-04-12 3441 }
6ba736a10e4ae63 drivers/mmc/host/sdhci.c Pierre Ossman 2007-05-13 3442
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3443 if (intmask & SDHCI_INT_DATA_END) {
7c89a3d9082c316 drivers/mmc/host/sdhci.c Adrian Hunter 2016-06-29 3444 if (host->cmd == host->data_cmd) {
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3445 /*
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3446 * Data managed to finish before the
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3447 * command completed. Make sure we do
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3448 * things in the proper order.
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3449 */
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3450 host->data_early = 1;
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3451 } else {
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3452 sdhci_finish_data(host);
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3453 }
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3454 }
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3455 }
e538fbe83e374a3 drivers/mmc/host/sdhci.c Pierre Ossman 2007-08-12 3456 }
d129bceb1d44ed3 drivers/mmc/sdhci.c Pierre Ossman 2006-03-24 3457
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all(a)lists.01.org
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2022-01-10 15:01 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-14 14:41 [PATCH V2] mmc: debugfs: add error statistics Shaik Sajida Bhanu
2021-12-15 14:03 ` Adrian Hunter
2021-12-21 7:16 ` Sajida Bhanu (Temp) (QUIC)
2021-12-29 7:36 ` Sajida Bhanu (Temp) (QUIC)
2022-01-03 9:50 ` Adrian Hunter
2022-01-04 15:02 ` Sajida Bhanu (Temp) (QUIC)
2022-01-07 7:42 ` Adrian Hunter
2022-01-10 13:11 ` Sajida Bhanu (Temp) (QUIC)
2022-01-10 13:29 ` Adrian Hunter
2022-01-10 14:59 ` Sajida Bhanu (Temp) (QUIC)
2021-12-14 21:11 kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.