linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
@ 2019-10-23  5:29 Can Guo
  2019-10-23  7:25 ` Winkler, Tomas
  2019-10-23  9:33 ` kbuild test robot
  0 siblings, 2 replies; 9+ messages in thread
From: Can Guo @ 2019-10-23  5:29 UTC (permalink / raw)
  To: asutoshd, nguyenb, rnayak, linux-scsi, kernel-team, saravanak,
	salyzyn, cang
  Cc: Alim Akhtar, Avri Altman, Pedro Sousa, James E.J. Bottomley,
	Martin K. Petersen, Evan Green, Janek Kotas, Stanley Chu,
	Bean Huo, Tomas Winkler, Subhash Jadavani, Arnd Bergmann,
	Bjorn Andersson, open list

Add the necessary infrastructure to keep timestamp history
of commands, events and other useful info for debugging
complex issues. This helps in diagnosing events leading
upto failure.

Signed-off-by: Can Guo <cang@codeaurora.org>
---
 drivers/scsi/ufs/Kconfig  |  12 +++
 drivers/scsi/ufs/ufshcd.c | 214 +++++++++++++++++++++++++++++++++++++++-------
 drivers/scsi/ufs/ufshcd.h |  24 +++++-
 3 files changed, 218 insertions(+), 32 deletions(-)

diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 0b845ab..afc70cb 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig
@@ -50,6 +50,18 @@ config SCSI_UFSHCD
 	  However, do not compile this as a module if your root file system
 	  (the one containing the directory /) is located on a UFS device.
 
+config SCSI_UFSHCD_CMD_LOGGING
+	bool "Universal Flash Storage host controller driver layer command logging support"
+	depends on SCSI_UFSHCD
+	help
+	  This selects the UFS host controller driver layer command logging.
+	  UFS host controller driver layer command logging records all the
+	  command information sent from UFS host controller for debugging
+	  purpose.
+
+	  Select this if you want above mentioned debug information captured.
+	  If unsure, say N.
+
 config SCSI_UFSHCD_PCI
 	tristate "PCI bus based UFS Controller support"
 	depends on SCSI_UFSHCD && PCI
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index c28c144..f3faa85 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -91,6 +91,9 @@
 /* default delay of autosuspend: 2000 ms */
 #define RPM_AUTOSUSPEND_DELAY_MS 2000
 
+/* Maximum command logging entries */
+#define UFSHCD_MAX_CMD_LOGGING	20
+
 #define ufshcd_toggle_vreg(_dev, _vreg, _on)				\
 	({                                                              \
 		int _ret;                                               \
@@ -328,14 +331,135 @@ static void ufshcd_add_tm_upiu_trace(struct ufs_hba *hba, unsigned int tag,
 			&descp->input_param1);
 }
 
-static void ufshcd_add_command_trace(struct ufs_hba *hba,
-		unsigned int tag, const char *str)
+static inline void ufshcd_add_command_trace(struct ufs_hba *hba,
+			struct ufshcd_cmd_log_entry *entry)
+{
+	if (trace_ufshcd_command_enabled()) {
+		u32 intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
+
+		trace_ufshcd_command(dev_name(hba->dev), entry->str, entry->tag,
+				     entry->doorbell, entry->transfer_len, intr,
+				     entry->lba, entry->cmd_id);
+	}
+}
+
+#ifdef CONFIG_SCSI_UFSHCD_CMD_LOGGING
+static void ufshcd_cmd_log_init(struct ufs_hba *hba)
+{
+	/* Allocate log entries */
+	if (!hba->cmd_log.entries) {
+		hba->cmd_log.entries = kcalloc(UFSHCD_MAX_CMD_LOGGING,
+			sizeof(struct ufshcd_cmd_log_entry), GFP_KERNEL);
+		if (!hba->cmd_log.entries)
+			return;
+		dev_dbg(hba->dev, "%s: cmd_log.entries initialized\n",
+				__func__);
+	}
+}
+
+static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
+			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
+			     sector_t lba, int transfer_len)
+{
+	struct ufshcd_cmd_log_entry *entry;
+
+	if (!hba->cmd_log.entries)
+		return;
+
+	entry = &hba->cmd_log.entries[hba->cmd_log.pos];
+	entry->lun = lun;
+	entry->str = str;
+	entry->cmd_type = cmd_type;
+	entry->cmd_id = cmd_id;
+	entry->lba = lba;
+	entry->transfer_len = transfer_len;
+	entry->idn = idn;
+	entry->doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	entry->tag = tag;
+	entry->tstamp = ktime_get();
+	entry->outstanding_reqs = hba->outstanding_reqs;
+	entry->seq_num = hba->cmd_log.seq_num;
+	hba->cmd_log.seq_num++;
+	hba->cmd_log.pos = (hba->cmd_log.pos + 1) % UFSHCD_MAX_CMD_LOGGING;
+
+	ufshcd_add_command_trace(hba, entry);
+}
+
+static void ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
+	unsigned int tag, u8 cmd_id, u8 idn)
+{
+	__ufshcd_cmd_log(hba, str, cmd_type, tag, cmd_id, idn, 0, 0, 0);
+}
+
+static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8 cmd_id)
+{
+	ufshcd_cmd_log(hba, str, "dme", 0, cmd_id, 0);
+}
+
+static void ufshcd_print_cmd_log(struct ufs_hba *hba)
+{
+	int i;
+	int pos;
+	struct ufshcd_cmd_log_entry *p;
+
+	if (!hba->cmd_log.entries)
+		return;
+
+	pos = hba->cmd_log.pos;
+	for (i = 0; i < UFSHCD_MAX_CMD_LOGGING; i++) {
+		p = &hba->cmd_log.entries[pos];
+		pos = (pos + 1) % UFSHCD_MAX_CMD_LOGGING;
+
+		if (ktime_to_us(p->tstamp)) {
+			pr_err("%s: %s: seq_no=%u lun=0x%x cmd_id=0x%02x lba=0x%llx txfer_len=%d tag=%u, doorbell=0x%x outstanding=0x%x idn=%d time=%lld us\n",
+				p->cmd_type, p->str, p->seq_num,
+				p->lun, p->cmd_id, (unsigned long long)p->lba,
+				p->transfer_len, p->tag, p->doorbell,
+				p->outstanding_reqs, p->idn,
+				ktime_to_us(p->tstamp));
+				usleep_range(1000, 1100);
+		}
+	}
+}
+#else
+static void ufshcd_cmd_log_init(struct ufs_hba *hba)
+{
+}
+
+static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
+			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
+			     sector_t lba, int transfer_len)
+{
+	struct ufshcd_cmd_log_entry entry;
+
+	entry.str = str;
+	entry.lba = lba;
+	entry.cmd_id = cmd_id;
+	entry.transfer_len = transfer_len;
+	entry.doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	entry.tag = tag;
+
+	ufshcd_add_command_trace(hba, &entry);
+}
+
+static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8 cmd_id)
+{
+}
+
+static void ufshcd_print_cmd_log(struct ufs_hba *hba)
+{
+}
+#endif
+
+static inline void ufshcd_cond_add_cmd_trace(struct ufs_hba *hba,
+					unsigned int tag, const char *str)
 {
-	sector_t lba = -1;
-	u8 opcode = 0;
-	u32 intr, doorbell;
 	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
-	int transfer_len = -1;
+	char *cmd_type = NULL;
+	u8 opcode = 0;
+	u8 cmd_id = 0, idn = 0;
+	sector_t lba = 0;
+	int transfer_len = 0;
 
 	if (!trace_ufshcd_command_enabled()) {
 		/* trace UPIU W/O tracing command */
@@ -361,10 +485,23 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba,
 		}
 	}
 
-	intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
-	doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	trace_ufshcd_command(dev_name(hba->dev), str, tag,
-				doorbell, transfer_len, intr, lba, opcode);
+	if (lrbp->cmd && ((lrbp->command_type == UTP_CMD_TYPE_SCSI) ||
+			  (lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE))) {
+		cmd_type = "scsi";
+		cmd_id = (u8)(*lrbp->cmd->cmnd);
+	} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE) {
+		if (hba->dev_cmd.type == DEV_CMD_TYPE_NOP) {
+			cmd_type = "nop";
+			cmd_id = 0;
+		} else if (hba->dev_cmd.type == DEV_CMD_TYPE_QUERY) {
+			cmd_type = "query";
+			cmd_id = hba->dev_cmd.query.request.upiu_req.opcode;
+			idn = hba->dev_cmd.query.request.upiu_req.idn;
+		}
+	}
+
+	__ufshcd_cmd_log(hba, (char *) str, cmd_type, tag, cmd_id, idn,
+			 lrbp->lun, lba, transfer_len);
 }
 
 static void ufshcd_print_clk_freqs(struct ufs_hba *hba)
@@ -1886,7 +2023,8 @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 	ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
 	/* Make sure that doorbell is committed immediately */
 	wmb();
-	ufshcd_add_command_trace(hba, task_tag, "send");
+	ufshcd_cond_add_cmd_trace(hba, task_tag,
+			hba->lrb[task_tag].cmd ? "scsi_send" : "dev_cmd_send");
 }
 
 /**
@@ -2001,6 +2139,7 @@ static inline u8 ufshcd_get_upmcrs(struct ufs_hba *hba)
 
 	hba->active_uic_cmd = uic_cmd;
 
+	ufshcd_dme_cmd_log(hba, "dme_send", hba->active_uic_cmd->command);
 	/* Write Args */
 	ufshcd_writel(hba, uic_cmd->argument1, REG_UIC_COMMAND_ARG_1);
 	ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2);
@@ -2031,6 +2170,8 @@ static inline u8 ufshcd_get_upmcrs(struct ufs_hba *hba)
 	else
 		ret = -ETIMEDOUT;
 
+	ufshcd_dme_cmd_log(hba, "dme_cmpl_1", hba->active_uic_cmd->command);
+
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->active_uic_cmd = NULL;
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
@@ -3797,11 +3938,14 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd)
 			cmd->command, status);
 		ret = (status != PWR_OK) ? status : -1;
 	}
+	ufshcd_dme_cmd_log(hba, "dme_cmpl_2", hba->active_uic_cmd->command);
+
 out:
 	if (ret) {
 		ufshcd_print_host_state(hba);
 		ufshcd_print_pwr_info(hba);
 		ufshcd_print_host_regs(hba);
+		ufshcd_print_cmd_log(hba);
 	}
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
@@ -4712,6 +4856,7 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev)
 	int result = 0;
 	int scsi_status;
 	int ocs;
+	bool print_prdt;
 
 	/* overall command status of utrd */
 	ocs = ufshcd_get_tr_ocs(lrbp);
@@ -4787,8 +4932,11 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev)
 		break;
 	} /* end of switch */
 
-	if (host_byte(result) != DID_OK)
-		ufshcd_print_trs(hba, 1 << lrbp->task_tag, true);
+	if (host_byte(result) != DID_OK) {
+		print_prdt = (ocs == OCS_INVALID_PRDT_ATTR ||
+				ocs == OCS_MISMATCH_DATA_BUF_SIZE);
+		ufshcd_print_trs(hba, 1 << lrbp->task_tag, print_prdt);
+	}
 	return result;
 }
 
@@ -4828,7 +4976,7 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 		lrbp = &hba->lrb[index];
 		cmd = lrbp->cmd;
 		if (cmd) {
-			ufshcd_add_command_trace(hba, index, "complete");
+			ufshcd_cond_add_cmd_trace(hba, index, "scsi_cmpl");
 			result = ufshcd_transfer_rsp_status(hba, lrbp);
 			scsi_dma_unmap(cmd);
 			cmd->result = result;
@@ -4841,8 +4989,8 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 		} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
 			lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) {
 			if (hba->dev_cmd.complete) {
-				ufshcd_add_command_trace(hba, index,
-						"dev_complete");
+				ufshcd_cond_add_cmd_trace(hba, index,
+						"dev_cmd_cmpl");
 				complete(hba->dev_cmd.complete);
 			}
 		}
@@ -5307,6 +5455,23 @@ static void ufshcd_err_handler(struct work_struct *work)
 		if (!ret)
 			goto skip_err_handling;
 	}
+
+	/*
+	 * Dump controller state before resetting. Transfer requests state
+	 * will be dump as part of the request completion.
+	 */
+	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
+		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
+			__func__, hba->saved_err, hba->saved_uic_err);
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		ufshcd_print_host_regs(hba);
+		ufshcd_print_host_state(hba);
+		ufshcd_print_pwr_info(hba);
+		ufshcd_print_tmrs(hba, hba->outstanding_tasks);
+		ufshcd_print_cmd_log(hba);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+	}
+
 	if ((hba->saved_err & INT_FATAL_ERRORS) ||
 	    (hba->saved_err & UFSHCD_UIC_HIBERN8_MASK) ||
 	    ((hba->saved_err & UIC_ERROR) &&
@@ -5523,21 +5688,6 @@ static void ufshcd_check_errors(struct ufs_hba *hba)
 
 			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED;
 
-			/* dump controller state before resetting */
-			if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
-				bool pr_prdt = !!(hba->saved_err &
-						SYSTEM_BUS_FATAL_ERROR);
-
-				dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err 0x%x\n",
-					__func__, hba->saved_err,
-					hba->saved_uic_err);
-
-				ufshcd_print_host_regs(hba);
-				ufshcd_print_pwr_info(hba);
-				ufshcd_print_tmrs(hba, hba->outstanding_tasks);
-				ufshcd_print_trs(hba, hba->outstanding_reqs,
-							pr_prdt);
-			}
 			schedule_work(&hba->eh_work);
 		}
 	}
@@ -8425,6 +8575,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	 */
 	ufshcd_set_ufs_dev_active(hba);
 
+	ufshcd_cmd_log_init(hba);
+
 	async_schedule(ufshcd_async_scan, hba);
 	ufs_sysfs_add_nodes(hba->dev);
 
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index e0fe247..97cc5b0 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -414,7 +414,7 @@ struct ufs_init_prefetch {
 	u32 icc_level;
 };
 
-#define UFS_ERR_REG_HIST_LENGTH 8
+#define UIC_ERR_REG_HIST_LENGTH 20
 /**
  * struct ufs_err_reg_hist - keeps history of errors
  * @pos: index to indicate cyclic buffer position
@@ -471,6 +471,27 @@ struct ufs_stats {
 	struct ufs_err_reg_hist task_abort;
 };
 
+struct ufshcd_cmd_log_entry {
+	char *str;	/* context like "send", "complete" */
+	char *cmd_type;	/* "scsi", "query", "nop", "dme" */
+	u8 lun;
+	u8 cmd_id;
+	sector_t lba;
+	int transfer_len;
+	u8 idn;		/* used only for query idn */
+	u32 doorbell;
+	u32 outstanding_reqs;
+	u32 seq_num;
+	unsigned int tag;
+	ktime_t tstamp;
+};
+
+struct ufshcd_cmd_log {
+	struct ufshcd_cmd_log_entry *entries;
+	int pos;
+	u32 seq_num;
+};
+
 /**
  * struct ufs_hba - per adapter private structure
  * @mmio_base: UFSHCI base register address
@@ -692,6 +713,7 @@ struct ufs_hba {
 	struct ufs_pwr_mode_info max_pwr_info;
 
 	struct ufs_clk_gating clk_gating;
+	struct ufshcd_cmd_log cmd_log;
 	/* Control to enable/disable host capabilities */
 	u32 caps;
 	/* Allow dynamic clk gating */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
  2019-10-23  5:29 [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure Can Guo
@ 2019-10-23  7:25 ` Winkler, Tomas
  2019-10-23 10:33   ` Avri Altman
  2019-10-23  9:33 ` kbuild test robot
  1 sibling, 1 reply; 9+ messages in thread
From: Winkler, Tomas @ 2019-10-23  7:25 UTC (permalink / raw)
  To: Can Guo, asutoshd, nguyenb, rnayak, linux-scsi, kernel-team,
	saravanak, salyzyn
  Cc: Alim Akhtar, Avri Altman, Pedro Sousa, James E.J. Bottomley,
	Martin K. Petersen, Evan Green, Janek Kotas, Stanley Chu,
	Bean Huo, Subhash Jadavani, Arnd Bergmann, Bjorn Andersson,
	open list

> Add the necessary infrastructure to keep timestamp history of commands,
> events and other useful info for debugging complex issues. This helps in
> diagnosing events leading upto failure.

Why not use tracepoints, for that?
Thanks
Tomas

> Signed-off-by: Can Guo <cang@codeaurora.org>
> ---
>  drivers/scsi/ufs/Kconfig  |  12 +++
>  drivers/scsi/ufs/ufshcd.c | 214
> +++++++++++++++++++++++++++++++++++++++-------
>  drivers/scsi/ufs/ufshcd.h |  24 +++++-
>  3 files changed, 218 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index
> 0b845ab..afc70cb 100644
> --- a/drivers/scsi/ufs/Kconfig
> +++ b/drivers/scsi/ufs/Kconfig
> @@ -50,6 +50,18 @@ config SCSI_UFSHCD
>  	  However, do not compile this as a module if your root file system
>  	  (the one containing the directory /) is located on a UFS device.
> 
> +config SCSI_UFSHCD_CMD_LOGGING
> +	bool "Universal Flash Storage host controller driver layer command
> logging support"
> +	depends on SCSI_UFSHCD
> +	help
> +	  This selects the UFS host controller driver layer command logging.
> +	  UFS host controller driver layer command logging records all the
> +	  command information sent from UFS host controller for debugging
> +	  purpose.
> +
> +	  Select this if you want above mentioned debug information captured.
> +	  If unsure, say N.
> +
>  config SCSI_UFSHCD_PCI
>  	tristate "PCI bus based UFS Controller support"
>  	depends on SCSI_UFSHCD && PCI
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index
> c28c144..f3faa85 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -91,6 +91,9 @@
>  /* default delay of autosuspend: 2000 ms */  #define
> RPM_AUTOSUSPEND_DELAY_MS 2000
> 
> +/* Maximum command logging entries */
> +#define UFSHCD_MAX_CMD_LOGGING	20
> +
>  #define ufshcd_toggle_vreg(_dev, _vreg, _on)				\
>  	({                                                              \
>  		int _ret;                                               \
> @@ -328,14 +331,135 @@ static void ufshcd_add_tm_upiu_trace(struct
> ufs_hba *hba, unsigned int tag,
>  			&descp->input_param1);
>  }
> 
> -static void ufshcd_add_command_trace(struct ufs_hba *hba,
> -		unsigned int tag, const char *str)
> +static inline void ufshcd_add_command_trace(struct ufs_hba *hba,
> +			struct ufshcd_cmd_log_entry *entry)
> +{
> +	if (trace_ufshcd_command_enabled()) {
> +		u32 intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
> +
> +		trace_ufshcd_command(dev_name(hba->dev), entry->str,
> entry->tag,
> +				     entry->doorbell, entry->transfer_len, intr,
> +				     entry->lba, entry->cmd_id);
> +	}
> +}
> +
> +#ifdef CONFIG_SCSI_UFSHCD_CMD_LOGGING
> +static void ufshcd_cmd_log_init(struct ufs_hba *hba) {
> +	/* Allocate log entries */
> +	if (!hba->cmd_log.entries) {
> +		hba->cmd_log.entries =
> kcalloc(UFSHCD_MAX_CMD_LOGGING,
> +			sizeof(struct ufshcd_cmd_log_entry), GFP_KERNEL);
> +		if (!hba->cmd_log.entries)
> +			return;
> +		dev_dbg(hba->dev, "%s: cmd_log.entries initialized\n",
> +				__func__);
> +	}
> +}
> +
> +static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
> +			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
> +			     sector_t lba, int transfer_len) {
> +	struct ufshcd_cmd_log_entry *entry;
> +
> +	if (!hba->cmd_log.entries)
> +		return;
> +
> +	entry = &hba->cmd_log.entries[hba->cmd_log.pos];
> +	entry->lun = lun;
> +	entry->str = str;
> +	entry->cmd_type = cmd_type;
> +	entry->cmd_id = cmd_id;
> +	entry->lba = lba;
> +	entry->transfer_len = transfer_len;
> +	entry->idn = idn;
> +	entry->doorbell = ufshcd_readl(hba,
> REG_UTP_TRANSFER_REQ_DOOR_BELL);
> +	entry->tag = tag;
> +	entry->tstamp = ktime_get();
> +	entry->outstanding_reqs = hba->outstanding_reqs;
> +	entry->seq_num = hba->cmd_log.seq_num;
> +	hba->cmd_log.seq_num++;
> +	hba->cmd_log.pos = (hba->cmd_log.pos + 1) %
> UFSHCD_MAX_CMD_LOGGING;
> +
> +	ufshcd_add_command_trace(hba, entry);
> +}
> +
> +static void ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
> +	unsigned int tag, u8 cmd_id, u8 idn)
> +{
> +	__ufshcd_cmd_log(hba, str, cmd_type, tag, cmd_id, idn, 0, 0, 0); }
> +
> +static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8
> +cmd_id) {
> +	ufshcd_cmd_log(hba, str, "dme", 0, cmd_id, 0); }
> +
> +static void ufshcd_print_cmd_log(struct ufs_hba *hba) {
> +	int i;
> +	int pos;
> +	struct ufshcd_cmd_log_entry *p;
> +
> +	if (!hba->cmd_log.entries)
> +		return;
> +
> +	pos = hba->cmd_log.pos;
> +	for (i = 0; i < UFSHCD_MAX_CMD_LOGGING; i++) {
> +		p = &hba->cmd_log.entries[pos];
> +		pos = (pos + 1) % UFSHCD_MAX_CMD_LOGGING;
> +
> +		if (ktime_to_us(p->tstamp)) {
> +			pr_err("%s: %s: seq_no=%u lun=0x%x cmd_id=0x%02x
> lba=0x%llx txfer_len=%d tag=%u, doorbell=0x%x outstanding=0x%x idn=%d
> time=%lld us\n",
> +				p->cmd_type, p->str, p->seq_num,
> +				p->lun, p->cmd_id, (unsigned long long)p->lba,
> +				p->transfer_len, p->tag, p->doorbell,
> +				p->outstanding_reqs, p->idn,
> +				ktime_to_us(p->tstamp));
> +				usleep_range(1000, 1100);
> +		}
> +	}
> +}
> +#else
> +static void ufshcd_cmd_log_init(struct ufs_hba *hba) { }
> +
> +static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
> +			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
> +			     sector_t lba, int transfer_len) {
> +	struct ufshcd_cmd_log_entry entry;
> +
> +	entry.str = str;
> +	entry.lba = lba;
> +	entry.cmd_id = cmd_id;
> +	entry.transfer_len = transfer_len;
> +	entry.doorbell = ufshcd_readl(hba,
> REG_UTP_TRANSFER_REQ_DOOR_BELL);
> +	entry.tag = tag;
> +
> +	ufshcd_add_command_trace(hba, &entry); }
> +
> +static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8
> +cmd_id) { }
> +
> +static void ufshcd_print_cmd_log(struct ufs_hba *hba) { } #endif
> +
> +static inline void ufshcd_cond_add_cmd_trace(struct ufs_hba *hba,
> +					unsigned int tag, const char *str)
>  {
> -	sector_t lba = -1;
> -	u8 opcode = 0;
> -	u32 intr, doorbell;
>  	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
> -	int transfer_len = -1;
> +	char *cmd_type = NULL;
> +	u8 opcode = 0;
> +	u8 cmd_id = 0, idn = 0;
> +	sector_t lba = 0;
> +	int transfer_len = 0;
> 
>  	if (!trace_ufshcd_command_enabled()) {
>  		/* trace UPIU W/O tracing command */
> @@ -361,10 +485,23 @@ static void ufshcd_add_command_trace(struct
> ufs_hba *hba,
>  		}
>  	}
> 
> -	intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
> -	doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> -	trace_ufshcd_command(dev_name(hba->dev), str, tag,
> -				doorbell, transfer_len, intr, lba, opcode);
> +	if (lrbp->cmd && ((lrbp->command_type == UTP_CMD_TYPE_SCSI) ||
> +			  (lrbp->command_type ==
> UTP_CMD_TYPE_UFS_STORAGE))) {
> +		cmd_type = "scsi";
> +		cmd_id = (u8)(*lrbp->cmd->cmnd);
> +	} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE) {
> +		if (hba->dev_cmd.type == DEV_CMD_TYPE_NOP) {
> +			cmd_type = "nop";
> +			cmd_id = 0;
> +		} else if (hba->dev_cmd.type == DEV_CMD_TYPE_QUERY) {
> +			cmd_type = "query";
> +			cmd_id = hba-
> >dev_cmd.query.request.upiu_req.opcode;
> +			idn = hba->dev_cmd.query.request.upiu_req.idn;
> +		}
> +	}
> +
> +	__ufshcd_cmd_log(hba, (char *) str, cmd_type, tag, cmd_id, idn,
> +			 lrbp->lun, lba, transfer_len);
>  }
> 
>  static void ufshcd_print_clk_freqs(struct ufs_hba *hba) @@ -1886,7 +2023,8
> @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
>  	ufshcd_writel(hba, 1 << task_tag,
> REG_UTP_TRANSFER_REQ_DOOR_BELL);
>  	/* Make sure that doorbell is committed immediately */
>  	wmb();
> -	ufshcd_add_command_trace(hba, task_tag, "send");
> +	ufshcd_cond_add_cmd_trace(hba, task_tag,
> +			hba->lrb[task_tag].cmd ? "scsi_send" :
> "dev_cmd_send");
>  }
> 
>  /**
> @@ -2001,6 +2139,7 @@ static inline u8 ufshcd_get_upmcrs(struct ufs_hba
> *hba)
> 
>  	hba->active_uic_cmd = uic_cmd;
> 
> +	ufshcd_dme_cmd_log(hba, "dme_send", hba->active_uic_cmd-
> >command);
>  	/* Write Args */
>  	ufshcd_writel(hba, uic_cmd->argument1,
> REG_UIC_COMMAND_ARG_1);
>  	ufshcd_writel(hba, uic_cmd->argument2,
> REG_UIC_COMMAND_ARG_2); @@ -2031,6 +2170,8 @@ static inline u8
> ufshcd_get_upmcrs(struct ufs_hba *hba)
>  	else
>  		ret = -ETIMEDOUT;
> 
> +	ufshcd_dme_cmd_log(hba, "dme_cmpl_1", hba->active_uic_cmd-
> >command);
> +
>  	spin_lock_irqsave(hba->host->host_lock, flags);
>  	hba->active_uic_cmd = NULL;
>  	spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -3797,11
> +3938,14 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct
> uic_command *cmd)
>  			cmd->command, status);
>  		ret = (status != PWR_OK) ? status : -1;
>  	}
> +	ufshcd_dme_cmd_log(hba, "dme_cmpl_2", hba->active_uic_cmd-
> >command);
> +
>  out:
>  	if (ret) {
>  		ufshcd_print_host_state(hba);
>  		ufshcd_print_pwr_info(hba);
>  		ufshcd_print_host_regs(hba);
> +		ufshcd_print_cmd_log(hba);
>  	}
> 
>  	spin_lock_irqsave(hba->host->host_lock, flags); @@ -4712,6 +4856,7
> @@ static void ufshcd_slave_destroy(struct scsi_device *sdev)
>  	int result = 0;
>  	int scsi_status;
>  	int ocs;
> +	bool print_prdt;
> 
>  	/* overall command status of utrd */
>  	ocs = ufshcd_get_tr_ocs(lrbp);
> @@ -4787,8 +4932,11 @@ static void ufshcd_slave_destroy(struct scsi_device
> *sdev)
>  		break;
>  	} /* end of switch */
> 
> -	if (host_byte(result) != DID_OK)
> -		ufshcd_print_trs(hba, 1 << lrbp->task_tag, true);
> +	if (host_byte(result) != DID_OK) {
> +		print_prdt = (ocs == OCS_INVALID_PRDT_ATTR ||
> +				ocs == OCS_MISMATCH_DATA_BUF_SIZE);
> +		ufshcd_print_trs(hba, 1 << lrbp->task_tag, print_prdt);
> +	}
>  	return result;
>  }
> 
> @@ -4828,7 +4976,7 @@ static void __ufshcd_transfer_req_compl(struct
> ufs_hba *hba,
>  		lrbp = &hba->lrb[index];
>  		cmd = lrbp->cmd;
>  		if (cmd) {
> -			ufshcd_add_command_trace(hba, index, "complete");
> +			ufshcd_cond_add_cmd_trace(hba, index, "scsi_cmpl");
>  			result = ufshcd_transfer_rsp_status(hba, lrbp);
>  			scsi_dma_unmap(cmd);
>  			cmd->result = result;
> @@ -4841,8 +4989,8 @@ static void __ufshcd_transfer_req_compl(struct
> ufs_hba *hba,
>  		} else if (lrbp->command_type ==
> UTP_CMD_TYPE_DEV_MANAGE ||
>  			lrbp->command_type ==
> UTP_CMD_TYPE_UFS_STORAGE) {
>  			if (hba->dev_cmd.complete) {
> -				ufshcd_add_command_trace(hba, index,
> -						"dev_complete");
> +				ufshcd_cond_add_cmd_trace(hba, index,
> +						"dev_cmd_cmpl");
>  				complete(hba->dev_cmd.complete);
>  			}
>  		}
> @@ -5307,6 +5455,23 @@ static void ufshcd_err_handler(struct work_struct
> *work)
>  		if (!ret)
>  			goto skip_err_handling;
>  	}
> +
> +	/*
> +	 * Dump controller state before resetting. Transfer requests state
> +	 * will be dump as part of the request completion.
> +	 */
> +	if (hba->saved_err & (INT_FATAL_ERRORS | UIC_ERROR)) {
> +		dev_err(hba->dev, "%s: saved_err 0x%x saved_uic_err
> 0x%x\n",
> +			__func__, hba->saved_err, hba->saved_uic_err);
> +		spin_unlock_irqrestore(hba->host->host_lock, flags);
> +		ufshcd_print_host_regs(hba);
> +		ufshcd_print_host_state(hba);
> +		ufshcd_print_pwr_info(hba);
> +		ufshcd_print_tmrs(hba, hba->outstanding_tasks);
> +		ufshcd_print_cmd_log(hba);
> +		spin_lock_irqsave(hba->host->host_lock, flags);
> +	}
> +
>  	if ((hba->saved_err & INT_FATAL_ERRORS) ||
>  	    (hba->saved_err & UFSHCD_UIC_HIBERN8_MASK) ||
>  	    ((hba->saved_err & UIC_ERROR) &&
> @@ -5523,21 +5688,6 @@ static void ufshcd_check_errors(struct ufs_hba
> *hba)
> 
>  			hba->ufshcd_state = UFSHCD_STATE_EH_SCHEDULED;
> 
> -			/* dump controller state before resetting */
> -			if (hba->saved_err & (INT_FATAL_ERRORS |
> UIC_ERROR)) {
> -				bool pr_prdt = !!(hba->saved_err &
> -						SYSTEM_BUS_FATAL_ERROR);
> -
> -				dev_err(hba->dev, "%s: saved_err 0x%x
> saved_uic_err 0x%x\n",
> -					__func__, hba->saved_err,
> -					hba->saved_uic_err);
> -
> -				ufshcd_print_host_regs(hba);
> -				ufshcd_print_pwr_info(hba);
> -				ufshcd_print_tmrs(hba, hba-
> >outstanding_tasks);
> -				ufshcd_print_trs(hba, hba->outstanding_reqs,
> -							pr_prdt);
> -			}
>  			schedule_work(&hba->eh_work);
>  		}
>  	}
> @@ -8425,6 +8575,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem
> *mmio_base, unsigned int irq)
>  	 */
>  	ufshcd_set_ufs_dev_active(hba);
> 
> +	ufshcd_cmd_log_init(hba);
> +
>  	async_schedule(ufshcd_async_scan, hba);
>  	ufs_sysfs_add_nodes(hba->dev);
> 
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index
> e0fe247..97cc5b0 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -414,7 +414,7 @@ struct ufs_init_prefetch {
>  	u32 icc_level;
>  };
> 
> -#define UFS_ERR_REG_HIST_LENGTH 8
> +#define UIC_ERR_REG_HIST_LENGTH 20
>  /**
>   * struct ufs_err_reg_hist - keeps history of errors
>   * @pos: index to indicate cyclic buffer position @@ -471,6 +471,27 @@ struct
> ufs_stats {
>  	struct ufs_err_reg_hist task_abort;
>  };
> 
> +struct ufshcd_cmd_log_entry {
> +	char *str;	/* context like "send", "complete" */
> +	char *cmd_type;	/* "scsi", "query", "nop", "dme" */
> +	u8 lun;
> +	u8 cmd_id;
> +	sector_t lba;
> +	int transfer_len;
> +	u8 idn;		/* used only for query idn */
> +	u32 doorbell;
> +	u32 outstanding_reqs;
> +	u32 seq_num;
> +	unsigned int tag;
> +	ktime_t tstamp;
> +};
> +
> +struct ufshcd_cmd_log {
> +	struct ufshcd_cmd_log_entry *entries;
> +	int pos;
> +	u32 seq_num;
> +};
> +
>  /**
>   * struct ufs_hba - per adapter private structure
>   * @mmio_base: UFSHCI base register address @@ -692,6 +713,7 @@ struct
> ufs_hba {
>  	struct ufs_pwr_mode_info max_pwr_info;
> 
>  	struct ufs_clk_gating clk_gating;
> +	struct ufshcd_cmd_log cmd_log;
>  	/* Control to enable/disable host capabilities */
>  	u32 caps;
>  	/* Allow dynamic clk gating */
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
  2019-10-23  5:29 [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure Can Guo
  2019-10-23  7:25 ` Winkler, Tomas
@ 2019-10-23  9:33 ` kbuild test robot
  1 sibling, 0 replies; 9+ messages in thread
From: kbuild test robot @ 2019-10-23  9:33 UTC (permalink / raw)
  To: Can Guo
  Cc: kbuild-all, asutoshd, nguyenb, rnayak, linux-scsi, kernel-team,
	saravanak, salyzyn, cang, Alim Akhtar, Avri Altman, Pedro Sousa,
	James E.J. Bottomley, Martin K. Petersen, Evan Green,
	Janek Kotas, Stanley Chu, Bean Huo, Tomas Winkler,
	Subhash Jadavani, Arnd Bergmann, Bjorn Andersson, open list

[-- Attachment #1: Type: text/plain, Size: 2634 bytes --]

Hi Can,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on scsi/for-next]
[cannot apply to v5.4-rc4 next-20191023]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Can-Guo/scsi-ufs-Add-command-logging-infrastructure/20191023-151035
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: x86_64-rhel (attached as .config)
compiler: gcc-7 (Debian 7.4.0-14) 7.4.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from drivers/scsi/ufs/ufshcd.c:45:0:
>> drivers/scsi/ufs/ufshcd.h:426:10: error: 'UFS_ERR_REG_HIST_LENGTH' undeclared here (not in a function); did you mean 'UIC_ERR_REG_HIST_LENGTH'?
     u32 reg[UFS_ERR_REG_HIST_LENGTH];
             ^~~~~~~~~~~~~~~~~~~~~~~
             UIC_ERR_REG_HIST_LENGTH

vim +426 drivers/scsi/ufs/ufshcd.h

3a4bf06d1f6d7d Yaniv Gardi 2014-09-25  416  
7425e502f13960 Can Guo     2019-10-22  417  #define UIC_ERR_REG_HIST_LENGTH 20
ff8e20c6624921 Dolev Raviv 2016-12-22  418  /**
d3c615bf586f27 Stanley Chu 2019-07-10  419   * struct ufs_err_reg_hist - keeps history of errors
ff8e20c6624921 Dolev Raviv 2016-12-22  420   * @pos: index to indicate cyclic buffer position
ff8e20c6624921 Dolev Raviv 2016-12-22  421   * @reg: cyclic buffer for registers value
ff8e20c6624921 Dolev Raviv 2016-12-22  422   * @tstamp: cyclic buffer for time stamp
ff8e20c6624921 Dolev Raviv 2016-12-22  423   */
48d5b9732580bc Stanley Chu 2019-07-10  424  struct ufs_err_reg_hist {
ff8e20c6624921 Dolev Raviv 2016-12-22  425  	int pos;
48d5b9732580bc Stanley Chu 2019-07-10 @426  	u32 reg[UFS_ERR_REG_HIST_LENGTH];
48d5b9732580bc Stanley Chu 2019-07-10  427  	ktime_t tstamp[UFS_ERR_REG_HIST_LENGTH];
ff8e20c6624921 Dolev Raviv 2016-12-22  428  };
ff8e20c6624921 Dolev Raviv 2016-12-22  429  

:::::: The code at line 426 was first introduced by commit
:::::: 48d5b9732580bca18fed9ef20da8aa210c646755 scsi: ufs: Change names related to error history

:::::: TO: Stanley Chu <stanley.chu@mediatek.com>
:::::: CC: Martin K. Petersen <martin.petersen@oracle.com>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 43803 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
  2019-10-23  7:25 ` Winkler, Tomas
@ 2019-10-23 10:33   ` Avri Altman
  2019-10-29  2:37     ` cang
  0 siblings, 1 reply; 9+ messages in thread
From: Avri Altman @ 2019-10-23 10:33 UTC (permalink / raw)
  To: Winkler, Tomas, Can Guo, asutoshd, nguyenb, rnayak, linux-scsi,
	kernel-team, saravanak, salyzyn
  Cc: Alim Akhtar, Pedro Sousa, James E.J. Bottomley,
	Martin K. Petersen, Evan Green, Janek Kotas, Stanley Chu,
	Bean Huo, Subhash Jadavani, Arnd Bergmann, Bjorn Andersson,
	open list

 
> 
> > Add the necessary infrastructure to keep timestamp history of
> > commands, events and other useful info for debugging complex issues.
> > This helps in diagnosing events leading upto failure.
> 
> Why not use tracepoints, for that?
Ack on Tomas's comment.
Are there any pieces of information that you need not provided by the upiu tracer?

Thanks,
Avri

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
  2019-10-23 10:33   ` Avri Altman
@ 2019-10-29  2:37     ` cang
  2019-10-29  5:46       ` Bjorn Andersson
  0 siblings, 1 reply; 9+ messages in thread
From: cang @ 2019-10-29  2:37 UTC (permalink / raw)
  To: Avri Altman
  Cc: Winkler, Tomas, asutoshd, nguyenb, rnayak, linux-scsi,
	kernel-team, saravanak, salyzyn, Alim Akhtar, Pedro Sousa,
	James E.J. Bottomley, Martin K. Petersen, Evan Green,
	Janek Kotas, Stanley Chu, Bean Huo, Subhash Jadavani,
	Arnd Bergmann, Bjorn Andersson, open list

On 2019-10-23 18:33, Avri Altman wrote:
>> 
>> > Add the necessary infrastructure to keep timestamp history of
>> > commands, events and other useful info for debugging complex issues.
>> > This helps in diagnosing events leading upto failure.
>> 
>> Why not use tracepoints, for that?
> Ack on Tomas's comment.
> Are there any pieces of information that you need not provided by the
> upiu tracer?
> 
> Thanks,
> Avri

In extreme cases, when the UFS runs into bad state, system may crash. 
There may not be a chance to collect trace. If trace is not collected 
and failure is hard to be reproduced, some command logs prints would be 
very helpful to help understand what was going on before we run into 
failure.

Thanks,
Can Guo


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
  2019-10-29  2:37     ` cang
@ 2019-10-29  5:46       ` Bjorn Andersson
  2020-01-16 23:03         ` Asutosh Das (asd)
  0 siblings, 1 reply; 9+ messages in thread
From: Bjorn Andersson @ 2019-10-29  5:46 UTC (permalink / raw)
  To: cang
  Cc: Avri Altman, Winkler, Tomas, asutoshd, nguyenb, rnayak,
	linux-scsi, kernel-team, saravanak, salyzyn, Alim Akhtar,
	Pedro Sousa, James E.J. Bottomley, Martin K. Petersen,
	Evan Green, Janek Kotas, Stanley Chu, Bean Huo, Subhash Jadavani,
	Arnd Bergmann, open list

On Mon 28 Oct 19:37 PDT 2019, cang@codeaurora.org wrote:

> On 2019-10-23 18:33, Avri Altman wrote:
> > > 
> > > > Add the necessary infrastructure to keep timestamp history of
> > > > commands, events and other useful info for debugging complex issues.
> > > > This helps in diagnosing events leading upto failure.
> > > 
> > > Why not use tracepoints, for that?
> > Ack on Tomas's comment.
> > Are there any pieces of information that you need not provided by the
> > upiu tracer?
> > 
> > Thanks,
> > Avri
> 
> In extreme cases, when the UFS runs into bad state, system may crash. There
> may not be a chance to collect trace. If trace is not collected and failure
> is hard to be reproduced, some command logs prints would be very helpful to
> help understand what was going on before we run into failure.
> 

This is a common problem shared among many/all subsystems, so it's
better to rely on a generic solution for this; such as using tracepoints
dumped into pstore/ramoops.

Regards,
Bjorn

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
  2019-10-29  5:46       ` Bjorn Andersson
@ 2020-01-16 23:03         ` Asutosh Das (asd)
  2020-01-17  3:43           ` Bart Van Assche
  0 siblings, 1 reply; 9+ messages in thread
From: Asutosh Das (asd) @ 2020-01-16 23:03 UTC (permalink / raw)
  To: Bjorn Andersson, cang
  Cc: Avri Altman, Winkler, Tomas, nguyenb, rnayak, linux-scsi,
	kernel-team, saravanak, salyzyn, Alim Akhtar, Pedro Sousa,
	James E.J. Bottomley, Martin K. Petersen, Evan Green,
	Janek Kotas, Stanley Chu, Bean Huo, Subhash Jadavani,
	Arnd Bergmann, open list

On 10/28/2019 10:46 PM, Bjorn Andersson wrote:
> On Mon 28 Oct 19:37 PDT 2019, cang@codeaurora.org wrote:
> 
>> On 2019-10-23 18:33, Avri Altman wrote:
>>>>
>>>>> Add the necessary infrastructure to keep timestamp history of
>>>>> commands, events and other useful info for debugging complex issues.
>>>>> This helps in diagnosing events leading upto failure.
>>>>
>>>> Why not use tracepoints, for that?
>>> Ack on Tomas's comment.
>>> Are there any pieces of information that you need not provided by the
>>> upiu tracer?
>>>
>>> Thanks,
>>> Avri
>>
>> In extreme cases, when the UFS runs into bad state, system may crash. There
>> may not be a chance to collect trace. If trace is not collected and failure
>> is hard to be reproduced, some command logs prints would be very helpful to
>> help understand what was going on before we run into failure.
>>
> 
> This is a common problem shared among many/all subsystems, so it's
> better to rely on a generic solution for this; such as using tracepoints
> dumped into pstore/ramoops.
> 
> Regards,
> Bjorn
> 

Reviving this discussion.

Another issue with using ftrace is that several subsystems use it.
Consider a situation in which we store a history of 64 commands, 
responses and some other required info in ftrace.

Say there's a command that's stuck for seconds until the software times 
out. In this case, the other ftrace events are still enabled and keep 
writing to ftrace buffer thus overwriting some/most of the ufs's command 
history; thus the history is more or less lost. And there's a need to 
reproduce the issue with other tracing disabled.

So what we need is a client specific logging mechanism, which is 
lightweight as ftrace and can be parsed from ramdumps.

I'm open to ideas but ftrace in its current form may not be suitable for 
this.

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
  2020-01-16 23:03         ` Asutosh Das (asd)
@ 2020-01-17  3:43           ` Bart Van Assche
  2020-01-20 18:46             ` Asutosh Das (asd)
  0 siblings, 1 reply; 9+ messages in thread
From: Bart Van Assche @ 2020-01-17  3:43 UTC (permalink / raw)
  To: Asutosh Das (asd), Bjorn Andersson, cang
  Cc: Avri Altman, Winkler, Tomas, nguyenb, rnayak, linux-scsi,
	kernel-team, saravanak, salyzyn, Alim Akhtar, Pedro Sousa,
	James E.J. Bottomley, Martin K. Petersen, Evan Green,
	Janek Kotas, Stanley Chu, Bean Huo, Subhash Jadavani,
	Arnd Bergmann, open list

On 2020-01-16 15:03, Asutosh Das (asd) wrote:
> On 10/28/2019 10:46 PM, Bjorn Andersson wrote:
>> On Mon 28 Oct 19:37 PDT 2019, cang@codeaurora.org wrote:
>>
>>> On 2019-10-23 18:33, Avri Altman wrote:
>>>>>
>>>>>> Add the necessary infrastructure to keep timestamp history of
>>>>>> commands, events and other useful info for debugging complex issues.
>>>>>> This helps in diagnosing events leading upto failure.
>>>>>
>>>>> Why not use tracepoints, for that?
>>>> Ack on Tomas's comment.
>>>> Are there any pieces of information that you need not provided by the
>>>> upiu tracer?
>>>>
>>>> Thanks,
>>>> Avri
>>>
>>> In extreme cases, when the UFS runs into bad state, system may crash.
>>> There
>>> may not be a chance to collect trace. If trace is not collected and
>>> failure
>>> is hard to be reproduced, some command logs prints would be very
>>> helpful to
>>> help understand what was going on before we run into failure.
>>>
>>
>> This is a common problem shared among many/all subsystems, so it's
>> better to rely on a generic solution for this; such as using tracepoints
>> dumped into pstore/ramoops.
>>
>> Regards,
>> Bjorn
>>
> 
> Reviving this discussion.
> 
> Another issue with using ftrace is that several subsystems use it.
> Consider a situation in which we store a history of 64 commands,
> responses and some other required info in ftrace.
> 
> Say there's a command that's stuck for seconds until the software times
> out. In this case, the other ftrace events are still enabled and keep
> writing to ftrace buffer thus overwriting some/most of the ufs's command
> history; thus the history is more or less lost. And there's a need to
> reproduce the issue with other tracing disabled.
> 
> So what we need is a client specific logging mechanism, which is
> lightweight as ftrace and can be parsed from ramdumps.
> 
> I'm open to ideas but ftrace in its current form may not be suitable for
> this.

Hi Asutosh,

Are you aware that it is already possible today to figure out which SCSI
commands are pending? Are you familiar with the files under
/sys/kernel/debug/block/? An example:

$ (cd /sys/kernel/debug/block && grep -aH . */*/busy)
nvme0n1/hctx7/busy:0000000006f07009 {.op=READ, .cmd_flags=META|PRIO,
.rq_flags=DONTPREP|IO_STAT, .state=in_flight, .tag=275, .internal_tag=-1}

Bart.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure
  2020-01-17  3:43           ` Bart Van Assche
@ 2020-01-20 18:46             ` Asutosh Das (asd)
  0 siblings, 0 replies; 9+ messages in thread
From: Asutosh Das (asd) @ 2020-01-20 18:46 UTC (permalink / raw)
  To: Bart Van Assche, Bjorn Andersson, cang
  Cc: Avri Altman, Winkler, Tomas, nguyenb, rnayak, linux-scsi,
	kernel-team, saravanak, salyzyn, Alim Akhtar, Pedro Sousa,
	James E.J. Bottomley, Martin K. Petersen, Evan Green,
	Janek Kotas, Stanley Chu, Bean Huo, Subhash Jadavani,
	Arnd Bergmann, open list

On 1/16/2020 7:43 PM, Bart Van Assche wrote:
> On 2020-01-16 15:03, Asutosh Das (asd) wrote:
>> On 10/28/2019 10:46 PM, Bjorn Andersson wrote:
>>> On Mon 28 Oct 19:37 PDT 2019, cang@codeaurora.org wrote:
>>>
>>>> On 2019-10-23 18:33, Avri Altman wrote:
>>>>>>
>>>>>>> Add the necessary infrastructure to keep timestamp history of
>>>>>>> commands, events and other useful info for debugging complex issues.
>>>>>>> This helps in diagnosing events leading upto failure.
>>>>>>
>>>>>> Why not use tracepoints, for that?
>>>>> Ack on Tomas's comment.
>>>>> Are there any pieces of information that you need not provided by the
>>>>> upiu tracer?
>>>>>
>>>>> Thanks,
>>>>> Avri
>>>>
>>>> In extreme cases, when the UFS runs into bad state, system may crash.
>>>> There
>>>> may not be a chance to collect trace. If trace is not collected and
>>>> failure
>>>> is hard to be reproduced, some command logs prints would be very
>>>> helpful to
>>>> help understand what was going on before we run into failure.
>>>>
>>>
>>> This is a common problem shared among many/all subsystems, so it's
>>> better to rely on a generic solution for this; such as using tracepoints
>>> dumped into pstore/ramoops.
>>>
>>> Regards,
>>> Bjorn
>>>
>>
>> Reviving this discussion.
>>
>> Another issue with using ftrace is that several subsystems use it.
>> Consider a situation in which we store a history of 64 commands,
>> responses and some other required info in ftrace.
>>
>> Say there's a command that's stuck for seconds until the software times
>> out. In this case, the other ftrace events are still enabled and keep
>> writing to ftrace buffer thus overwriting some/most of the ufs's command
>> history; thus the history is more or less lost. And there's a need to
>> reproduce the issue with other tracing disabled.
>>
>> So what we need is a client specific logging mechanism, which is
>> lightweight as ftrace and can be parsed from ramdumps.
>>
>> I'm open to ideas but ftrace in its current form may not be suitable for
>> this.
> 
> Hi Asutosh,
> 
> Are you aware that it is already possible today to figure out which SCSI
> commands are pending? Are you familiar with the files under
> /sys/kernel/debug/block/? An example:
> 
> $ (cd /sys/kernel/debug/block && grep -aH . */*/busy)
> nvme0n1/hctx7/busy:0000000006f07009 {.op=READ, .cmd_flags=META|PRIO,
> .rq_flags=DONTPREP|IO_STAT, .state=in_flight, .tag=275, .internal_tag=-1}
> 
> Bart.
> 
> 

Hi Bart,

No I was not aware of this. Thanks for letting me know.

However, it doesn't look like this stores the _history_ of commands that 
were already executed.

To put it in perspective, below is the output of the current command 
logging -:

[     461.142124354s] dme: dme_send: seq_no=  738272 lun=0 cmd_id=0x018 
lba=        0 txfer_len=       0 tag= 0 doorbell=0x0000 
outstanding=0x0000 idn= 0
[     461.142384823s] dme: dme_cmpl_2: seq_no=  738273 lun=0 
cmd_id=0x018 lba=        0 txfer_len=       0 tag= 0 doorbell=0x0000 
outstanding=0x0000 idn= 0
[     461.142453052s] scsi: scsi_send: seq_no=  738274 lun=0 
cmd_id=0x028 lba=  1360888 txfer_len=   77824 tag= 0 doorbell=0x0001 
outstanding=0x0001 idn= 0
[     461.142473000s] scsi: scsi_send: seq_no=  738275 lun=0 
cmd_id=0x028 lba=  1361056 txfer_len=   12288 tag= 1 doorbell=0x0003 
outstanding=0x0003 idn= 0
[     461.142482270s] scsi: scsi_send: seq_no=  738276 lun=0 
cmd_id=0x028 lba=  1361088 txfer_len=    4096 tag= 2 doorbell=0x0007 
outstanding=0x0007 idn= 0
[     461.142491020s] scsi: scsi_send: seq_no=  738277 lun=0 
cmd_id=0x028 lba=  1361120 txfer_len=    4096 tag= 3 doorbell=0x000F 
outstanding=0x000F idn= 0
[     461.142538208s] scsi: scsi_send: seq_no=  738278 lun=0 
cmd_id=0x028 lba=  1361144 txfer_len=    4096 tag= 4 doorbell=0x001F 
outstanding=0x001F idn= 0
[     461.142780083s] scsi: scsi_send: seq_no=  738279 lun=0 
cmd_id=0x028 lba=  1361184 txfer_len=   20480 tag= 5 doorbell=0x003F 
outstanding=0x003F idn= 0
[     461.142922791s] scsi: scsi_cmpl: seq_no=  738280 lun=0 
cmd_id=0x028 lba=  1360888 txfer_len=   77824 tag= 0 doorbell=0x003E 
outstanding=0x003F idn= 0
[     461.142982323s] scsi: scsi_cmpl: seq_no=  738281 lun=0 
cmd_id=0x028 lba=  1361056 txfer_len=   12288 tag= 1 doorbell=0x003C 
outstanding=0x003E idn= 0
[     461.142996958s] scsi: scsi_cmpl: seq_no=  738282 lun=0 
cmd_id=0x028 lba=  1361088 txfer_len=    4096 tag= 2 doorbell=0x0038 
outstanding=0x003C idn= 0
[     461.143059406s] scsi: scsi_cmpl: seq_no=  738283 lun=0 
cmd_id=0x028 lba=  1361120 txfer_len=    4096 tag= 3 doorbell=0x0020 
outstanding=0x0038 idn= 0
[     461.143064666s] scsi: scsi_cmpl: seq_no=  738284 lun=0 
cmd_id=0x028 lba=  1361144 txfer_len=    4096 tag= 4 doorbell=0x0020 
outstanding=0x0038 idn= 0

[...]

This output was extracted from ramdumps after a crash.
There're custom logs that come up as well, for e.g. scaling start and 
end etc.

In a nutshell, the high-level requirements of the logging would be -:
1. Light-weight
2. Extract from ram-dumps
3. Eliminate the need to reproduce an issue as much as possible

Ftrace ticks all the boxes except 3. There's a high chance of this data 
being overwritten by other loggers. And when this happens, the issue 
needs to be reproduced with ftrace logging enabled only for ufs.

*Motivation for this logging*:

There are several (read - all) UFS device vendors who need the sequence 
of commands leading up to the specific failure. They've asked for it 
every time. They still do.

There were 2 ways of doing this - either in device or in host.

1. In-Device-:
	The device stores a ring buffer of commands that were received.
	Upon error, host sends a command to device to read this ring 	buffer 
and dumps it out or can be extracted from ramdumps. It'd not be a 
universal solution anyway and would be very vendor-specific. Difficult 
to maintain etc. May have to be added to JEDEC to standardize it.

2. In-host-:
	Ftrace was tried out and the previously highlighted issues popped up. 
Hence the need for a UFS (client)-specific implementation.

If there's a way to have a client-specific ftrace logging, it'd be the 
ideal thing to do.

How about modifying ftrace to make it client-specific?

Any suggestions to modify the current design such that it can use ftrace 
or any other better ideas or methods, please let me know.

-asd


-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-01-20 18:46 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-23  5:29 [PATCH v1 1/1] scsi: ufs: Add command logging infrastructure Can Guo
2019-10-23  7:25 ` Winkler, Tomas
2019-10-23 10:33   ` Avri Altman
2019-10-29  2:37     ` cang
2019-10-29  5:46       ` Bjorn Andersson
2020-01-16 23:03         ` Asutosh Das (asd)
2020-01-17  3:43           ` Bart Van Assche
2020-01-20 18:46             ` Asutosh Das (asd)
2019-10-23  9:33 ` kbuild test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).