All of lore.kernel.org
 help / color / mirror / Atom feed
From: John Garry <john.garry@huawei.com>
To: <jejb@linux.vnet.ibm.com>, <martin.petersen@oracle.com>
Cc: <john.garry2@mail.dcu.ie>, <linuxarm@huawei.com>,
	<linux-scsi@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<zhangfei.gao@linaro.org>, John Garry <john.garry@huawei.com>,
	Xiaofei Tan <tanxiaofei@huawei.com>
Subject: [PATCH 4/6] scsi: hisi_sas: add v2 hw internal abort timeout workaround
Date: Mon, 10 Apr 2017 21:21:59 +0800	[thread overview]
Message-ID: <1491830521-21437-5-git-send-email-john.garry@huawei.com> (raw)
In-Reply-To: <1491830521-21437-1-git-send-email-john.garry@huawei.com>

This patch is a workaround for a SoC bug where an internal abort
command may timeout. In v2 hw, the channel should become idle in
order to finish abort process. If the target side has been sending
HOLD, host side channel failed to complete the frame to send, and
can not enter the idle state. Then internal abort command will
timeout.

As this issue is only in v2 hw, we deal with it in the hw layer.
Our workaround solution is:
If abort is not finished within a certain period of time, we will check
HOLD status. If HOLD has been sending, we will send break command.

Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       |  1 +
 drivers/scsi/hisi_sas/hisi_sas_main.c  |  2 +-
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 62 +++++++++++++++++++++++++++++-----
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index c80ca83..4e28f32 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -138,6 +138,7 @@ struct hisi_sas_slot {
 	struct hisi_sas_sge_page *sge_page;
 	dma_addr_t sge_page_dma;
 	struct work_struct abort_slot;
+	struct timer_list internal_abort_timer;
 };
 
 struct hisi_sas_tmf_task {
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 9890dfd..a5c6d06 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1224,7 +1224,7 @@ static int hisi_sas_query_task(struct sas_task *task)
 	task->task_done = hisi_sas_task_done;
 	task->slow_task->timer.data = (unsigned long)task;
 	task->slow_task->timer.function = hisi_sas_tmf_timedout;
-	task->slow_task->timer.expires = jiffies + 20*HZ;
+	task->slow_task->timer.expires = jiffies + msecs_to_jiffies(110);
 	add_timer(&task->slow_task->timer);
 
 	/* Lock as we are alloc'ing a slot, which cannot be interrupted */
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index a1eb88d..cc5675b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -194,9 +194,9 @@
 #define SL_CONTROL_NOTIFY_EN_MSK	(0x1 << SL_CONTROL_NOTIFY_EN_OFF)
 #define SL_CONTROL_CTA_OFF		17
 #define SL_CONTROL_CTA_MSK		(0x1 << SL_CONTROL_CTA_OFF)
-#define RX_PRIMS_STATUS         (PORT_BASE + 0x98)
-#define RX_BCAST_CHG_OFF        1
-#define RX_BCAST_CHG_MSK        (0x1 << RX_BCAST_CHG_OFF)
+#define RX_PRIMS_STATUS			(PORT_BASE + 0x98)
+#define RX_BCAST_CHG_OFF		1
+#define RX_BCAST_CHG_MSK		(0x1 << RX_BCAST_CHG_OFF)
 #define TX_ID_DWORD0			(PORT_BASE + 0x9c)
 #define TX_ID_DWORD1			(PORT_BASE + 0xa0)
 #define TX_ID_DWORD2			(PORT_BASE + 0xa4)
@@ -209,8 +209,8 @@
 #define TXID_AUTO_CT3_MSK		(0x1 << TXID_AUTO_CT3_OFF)
 #define TXID_AUTO_CTB_OFF		11
 #define TXID_AUTO_CTB_MSK		(0x1 << TXID_AUTO_CTB_OFF)
-#define TX_HARDRST_OFF          2
-#define TX_HARDRST_MSK          (0x1 << TX_HARDRST_OFF)
+#define TX_HARDRST_OFF			2
+#define TX_HARDRST_MSK			(0x1 << TX_HARDRST_OFF)
 #define RX_IDAF_DWORD0			(PORT_BASE + 0xc4)
 #define RX_IDAF_DWORD1			(PORT_BASE + 0xc8)
 #define RX_IDAF_DWORD2			(PORT_BASE + 0xcc)
@@ -245,13 +245,13 @@
 #define CHL_INT1_MSK			(PORT_BASE + 0x1c4)
 #define CHL_INT2_MSK			(PORT_BASE + 0x1c8)
 #define CHL_INT_COAL_EN			(PORT_BASE + 0x1d0)
-#define DMA_TX_DFX0			(PORT_BASE + 0x200)
-#define DMA_TX_DFX1			(PORT_BASE + 0x204)
+#define DMA_TX_DFX0				(PORT_BASE + 0x200)
+#define DMA_TX_DFX1				(PORT_BASE + 0x204)
 #define DMA_TX_DFX1_IPTT_OFF		0
 #define DMA_TX_DFX1_IPTT_MSK		(0xffff << DMA_TX_DFX1_IPTT_OFF)
 #define DMA_TX_FIFO_DFX0		(PORT_BASE + 0x240)
-#define PORT_DFX0			(PORT_BASE + 0x258)
-#define LINK_DFX2			(PORT_BASE + 0X264)
+#define PORT_DFX0				(PORT_BASE + 0x258)
+#define LINK_DFX2					(PORT_BASE + 0X264)
 #define LINK_DFX2_RCVR_HOLD_STS_OFF	9
 #define LINK_DFX2_RCVR_HOLD_STS_MSK	(0x1 << LINK_DFX2_RCVR_HOLD_STS_OFF)
 #define LINK_DFX2_SEND_HOLD_STS_OFF	10
@@ -2260,15 +2260,18 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba,
 	case STAT_IO_COMPLETE:
 		/* internal abort command complete */
 		ts->stat = TMF_RESP_FUNC_SUCC;
+		del_timer(&slot->internal_abort_timer);
 		goto out;
 	case STAT_IO_NO_DEVICE:
 		ts->stat = TMF_RESP_FUNC_COMPLETE;
+		del_timer(&slot->internal_abort_timer);
 		goto out;
 	case STAT_IO_NOT_VALID:
 		/* abort single io, controller don't find
 		 * the io need to abort
 		 */
 		ts->stat = TMF_RESP_FUNC_FAILED;
+		del_timer(&slot->internal_abort_timer);
 		goto out;
 	default:
 		break;
@@ -2502,6 +2505,40 @@ static int prep_ata_v2_hw(struct hisi_hba *hisi_hba,
 	return 0;
 }
 
+static void hisi_sas_internal_abort_quirk_timeout(unsigned long data)
+{
+	struct hisi_sas_slot *slot = (struct hisi_sas_slot *)data;
+	struct hisi_sas_port *port = slot->port;
+	struct asd_sas_port *asd_sas_port;
+	struct asd_sas_phy *sas_phy;
+
+	if (!port)
+		return;
+
+	asd_sas_port = &port->sas_port;
+
+	/* Kick the hardware - send break command */
+	list_for_each_entry(sas_phy, &asd_sas_port->phy_list, port_phy_el) {
+		struct hisi_sas_phy *phy = sas_phy->lldd_phy;
+		struct hisi_hba *hisi_hba = phy->hisi_hba;
+		int phy_no = sas_phy->id;
+		u32 link_dfx2;
+
+		link_dfx2 = hisi_sas_phy_read32(hisi_hba, phy_no, LINK_DFX2);
+		if ((link_dfx2 == LINK_DFX2_RCVR_HOLD_STS_MSK) ||
+		    (link_dfx2 & LINK_DFX2_SEND_HOLD_STS_MSK)) {
+			u32 txid_auto;
+
+			txid_auto = hisi_sas_phy_read32(hisi_hba, phy_no,
+							TXID_AUTO);
+			txid_auto |= TXID_AUTO_CTB_MSK;
+			hisi_sas_phy_write32(hisi_hba, phy_no, TXID_AUTO,
+					     txid_auto);
+			return;
+		}
+	}
+}
+
 static int prep_abort_v2_hw(struct hisi_hba *hisi_hba,
 		struct hisi_sas_slot *slot,
 		int device_id, int abort_flag, int tag_to_abort)
@@ -2510,6 +2547,13 @@ static int prep_abort_v2_hw(struct hisi_hba *hisi_hba,
 	struct domain_device *dev = task->dev;
 	struct hisi_sas_cmd_hdr *hdr = slot->cmd_hdr;
 	struct hisi_sas_port *port = slot->port;
+	struct timer_list *timer = &slot->internal_abort_timer;
+
+	/* setup the quirk timer */
+	setup_timer(timer, hisi_sas_internal_abort_quirk_timeout,
+		    (unsigned long)slot);
+	/* Set the timeout to 10ms less than internal abort timeout */
+	mod_timer(timer, jiffies + msecs_to_jiffies(100));
 
 	/* dw0 */
 	hdr->dw0 = cpu_to_le32((5 << CMD_HDR_CMD_OFF) | /*abort*/
-- 
1.9.1

WARNING: multiple messages have this Message-ID (diff)
From: John Garry <john.garry@huawei.com>
To: jejb@linux.vnet.ibm.com, martin.petersen@oracle.com
Cc: john.garry2@mail.dcu.ie, linuxarm@huawei.com,
	linux-scsi@vger.kernel.org, linux-kernel@vger.kernel.org,
	zhangfei.gao@linaro.org, John Garry <john.garry@huawei.com>,
	Xiaofei Tan <tanxiaofei@huawei.com>
Subject: [PATCH 4/6] scsi: hisi_sas: add v2 hw internal abort timeout workaround
Date: Mon, 10 Apr 2017 21:21:59 +0800	[thread overview]
Message-ID: <1491830521-21437-5-git-send-email-john.garry@huawei.com> (raw)
In-Reply-To: <1491830521-21437-1-git-send-email-john.garry@huawei.com>

This patch is a workaround for a SoC bug where an internal abort
command may timeout. In v2 hw, the channel should become idle in
order to finish abort process. If the target side has been sending
HOLD, host side channel failed to complete the frame to send, and
can not enter the idle state. Then internal abort command will
timeout.

As this issue is only in v2 hw, we deal with it in the hw layer.
Our workaround solution is:
If abort is not finished within a certain period of time, we will check
HOLD status. If HOLD has been sending, we will send break command.

Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Xiaofei Tan <tanxiaofei@huawei.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       |  1 +
 drivers/scsi/hisi_sas/hisi_sas_main.c  |  2 +-
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 62 +++++++++++++++++++++++++++++-----
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index c80ca83..4e28f32 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -138,6 +138,7 @@ struct hisi_sas_slot {
 	struct hisi_sas_sge_page *sge_page;
 	dma_addr_t sge_page_dma;
 	struct work_struct abort_slot;
+	struct timer_list internal_abort_timer;
 };
 
 struct hisi_sas_tmf_task {
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 9890dfd..a5c6d06 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1224,7 +1224,7 @@ static int hisi_sas_query_task(struct sas_task *task)
 	task->task_done = hisi_sas_task_done;
 	task->slow_task->timer.data = (unsigned long)task;
 	task->slow_task->timer.function = hisi_sas_tmf_timedout;
-	task->slow_task->timer.expires = jiffies + 20*HZ;
+	task->slow_task->timer.expires = jiffies + msecs_to_jiffies(110);
 	add_timer(&task->slow_task->timer);
 
 	/* Lock as we are alloc'ing a slot, which cannot be interrupted */
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index a1eb88d..cc5675b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -194,9 +194,9 @@
 #define SL_CONTROL_NOTIFY_EN_MSK	(0x1 << SL_CONTROL_NOTIFY_EN_OFF)
 #define SL_CONTROL_CTA_OFF		17
 #define SL_CONTROL_CTA_MSK		(0x1 << SL_CONTROL_CTA_OFF)
-#define RX_PRIMS_STATUS         (PORT_BASE + 0x98)
-#define RX_BCAST_CHG_OFF        1
-#define RX_BCAST_CHG_MSK        (0x1 << RX_BCAST_CHG_OFF)
+#define RX_PRIMS_STATUS			(PORT_BASE + 0x98)
+#define RX_BCAST_CHG_OFF		1
+#define RX_BCAST_CHG_MSK		(0x1 << RX_BCAST_CHG_OFF)
 #define TX_ID_DWORD0			(PORT_BASE + 0x9c)
 #define TX_ID_DWORD1			(PORT_BASE + 0xa0)
 #define TX_ID_DWORD2			(PORT_BASE + 0xa4)
@@ -209,8 +209,8 @@
 #define TXID_AUTO_CT3_MSK		(0x1 << TXID_AUTO_CT3_OFF)
 #define TXID_AUTO_CTB_OFF		11
 #define TXID_AUTO_CTB_MSK		(0x1 << TXID_AUTO_CTB_OFF)
-#define TX_HARDRST_OFF          2
-#define TX_HARDRST_MSK          (0x1 << TX_HARDRST_OFF)
+#define TX_HARDRST_OFF			2
+#define TX_HARDRST_MSK			(0x1 << TX_HARDRST_OFF)
 #define RX_IDAF_DWORD0			(PORT_BASE + 0xc4)
 #define RX_IDAF_DWORD1			(PORT_BASE + 0xc8)
 #define RX_IDAF_DWORD2			(PORT_BASE + 0xcc)
@@ -245,13 +245,13 @@
 #define CHL_INT1_MSK			(PORT_BASE + 0x1c4)
 #define CHL_INT2_MSK			(PORT_BASE + 0x1c8)
 #define CHL_INT_COAL_EN			(PORT_BASE + 0x1d0)
-#define DMA_TX_DFX0			(PORT_BASE + 0x200)
-#define DMA_TX_DFX1			(PORT_BASE + 0x204)
+#define DMA_TX_DFX0				(PORT_BASE + 0x200)
+#define DMA_TX_DFX1				(PORT_BASE + 0x204)
 #define DMA_TX_DFX1_IPTT_OFF		0
 #define DMA_TX_DFX1_IPTT_MSK		(0xffff << DMA_TX_DFX1_IPTT_OFF)
 #define DMA_TX_FIFO_DFX0		(PORT_BASE + 0x240)
-#define PORT_DFX0			(PORT_BASE + 0x258)
-#define LINK_DFX2			(PORT_BASE + 0X264)
+#define PORT_DFX0				(PORT_BASE + 0x258)
+#define LINK_DFX2					(PORT_BASE + 0X264)
 #define LINK_DFX2_RCVR_HOLD_STS_OFF	9
 #define LINK_DFX2_RCVR_HOLD_STS_MSK	(0x1 << LINK_DFX2_RCVR_HOLD_STS_OFF)
 #define LINK_DFX2_SEND_HOLD_STS_OFF	10
@@ -2260,15 +2260,18 @@ static void slot_err_v2_hw(struct hisi_hba *hisi_hba,
 	case STAT_IO_COMPLETE:
 		/* internal abort command complete */
 		ts->stat = TMF_RESP_FUNC_SUCC;
+		del_timer(&slot->internal_abort_timer);
 		goto out;
 	case STAT_IO_NO_DEVICE:
 		ts->stat = TMF_RESP_FUNC_COMPLETE;
+		del_timer(&slot->internal_abort_timer);
 		goto out;
 	case STAT_IO_NOT_VALID:
 		/* abort single io, controller don't find
 		 * the io need to abort
 		 */
 		ts->stat = TMF_RESP_FUNC_FAILED;
+		del_timer(&slot->internal_abort_timer);
 		goto out;
 	default:
 		break;
@@ -2502,6 +2505,40 @@ static int prep_ata_v2_hw(struct hisi_hba *hisi_hba,
 	return 0;
 }
 
+static void hisi_sas_internal_abort_quirk_timeout(unsigned long data)
+{
+	struct hisi_sas_slot *slot = (struct hisi_sas_slot *)data;
+	struct hisi_sas_port *port = slot->port;
+	struct asd_sas_port *asd_sas_port;
+	struct asd_sas_phy *sas_phy;
+
+	if (!port)
+		return;
+
+	asd_sas_port = &port->sas_port;
+
+	/* Kick the hardware - send break command */
+	list_for_each_entry(sas_phy, &asd_sas_port->phy_list, port_phy_el) {
+		struct hisi_sas_phy *phy = sas_phy->lldd_phy;
+		struct hisi_hba *hisi_hba = phy->hisi_hba;
+		int phy_no = sas_phy->id;
+		u32 link_dfx2;
+
+		link_dfx2 = hisi_sas_phy_read32(hisi_hba, phy_no, LINK_DFX2);
+		if ((link_dfx2 == LINK_DFX2_RCVR_HOLD_STS_MSK) ||
+		    (link_dfx2 & LINK_DFX2_SEND_HOLD_STS_MSK)) {
+			u32 txid_auto;
+
+			txid_auto = hisi_sas_phy_read32(hisi_hba, phy_no,
+							TXID_AUTO);
+			txid_auto |= TXID_AUTO_CTB_MSK;
+			hisi_sas_phy_write32(hisi_hba, phy_no, TXID_AUTO,
+					     txid_auto);
+			return;
+		}
+	}
+}
+
 static int prep_abort_v2_hw(struct hisi_hba *hisi_hba,
 		struct hisi_sas_slot *slot,
 		int device_id, int abort_flag, int tag_to_abort)
@@ -2510,6 +2547,13 @@ static int prep_abort_v2_hw(struct hisi_hba *hisi_hba,
 	struct domain_device *dev = task->dev;
 	struct hisi_sas_cmd_hdr *hdr = slot->cmd_hdr;
 	struct hisi_sas_port *port = slot->port;
+	struct timer_list *timer = &slot->internal_abort_timer;
+
+	/* setup the quirk timer */
+	setup_timer(timer, hisi_sas_internal_abort_quirk_timeout,
+		    (unsigned long)slot);
+	/* Set the timeout to 10ms less than internal abort timeout */
+	mod_timer(timer, jiffies + msecs_to_jiffies(100));
 
 	/* dw0 */
 	hdr->dw0 = cpu_to_le32((5 << CMD_HDR_CMD_OFF) | /*abort*/
-- 
1.9.1

  parent reply	other threads:[~2017-04-10 12:52 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-10 13:21 [PATCH 0/6] hisi_sas: v2 hw SoC bug workarounds John Garry
2017-04-10 13:21 ` John Garry
2017-04-10 13:21 ` [PATCH 1/6] scsi: hisi_sas: workaround STP link SoC bug John Garry
2017-04-10 13:21   ` John Garry
2017-04-10 13:16   ` Johannes Thumshirn
2017-04-10 13:21 ` [PATCH 2/6] scsi: hisi_sas: workaround a SoC SATA IO processing bug John Garry
2017-04-10 13:21   ` John Garry
2017-04-10 14:07   ` Johannes Thumshirn
2017-04-10 13:21 ` [PATCH 3/6] scsi: hisi_sas: workaround SoC about abort timeout bug John Garry
2017-04-10 13:21   ` John Garry
2017-04-10 13:21 ` John Garry [this message]
2017-04-10 13:21   ` [PATCH 4/6] scsi: hisi_sas: add v2 hw internal abort timeout workaround John Garry
2017-04-10 13:22 ` [PATCH 5/6] scsi: hisi_sas: fix NULL deference when TMF timeouts John Garry
2017-04-10 13:22   ` John Garry
2017-04-10 13:22 ` [PATCH 6/6] scsi: hisi_sas: controller reset for multi-bits ECC and AXI fatal errors John Garry
2017-04-10 13:22   ` John Garry
2017-04-12  2:02 ` [PATCH 0/6] hisi_sas: v2 hw SoC bug workarounds Martin K. Petersen
2017-04-12  2:02   ` Martin K. Petersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1491830521-21437-5-git-send-email-john.garry@huawei.com \
    --to=john.garry@huawei.com \
    --cc=jejb@linux.vnet.ibm.com \
    --cc=john.garry2@mail.dcu.ie \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=linuxarm@huawei.com \
    --cc=martin.petersen@oracle.com \
    --cc=tanxiaofei@huawei.com \
    --cc=zhangfei.gao@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.