linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/6] NTB bug fixes and hardware workarounds
@ 2014-08-28 20:52 Dave Jiang
  2014-08-28 20:53 ` [PATCH 1/6] ntb: Add alignment check to meet hardware requirement Dave Jiang
                   ` (6 more replies)
  0 siblings, 7 replies; 9+ messages in thread
From: Dave Jiang @ 2014-08-28 20:52 UTC (permalink / raw)
  To: jdmason; +Cc: linux-kernel

The following series contains various fixes and cleanup for NTB. It also
adds the split BAR support on Haswell platform and a hardware errata
workaround in order to allow interrupts to function during bi-directional
traffic under stress. 

---

Dave Jiang (6):
      ntb: Add alignment check to meet hardware requirement
      ntb: move platform detection to separate function
      ntb: conslidate reading of PPD to move platform detection earlier
      ntb: use errata flag set via DID to implement workaround
      ntb: Adding split BAR support for Haswell platforms
      ntb: workaround for high traffic hardware hang


 drivers/ntb/ntb_hw.c        |  603 +++++++++++++++++++++++++++++++++++--------
 drivers/ntb/ntb_hw.h        |   27 ++
 drivers/ntb/ntb_regs.h      |   32 ++
 drivers/ntb/ntb_transport.c |  139 ++++++----
 4 files changed, 620 insertions(+), 181 deletions(-)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/6] ntb: Add alignment check to meet hardware requirement
  2014-08-28 20:52 [PATCH 0/6] NTB bug fixes and hardware workarounds Dave Jiang
@ 2014-08-28 20:53 ` Dave Jiang
  2014-08-28 20:53 ` [PATCH 2/6] ntb: move platform detection to separate function Dave Jiang
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2014-08-28 20:53 UTC (permalink / raw)
  To: jdmason; +Cc: linux-kernel

The NTB translate register must have the value to be BAR size aligned.
This alignment check make sure that the DMA memory allocated has the
proper alignment. Another requirement for NTB to function properly with
memory window BAR size greater or equal to 4M is to use the CMA feature
in 3.16 kernel with the appropriate CONFIG_CMA_ALIGNMENT and
CONFIG_CMA_SIZE_MBYTES set.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/ntb/ntb_transport.c |   13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 611fef4..24f0ac1 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -576,6 +576,19 @@ static int ntb_set_mw(struct ntb_transport *nt, int num_mw, unsigned int size)
 		return -ENOMEM;
 	}
 
+	/*
+	 * we must ensure that the memory address allocated is BAR size
+	 * aligned in order for the XLAT register to take the value. This
+	 * is a requirement of the hardware. It is recommended to setup CMA
+	 * for BAR sizes equal or greater than 4MB.
+	 */
+	if (!IS_ALIGNED(mw->dma_addr, mw->size)) {
+		dev_err(&pdev->dev, "DMA memory %#Lx not aligned to BAR size\n",
+			mw->dma_addr);
+		ntb_free_mw(nt, num_mw);
+		return -ENOMEM;
+	}
+
 	/* Notify HW the memory location of the receive buffer */
 	ntb_set_mw_addr(nt->ndev, num_mw, mw->dma_addr);
 


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/6] ntb: move platform detection to separate function
  2014-08-28 20:52 [PATCH 0/6] NTB bug fixes and hardware workarounds Dave Jiang
  2014-08-28 20:53 ` [PATCH 1/6] ntb: Add alignment check to meet hardware requirement Dave Jiang
@ 2014-08-28 20:53 ` Dave Jiang
  2014-08-28 20:53 ` [PATCH 3/6] ntb: conslidate reading of PPD to move platform detection earlier Dave Jiang
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2014-08-28 20:53 UTC (permalink / raw)
  To: jdmason; +Cc: linux-kernel

Move the platform detection function to separate functions to allow
easier maintenence.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/ntb/ntb_hw.c |   56 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
index ce6680a..64ef836 100644
--- a/drivers/ntb/ntb_hw.c
+++ b/drivers/ntb/ntb_hw.c
@@ -109,6 +109,41 @@ static const struct pci_device_id ntb_pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, ntb_pci_tbl);
 
+static int is_ntb_xeon(struct ntb_device *ndev)
+{
+	switch (ndev->pdev->device) {
+	case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+		return 1;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+static int is_ntb_atom(struct ntb_device *ndev)
+{
+	switch (ndev->pdev->device) {
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD:
+		return 1;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
 /**
  * ntb_register_event_callback() - register event callback
  * @ndev: pointer to ntb_device instance
@@ -932,27 +967,12 @@ static int ntb_device_setup(struct ntb_device *ndev)
 {
 	int rc;
 
-	switch (ndev->pdev->device) {
-	case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
-	case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
-	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
-	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
-	case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
-	case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
-	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
-	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
-	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
-	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
-	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
-	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+	if (is_ntb_xeon(ndev))
 		rc = ntb_xeon_setup(ndev);
-		break;
-	case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD:
+	else if (is_ntb_atom(ndev))
 		rc = ntb_bwd_setup(ndev);
-		break;
-	default:
+	else
 		rc = -ENODEV;
-	}
 
 	if (rc)
 		return rc;


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/6] ntb: conslidate reading of PPD to move platform detection earlier
  2014-08-28 20:52 [PATCH 0/6] NTB bug fixes and hardware workarounds Dave Jiang
  2014-08-28 20:53 ` [PATCH 1/6] ntb: Add alignment check to meet hardware requirement Dave Jiang
  2014-08-28 20:53 ` [PATCH 2/6] ntb: move platform detection to separate function Dave Jiang
@ 2014-08-28 20:53 ` Dave Jiang
  2014-08-28 20:53 ` [PATCH 4/6] ntb: use errata flag set via DID to implement workaround Dave Jiang
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2014-08-28 20:53 UTC (permalink / raw)
  To: jdmason; +Cc: linux-kernel

To simplify some of the platform detection code. Move the platform detection
to a function to be called earlier.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/ntb/ntb_hw.c |  130 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 100 insertions(+), 30 deletions(-)

diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
index 64ef836..d6f4268 100644
--- a/drivers/ntb/ntb_hw.c
+++ b/drivers/ntb/ntb_hw.c
@@ -702,24 +702,8 @@ static void bwd_link_poll(struct work_struct *work)
 
 static int ntb_xeon_setup(struct ntb_device *ndev)
 {
-	int rc;
-	u8 val;
-
-	ndev->hw_type = SNB_HW;
-
-	rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &val);
-	if (rc)
-		return rc;
-
-	if (val & SNB_PPD_DEV_TYPE)
-		ndev->dev_type = NTB_DEV_USD;
-	else
-		ndev->dev_type = NTB_DEV_DSD;
-
-	switch (val & SNB_PPD_CONN_TYPE) {
+	switch (ndev->conn_type) {
 	case NTB_CONN_B2B:
-		dev_info(&ndev->pdev->dev, "Conn Type = B2B\n");
-		ndev->conn_type = NTB_CONN_B2B;
 		ndev->reg_ofs.ldb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
 		ndev->reg_ofs.ldb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
 		ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
@@ -835,9 +819,6 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		}
 		break;
 	case NTB_CONN_RP:
-		dev_info(&ndev->pdev->dev, "Conn Type = RP\n");
-		ndev->conn_type = NTB_CONN_RP;
-
 		if (xeon_errata_workaround) {
 			dev_err(&ndev->pdev->dev,
 				"NTB-RP disabled due to hardware errata.  To disregard this warning and potentially lock-up the system, add the parameter 'xeon_errata_workaround=0'.\n");
@@ -867,8 +848,6 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		ndev->limits.max_mw = SNB_MAX_MW;
 		break;
 	case NTB_CONN_TRANSPARENT:
-		dev_info(&ndev->pdev->dev, "Conn Type = TRANSPARENT\n");
-		ndev->conn_type = NTB_CONN_TRANSPARENT;
 		/* Scratch pads need to have exclusive access from the primary
 		 * or secondary side.  Halve the num spads so that each side can
 		 * have an equal amount.
@@ -890,10 +869,10 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		ndev->limits.max_mw = SNB_MAX_MW;
 		break;
 	default:
-		/* Most likely caused by the remote NTB-RP device not being
-		 * configured
+		/*
+		 * we should never hit this. the detect function should've
+		 * take cared of everything.
 		 */
-		dev_err(&ndev->pdev->dev, "Unknown PPD %x\n", val);
 		return -EINVAL;
 	}
 
@@ -967,9 +946,9 @@ static int ntb_device_setup(struct ntb_device *ndev)
 {
 	int rc;
 
-	if (is_ntb_xeon(ndev))
+	if (ndev->hw_type == SNB_HW)
 		rc = ntb_xeon_setup(ndev);
-	else if (is_ntb_atom(ndev))
+	else if (ndev->hw_type == BWD_HW)
 		rc = ntb_bwd_setup(ndev);
 	else
 		rc = -ENODEV;
@@ -977,9 +956,6 @@ static int ntb_device_setup(struct ntb_device *ndev)
 	if (rc)
 		return rc;
 
-	dev_info(&ndev->pdev->dev, "Device Type = %s\n",
-		 ndev->dev_type == NTB_DEV_USD ? "USD/DSP" : "DSD/USP");
-
 	if (ndev->conn_type == NTB_CONN_B2B)
 		/* Enable Bus Master and Memory Space on the secondary side */
 		writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER,
@@ -1519,6 +1495,96 @@ static void ntb_hw_link_down(struct ntb_device *ndev)
 	writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
 }
 
+static int ntb_xeon_detect(struct ntb_device *ndev)
+{
+	int rc;
+	u8 ppd;
+
+	ndev->hw_type = SNB_HW;
+
+	rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &ppd);
+	if (rc)
+		return -EIO;
+
+	if (ppd & SNB_PPD_DEV_TYPE)
+		ndev->dev_type = NTB_DEV_USD;
+	else
+		ndev->dev_type = NTB_DEV_DSD;
+
+	switch (ppd & SNB_PPD_CONN_TYPE) {
+	case NTB_CONN_B2B:
+		dev_info(&ndev->pdev->dev, "Conn Type = B2B\n");
+		ndev->conn_type = NTB_CONN_B2B;
+		break;
+	case NTB_CONN_RP:
+		dev_info(&ndev->pdev->dev, "Conn Type = RP\n");
+		ndev->conn_type = NTB_CONN_RP;
+		break;
+	case NTB_CONN_TRANSPARENT:
+		dev_info(&ndev->pdev->dev, "Conn Type = TRANSPARENT\n");
+		ndev->conn_type = NTB_CONN_TRANSPARENT;
+		/*
+		 * This mode is default to USD/DSP. HW does not report
+		 * properly in transparent mode as it has no knowledge of
+		 * NTB. We will just force correct here.
+		 */
+		ndev->dev_type = NTB_DEV_USD;
+		break;
+	default:
+		dev_err(&ndev->pdev->dev, "Unknown PPD %x\n", ppd);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int ntb_atom_detect(struct ntb_device *ndev)
+{
+	int rc;
+	u32 ppd;
+
+	ndev->hw_type = BWD_HW;
+
+	rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &ppd);
+	if (rc)
+		return rc;
+
+	switch ((ppd & BWD_PPD_CONN_TYPE) >> 8) {
+	case NTB_CONN_B2B:
+		dev_info(&ndev->pdev->dev, "Conn Type = B2B\n");
+		ndev->conn_type = NTB_CONN_B2B;
+		break;
+	case NTB_CONN_RP:
+	default:
+		dev_err(&ndev->pdev->dev, "Unsupported NTB configuration\n");
+		return -EINVAL;
+	}
+
+	if (ppd & BWD_PPD_DEV_TYPE)
+		ndev->dev_type = NTB_DEV_DSD;
+	else
+		ndev->dev_type = NTB_DEV_USD;
+
+	return 0;
+}
+
+static int ntb_device_detect(struct ntb_device *ndev)
+{
+	int rc;
+
+	if (is_ntb_xeon(ndev))
+		rc = ntb_xeon_detect(ndev);
+	else if (is_ntb_atom(ndev))
+		rc = ntb_atom_detect(ndev);
+	else
+		rc = -ENODEV;
+
+	dev_info(&ndev->pdev->dev, "Device Type = %s\n",
+		 ndev->dev_type == NTB_DEV_USD ? "USD/DSP" : "DSD/USP");
+
+	return 0;
+}
+
 static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct ntb_device *ndev;
@@ -1539,6 +1605,10 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	pci_set_master(ndev->pdev);
 
+	rc = ntb_device_detect(ndev);
+	if (rc)
+		goto err;
+
 	rc = pci_request_selected_regions(pdev, NTB_BAR_MASK, KBUILD_MODNAME);
 	if (rc)
 		goto err1;


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/6] ntb: use errata flag set via DID to implement workaround
  2014-08-28 20:52 [PATCH 0/6] NTB bug fixes and hardware workarounds Dave Jiang
                   ` (2 preceding siblings ...)
  2014-08-28 20:53 ` [PATCH 3/6] ntb: conslidate reading of PPD to move platform detection earlier Dave Jiang
@ 2014-08-28 20:53 ` Dave Jiang
  2014-08-28 20:53 ` [PATCH 5/6] ntb: Adding split BAR support for Haswell platforms Dave Jiang
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2014-08-28 20:53 UTC (permalink / raw)
  To: jdmason; +Cc: linux-kernel

Instead of using a module parameter, we should detect the errata via
PCI DID and then set an appropriate flag. This will be used for additional
errata later on.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/ntb/ntb_hw.c |   47 ++++++++++++++++++++++++++++++++++++++---------
 drivers/ntb/ntb_hw.h |    4 ++++
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
index d6f4268..471c847 100644
--- a/drivers/ntb/ntb_hw.c
+++ b/drivers/ntb/ntb_hw.c
@@ -64,10 +64,6 @@ MODULE_VERSION(NTB_VER);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel Corporation");
 
-static bool xeon_errata_workaround = true;
-module_param(xeon_errata_workaround, bool, 0644);
-MODULE_PARM_DESC(xeon_errata_workaround, "Workaround for the Xeon Errata");
-
 enum {
 	NTB_CONN_TRANSPARENT = 0,
 	NTB_CONN_B2B,
@@ -144,6 +140,30 @@ static int is_ntb_atom(struct ntb_device *ndev)
 	return 0;
 }
 
+static void ntb_set_errata_flags(struct ntb_device *ndev)
+{
+	switch (ndev->pdev->device) {
+	/*
+	 * this workaround applies to all platform up to IvyBridge
+	 * Haswell has splitbar support and use a different workaround
+	 */
+	case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+		ndev->wa_flags |= WA_SNB_ERR;
+		break;
+	}
+}
+
 /**
  * ntb_register_event_callback() - register event callback
  * @ndev: pointer to ntb_device instance
@@ -717,7 +737,7 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		 * this use the second memory window to access the interrupt and
 		 * scratch pad registers on the remote system.
 		 */
-		if (xeon_errata_workaround) {
+		if (ndev->wa_flags & WA_SNB_ERR) {
 			if (!ndev->mw[1].bar_sz)
 				return -EINVAL;
 
@@ -772,7 +792,7 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		if (ndev->dev_type == NTB_DEV_USD) {
 			writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
 			       SNB_PBAR2XLAT_OFFSET);
-			if (xeon_errata_workaround)
+			if (ndev->wa_flags & WA_SNB_ERR)
 				writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
 				       SNB_PBAR4XLAT_OFFSET);
 			else {
@@ -796,7 +816,7 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		} else {
 			writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
 			       SNB_PBAR2XLAT_OFFSET);
-			if (xeon_errata_workaround)
+			if (ndev->wa_flags & WA_SNB_ERR)
 				writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
 				       SNB_PBAR4XLAT_OFFSET);
 			else {
@@ -819,9 +839,9 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		}
 		break;
 	case NTB_CONN_RP:
-		if (xeon_errata_workaround) {
+		if (ndev->wa_flags & WA_SNB_ERR) {
 			dev_err(&ndev->pdev->dev,
-				"NTB-RP disabled due to hardware errata.  To disregard this warning and potentially lock-up the system, add the parameter 'xeon_errata_workaround=0'.\n");
+				"NTB-RP disabled due to hardware errata. To disregard this warning and potentially lock-up the system\n");
 			return -EINVAL;
 		}
 
@@ -848,6 +868,12 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		ndev->limits.max_mw = SNB_MAX_MW;
 		break;
 	case NTB_CONN_TRANSPARENT:
+		if (ndev->wa_flags & WA_SNB_ERR) {
+			dev_err(&ndev->pdev->dev,
+				"NTB-TRANSPARENT disabled due to hardware errata. To disregard this warning and potentially lock-up the system\n");
+			return -EINVAL;
+		}
+
 		/* Scratch pads need to have exclusive access from the primary
 		 * or secondary side.  Halve the num spads so that each side can
 		 * have an equal amount.
@@ -1595,6 +1621,9 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 
 	ndev->pdev = pdev;
+
+	ntb_set_errata_flags(ndev);
+
 	ndev->link_status = NTB_LINK_DOWN;
 	pci_set_drvdata(pdev, ndev);
 	ntb_setup_debugfs(ndev);
diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h
index ddbcbfd..5380ca1 100644
--- a/drivers/ntb/ntb_hw.h
+++ b/drivers/ntb/ntb_hw.h
@@ -109,6 +109,8 @@ struct ntb_db_cb {
 	struct tasklet_struct irq_work;
 };
 
+#define WA_SNB_ERR	0x00000001
+
 struct ntb_device {
 	struct pci_dev *pdev;
 	struct msix_entry *msix_entries;
@@ -153,6 +155,8 @@ struct ntb_device {
 
 	struct dentry *debugfs_dir;
 	struct dentry *debugfs_info;
+
+	unsigned int wa_flags;
 };
 
 /**


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 5/6] ntb: Adding split BAR support for Haswell platforms
  2014-08-28 20:52 [PATCH 0/6] NTB bug fixes and hardware workarounds Dave Jiang
                   ` (3 preceding siblings ...)
  2014-08-28 20:53 ` [PATCH 4/6] ntb: use errata flag set via DID to implement workaround Dave Jiang
@ 2014-08-28 20:53 ` Dave Jiang
  2014-08-28 20:53 ` [PATCH 6/6] ntb: workaround for high traffic hardware hang Dave Jiang
  2014-09-08  2:54 ` [PATCH 0/6] NTB bug fixes and hardware workarounds Jon Mason
  6 siblings, 0 replies; 9+ messages in thread
From: Dave Jiang @ 2014-08-28 20:53 UTC (permalink / raw)
  To: jdmason; +Cc: linux-kernel

On the Haswell platform, a split BAR option to allow creation of 2
32bit BARs (4 and 5) from the 64bit BAR 4. Adding support for this
new option.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@gmail.com>
---
 drivers/ntb/ntb_hw.c   |  217 ++++++++++++++++++++++++++++++++++++++----------
 drivers/ntb/ntb_hw.h   |   14 ++-
 drivers/ntb/ntb_regs.h |   31 +++++--
 3 files changed, 202 insertions(+), 60 deletions(-)

diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
index 471c847..cef9d8a 100644
--- a/drivers/ntb/ntb_hw.c
+++ b/drivers/ntb/ntb_hw.c
@@ -84,8 +84,8 @@ static struct dentry *debugfs_dir;
 
 #define BWD_LINK_RECOVERY_TIME	500
 
-/* Translate memory window 0,1 to BAR 2,4 */
-#define MW_TO_BAR(mw)	(mw * NTB_MAX_NUM_MW + 2)
+/* Translate memory window 0,1,2 to BAR 2,4,5 */
+#define MW_TO_BAR(mw)	(mw == 0 ? 2 : (mw == 1 ? 4 : 5))
 
 static const struct pci_device_id ntb_pci_tbl[] = {
 	{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)},
@@ -506,8 +506,14 @@ void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
 	case NTB_BAR_23:
 		writeq(addr, ndev->reg_ofs.bar2_xlat);
 		break;
-	case NTB_BAR_45:
-		writeq(addr, ndev->reg_ofs.bar4_xlat);
+	case NTB_BAR_4:
+		if (ndev->split_bar)
+			writel(addr, ndev->reg_ofs.bar4_xlat);
+		else
+			writeq(addr, ndev->reg_ofs.bar4_xlat);
+		break;
+	case NTB_BAR_5:
+		writel(addr, ndev->reg_ofs.bar5_xlat);
 		break;
 	}
 }
@@ -729,6 +735,9 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
 		ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
 		ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
+		if (ndev->split_bar)
+			ndev->reg_ofs.bar5_xlat =
+				ndev->reg_base + SNB_SBAR5XLAT_OFFSET;
 		ndev->limits.max_spads = SNB_MAX_B2B_SPADS;
 
 		/* There is a Xeon hardware errata related to writes to
@@ -738,15 +747,16 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		 * scratch pad registers on the remote system.
 		 */
 		if (ndev->wa_flags & WA_SNB_ERR) {
-			if (!ndev->mw[1].bar_sz)
+			if (!ndev->mw[ndev->limits.max_mw - 1].bar_sz)
 				return -EINVAL;
 
-			ndev->limits.max_mw = SNB_ERRATA_MAX_MW;
 			ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
-			ndev->reg_ofs.spad_write = ndev->mw[1].vbase +
-						   SNB_SPAD_OFFSET;
-			ndev->reg_ofs.rdb = ndev->mw[1].vbase +
-					    SNB_PDOORBELL_OFFSET;
+			ndev->reg_ofs.spad_write =
+				ndev->mw[ndev->limits.max_mw - 1].vbase +
+				SNB_SPAD_OFFSET;
+			ndev->reg_ofs.rdb =
+				ndev->mw[ndev->limits.max_mw - 1].vbase +
+				SNB_PDOORBELL_OFFSET;
 
 			/* Set the Limit register to 4k, the minimum size, to
 			 * prevent an illegal access
@@ -759,9 +769,9 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 			 * the driver defaults, but write the Limit registers
 			 * first just in case.
 			 */
-		} else {
-			ndev->limits.max_mw = SNB_MAX_MW;
 
+			ndev->limits.max_mw = SNB_ERRATA_MAX_MW;
+		} else {
 			/* HW Errata on bit 14 of b2bdoorbell register.  Writes
 			 * will not be mirrored to the remote system.  Shrink
 			 * the number of bits by one, since bit 14 is the last
@@ -774,7 +784,8 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 					    SNB_B2B_DOORBELL_OFFSET;
 
 			/* Disable the Limit register, just incase it is set to
-			 * something silly
+			 * something silly. A 64bit write should handle it
+			 * regardless of whether it has a split BAR or not.
 			 */
 			writeq(0, ndev->reg_base + SNB_PBAR4LMT_OFFSET);
 			/* HW errata on the Limit registers.  They can only be
@@ -783,6 +794,10 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 			 * the driver defaults, but write the Limit registers
 			 * first just in case.
 			 */
+			if (ndev->split_bar)
+				ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
+			else
+				ndev->limits.max_mw = SNB_MAX_MW;
 		}
 
 		/* The Xeon errata workaround requires setting SBAR Base
@@ -796,8 +811,15 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 				writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
 				       SNB_PBAR4XLAT_OFFSET);
 			else {
-				writeq(SNB_MBAR45_DSD_ADDR, ndev->reg_base +
-				       SNB_PBAR4XLAT_OFFSET);
+				if (ndev->split_bar) {
+					writel(SNB_MBAR4_DSD_ADDR,
+					       ndev->reg_base + SNB_PBAR4XLAT_OFFSET);
+					writel(SNB_MBAR5_DSD_ADDR,
+					       ndev->reg_base + SNB_PBAR5XLAT_OFFSET);
+				} else
+					writeq(SNB_MBAR4_DSD_ADDR,
+					       ndev->reg_base + SNB_PBAR4XLAT_OFFSET);
+
 				/* B2B_XLAT_OFFSET is a 64bit register, but can
 				 * only take 32bit writes
 				 */
@@ -811,8 +833,14 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 			       SNB_SBAR0BASE_OFFSET);
 			writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
 			       SNB_SBAR2BASE_OFFSET);
-			writeq(SNB_MBAR45_USD_ADDR, ndev->reg_base +
-			       SNB_SBAR4BASE_OFFSET);
+			if (ndev->split_bar) {
+				writel(SNB_MBAR4_USD_ADDR, ndev->reg_base +
+				       SNB_SBAR4BASE_OFFSET);
+				writel(SNB_MBAR5_USD_ADDR, ndev->reg_base +
+				       SNB_SBAR5BASE_OFFSET);
+			} else
+				writeq(SNB_MBAR4_USD_ADDR, ndev->reg_base +
+				       SNB_SBAR4BASE_OFFSET);
 		} else {
 			writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
 			       SNB_PBAR2XLAT_OFFSET);
@@ -820,9 +848,17 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 				writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
 				       SNB_PBAR4XLAT_OFFSET);
 			else {
-				writeq(SNB_MBAR45_USD_ADDR, ndev->reg_base +
-				       SNB_PBAR4XLAT_OFFSET);
-				/* B2B_XLAT_OFFSET is a 64bit register, but can
+				if (ndev->split_bar) {
+					writel(SNB_MBAR4_USD_ADDR,
+					       ndev->reg_base + SNB_PBAR4XLAT_OFFSET);
+					writel(SNB_MBAR5_USD_ADDR,
+					       ndev->reg_base + SNB_PBAR5XLAT_OFFSET);
+				} else
+					writeq(SNB_MBAR4_USD_ADDR,
+					       ndev->reg_base + SNB_PBAR4XLAT_OFFSET);
+
+				/*
+				 * B2B_XLAT_OFFSET is a 64bit register, but can
 				 * only take 32bit writes
 				 */
 				writel(SNB_MBAR01_USD_ADDR & 0xffffffff,
@@ -834,8 +870,15 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 			       SNB_SBAR0BASE_OFFSET);
 			writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
 			       SNB_SBAR2BASE_OFFSET);
-			writeq(SNB_MBAR45_DSD_ADDR, ndev->reg_base +
-			       SNB_SBAR4BASE_OFFSET);
+			if (ndev->split_bar) {
+				writel(SNB_MBAR4_DSD_ADDR, ndev->reg_base +
+				       SNB_SBAR4BASE_OFFSET);
+				writel(SNB_MBAR5_DSD_ADDR, ndev->reg_base +
+				       SNB_SBAR5BASE_OFFSET);
+			} else
+				writeq(SNB_MBAR4_DSD_ADDR, ndev->reg_base +
+				       SNB_SBAR4BASE_OFFSET);
+
 		}
 		break;
 	case NTB_CONN_RP:
@@ -865,7 +908,12 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
 		ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
 		ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
-		ndev->limits.max_mw = SNB_MAX_MW;
+		if (ndev->split_bar) {
+			ndev->reg_ofs.bar5_xlat =
+				ndev->reg_base + SNB_SBAR5XLAT_OFFSET;
+			ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
+		} else
+			ndev->limits.max_mw = SNB_MAX_MW;
 		break;
 	case NTB_CONN_TRANSPARENT:
 		if (ndev->wa_flags & WA_SNB_ERR) {
@@ -892,7 +940,12 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		ndev->reg_ofs.bar2_xlat = ndev->reg_base + SNB_PBAR2XLAT_OFFSET;
 		ndev->reg_ofs.bar4_xlat = ndev->reg_base + SNB_PBAR4XLAT_OFFSET;
 
-		ndev->limits.max_mw = SNB_MAX_MW;
+		if (ndev->split_bar) {
+			ndev->reg_ofs.bar5_xlat =
+				ndev->reg_base + SNB_PBAR5XLAT_OFFSET;
+			ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
+		} else
+			ndev->limits.max_mw = SNB_MAX_MW;
 		break;
 	default:
 		/*
@@ -1499,7 +1552,10 @@ static void ntb_hw_link_up(struct ntb_device *ndev)
 		ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
 		ntb_cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
 		ntb_cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
-		ntb_cntl |= NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP;
+		ntb_cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
+		if (ndev->split_bar)
+			ntb_cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
+
 		writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
 	}
 }
@@ -1516,14 +1572,25 @@ static void ntb_hw_link_down(struct ntb_device *ndev)
 	/* Bring NTB link down */
 	ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
 	ntb_cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
-	ntb_cntl &= ~(NTB_CNTL_P2S_BAR45_SNOOP | NTB_CNTL_S2P_BAR45_SNOOP);
+	ntb_cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
+	if (ndev->split_bar)
+		ntb_cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
 	ntb_cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
 	writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
 }
 
+static void ntb_max_mw_detect(struct ntb_device *ndev)
+{
+	if (ndev->split_bar)
+		ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
+	else
+		ndev->limits.max_mw = SNB_MAX_MW;
+}
+
 static int ntb_xeon_detect(struct ntb_device *ndev)
 {
-	int rc;
+	int rc, bars_mask;
+	u32 bars;
 	u8 ppd;
 
 	ndev->hw_type = SNB_HW;
@@ -1537,6 +1604,8 @@ static int ntb_xeon_detect(struct ntb_device *ndev)
 	else
 		ndev->dev_type = NTB_DEV_DSD;
 
+	ndev->split_bar = (ppd & SNB_PPD_SPLIT_BAR) ? 1 : 0;
+
 	switch (ppd & SNB_PPD_CONN_TYPE) {
 	case NTB_CONN_B2B:
 		dev_info(&ndev->pdev->dev, "Conn Type = B2B\n");
@@ -1555,12 +1624,25 @@ static int ntb_xeon_detect(struct ntb_device *ndev)
 		 * NTB. We will just force correct here.
 		 */
 		ndev->dev_type = NTB_DEV_USD;
+
+		/*
+		 * This is a way for transparent BAR to figure out if we
+		 * are doing split BAR or not. There is no way for the hw
+		 * on the transparent side to know and set the PPD.
+		 */
+		bars_mask = pci_select_bars(ndev->pdev, IORESOURCE_MEM);
+		bars = hweight32(bars_mask);
+		if (bars == (HSX_SPLITBAR_MAX_MW + 1))
+			ndev->split_bar = 1;
+
 		break;
 	default:
 		dev_err(&ndev->pdev->dev, "Unknown PPD %x\n", ppd);
 		return -ENODEV;
 	}
 
+	ntb_max_mw_detect(ndev);
+
 	return 0;
 }
 
@@ -1638,22 +1720,50 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		goto err;
 
-	rc = pci_request_selected_regions(pdev, NTB_BAR_MASK, KBUILD_MODNAME);
-	if (rc)
+	ndev->mw = kcalloc(sizeof(struct ntb_mw), ndev->limits.max_mw,
+			   GFP_KERNEL);
+	if (!ndev->mw) {
+		rc = -ENOMEM;
 		goto err1;
+	}
+
+	if (ndev->split_bar)
+		rc = pci_request_selected_regions(pdev, NTB_SPLITBAR_MASK,
+						  KBUILD_MODNAME);
+	else
+		rc = pci_request_selected_regions(pdev, NTB_BAR_MASK,
+						  KBUILD_MODNAME);
+
+	if (rc)
+		goto err2;
 
 	ndev->reg_base = pci_ioremap_bar(pdev, NTB_BAR_MMIO);
 	if (!ndev->reg_base) {
 		dev_warn(&pdev->dev, "Cannot remap BAR 0\n");
 		rc = -EIO;
-		goto err2;
+		goto err3;
 	}
 
-	for (i = 0; i < NTB_MAX_NUM_MW; i++) {
+	for (i = 0; i < ndev->limits.max_mw; i++) {
 		ndev->mw[i].bar_sz = pci_resource_len(pdev, MW_TO_BAR(i));
-		ndev->mw[i].vbase =
-		    ioremap_wc(pci_resource_start(pdev, MW_TO_BAR(i)),
-			       ndev->mw[i].bar_sz);
+
+		/*
+		 * with the errata we need to steal last of the memory
+		 * windows for workarounds and they point to MMIO registers.
+		 */
+		if ((ndev->wa_flags & WA_SNB_ERR) &&
+		    (i == (ndev->limits.max_mw - 1))) {
+			ndev->mw[i].vbase =
+				ioremap_nocache(pci_resource_start(pdev,
+							MW_TO_BAR(i)),
+						ndev->mw[i].bar_sz);
+		} else {
+			ndev->mw[i].vbase =
+				ioremap_wc(pci_resource_start(pdev,
+							MW_TO_BAR(i)),
+					   ndev->mw[i].bar_sz);
+		}
+
 		dev_info(&pdev->dev, "MW %d size %llu\n", i,
 			 (unsigned long long) ndev->mw[i].bar_sz);
 		if (!ndev->mw[i].vbase) {
@@ -1668,7 +1778,7 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc) {
 		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
-			goto err3;
+			goto err4;
 
 		dev_warn(&pdev->dev, "Cannot DMA highmem\n");
 	}
@@ -1677,22 +1787,22 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc) {
 		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (rc)
-			goto err3;
+			goto err4;
 
 		dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
 	}
 
 	rc = ntb_device_setup(ndev);
 	if (rc)
-		goto err3;
+		goto err4;
 
 	rc = ntb_create_callbacks(ndev);
 	if (rc)
-		goto err4;
+		goto err5;
 
 	rc = ntb_setup_interrupts(ndev);
 	if (rc)
-		goto err5;
+		goto err6;
 
 	/* The scratchpad registers keep the values between rmmod/insmod,
 	 * blast them now
@@ -1704,24 +1814,29 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	rc = ntb_transport_init(pdev);
 	if (rc)
-		goto err6;
+		goto err7;
 
 	ntb_hw_link_up(ndev);
 
 	return 0;
 
-err6:
+err7:
 	ntb_free_interrupts(ndev);
-err5:
+err6:
 	ntb_free_callbacks(ndev);
-err4:
+err5:
 	ntb_device_free(ndev);
-err3:
+err4:
 	for (i--; i >= 0; i--)
 		iounmap(ndev->mw[i].vbase);
 	iounmap(ndev->reg_base);
+err3:
+	if (ndev->split_bar)
+		pci_release_selected_regions(pdev, NTB_SPLITBAR_MASK);
+	else
+		pci_release_selected_regions(pdev, NTB_BAR_MASK);
 err2:
-	pci_release_selected_regions(pdev, NTB_BAR_MASK);
+	kfree(ndev->mw);
 err1:
 	pci_disable_device(pdev);
 err:
@@ -1745,11 +1860,19 @@ static void ntb_pci_remove(struct pci_dev *pdev)
 	ntb_free_callbacks(ndev);
 	ntb_device_free(ndev);
 
-	for (i = 0; i < NTB_MAX_NUM_MW; i++)
+	/* need to reset max_mw limits so we can unmap properly */
+	if (ndev->hw_type == SNB_HW)
+		ntb_max_mw_detect(ndev);
+
+	for (i = 0; i < ndev->limits.max_mw; i++)
 		iounmap(ndev->mw[i].vbase);
 
+	kfree(ndev->mw);
 	iounmap(ndev->reg_base);
-	pci_release_selected_regions(pdev, NTB_BAR_MASK);
+	if (ndev->split_bar)
+		pci_release_selected_regions(pdev, NTB_SPLITBAR_MASK);
+	else
+		pci_release_selected_regions(pdev, NTB_BAR_MASK);
 	pci_disable_device(pdev);
 	ntb_free_debugfs(ndev);
 	kfree(ndev);
diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h
index 5380ca1..96de5fc 100644
--- a/drivers/ntb/ntb_hw.h
+++ b/drivers/ntb/ntb_hw.h
@@ -78,14 +78,16 @@ static inline void writeq(u64 val, void __iomem *addr)
 
 #define NTB_BAR_MMIO		0
 #define NTB_BAR_23		2
-#define NTB_BAR_45		4
+#define NTB_BAR_4		4
+#define NTB_BAR_5		5
+
 #define NTB_BAR_MASK		((1 << NTB_BAR_MMIO) | (1 << NTB_BAR_23) |\
-				 (1 << NTB_BAR_45))
+				 (1 << NTB_BAR_4))
+#define NTB_SPLITBAR_MASK	((1 << NTB_BAR_MMIO) | (1 << NTB_BAR_23) |\
+				 (1 << NTB_BAR_4) | (1 << NTB_BAR_5))
 
 #define NTB_HB_TIMEOUT		msecs_to_jiffies(1000)
 
-#define NTB_MAX_NUM_MW		2
-
 enum ntb_hw_event {
 	NTB_EVENT_SW_EVENT0 = 0,
 	NTB_EVENT_SW_EVENT1,
@@ -115,7 +117,7 @@ struct ntb_device {
 	struct pci_dev *pdev;
 	struct msix_entry *msix_entries;
 	void __iomem *reg_base;
-	struct ntb_mw mw[NTB_MAX_NUM_MW];
+	struct ntb_mw *mw;
 	struct {
 		unsigned char max_mw;
 		unsigned char max_spads;
@@ -128,6 +130,7 @@ struct ntb_device {
 		void __iomem *rdb;
 		void __iomem *bar2_xlat;
 		void __iomem *bar4_xlat;
+		void __iomem *bar5_xlat;
 		void __iomem *spad_write;
 		void __iomem *spad_read;
 		void __iomem *lnk_cntl;
@@ -147,6 +150,7 @@ struct ntb_device {
 	unsigned char link_width;
 	unsigned char link_speed;
 	unsigned char link_status;
+	unsigned char split_bar;
 
 	struct delayed_work hb_timer;
 	unsigned long last_ts;
diff --git a/drivers/ntb/ntb_regs.h b/drivers/ntb/ntb_regs.h
index 0787205..f028ff8 100644
--- a/drivers/ntb/ntb_regs.h
+++ b/drivers/ntb/ntb_regs.h
@@ -57,6 +57,7 @@
 #define SNB_MAX_DB_BITS		15
 #define SNB_LINK_DB		15
 #define SNB_DB_BITS_PER_VEC	5
+#define HSX_SPLITBAR_MAX_MW	3
 #define SNB_MAX_MW		2
 #define SNB_ERRATA_MAX_MW	1
 
@@ -72,15 +73,20 @@
 
 #define SNB_PBAR2LMT_OFFSET	0x0000
 #define SNB_PBAR4LMT_OFFSET	0x0008
+#define SNB_PBAR5LMT_OFFSET	0x000C
 #define SNB_PBAR2XLAT_OFFSET	0x0010
 #define SNB_PBAR4XLAT_OFFSET	0x0018
+#define SNB_PBAR5XLAT_OFFSET	0x001C
 #define SNB_SBAR2LMT_OFFSET	0x0020
 #define SNB_SBAR4LMT_OFFSET	0x0028
+#define SNB_SBAR5LMT_OFFSET	0x002C
 #define SNB_SBAR2XLAT_OFFSET	0x0030
 #define SNB_SBAR4XLAT_OFFSET	0x0038
+#define SNB_SBAR5XLAT_OFFSET	0x003C
 #define SNB_SBAR0BASE_OFFSET	0x0040
 #define SNB_SBAR2BASE_OFFSET	0x0048
 #define SNB_SBAR4BASE_OFFSET	0x0050
+#define SNB_SBAR5BASE_OFFSET	0x0054
 #define SNB_NTBCNTL_OFFSET	0x0058
 #define SNB_SBDF_OFFSET		0x005C
 #define SNB_PDOORBELL_OFFSET	0x0060
@@ -96,12 +102,18 @@
 #define SNB_B2B_XLAT_OFFSETL	0x0144
 #define SNB_B2B_XLAT_OFFSETU	0x0148
 
-#define SNB_MBAR01_USD_ADDR	0x000000210000000CULL
-#define SNB_MBAR23_USD_ADDR	0x000000410000000CULL
-#define SNB_MBAR45_USD_ADDR	0x000000810000000CULL
-#define SNB_MBAR01_DSD_ADDR	0x000000200000000CULL
-#define SNB_MBAR23_DSD_ADDR	0x000000400000000CULL
-#define SNB_MBAR45_DSD_ADDR	0x000000800000000CULL
+/*
+ * The addresses are setup so the 32bit BARs can function. Thus
+ * the addresses are all in 32bit space
+ */
+#define SNB_MBAR01_USD_ADDR	0x000000002100000CULL
+#define SNB_MBAR23_USD_ADDR	0x000000004100000CULL
+#define SNB_MBAR4_USD_ADDR	0x000000008100000CULL
+#define SNB_MBAR5_USD_ADDR	0x00000000A100000CULL
+#define SNB_MBAR01_DSD_ADDR	0x000000002000000CULL
+#define SNB_MBAR23_DSD_ADDR	0x000000004000000CULL
+#define SNB_MBAR4_DSD_ADDR	0x000000008000000CULL
+#define SNB_MBAR5_DSD_ADDR	0x00000000A000000CULL
 
 #define BWD_MSIX_CNT		34
 #define BWD_MAX_SPADS		16
@@ -150,13 +162,16 @@
 #define NTB_CNTL_LINK_DISABLE		(1 << 1)
 #define NTB_CNTL_S2P_BAR23_SNOOP	(1 << 2)
 #define NTB_CNTL_P2S_BAR23_SNOOP	(1 << 4)
-#define NTB_CNTL_S2P_BAR45_SNOOP	(1 << 6)
-#define NTB_CNTL_P2S_BAR45_SNOOP	(1 << 8)
+#define NTB_CNTL_S2P_BAR4_SNOOP	(1 << 6)
+#define NTB_CNTL_P2S_BAR4_SNOOP	(1 << 8)
+#define NTB_CNTL_S2P_BAR5_SNOOP	(1 << 12)
+#define NTB_CNTL_P2S_BAR5_SNOOP	(1 << 14)
 #define BWD_CNTL_LINK_DOWN		(1 << 16)
 
 #define NTB_PPD_OFFSET		0x00D4
 #define SNB_PPD_CONN_TYPE	0x0003
 #define SNB_PPD_DEV_TYPE	0x0010
+#define SNB_PPD_SPLIT_BAR	(1 << 6)
 #define BWD_PPD_INIT_LINK	0x0008
 #define BWD_PPD_CONN_TYPE	0x0300
 #define BWD_PPD_DEV_TYPE	0x1000


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 6/6] ntb: workaround for high traffic hardware hang
  2014-08-28 20:52 [PATCH 0/6] NTB bug fixes and hardware workarounds Dave Jiang
                   ` (4 preceding siblings ...)
  2014-08-28 20:53 ` [PATCH 5/6] ntb: Adding split BAR support for Haswell platforms Dave Jiang
@ 2014-08-28 20:53 ` Dave Jiang
  2014-09-08  3:19   ` Jon Mason
  2014-09-08  2:54 ` [PATCH 0/6] NTB bug fixes and hardware workarounds Jon Mason
  6 siblings, 1 reply; 9+ messages in thread
From: Dave Jiang @ 2014-08-28 20:53 UTC (permalink / raw)
  To: jdmason; +Cc: linux-kernel

A hardware errata causes the NTB to hang when heavy bi-directional traffic
in addition to the usage of BAR0/1 (where the registers reside, including
the doorbell registers to trigger interrupts).

This workaround is only available on Haswell platform.
The workaround is to enable split BAR in the BIOS to allow the 64bit BAR4 to
be split into two 32bit BAR4 and BAR5. The BAR4 shall be pointed to LAPIC
region of the remote host. We will bypass the doorbell mechanism and directly
trigger the MSIX interrupts. The offsets and vectors are exchanged during
transport scratch pad negotiation. The scratch pads are now overloaded
in order to allow the exchange of the information. This gets around using
the doorbell and prevents the lockup with additional pcode changes in BIOS.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/ntb/ntb_hw.c        |  177 +++++++++++++++++++++++++++++++++++++------
 drivers/ntb/ntb_hw.h        |    9 ++
 drivers/ntb/ntb_regs.h      |    1 
 drivers/ntb/ntb_transport.c |  126 ++++++++++++++++++-------------
 4 files changed, 237 insertions(+), 76 deletions(-)

diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
index cef9d8a..97e18c3 100644
--- a/drivers/ntb/ntb_hw.c
+++ b/drivers/ntb/ntb_hw.c
@@ -53,6 +53,8 @@
 #include <linux/pci.h>
 #include <linux/random.h>
 #include <linux/slab.h>
+#include <linux/msi.h>
+#include <linux/interrupt.h>
 #include "ntb_hw.h"
 #include "ntb_regs.h"
 
@@ -150,17 +152,19 @@ static void ntb_set_errata_flags(struct ntb_device *ndev)
 	case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
 	case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
 	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
-	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
 	case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
 	case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
 	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
-	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
 	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
 	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
 	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
-	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
 		ndev->wa_flags |= WA_SNB_ERR;
 		break;
+	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
+	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
+		ndev->wa_flags |= WA_HSX_ERR;
+		break;
 	}
 }
 
@@ -209,9 +213,13 @@ static void ntb_irq_work(unsigned long data)
 		struct ntb_device *ndev = db_cb->ndev;
 		unsigned long mask;
 
-		mask = readw(ndev->reg_ofs.ldb_mask);
-		clear_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
-		writew(mask, ndev->reg_ofs.ldb_mask);
+		if (ndev->wa_flags & WA_HSX_ERR)
+			enable_irq(db_cb->irq);
+		else {
+			mask = readw(ndev->reg_ofs.ldb_mask);
+			clear_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
+			writew(mask, ndev->reg_ofs.ldb_mask);
+		}
 	}
 }
 
@@ -246,9 +254,12 @@ int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx,
 		     (unsigned long) &ndev->db_cb[idx]);
 
 	/* unmask interrupt */
-	mask = readw(ndev->reg_ofs.ldb_mask);
-	clear_bit(idx * ndev->bits_per_vector, &mask);
-	writew(mask, ndev->reg_ofs.ldb_mask);
+	if (!(ndev->wa_flags & WA_HSX_ERR)) {
+		/* unmask interrupt */
+		mask = readw(ndev->reg_ofs.ldb_mask);
+		clear_bit(idx * ndev->bits_per_vector, &mask);
+		writew(mask, ndev->reg_ofs.ldb_mask);
+	}
 
 	return 0;
 }
@@ -268,9 +279,11 @@ void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx)
 	if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback)
 		return;
 
-	mask = readw(ndev->reg_ofs.ldb_mask);
-	set_bit(idx * ndev->bits_per_vector, &mask);
-	writew(mask, ndev->reg_ofs.ldb_mask);
+	if (!(ndev->wa_flags & WA_HSX_ERR)) {
+		mask = readw(ndev->reg_ofs.ldb_mask);
+		set_bit(idx * ndev->bits_per_vector, &mask);
+		writew(mask, ndev->reg_ofs.ldb_mask);
+	}
 
 	tasklet_disable(&ndev->db_cb[idx].irq_work);
 
@@ -518,6 +531,17 @@ void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
 	}
 }
 
+static void ntb_generate_rirq(struct ntb_device *ndev, int vec)
+{
+	if (vec > 2) {
+		dev_err(&ndev->pdev->dev, "%s: vec %d out of bounds\n",
+			__func__, vec);
+		return;
+	}
+
+	writel(ndev->rirq[vec].data, ndev->mw[1].vbase + ndev->rirq[vec].ofs);
+}
+
 /**
  * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
  * @ndev: pointer to ntb_device instance
@@ -532,7 +556,9 @@ void ntb_ring_doorbell(struct ntb_device *ndev, unsigned int db)
 {
 	dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db);
 
-	if (ndev->hw_type == BWD_HW)
+	if (ndev->wa_flags & WA_HSX_ERR)
+		ntb_generate_rirq(ndev, db);
+	else if (ndev->hw_type == BWD_HW)
 		writeq((u64) 1 << db, ndev->reg_ofs.rdb);
 	else
 		writew(((1 << ndev->bits_per_vector) - 1) <<
@@ -794,7 +820,26 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 			 * the driver defaults, but write the Limit registers
 			 * first just in case.
 			 */
-			if (ndev->split_bar)
+			if (ndev->wa_flags & WA_HSX_ERR) {
+				/* using BAR4, must be set to 1M */
+				if (ndev->mw[1].bar_sz != 0x100000) {
+					dev_err(&ndev->pdev->dev,
+						"BAR4 must be 1M\n");
+					return -EINVAL;
+				}
+
+				/* set limit to 1M according to spec */
+				writel(pci_resource_start(ndev->pdev, 1) + 0x100000,
+				       ndev->reg_base + SNB_PBAR4LMT_OFFSET);
+				/*
+				 * need to point SBAR4XLAT to remote
+				 * interrupt region
+				 */
+				writel(0xfee00000,
+				       ndev->reg_base + SNB_SBAR4XLAT_OFFSET);
+
+				ndev->limits.max_mw = HSX_ERRATA_MAX_MW;
+			} else if (ndev->split_bar)
 				ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
 			else
 				ndev->limits.max_mw = SNB_MAX_MW;
@@ -911,7 +956,22 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		if (ndev->split_bar) {
 			ndev->reg_ofs.bar5_xlat =
 				ndev->reg_base + SNB_SBAR5XLAT_OFFSET;
-			ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
+
+			if (ndev->wa_flags & WA_HSX_ERR) {
+				/* using BAR4, must be set to 1M */
+				if (ndev->mw[1].bar_sz != 0x100000) {
+					dev_err(&ndev->pdev->dev,
+						"BAR4 must be 1M\n");
+					return -EINVAL;
+				}
+
+				/* set limit to 1M according to spec */
+				writel(pci_resource_start(ndev->pdev, 1) + 0x100000,
+				       ndev->reg_base + SNB_PBAR4LMT_OFFSET);
+				writel(0xfee00000, ndev->reg_ofs.bar4_xlat);
+				ndev->limits.max_mw = HSX_ERRATA_MAX_MW;
+			} else
+				ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
 		} else
 			ndev->limits.max_mw = SNB_MAX_MW;
 		break;
@@ -943,7 +1003,22 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
 		if (ndev->split_bar) {
 			ndev->reg_ofs.bar5_xlat =
 				ndev->reg_base + SNB_PBAR5XLAT_OFFSET;
-			ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
+
+			if (ndev->wa_flags & WA_HSX_ERR) {
+				/* using BAR4, must be set to 1M */
+				if (ndev->mw[1].bar_sz != 0x100000) {
+					dev_err(&ndev->pdev->dev,
+						"BAR4 must be 1M\n");
+					return -EINVAL;
+				}
+
+				/* set limit to 1M according to spec */
+				writel(pci_resource_start(ndev->pdev, 1) + 0x100000,
+				       ndev->reg_base + SNB_PBAR4LMT_OFFSET);
+				writel(0xfee00000, ndev->reg_ofs.bar4_xlat);
+				ndev->limits.max_mw = HSX_ERRATA_MAX_MW;
+			} else
+				ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
 		} else
 			ndev->limits.max_mw = SNB_MAX_MW;
 		break;
@@ -1085,9 +1160,14 @@ static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
 	dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
 		db_cb->db_num);
 
-	mask = readw(ndev->reg_ofs.ldb_mask);
-	set_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
-	writew(mask, ndev->reg_ofs.ldb_mask);
+	if (ndev->wa_flags & WA_HSX_ERR) {
+		disable_irq_nosync(irq);
+		db_cb->irq = irq;
+	} else {
+		mask = readw(ndev->reg_ofs.ldb_mask);
+		set_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
+		writew(mask, ndev->reg_ofs.ldb_mask);
+	}
 
 	tasklet_schedule(&db_cb->irq_work);
 
@@ -1096,8 +1176,11 @@ static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
 	 * vectors, with the 4th having a single bit for link
 	 * interrupts.
 	 */
-	writew(((1 << ndev->bits_per_vector) - 1) <<
-	       (db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.ldb);
+	if (!(ndev->wa_flags & WA_HSX_ERR)) {
+		writew(((1 << ndev->bits_per_vector) - 1) <<
+			(db_cb->db_num * ndev->bits_per_vector),
+			ndev->reg_ofs.ldb);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -1160,6 +1243,9 @@ static int ntb_setup_snb_msix(struct ntb_device *ndev, int msix_entries)
 	struct pci_dev *pdev = ndev->pdev;
 	struct msix_entry *msix;
 	int rc, i;
+	struct msi_desc *entry;
+	u32 laddr = 0;
+	u32 data = 0;
 
 	if (msix_entries < ndev->limits.msix_cnt)
 		return -ENOSPC;
@@ -1191,6 +1277,31 @@ static int ntb_setup_snb_msix(struct ntb_device *ndev, int msix_entries)
 	ndev->num_msix = msix_entries;
 	ndev->max_cbs = msix_entries - 1;
 
+	if (ndev->wa_flags & WA_HSX_ERR) {
+		i = 0;
+
+		/*
+		 * acquire the interrupt region in the LAPIC for the
+		 * MSIX vectors
+		 */
+		list_for_each_entry(entry, &pdev->msi_list, list) {
+			unsigned int offset = ndev->msix_entries[i].entry *
+				PCI_MSIX_ENTRY_SIZE;
+
+			laddr = readl(entry->mask_base + offset +
+					PCI_MSIX_ENTRY_LOWER_ADDR);
+			dev_dbg(&pdev->dev, "local lower MSIX addr(%d): %#x\n",
+				i, laddr);
+			ndev->lirq[i].ofs = 0x1fffff & laddr;
+			data = readl(entry->mask_base + offset +
+					PCI_MSIX_ENTRY_DATA);
+			dev_dbg(&pdev->dev, "local MSIX data(%d): %#x\n",
+				i, data);
+			ndev->lirq[i].data = data;
+			i++;
+		}
+	}
+
 	return 0;
 
 err:
@@ -1288,6 +1399,11 @@ static int ntb_setup_msi(struct ntb_device *ndev)
 	struct pci_dev *pdev = ndev->pdev;
 	int rc;
 
+	if (ndev->wa_flags & WA_HSX_ERR) {
+		dev_err(&pdev->dev, "Platform errata does not support MSI\n");
+		return -EINVAL;
+	}
+
 	rc = pci_enable_msi(pdev);
 	if (rc)
 		return rc;
@@ -1307,6 +1423,11 @@ static int ntb_setup_intx(struct ntb_device *ndev)
 	struct pci_dev *pdev = ndev->pdev;
 	int rc;
 
+	if (ndev->wa_flags & WA_HSX_ERR) {
+		dev_err(&pdev->dev, "Platform errata does not support INTX\n");
+		return -EINVAL;
+	}
+
 	pci_msi_off(pdev);
 
 	/* Verify intx is enabled */
@@ -1720,6 +1841,17 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		goto err;
 
+	if (!ndev->split_bar && (ndev->wa_flags & WA_HSX_ERR)) {
+		dev_warn(&pdev->dev,
+			 "Please config NTB split BAR for errata workaround\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * From this point on we will assume that split BAR is set when
+	 * WA_HSX_ERR is set
+	 */
+
 	ndev->mw = kcalloc(sizeof(struct ntb_mw), ndev->limits.max_mw,
 			   GFP_KERNEL);
 	if (!ndev->mw) {
@@ -1751,8 +1883,7 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		 * with the errata we need to steal last of the memory
 		 * windows for workarounds and they point to MMIO registers.
 		 */
-		if ((ndev->wa_flags & WA_SNB_ERR) &&
-		    (i == (ndev->limits.max_mw - 1))) {
+		if ((ndev->wa_flags & (WA_SNB_ERR | WA_HSX_ERR)) && (i > 0)) {
 			ndev->mw[i].vbase =
 				ioremap_nocache(pci_resource_start(pdev,
 							MW_TO_BAR(i)),
diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h
index 96de5fc..d59650e 100644
--- a/drivers/ntb/ntb_hw.h
+++ b/drivers/ntb/ntb_hw.h
@@ -109,9 +109,16 @@ struct ntb_db_cb {
 	void *data;
 	struct ntb_device *ndev;
 	struct tasklet_struct irq_work;
+	unsigned int irq;
+};
+
+struct msix_info {
+	u32 ofs;
+	u32 data;
 };
 
 #define WA_SNB_ERR	0x00000001
+#define WA_HSX_ERR	0x00000002
 
 struct ntb_device {
 	struct pci_dev *pdev;
@@ -161,6 +168,8 @@ struct ntb_device {
 	struct dentry *debugfs_info;
 
 	unsigned int wa_flags;
+	struct msix_info lirq[4];
+	struct msix_info rirq[4];
 };
 
 /**
diff --git a/drivers/ntb/ntb_regs.h b/drivers/ntb/ntb_regs.h
index f028ff8..7eb1440 100644
--- a/drivers/ntb/ntb_regs.h
+++ b/drivers/ntb/ntb_regs.h
@@ -60,6 +60,7 @@
 #define HSX_SPLITBAR_MAX_MW	3
 #define SNB_MAX_MW		2
 #define SNB_ERRATA_MAX_MW	1
+#define HSX_ERRATA_MAX_MW	1
 
 #define SNB_DB_HW_LINK		0x8000
 
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 24f0ac1..7ab4d63 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -56,6 +56,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/sched.h>
 #include "ntb_hw.h"
 
 #define NTB_TRANSPORT_VERSION	3
@@ -188,15 +189,24 @@ struct ntb_payload_header {
 	unsigned int flags;
 };
 
+/*
+ * Using 7 scratch pads, 1 left
+ * VERSION
+ * QP_LINKS
+ * NUM_QPS NUM_MWS
+ * MW0_SZ MW1_SZ MW2_SZ MW3_SZ
+ * DATA0 MSIX_OFS0
+ * DATA1 MSIX_OFS1
+ * DATA2 MSIX_OFS2
+ */
 enum {
 	VERSION = 0,
 	QP_LINKS,
-	NUM_QPS,
 	NUM_MWS,
-	MW0_SZ_HIGH,
-	MW0_SZ_LOW,
-	MW1_SZ_HIGH,
-	MW1_SZ_LOW,
+	MW_SZ,
+	MSIX_OFS0,
+	MSIX_OFS1,
+	MSIX_OFS2,
 	MAX_SPAD,
 };
 
@@ -687,37 +697,45 @@ static void ntb_transport_link_work(struct work_struct *work)
 	int rc, i;
 
 	/* send the local info, in the opposite order of the way we read it */
-	for (i = 0; i < ntb_max_mw(ndev); i++) {
-		rc = ntb_write_remote_spad(ndev, MW0_SZ_HIGH + (i * 2),
-					   ntb_get_mw_size(ndev, i) >> 32);
-		if (rc) {
-			dev_err(&pdev->dev, "Error writing %u to remote spad %d\n",
-				(u32)(ntb_get_mw_size(ndev, i) >> 32),
-				MW0_SZ_HIGH + (i * 2));
-			goto out;
-		}
 
-		rc = ntb_write_remote_spad(ndev, MW0_SZ_LOW + (i * 2),
-					   (u32) ntb_get_mw_size(ndev, i));
+	for (i = 0; i < 3; i++) {
+		val = (ndev->lirq[i].ofs & 0x1fffff) |
+		      (ndev->lirq[i].data & 0xff) << 24;
+
+		rc = ntb_write_remote_spad(ndev, MSIX_OFS0 + i, val);
 		if (rc) {
-			dev_err(&pdev->dev, "Error writing %u to remote spad %d\n",
-				(u32) ntb_get_mw_size(ndev, i),
-				MW0_SZ_LOW + (i * 2));
+			dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
+				val, MSIX_OFS0 + i);
 			goto out;
 		}
 	}
 
-	rc = ntb_write_remote_spad(ndev, NUM_MWS, ntb_max_mw(ndev));
+	if (ntb_max_mw(ndev) > 4) {
+		dev_err(&pdev->dev,
+			"Greater than 4 memory window unsupported!\n");
+		goto out;
+	}
+
+	val = 0;
+	for (i = 0; i < ntb_max_mw(ndev); i++) {
+		u32 size;
+
+		size = ilog2(rounddown_pow_of_two(ntb_get_mw_size(ndev, i)));
+		val |= (size & 0xff) << (8 * i);
+	}
+
+	rc = ntb_write_remote_spad(ndev, MW_SZ, val);
 	if (rc) {
-		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
-			ntb_max_mw(ndev), NUM_MWS);
+		dev_err(&pdev->dev, "Error writing %#x to remote spad %d\n",
+			val, MW_SZ);
 		goto out;
 	}
 
-	rc = ntb_write_remote_spad(ndev, NUM_QPS, nt->max_qps);
+	val = (ntb_max_mw(ndev) & 0xffff) | (nt->max_qps & 0xffff) << 16;
+	rc = ntb_write_remote_spad(ndev, NUM_MWS, val);
 	if (rc) {
-		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
-			nt->max_qps, NUM_QPS);
+		dev_err(&pdev->dev, "Error writing %#x to remote spad %d\n",
+			val, NUM_MWS);
 		goto out;
 	}
 
@@ -739,46 +757,31 @@ static void ntb_transport_link_work(struct work_struct *work)
 		goto out;
 	dev_dbg(&pdev->dev, "Remote version = %d\n", val);
 
-	rc = ntb_read_remote_spad(ndev, NUM_QPS, &val);
-	if (rc) {
-		dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_QPS);
-		goto out;
-	}
-
-	if (val != nt->max_qps)
-		goto out;
-	dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val);
-
 	rc = ntb_read_remote_spad(ndev, NUM_MWS, &val);
 	if (rc) {
 		dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_MWS);
 		goto out;
 	}
 
-	if (val != ntb_max_mw(ndev))
+	if (((val >> 16) & 0xffff) != nt->max_qps)
 		goto out;
-	dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val);
 
-	for (i = 0; i < ntb_max_mw(ndev); i++) {
-		u64 val64;
+	dev_dbg(&pdev->dev, "Remote max number of qps = %d\n",
+		(val >> 16) & 0xffff);
 
-		rc = ntb_read_remote_spad(ndev, MW0_SZ_HIGH + (i * 2), &val);
-		if (rc) {
-			dev_err(&pdev->dev, "Error reading remote spad %d\n",
-				MW0_SZ_HIGH + (i * 2));
-			goto out1;
-		}
+	if ((val & 0xffff) != ntb_max_mw(ndev))
+		goto out;
 
-		val64 = (u64) val << 32;
+	dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val & 0xffff);
 
-		rc = ntb_read_remote_spad(ndev, MW0_SZ_LOW + (i * 2), &val);
-		if (rc) {
-			dev_err(&pdev->dev, "Error reading remote spad %d\n",
-				MW0_SZ_LOW + (i * 2));
-			goto out1;
-		}
+	rc = ntb_read_remote_spad(ndev, MW_SZ, &val);
+	if (rc) {
+		dev_err(&pdev->dev, "Error reading remote spad %d\n", MW_SZ);
+		goto out1;
+	}
 
-		val64 |= val;
+	for (i = 0; i < ntb_max_mw(ndev); i++) {
+		u64 val64 = 1 << ((val >> (i * 8)) & 0xff);
 
 		dev_dbg(&pdev->dev, "Remote MW%d size = %llu\n", i, val64);
 
@@ -787,6 +790,23 @@ static void ntb_transport_link_work(struct work_struct *work)
 			goto out1;
 	}
 
+	for (i = 0; i < 3; i++) {
+		rc = ntb_read_remote_spad(ndev, MSIX_OFS0 + i, &val);
+		if (rc) {
+			dev_err(&pdev->dev,
+				"Error reading remote spad %d\n",
+				MSIX_OFS0 + i);
+			goto out;
+		}
+
+		ndev->rirq[i].ofs = 0x1ffff & val;
+		ndev->rirq[i].data = (val >> 24) & 0xff;
+		dev_dbg(&pdev->dev, "received MSIX_OFS%d: %#x\n",
+			i, ndev->rirq[i].ofs);
+		dev_dbg(&pdev->dev, "received MSIX_DATA%d: %#x\n",
+			i, ndev->rirq[i].data);
+	}
+
 	nt->transport_link = NTB_LINK_UP;
 
 	for (i = 0; i < nt->max_qps; i++) {


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/6] NTB bug fixes and hardware workarounds
  2014-08-28 20:52 [PATCH 0/6] NTB bug fixes and hardware workarounds Dave Jiang
                   ` (5 preceding siblings ...)
  2014-08-28 20:53 ` [PATCH 6/6] ntb: workaround for high traffic hardware hang Dave Jiang
@ 2014-09-08  2:54 ` Jon Mason
  6 siblings, 0 replies; 9+ messages in thread
From: Jon Mason @ 2014-09-08  2:54 UTC (permalink / raw)
  To: Dave Jiang; +Cc: linux-kernel

On Thu, Aug 28, 2014 at 01:52:57PM -0700, Dave Jiang wrote:
> The following series contains various fixes and cleanup for NTB. It also
> adds the split BAR support on Haswell platform and a hardware errata
> workaround in order to allow interrupts to function during bi-directional
> traffic under stress. 

Accepted all but the last one.

Thanks,
Jon

> 
> ---
> 
> Dave Jiang (6):
>       ntb: Add alignment check to meet hardware requirement
>       ntb: move platform detection to separate function
>       ntb: conslidate reading of PPD to move platform detection earlier
>       ntb: use errata flag set via DID to implement workaround
>       ntb: Adding split BAR support for Haswell platforms
>       ntb: workaround for high traffic hardware hang
> 
> 
>  drivers/ntb/ntb_hw.c        |  603 +++++++++++++++++++++++++++++++++++--------
>  drivers/ntb/ntb_hw.h        |   27 ++
>  drivers/ntb/ntb_regs.h      |   32 ++
>  drivers/ntb/ntb_transport.c |  139 ++++++----
>  4 files changed, 620 insertions(+), 181 deletions(-)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 6/6] ntb: workaround for high traffic hardware hang
  2014-08-28 20:53 ` [PATCH 6/6] ntb: workaround for high traffic hardware hang Dave Jiang
@ 2014-09-08  3:19   ` Jon Mason
  0 siblings, 0 replies; 9+ messages in thread
From: Jon Mason @ 2014-09-08  3:19 UTC (permalink / raw)
  To: Dave Jiang; +Cc: linux-kernel

On Thu, Aug 28, 2014 at 01:53:29PM -0700, Dave Jiang wrote:
> A hardware errata causes the NTB to hang when heavy bi-directional traffic
> in addition to the usage of BAR0/1 (where the registers reside, including
> the doorbell registers to trigger interrupts).
> 
> This workaround is only available on Haswell platform.
> The workaround is to enable split BAR in the BIOS to allow the 64bit BAR4 to
> be split into two 32bit BAR4 and BAR5. The BAR4 shall be pointed to LAPIC
> region of the remote host. We will bypass the doorbell mechanism and directly
> trigger the MSIX interrupts. The offsets and vectors are exchanged during
> transport scratch pad negotiation. The scratch pads are now overloaded
> in order to allow the exchange of the information. This gets around using
> the doorbell and prevents the lockup with additional pcode changes in BIOS.

I REALLY don't like a driver mucking with the MSI-X table to work
around hardware issues.  I don't see any presidence for this kind of
behavior in other drivers.  Also, this patch is fairly invasive.  I
realize that there isn't much alternative, other than polling.  In
fact, I think I'd rather see polling.

Thanks,
Jon

> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/ntb/ntb_hw.c        |  177 +++++++++++++++++++++++++++++++++++++------
>  drivers/ntb/ntb_hw.h        |    9 ++
>  drivers/ntb/ntb_regs.h      |    1 
>  drivers/ntb/ntb_transport.c |  126 ++++++++++++++++++-------------
>  4 files changed, 237 insertions(+), 76 deletions(-)
> 
> diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
> index cef9d8a..97e18c3 100644
> --- a/drivers/ntb/ntb_hw.c
> +++ b/drivers/ntb/ntb_hw.c
> @@ -53,6 +53,8 @@
>  #include <linux/pci.h>
>  #include <linux/random.h>
>  #include <linux/slab.h>
> +#include <linux/msi.h>
> +#include <linux/interrupt.h>
>  #include "ntb_hw.h"
>  #include "ntb_regs.h"
>  
> @@ -150,17 +152,19 @@ static void ntb_set_errata_flags(struct ntb_device *ndev)
>  	case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
>  	case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
>  	case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
> -	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
>  	case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
>  	case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
>  	case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
> -	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
>  	case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
>  	case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
>  	case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
> -	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
>  		ndev->wa_flags |= WA_SNB_ERR;
>  		break;
> +	case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
> +	case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
> +	case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
> +		ndev->wa_flags |= WA_HSX_ERR;
> +		break;
>  	}
>  }
>  
> @@ -209,9 +213,13 @@ static void ntb_irq_work(unsigned long data)
>  		struct ntb_device *ndev = db_cb->ndev;
>  		unsigned long mask;
>  
> -		mask = readw(ndev->reg_ofs.ldb_mask);
> -		clear_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
> -		writew(mask, ndev->reg_ofs.ldb_mask);
> +		if (ndev->wa_flags & WA_HSX_ERR)
> +			enable_irq(db_cb->irq);
> +		else {
> +			mask = readw(ndev->reg_ofs.ldb_mask);
> +			clear_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
> +			writew(mask, ndev->reg_ofs.ldb_mask);
> +		}
>  	}
>  }
>  
> @@ -246,9 +254,12 @@ int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx,
>  		     (unsigned long) &ndev->db_cb[idx]);
>  
>  	/* unmask interrupt */
> -	mask = readw(ndev->reg_ofs.ldb_mask);
> -	clear_bit(idx * ndev->bits_per_vector, &mask);
> -	writew(mask, ndev->reg_ofs.ldb_mask);
> +	if (!(ndev->wa_flags & WA_HSX_ERR)) {
> +		/* unmask interrupt */
> +		mask = readw(ndev->reg_ofs.ldb_mask);
> +		clear_bit(idx * ndev->bits_per_vector, &mask);
> +		writew(mask, ndev->reg_ofs.ldb_mask);
> +	}
>  
>  	return 0;
>  }
> @@ -268,9 +279,11 @@ void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx)
>  	if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback)
>  		return;
>  
> -	mask = readw(ndev->reg_ofs.ldb_mask);
> -	set_bit(idx * ndev->bits_per_vector, &mask);
> -	writew(mask, ndev->reg_ofs.ldb_mask);
> +	if (!(ndev->wa_flags & WA_HSX_ERR)) {
> +		mask = readw(ndev->reg_ofs.ldb_mask);
> +		set_bit(idx * ndev->bits_per_vector, &mask);
> +		writew(mask, ndev->reg_ofs.ldb_mask);
> +	}
>  
>  	tasklet_disable(&ndev->db_cb[idx].irq_work);
>  
> @@ -518,6 +531,17 @@ void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
>  	}
>  }
>  
> +static void ntb_generate_rirq(struct ntb_device *ndev, int vec)
> +{
> +	if (vec > 2) {
> +		dev_err(&ndev->pdev->dev, "%s: vec %d out of bounds\n",
> +			__func__, vec);
> +		return;
> +	}
> +
> +	writel(ndev->rirq[vec].data, ndev->mw[1].vbase + ndev->rirq[vec].ofs);
> +}
> +
>  /**
>   * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
>   * @ndev: pointer to ntb_device instance
> @@ -532,7 +556,9 @@ void ntb_ring_doorbell(struct ntb_device *ndev, unsigned int db)
>  {
>  	dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db);
>  
> -	if (ndev->hw_type == BWD_HW)
> +	if (ndev->wa_flags & WA_HSX_ERR)
> +		ntb_generate_rirq(ndev, db);
> +	else if (ndev->hw_type == BWD_HW)
>  		writeq((u64) 1 << db, ndev->reg_ofs.rdb);
>  	else
>  		writew(((1 << ndev->bits_per_vector) - 1) <<
> @@ -794,7 +820,26 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
>  			 * the driver defaults, but write the Limit registers
>  			 * first just in case.
>  			 */
> -			if (ndev->split_bar)
> +			if (ndev->wa_flags & WA_HSX_ERR) {
> +				/* using BAR4, must be set to 1M */
> +				if (ndev->mw[1].bar_sz != 0x100000) {
> +					dev_err(&ndev->pdev->dev,
> +						"BAR4 must be 1M\n");
> +					return -EINVAL;
> +				}
> +
> +				/* set limit to 1M according to spec */
> +				writel(pci_resource_start(ndev->pdev, 1) + 0x100000,
> +				       ndev->reg_base + SNB_PBAR4LMT_OFFSET);
> +				/*
> +				 * need to point SBAR4XLAT to remote
> +				 * interrupt region
> +				 */
> +				writel(0xfee00000,
> +				       ndev->reg_base + SNB_SBAR4XLAT_OFFSET);
> +
> +				ndev->limits.max_mw = HSX_ERRATA_MAX_MW;
> +			} else if (ndev->split_bar)
>  				ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
>  			else
>  				ndev->limits.max_mw = SNB_MAX_MW;
> @@ -911,7 +956,22 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
>  		if (ndev->split_bar) {
>  			ndev->reg_ofs.bar5_xlat =
>  				ndev->reg_base + SNB_SBAR5XLAT_OFFSET;
> -			ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
> +
> +			if (ndev->wa_flags & WA_HSX_ERR) {
> +				/* using BAR4, must be set to 1M */
> +				if (ndev->mw[1].bar_sz != 0x100000) {
> +					dev_err(&ndev->pdev->dev,
> +						"BAR4 must be 1M\n");
> +					return -EINVAL;
> +				}
> +
> +				/* set limit to 1M according to spec */
> +				writel(pci_resource_start(ndev->pdev, 1) + 0x100000,
> +				       ndev->reg_base + SNB_PBAR4LMT_OFFSET);
> +				writel(0xfee00000, ndev->reg_ofs.bar4_xlat);
> +				ndev->limits.max_mw = HSX_ERRATA_MAX_MW;
> +			} else
> +				ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
>  		} else
>  			ndev->limits.max_mw = SNB_MAX_MW;
>  		break;
> @@ -943,7 +1003,22 @@ static int ntb_xeon_setup(struct ntb_device *ndev)
>  		if (ndev->split_bar) {
>  			ndev->reg_ofs.bar5_xlat =
>  				ndev->reg_base + SNB_PBAR5XLAT_OFFSET;
> -			ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
> +
> +			if (ndev->wa_flags & WA_HSX_ERR) {
> +				/* using BAR4, must be set to 1M */
> +				if (ndev->mw[1].bar_sz != 0x100000) {
> +					dev_err(&ndev->pdev->dev,
> +						"BAR4 must be 1M\n");
> +					return -EINVAL;
> +				}
> +
> +				/* set limit to 1M according to spec */
> +				writel(pci_resource_start(ndev->pdev, 1) + 0x100000,
> +				       ndev->reg_base + SNB_PBAR4LMT_OFFSET);
> +				writel(0xfee00000, ndev->reg_ofs.bar4_xlat);
> +				ndev->limits.max_mw = HSX_ERRATA_MAX_MW;
> +			} else
> +				ndev->limits.max_mw = HSX_SPLITBAR_MAX_MW;
>  		} else
>  			ndev->limits.max_mw = SNB_MAX_MW;
>  		break;
> @@ -1085,9 +1160,14 @@ static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
>  	dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
>  		db_cb->db_num);
>  
> -	mask = readw(ndev->reg_ofs.ldb_mask);
> -	set_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
> -	writew(mask, ndev->reg_ofs.ldb_mask);
> +	if (ndev->wa_flags & WA_HSX_ERR) {
> +		disable_irq_nosync(irq);
> +		db_cb->irq = irq;
> +	} else {
> +		mask = readw(ndev->reg_ofs.ldb_mask);
> +		set_bit(db_cb->db_num * ndev->bits_per_vector, &mask);
> +		writew(mask, ndev->reg_ofs.ldb_mask);
> +	}
>  
>  	tasklet_schedule(&db_cb->irq_work);
>  
> @@ -1096,8 +1176,11 @@ static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
>  	 * vectors, with the 4th having a single bit for link
>  	 * interrupts.
>  	 */
> -	writew(((1 << ndev->bits_per_vector) - 1) <<
> -	       (db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.ldb);
> +	if (!(ndev->wa_flags & WA_HSX_ERR)) {
> +		writew(((1 << ndev->bits_per_vector) - 1) <<
> +			(db_cb->db_num * ndev->bits_per_vector),
> +			ndev->reg_ofs.ldb);
> +	}
>  
>  	return IRQ_HANDLED;
>  }
> @@ -1160,6 +1243,9 @@ static int ntb_setup_snb_msix(struct ntb_device *ndev, int msix_entries)
>  	struct pci_dev *pdev = ndev->pdev;
>  	struct msix_entry *msix;
>  	int rc, i;
> +	struct msi_desc *entry;
> +	u32 laddr = 0;
> +	u32 data = 0;
>  
>  	if (msix_entries < ndev->limits.msix_cnt)
>  		return -ENOSPC;
> @@ -1191,6 +1277,31 @@ static int ntb_setup_snb_msix(struct ntb_device *ndev, int msix_entries)
>  	ndev->num_msix = msix_entries;
>  	ndev->max_cbs = msix_entries - 1;
>  
> +	if (ndev->wa_flags & WA_HSX_ERR) {
> +		i = 0;
> +
> +		/*
> +		 * acquire the interrupt region in the LAPIC for the
> +		 * MSIX vectors
> +		 */
> +		list_for_each_entry(entry, &pdev->msi_list, list) {
> +			unsigned int offset = ndev->msix_entries[i].entry *
> +				PCI_MSIX_ENTRY_SIZE;
> +
> +			laddr = readl(entry->mask_base + offset +
> +					PCI_MSIX_ENTRY_LOWER_ADDR);
> +			dev_dbg(&pdev->dev, "local lower MSIX addr(%d): %#x\n",
> +				i, laddr);
> +			ndev->lirq[i].ofs = 0x1fffff & laddr;
> +			data = readl(entry->mask_base + offset +
> +					PCI_MSIX_ENTRY_DATA);
> +			dev_dbg(&pdev->dev, "local MSIX data(%d): %#x\n",
> +				i, data);
> +			ndev->lirq[i].data = data;
> +			i++;
> +		}
> +	}
> +
>  	return 0;
>  
>  err:
> @@ -1288,6 +1399,11 @@ static int ntb_setup_msi(struct ntb_device *ndev)
>  	struct pci_dev *pdev = ndev->pdev;
>  	int rc;
>  
> +	if (ndev->wa_flags & WA_HSX_ERR) {
> +		dev_err(&pdev->dev, "Platform errata does not support MSI\n");
> +		return -EINVAL;
> +	}
> +
>  	rc = pci_enable_msi(pdev);
>  	if (rc)
>  		return rc;
> @@ -1307,6 +1423,11 @@ static int ntb_setup_intx(struct ntb_device *ndev)
>  	struct pci_dev *pdev = ndev->pdev;
>  	int rc;
>  
> +	if (ndev->wa_flags & WA_HSX_ERR) {
> +		dev_err(&pdev->dev, "Platform errata does not support INTX\n");
> +		return -EINVAL;
> +	}
> +
>  	pci_msi_off(pdev);
>  
>  	/* Verify intx is enabled */
> @@ -1720,6 +1841,17 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (rc)
>  		goto err;
>  
> +	if (!ndev->split_bar && (ndev->wa_flags & WA_HSX_ERR)) {
> +		dev_warn(&pdev->dev,
> +			 "Please config NTB split BAR for errata workaround\n");
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * From this point on we will assume that split BAR is set when
> +	 * WA_HSX_ERR is set
> +	 */
> +
>  	ndev->mw = kcalloc(sizeof(struct ntb_mw), ndev->limits.max_mw,
>  			   GFP_KERNEL);
>  	if (!ndev->mw) {
> @@ -1751,8 +1883,7 @@ static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  		 * with the errata we need to steal last of the memory
>  		 * windows for workarounds and they point to MMIO registers.
>  		 */
> -		if ((ndev->wa_flags & WA_SNB_ERR) &&
> -		    (i == (ndev->limits.max_mw - 1))) {
> +		if ((ndev->wa_flags & (WA_SNB_ERR | WA_HSX_ERR)) && (i > 0)) {
>  			ndev->mw[i].vbase =
>  				ioremap_nocache(pci_resource_start(pdev,
>  							MW_TO_BAR(i)),
> diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h
> index 96de5fc..d59650e 100644
> --- a/drivers/ntb/ntb_hw.h
> +++ b/drivers/ntb/ntb_hw.h
> @@ -109,9 +109,16 @@ struct ntb_db_cb {
>  	void *data;
>  	struct ntb_device *ndev;
>  	struct tasklet_struct irq_work;
> +	unsigned int irq;
> +};
> +
> +struct msix_info {
> +	u32 ofs;
> +	u32 data;
>  };
>  
>  #define WA_SNB_ERR	0x00000001
> +#define WA_HSX_ERR	0x00000002
>  
>  struct ntb_device {
>  	struct pci_dev *pdev;
> @@ -161,6 +168,8 @@ struct ntb_device {
>  	struct dentry *debugfs_info;
>  
>  	unsigned int wa_flags;
> +	struct msix_info lirq[4];
> +	struct msix_info rirq[4];
>  };
>  
>  /**
> diff --git a/drivers/ntb/ntb_regs.h b/drivers/ntb/ntb_regs.h
> index f028ff8..7eb1440 100644
> --- a/drivers/ntb/ntb_regs.h
> +++ b/drivers/ntb/ntb_regs.h
> @@ -60,6 +60,7 @@
>  #define HSX_SPLITBAR_MAX_MW	3
>  #define SNB_MAX_MW		2
>  #define SNB_ERRATA_MAX_MW	1
> +#define HSX_ERRATA_MAX_MW	1
>  
>  #define SNB_DB_HW_LINK		0x8000
>  
> diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
> index 24f0ac1..7ab4d63 100644
> --- a/drivers/ntb/ntb_transport.c
> +++ b/drivers/ntb/ntb_transport.c
> @@ -56,6 +56,7 @@
>  #include <linux/pci.h>
>  #include <linux/slab.h>
>  #include <linux/types.h>
> +#include <linux/sched.h>
>  #include "ntb_hw.h"
>  
>  #define NTB_TRANSPORT_VERSION	3
> @@ -188,15 +189,24 @@ struct ntb_payload_header {
>  	unsigned int flags;
>  };
>  
> +/*
> + * Using 7 scratch pads, 1 left
> + * VERSION
> + * QP_LINKS
> + * NUM_QPS NUM_MWS
> + * MW0_SZ MW1_SZ MW2_SZ MW3_SZ
> + * DATA0 MSIX_OFS0
> + * DATA1 MSIX_OFS1
> + * DATA2 MSIX_OFS2
> + */
>  enum {
>  	VERSION = 0,
>  	QP_LINKS,
> -	NUM_QPS,
>  	NUM_MWS,
> -	MW0_SZ_HIGH,
> -	MW0_SZ_LOW,
> -	MW1_SZ_HIGH,
> -	MW1_SZ_LOW,
> +	MW_SZ,
> +	MSIX_OFS0,
> +	MSIX_OFS1,
> +	MSIX_OFS2,
>  	MAX_SPAD,
>  };
>  
> @@ -687,37 +697,45 @@ static void ntb_transport_link_work(struct work_struct *work)
>  	int rc, i;
>  
>  	/* send the local info, in the opposite order of the way we read it */
> -	for (i = 0; i < ntb_max_mw(ndev); i++) {
> -		rc = ntb_write_remote_spad(ndev, MW0_SZ_HIGH + (i * 2),
> -					   ntb_get_mw_size(ndev, i) >> 32);
> -		if (rc) {
> -			dev_err(&pdev->dev, "Error writing %u to remote spad %d\n",
> -				(u32)(ntb_get_mw_size(ndev, i) >> 32),
> -				MW0_SZ_HIGH + (i * 2));
> -			goto out;
> -		}
>  
> -		rc = ntb_write_remote_spad(ndev, MW0_SZ_LOW + (i * 2),
> -					   (u32) ntb_get_mw_size(ndev, i));
> +	for (i = 0; i < 3; i++) {
> +		val = (ndev->lirq[i].ofs & 0x1fffff) |
> +		      (ndev->lirq[i].data & 0xff) << 24;
> +
> +		rc = ntb_write_remote_spad(ndev, MSIX_OFS0 + i, val);
>  		if (rc) {
> -			dev_err(&pdev->dev, "Error writing %u to remote spad %d\n",
> -				(u32) ntb_get_mw_size(ndev, i),
> -				MW0_SZ_LOW + (i * 2));
> +			dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
> +				val, MSIX_OFS0 + i);
>  			goto out;
>  		}
>  	}
>  
> -	rc = ntb_write_remote_spad(ndev, NUM_MWS, ntb_max_mw(ndev));
> +	if (ntb_max_mw(ndev) > 4) {
> +		dev_err(&pdev->dev,
> +			"Greater than 4 memory window unsupported!\n");
> +		goto out;
> +	}
> +
> +	val = 0;
> +	for (i = 0; i < ntb_max_mw(ndev); i++) {
> +		u32 size;
> +
> +		size = ilog2(rounddown_pow_of_two(ntb_get_mw_size(ndev, i)));
> +		val |= (size & 0xff) << (8 * i);
> +	}
> +
> +	rc = ntb_write_remote_spad(ndev, MW_SZ, val);
>  	if (rc) {
> -		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
> -			ntb_max_mw(ndev), NUM_MWS);
> +		dev_err(&pdev->dev, "Error writing %#x to remote spad %d\n",
> +			val, MW_SZ);
>  		goto out;
>  	}
>  
> -	rc = ntb_write_remote_spad(ndev, NUM_QPS, nt->max_qps);
> +	val = (ntb_max_mw(ndev) & 0xffff) | (nt->max_qps & 0xffff) << 16;
> +	rc = ntb_write_remote_spad(ndev, NUM_MWS, val);
>  	if (rc) {
> -		dev_err(&pdev->dev, "Error writing %x to remote spad %d\n",
> -			nt->max_qps, NUM_QPS);
> +		dev_err(&pdev->dev, "Error writing %#x to remote spad %d\n",
> +			val, NUM_MWS);
>  		goto out;
>  	}
>  
> @@ -739,46 +757,31 @@ static void ntb_transport_link_work(struct work_struct *work)
>  		goto out;
>  	dev_dbg(&pdev->dev, "Remote version = %d\n", val);
>  
> -	rc = ntb_read_remote_spad(ndev, NUM_QPS, &val);
> -	if (rc) {
> -		dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_QPS);
> -		goto out;
> -	}
> -
> -	if (val != nt->max_qps)
> -		goto out;
> -	dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val);
> -
>  	rc = ntb_read_remote_spad(ndev, NUM_MWS, &val);
>  	if (rc) {
>  		dev_err(&pdev->dev, "Error reading remote spad %d\n", NUM_MWS);
>  		goto out;
>  	}
>  
> -	if (val != ntb_max_mw(ndev))
> +	if (((val >> 16) & 0xffff) != nt->max_qps)
>  		goto out;
> -	dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val);
>  
> -	for (i = 0; i < ntb_max_mw(ndev); i++) {
> -		u64 val64;
> +	dev_dbg(&pdev->dev, "Remote max number of qps = %d\n",
> +		(val >> 16) & 0xffff);
>  
> -		rc = ntb_read_remote_spad(ndev, MW0_SZ_HIGH + (i * 2), &val);
> -		if (rc) {
> -			dev_err(&pdev->dev, "Error reading remote spad %d\n",
> -				MW0_SZ_HIGH + (i * 2));
> -			goto out1;
> -		}
> +	if ((val & 0xffff) != ntb_max_mw(ndev))
> +		goto out;
>  
> -		val64 = (u64) val << 32;
> +	dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val & 0xffff);
>  
> -		rc = ntb_read_remote_spad(ndev, MW0_SZ_LOW + (i * 2), &val);
> -		if (rc) {
> -			dev_err(&pdev->dev, "Error reading remote spad %d\n",
> -				MW0_SZ_LOW + (i * 2));
> -			goto out1;
> -		}
> +	rc = ntb_read_remote_spad(ndev, MW_SZ, &val);
> +	if (rc) {
> +		dev_err(&pdev->dev, "Error reading remote spad %d\n", MW_SZ);
> +		goto out1;
> +	}
>  
> -		val64 |= val;
> +	for (i = 0; i < ntb_max_mw(ndev); i++) {
> +		u64 val64 = 1 << ((val >> (i * 8)) & 0xff);
>  
>  		dev_dbg(&pdev->dev, "Remote MW%d size = %llu\n", i, val64);
>  
> @@ -787,6 +790,23 @@ static void ntb_transport_link_work(struct work_struct *work)
>  			goto out1;
>  	}
>  
> +	for (i = 0; i < 3; i++) {
> +		rc = ntb_read_remote_spad(ndev, MSIX_OFS0 + i, &val);
> +		if (rc) {
> +			dev_err(&pdev->dev,
> +				"Error reading remote spad %d\n",
> +				MSIX_OFS0 + i);
> +			goto out;
> +		}
> +
> +		ndev->rirq[i].ofs = 0x1ffff & val;
> +		ndev->rirq[i].data = (val >> 24) & 0xff;
> +		dev_dbg(&pdev->dev, "received MSIX_OFS%d: %#x\n",
> +			i, ndev->rirq[i].ofs);
> +		dev_dbg(&pdev->dev, "received MSIX_DATA%d: %#x\n",
> +			i, ndev->rirq[i].data);
> +	}
> +
>  	nt->transport_link = NTB_LINK_UP;
>  
>  	for (i = 0; i < nt->max_qps; i++) {
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2014-09-08  3:23 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-28 20:52 [PATCH 0/6] NTB bug fixes and hardware workarounds Dave Jiang
2014-08-28 20:53 ` [PATCH 1/6] ntb: Add alignment check to meet hardware requirement Dave Jiang
2014-08-28 20:53 ` [PATCH 2/6] ntb: move platform detection to separate function Dave Jiang
2014-08-28 20:53 ` [PATCH 3/6] ntb: conslidate reading of PPD to move platform detection earlier Dave Jiang
2014-08-28 20:53 ` [PATCH 4/6] ntb: use errata flag set via DID to implement workaround Dave Jiang
2014-08-28 20:53 ` [PATCH 5/6] ntb: Adding split BAR support for Haswell platforms Dave Jiang
2014-08-28 20:53 ` [PATCH 6/6] ntb: workaround for high traffic hardware hang Dave Jiang
2014-09-08  3:19   ` Jon Mason
2014-09-08  2:54 ` [PATCH 0/6] NTB bug fixes and hardware workarounds Jon Mason

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).