linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions
       [not found] <CAErSpo4PK+SQLNfdkLNt9eLc7bpbxbkZCsa8T-tQpid601n0SQ@mail.gmail.com>
@ 2018-08-06 23:25 ` Alexandru Gagniuc
  2018-08-06 23:25   ` [PATCH v6 2/9] bnx2x: Do not call pcie_print_link_status() Alexandru Gagniuc
                     ` (9 more replies)
  0 siblings, 10 replies; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

PCIe downtraining happens when both the device and PCIe port are
capable of a larger bus width or higher speed than negotiated.
Downtraining might be indicative of other problems in the system, and
identifying this from userspace is neither intuitive, nor
straightforward.

The easiest way to detect this is with pcie_print_link_status(),
since the bottleneck is usually the link that is downtrained. It's not
a perfect solution, but it works extremely well in most cases.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/pci/pci.c   | 22 ++++++++++++++++++----
 drivers/pci/probe.c | 21 +++++++++++++++++++++
 include/linux/pci.h |  1 +
 3 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 316496e99da9..414ad7b3abdb 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5302,14 +5302,15 @@ u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
 }
 
 /**
- * pcie_print_link_status - Report the PCI device's link speed and width
+ * __pcie_print_link_status - Report the PCI device's link speed and width
  * @dev: PCI device to query
+ * @verbose: Be verbose -- print info even when enough bandwidth is available.
  *
  * Report the available bandwidth at the device.  If this is less than the
  * device is capable of, report the device's maximum possible bandwidth and
  * the upstream link that limits its performance to less than that.
  */
-void pcie_print_link_status(struct pci_dev *dev)
+void __pcie_print_link_status(struct pci_dev *dev, bool verbose)
 {
 	enum pcie_link_width width, width_cap;
 	enum pci_bus_speed speed, speed_cap;
@@ -5319,11 +5320,11 @@ void pcie_print_link_status(struct pci_dev *dev)
 	bw_cap = pcie_bandwidth_capable(dev, &speed_cap, &width_cap);
 	bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, &width);
 
-	if (bw_avail >= bw_cap)
+	if (bw_avail >= bw_cap && verbose)
 		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth (%s x%d link)\n",
 			 bw_cap / 1000, bw_cap % 1000,
 			 PCIE_SPEED2STR(speed_cap), width_cap);
-	else
+	else if (bw_avail < bw_cap)
 		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n",
 			 bw_avail / 1000, bw_avail % 1000,
 			 PCIE_SPEED2STR(speed), width,
@@ -5331,6 +5332,19 @@ void pcie_print_link_status(struct pci_dev *dev)
 			 bw_cap / 1000, bw_cap % 1000,
 			 PCIE_SPEED2STR(speed_cap), width_cap);
 }
+
+/**
+ * pcie_print_link_status - Report the PCI device's link speed and width
+ * @dev: PCI device to query
+ *
+ * Report the available bandwidth at the device.  If this is less than the
+ * device is capable of, report the device's maximum possible bandwidth and
+ * the upstream link that limits its performance to less than that.
+ */
+void pcie_print_link_status(struct pci_dev *dev)
+{
+	__pcie_print_link_status(dev, true);
+}
 EXPORT_SYMBOL(pcie_print_link_status);
 
 /**
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 611adcd9c169..1c8c26dd2cb2 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2205,6 +2205,24 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 	return dev;
 }
 
+static void pcie_check_upstream_link(struct pci_dev *dev)
+{
+	if (!pci_is_pcie(dev))
+		return;
+
+	/* Look from the device up to avoid downstream ports with no devices. */
+	if ((pci_pcie_type(dev) != PCI_EXP_TYPE_ENDPOINT) &&
+	    (pci_pcie_type(dev) != PCI_EXP_TYPE_LEG_END) &&
+	    (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM))
+		return;
+
+	/* Multi-function PCIe share the same link/status. */
+	if (PCI_FUNC(dev->devfn) != 0 || dev->is_virtfn)
+		return;
+
+	__pcie_print_link_status(dev, false);
+}
+
 static void pci_init_capabilities(struct pci_dev *dev)
 {
 	/* Enhanced Allocation */
@@ -2240,6 +2258,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	/* Advanced Error Reporting */
 	pci_aer_init(dev);
 
+	/* Check link and detect downtrain errors */
+	pcie_check_upstream_link(dev);
+
 	if (pci_probe_reset_function(dev) == 0)
 		dev->reset_fn = 1;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c133ccfa002e..d212de231259 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1087,6 +1087,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps);
 u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
 			     enum pci_bus_speed *speed,
 			     enum pcie_link_width *width);
+void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
 void pcie_print_link_status(struct pci_dev *dev);
 int pcie_flr(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v6 2/9] bnx2x: Do not call pcie_print_link_status()
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
@ 2018-08-06 23:25   ` Alexandru Gagniuc
  2018-08-06 23:25   ` [PATCH v6 3/9] bnxt_en: " Alexandru Gagniuc
                     ` (8 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

This is now done by the PCI core to warn of sub-optimal bandwidth.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 57348f2b49a3..3eadd6201dff 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -14100,7 +14100,6 @@ static int bnx2x_init_one(struct pci_dev *pdev,
 	       board_info[ent->driver_data].name,
 	       (CHIP_REV(bp) >> 12) + 'A', (CHIP_METAL(bp) >> 4),
 	       dev->base_addr, bp->pdev->irq, dev->dev_addr);
-	pcie_print_link_status(bp->pdev);
 
 	bnx2x_register_phc(bp);
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v6 3/9] bnxt_en: Do not call pcie_print_link_status()
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
  2018-08-06 23:25   ` [PATCH v6 2/9] bnx2x: Do not call pcie_print_link_status() Alexandru Gagniuc
@ 2018-08-06 23:25   ` Alexandru Gagniuc
  2018-08-06 23:25   ` [PATCH v6 4/9] cxgb4: " Alexandru Gagniuc
                     ` (7 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

This is now done by the PCI core to warn of sub-optimal bandwidth.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4394c1162be4..4b3928c89076 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8909,7 +8909,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
 		    board_info[ent->driver_data].name,
 		    (long)pci_resource_start(pdev, 0), dev->dev_addr);
-	pcie_print_link_status(pdev);
 
 	return 0;
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v6 4/9] cxgb4: Do not call pcie_print_link_status()
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
  2018-08-06 23:25   ` [PATCH v6 2/9] bnx2x: Do not call pcie_print_link_status() Alexandru Gagniuc
  2018-08-06 23:25   ` [PATCH v6 3/9] bnxt_en: " Alexandru Gagniuc
@ 2018-08-06 23:25   ` Alexandru Gagniuc
  2018-08-06 23:25   ` [PATCH v6 5/9] fm10k: " Alexandru Gagniuc
                     ` (6 subsequent siblings)
  9 siblings, 0 replies; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

This is now done by the PCI core to warn of sub-optimal bandwidth.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index a8926e97935e..049958898c17 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5726,9 +5726,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			free_msix_info(adapter);
 	}
 
-	/* check for PCI Express bandwidth capabiltites */
-	pcie_print_link_status(pdev);
-
 	err = init_rss(adapter);
 	if (err)
 		goto out_free_dev;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v6 5/9] fm10k: Do not call pcie_print_link_status()
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
                     ` (2 preceding siblings ...)
  2018-08-06 23:25   ` [PATCH v6 4/9] cxgb4: " Alexandru Gagniuc
@ 2018-08-06 23:25   ` Alexandru Gagniuc
  2018-08-07 17:52     ` Jeff Kirsher
  2018-08-06 23:25   ` [PATCH v6 6/9] ixgbe: " Alexandru Gagniuc
                     ` (5 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

This is now done by the PCI core to warn of sub-optimal bandwidth.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 15071e4adb98..079fd3c884ea 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -2224,9 +2224,6 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* kick off service timer now, even when interface is down */
 	mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
 
-	/* print warning for non-optimal configurations */
-	pcie_print_link_status(interface->pdev);
-
 	/* report MAC address for logging */
 	dev_info(&pdev->dev, "%pM\n", netdev->dev_addr);
 
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v6 6/9] ixgbe: Do not call pcie_print_link_status()
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
                     ` (3 preceding siblings ...)
  2018-08-06 23:25   ` [PATCH v6 5/9] fm10k: " Alexandru Gagniuc
@ 2018-08-06 23:25   ` Alexandru Gagniuc
  2018-08-07 17:51     ` Jeff Kirsher
  2018-08-06 23:25   ` [PATCH v6 7/9] net/mlx4: " Alexandru Gagniuc
                     ` (4 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

This is now done by the PCI core to warn of sub-optimal bandwidth.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 26 -------------------
 1 file changed, 26 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 62e57b05a0ae..7ecdc6c03a66 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -241,28 +241,6 @@ static inline bool ixgbe_pcie_from_parent(struct ixgbe_hw *hw)
 	}
 }
 
-static void ixgbe_check_minimum_link(struct ixgbe_adapter *adapter,
-				     int expected_gts)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-	struct pci_dev *pdev;
-
-	/* Some devices are not connected over PCIe and thus do not negotiate
-	 * speed. These devices do not have valid bus info, and thus any report
-	 * we generate may not be correct.
-	 */
-	if (hw->bus.type == ixgbe_bus_type_internal)
-		return;
-
-	/* determine whether to use the parent device */
-	if (ixgbe_pcie_from_parent(&adapter->hw))
-		pdev = adapter->pdev->bus->parent->self;
-	else
-		pdev = adapter->pdev;
-
-	pcie_print_link_status(pdev);
-}
-
 static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter)
 {
 	if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
@@ -10585,10 +10563,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		break;
 	}
 
-	/* don't check link if we failed to enumerate functions */
-	if (expected_gts > 0)
-		ixgbe_check_minimum_link(adapter, expected_gts);
-
 	err = ixgbe_read_pba_string_generic(hw, part_str, sizeof(part_str));
 	if (err)
 		strlcpy(part_str, "Unknown", sizeof(part_str));
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v6 7/9] net/mlx4: Do not call pcie_print_link_status()
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
                     ` (4 preceding siblings ...)
  2018-08-06 23:25   ` [PATCH v6 6/9] ixgbe: " Alexandru Gagniuc
@ 2018-08-06 23:25   ` Alexandru Gagniuc
  2018-08-08  6:10     ` Leon Romanovsky
  2018-08-06 23:25   ` [PATCH v6 8/9] net/mlx5: " Alexandru Gagniuc
                     ` (3 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

This is now done by the PCI core to warn of sub-optimal bandwidth.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 872014702fc1..da4d54195853 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3398,13 +3398,6 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 		}
 	}
 
-	/* check if the device is functioning at its maximum possible speed.
-	 * No return code for this call, just warn the user in case of PCI
-	 * express device capabilities are under-satisfied by the bus.
-	 */
-	if (!mlx4_is_slave(dev))
-		pcie_print_link_status(dev->persist->pdev);
-
 	/* In master functions, the communication channel must be initialized
 	 * after obtaining its address from fw */
 	if (mlx4_is_master(dev)) {
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v6 8/9] net/mlx5: Do not call pcie_print_link_status()
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
                     ` (5 preceding siblings ...)
  2018-08-06 23:25   ` [PATCH v6 7/9] net/mlx4: " Alexandru Gagniuc
@ 2018-08-06 23:25   ` Alexandru Gagniuc
  2018-08-08  6:08     ` Leon Romanovsky
  2018-08-06 23:25   ` [PATCH v6 9/9] nfp: " Alexandru Gagniuc
                     ` (2 subsequent siblings)
  9 siblings, 1 reply; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

This is now done by the PCI core to warn of sub-optimal bandwidth.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 615005e63819..e10f9c2bea3b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1045,10 +1045,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
 		 fw_rev_min(dev), fw_rev_sub(dev));
 
-	/* Only PFs hold the relevant PCIe information for this query */
-	if (mlx5_core_is_pf(dev))
-		pcie_print_link_status(dev->pdev);
-
 	/* on load removing any previous indication of internal error, device is
 	 * up
 	 */
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH v6 9/9] nfp: Do not call pcie_print_link_status()
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
                     ` (6 preceding siblings ...)
  2018-08-06 23:25   ` [PATCH v6 8/9] net/mlx5: " Alexandru Gagniuc
@ 2018-08-06 23:25   ` Alexandru Gagniuc
  2018-08-07 19:44   ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions David Miller
  2018-08-07 21:41   ` Bjorn Helgaas
  9 siblings, 0 replies; 21+ messages in thread
From: Alexandru Gagniuc @ 2018-08-06 23:25 UTC (permalink / raw)
  To: linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer,
	Alexandru Gagniuc, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

This is now done by the PCI core to warn of sub-optimal bandwidth.

Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
---
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
index 749655c329b2..0324f99bd1a7 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c
@@ -1324,7 +1324,6 @@ struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev)
 	/*  Finished with card initialization. */
 	dev_info(&pdev->dev,
 		 "Netronome Flow Processor NFP4000/NFP6000 PCIe Card Probe\n");
-	pcie_print_link_status(pdev);
 
 	nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
 	if (!nfp) {
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 6/9] ixgbe: Do not call pcie_print_link_status()
  2018-08-06 23:25   ` [PATCH v6 6/9] ixgbe: " Alexandru Gagniuc
@ 2018-08-07 17:51     ` Jeff Kirsher
  0 siblings, 0 replies; 21+ messages in thread
From: Jeff Kirsher @ 2018-08-07 17:51 UTC (permalink / raw)
  To: Alexandru Gagniuc, linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer, Ariel Elior,
	everest-linux-l2, David S. Miller, Michael Chan, Ganesh Goudar,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

[-- Attachment #1: Type: text/plain, Size: 372 bytes --]

On Mon, 2018-08-06 at 18:25 -0500, Alexandru Gagniuc wrote:
> This is now done by the PCI core to warn of sub-optimal bandwidth.
> 
> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 26 ---------------
> ----
>  1 file changed, 26 deletions(-)

Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 5/9] fm10k: Do not call pcie_print_link_status()
  2018-08-06 23:25   ` [PATCH v6 5/9] fm10k: " Alexandru Gagniuc
@ 2018-08-07 17:52     ` Jeff Kirsher
  0 siblings, 0 replies; 21+ messages in thread
From: Jeff Kirsher @ 2018-08-07 17:52 UTC (permalink / raw)
  To: Alexandru Gagniuc, linux-pci, bhelgaas, jakub.kicinski
  Cc: keith.busch, alex_gagniuc, austin_bolen, shyam_iyer, Ariel Elior,
	everest-linux-l2, David S. Miller, Michael Chan, Ganesh Goudar,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

[-- Attachment #1: Type: text/plain, Size: 349 bytes --]

On Mon, 2018-08-06 at 18:25 -0500, Alexandru Gagniuc wrote:
> This is now done by the PCI core to warn of sub-optimal bandwidth.
> 
> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> ---
>  drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 3 ---
>  1 file changed, 3 deletions(-)

Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
                     ` (7 preceding siblings ...)
  2018-08-06 23:25   ` [PATCH v6 9/9] nfp: " Alexandru Gagniuc
@ 2018-08-07 19:44   ` David Miller
  2018-08-07 21:41   ` Bjorn Helgaas
  9 siblings, 0 replies; 21+ messages in thread
From: David Miller @ 2018-08-07 19:44 UTC (permalink / raw)
  To: mr.nuke.me
  Cc: linux-pci, bhelgaas, jakub.kicinski, keith.busch, alex_gagniuc,
	austin_bolen, shyam_iyer, ariel.elior, everest-linux-l2,
	michael.chan, ganeshgr, jeffrey.t.kirsher, tariqt, saeedm, leon,
	dirk.vandermerwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

From: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Date: Mon,  6 Aug 2018 18:25:35 -0500

> PCIe downtraining happens when both the device and PCIe port are
> capable of a larger bus width or higher speed than negotiated.
> Downtraining might be indicative of other problems in the system, and
> identifying this from userspace is neither intuitive, nor
> straightforward.
> 
> The easiest way to detect this is with pcie_print_link_status(),
> since the bottleneck is usually the link that is downtrained. It's not
> a perfect solution, but it works extremely well in most cases.
> 
> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>

Feel free to merge this entire series via the PCI tree.

For the series:

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions
  2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
                     ` (8 preceding siblings ...)
  2018-08-07 19:44   ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions David Miller
@ 2018-08-07 21:41   ` Bjorn Helgaas
  9 siblings, 0 replies; 21+ messages in thread
From: Bjorn Helgaas @ 2018-08-07 21:41 UTC (permalink / raw)
  To: Alexandru Gagniuc
  Cc: linux-pci, bhelgaas, jakub.kicinski, keith.busch, alex_gagniuc,
	austin_bolen, shyam_iyer, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Leon Romanovsky,
	Dirk van der Merwe, netdev, linux-kernel, intel-wired-lan,
	linux-rdma, oss-drivers

On Mon, Aug 06, 2018 at 06:25:35PM -0500, Alexandru Gagniuc wrote:
> PCIe downtraining happens when both the device and PCIe port are
> capable of a larger bus width or higher speed than negotiated.
> Downtraining might be indicative of other problems in the system, and
> identifying this from userspace is neither intuitive, nor
> straightforward.
> 
> The easiest way to detect this is with pcie_print_link_status(),
> since the bottleneck is usually the link that is downtrained. It's not
> a perfect solution, but it works extremely well in most cases.

After this series, there are no callers of pcie_print_link_status(),
which means we *only* print something if a device is capable of more
bandwidth than the fabric can deliver.

ISTR some desire to have this information for NICs even if the device
isn't limited, so I'm just double-checking to make sure the driver
guys are OK with this change.

There are no callers of __pcie_print_link_status() outside the PCI
core, so I would move the declaration from include/linux/pci.h to
drivers/pci/pci.h.

If we agree that we *never* need to print anything unless a device is
constrained by the fabric, I would get rid of the "verbose" flag and
keep everything in pcie_print_link_status().

> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> ---
>  drivers/pci/pci.c   | 22 ++++++++++++++++++----
>  drivers/pci/probe.c | 21 +++++++++++++++++++++
>  include/linux/pci.h |  1 +
>  3 files changed, 40 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 316496e99da9..414ad7b3abdb 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -5302,14 +5302,15 @@ u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
>  }
>  
>  /**
> - * pcie_print_link_status - Report the PCI device's link speed and width
> + * __pcie_print_link_status - Report the PCI device's link speed and width
>   * @dev: PCI device to query
> + * @verbose: Be verbose -- print info even when enough bandwidth is available.
>   *
>   * Report the available bandwidth at the device.  If this is less than the
>   * device is capable of, report the device's maximum possible bandwidth and
>   * the upstream link that limits its performance to less than that.
>   */
> -void pcie_print_link_status(struct pci_dev *dev)
> +void __pcie_print_link_status(struct pci_dev *dev, bool verbose)
>  {
>  	enum pcie_link_width width, width_cap;
>  	enum pci_bus_speed speed, speed_cap;
> @@ -5319,11 +5320,11 @@ void pcie_print_link_status(struct pci_dev *dev)
>  	bw_cap = pcie_bandwidth_capable(dev, &speed_cap, &width_cap);
>  	bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, &width);
>  
> -	if (bw_avail >= bw_cap)
> +	if (bw_avail >= bw_cap && verbose)
>  		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth (%s x%d link)\n",
>  			 bw_cap / 1000, bw_cap % 1000,
>  			 PCIE_SPEED2STR(speed_cap), width_cap);
> -	else
> +	else if (bw_avail < bw_cap)
>  		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n",
>  			 bw_avail / 1000, bw_avail % 1000,
>  			 PCIE_SPEED2STR(speed), width,
> @@ -5331,6 +5332,19 @@ void pcie_print_link_status(struct pci_dev *dev)
>  			 bw_cap / 1000, bw_cap % 1000,
>  			 PCIE_SPEED2STR(speed_cap), width_cap);
>  }
> +
> +/**
> + * pcie_print_link_status - Report the PCI device's link speed and width
> + * @dev: PCI device to query
> + *
> + * Report the available bandwidth at the device.  If this is less than the
> + * device is capable of, report the device's maximum possible bandwidth and
> + * the upstream link that limits its performance to less than that.
> + */
> +void pcie_print_link_status(struct pci_dev *dev)
> +{
> +	__pcie_print_link_status(dev, true);
> +}
>  EXPORT_SYMBOL(pcie_print_link_status);
>  
>  /**
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 611adcd9c169..1c8c26dd2cb2 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2205,6 +2205,24 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
>  	return dev;
>  }
>  
> +static void pcie_check_upstream_link(struct pci_dev *dev)
> +{
> +	if (!pci_is_pcie(dev))
> +		return;
> +
> +	/* Look from the device up to avoid downstream ports with no devices. */
> +	if ((pci_pcie_type(dev) != PCI_EXP_TYPE_ENDPOINT) &&
> +	    (pci_pcie_type(dev) != PCI_EXP_TYPE_LEG_END) &&
> +	    (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM))
> +		return;
> +
> +	/* Multi-function PCIe share the same link/status. */
> +	if (PCI_FUNC(dev->devfn) != 0 || dev->is_virtfn)
> +		return;
> +
> +	__pcie_print_link_status(dev, false);
> +}
> +
>  static void pci_init_capabilities(struct pci_dev *dev)
>  {
>  	/* Enhanced Allocation */
> @@ -2240,6 +2258,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
>  	/* Advanced Error Reporting */
>  	pci_aer_init(dev);
>  
> +	/* Check link and detect downtrain errors */
> +	pcie_check_upstream_link(dev);
> +
>  	if (pci_probe_reset_function(dev) == 0)
>  		dev->reset_fn = 1;
>  }
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index c133ccfa002e..d212de231259 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1087,6 +1087,7 @@ int pcie_set_mps(struct pci_dev *dev, int mps);
>  u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
>  			     enum pci_bus_speed *speed,
>  			     enum pcie_link_width *width);
> +void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
>  void pcie_print_link_status(struct pci_dev *dev);
>  int pcie_flr(struct pci_dev *dev);
>  int __pci_reset_function_locked(struct pci_dev *dev);
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 8/9] net/mlx5: Do not call pcie_print_link_status()
  2018-08-06 23:25   ` [PATCH v6 8/9] net/mlx5: " Alexandru Gagniuc
@ 2018-08-08  6:08     ` Leon Romanovsky
  2018-08-08 14:23       ` Tal Gilboa
  0 siblings, 1 reply; 21+ messages in thread
From: Leon Romanovsky @ 2018-08-08  6:08 UTC (permalink / raw)
  To: Alexandru Gagniuc
  Cc: linux-pci, bhelgaas, jakub.kicinski, keith.busch, alex_gagniuc,
	austin_bolen, shyam_iyer, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Dirk van der Merwe, netdev,
	linux-kernel, intel-wired-lan, linux-rdma, oss-drivers

[-- Attachment #1: Type: text/plain, Size: 357 bytes --]

On Mon, Aug 06, 2018 at 06:25:42PM -0500, Alexandru Gagniuc wrote:
> This is now done by the PCI core to warn of sub-optimal bandwidth.
>
> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ----
>  1 file changed, 4 deletions(-)
>

Thanks,
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 7/9] net/mlx4: Do not call pcie_print_link_status()
  2018-08-06 23:25   ` [PATCH v6 7/9] net/mlx4: " Alexandru Gagniuc
@ 2018-08-08  6:10     ` Leon Romanovsky
  0 siblings, 0 replies; 21+ messages in thread
From: Leon Romanovsky @ 2018-08-08  6:10 UTC (permalink / raw)
  To: Alexandru Gagniuc
  Cc: linux-pci, bhelgaas, jakub.kicinski, keith.busch, alex_gagniuc,
	austin_bolen, shyam_iyer, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Dirk van der Merwe, netdev,
	linux-kernel, intel-wired-lan, linux-rdma, oss-drivers

[-- Attachment #1: Type: text/plain, Size: 355 bytes --]

On Mon, Aug 06, 2018 at 06:25:41PM -0500, Alexandru Gagniuc wrote:
> This is now done by the PCI core to warn of sub-optimal bandwidth.
>
> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> ---
>  drivers/net/ethernet/mellanox/mlx4/main.c | 7 -------
>  1 file changed, 7 deletions(-)
>

Thanks,
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 8/9] net/mlx5: Do not call pcie_print_link_status()
  2018-08-08  6:08     ` Leon Romanovsky
@ 2018-08-08 14:23       ` Tal Gilboa
  2018-08-08 15:41         ` Leon Romanovsky
  0 siblings, 1 reply; 21+ messages in thread
From: Tal Gilboa @ 2018-08-08 14:23 UTC (permalink / raw)
  To: Leon Romanovsky, Alexandru Gagniuc
  Cc: linux-pci, bhelgaas, jakub.kicinski, keith.busch, alex_gagniuc,
	austin_bolen, shyam_iyer, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Dirk van der Merwe, netdev,
	linux-kernel, intel-wired-lan, linux-rdma, oss-drivers

On 8/8/2018 9:08 AM, Leon Romanovsky wrote:
> On Mon, Aug 06, 2018 at 06:25:42PM -0500, Alexandru Gagniuc wrote:
>> This is now done by the PCI core to warn of sub-optimal bandwidth.
>>
>> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ----
>>   1 file changed, 4 deletions(-)
>>
> 
> Thanks,
> Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> 

Alex,
I loaded mlx5 driver with and without these series. The report in dmesg 
is now missing. From what I understood, the status should be reported at 
least once, even if everything is in order. We need this functionality 
to stay.

net-next (dmesg output for 07:00.0):
[270498.625351] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
[270498.632130] mlx5_core 0000:07:00.0: 63.008 Gb/s available PCIe 
bandwidth (8 GT/s x8 link)
[270499.169533] (0000:07:00.0): E-Switch: Total vports 9, per vport: max 
uc(1024) max mc(16384)
[270499.182358] mlx5_core 0000:07:00.0: Port module event: module 0, 
Cable plugged

net-next + patches (dmesg output for 07:00.0):
[  331.608472] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
[  332.564938] (0000:07:00.0): E-Switch: Total vports 9, per vport: max 
uc(1024) max mc(16384)
[  332.616271] mlx5_core 0000:07:00.0: Port module event: module 0, 
Cable plugged

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 8/9] net/mlx5: Do not call pcie_print_link_status()
  2018-08-08 14:23       ` Tal Gilboa
@ 2018-08-08 15:41         ` Leon Romanovsky
  2018-08-08 15:56           ` Tal Gilboa
  0 siblings, 1 reply; 21+ messages in thread
From: Leon Romanovsky @ 2018-08-08 15:41 UTC (permalink / raw)
  To: Tal Gilboa
  Cc: Alexandru Gagniuc, linux-pci, bhelgaas, jakub.kicinski,
	keith.busch, alex_gagniuc, austin_bolen, shyam_iyer, Ariel Elior,
	everest-linux-l2, David S. Miller, Michael Chan, Ganesh Goudar,
	Jeff Kirsher, Tariq Toukan, Saeed Mahameed, Dirk van der Merwe,
	netdev, linux-kernel, intel-wired-lan, linux-rdma, oss-drivers

[-- Attachment #1: Type: text/plain, Size: 1730 bytes --]

On Wed, Aug 08, 2018 at 05:23:12PM +0300, Tal Gilboa wrote:
> On 8/8/2018 9:08 AM, Leon Romanovsky wrote:
> > On Mon, Aug 06, 2018 at 06:25:42PM -0500, Alexandru Gagniuc wrote:
> > > This is now done by the PCI core to warn of sub-optimal bandwidth.
> > >
> > > Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> > > ---
> > >   drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ----
> > >   1 file changed, 4 deletions(-)
> > >
> >
> > Thanks,
> > Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> >
>
> Alex,
> I loaded mlx5 driver with and without these series. The report in dmesg is
> now missing. From what I understood, the status should be reported at least
> once, even if everything is in order.

It is not what this series is doing and it removes prints completely if
fabric can deliver more than card is capable.

> We need this functionality to stay.

I'm not sure that you need this information in driver's dmesg output,
but most probably something globally visible and accessible per-pci
device.

>
> net-next (dmesg output for 07:00.0):
> [270498.625351] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
> [270498.632130] mlx5_core 0000:07:00.0: 63.008 Gb/s available PCIe bandwidth
> (8 GT/s x8 link)
> [270499.169533] (0000:07:00.0): E-Switch: Total vports 9, per vport: max
> uc(1024) max mc(16384)
> [270499.182358] mlx5_core 0000:07:00.0: Port module event: module 0, Cable
> plugged
>
> net-next + patches (dmesg output for 07:00.0):
> [  331.608472] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
> [  332.564938] (0000:07:00.0): E-Switch: Total vports 9, per vport: max
> uc(1024) max mc(16384)
> [  332.616271] mlx5_core 0000:07:00.0: Port module event: module 0, Cable
> plugged
>
>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 8/9] net/mlx5: Do not call pcie_print_link_status()
  2018-08-08 15:41         ` Leon Romanovsky
@ 2018-08-08 15:56           ` Tal Gilboa
  2018-08-08 16:33             ` Alex G.
  0 siblings, 1 reply; 21+ messages in thread
From: Tal Gilboa @ 2018-08-08 15:56 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Alexandru Gagniuc, linux-pci, bhelgaas, jakub.kicinski,
	keith.busch, alex_gagniuc, austin_bolen, shyam_iyer, Ariel Elior,
	everest-linux-l2, David S. Miller, Michael Chan, Ganesh Goudar,
	Jeff Kirsher, Tariq Toukan, Saeed Mahameed, Dirk van der Merwe,
	netdev, linux-kernel, intel-wired-lan, linux-rdma, oss-drivers

On 8/8/2018 6:41 PM, Leon Romanovsky wrote:
> On Wed, Aug 08, 2018 at 05:23:12PM +0300, Tal Gilboa wrote:
>> On 8/8/2018 9:08 AM, Leon Romanovsky wrote:
>>> On Mon, Aug 06, 2018 at 06:25:42PM -0500, Alexandru Gagniuc wrote:
>>>> This is now done by the PCI core to warn of sub-optimal bandwidth.
>>>>
>>>> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
>>>> ---
>>>>    drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ----
>>>>    1 file changed, 4 deletions(-)
>>>>
>>>
>>> Thanks,
>>> Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
>>>
>>
>> Alex,
>> I loaded mlx5 driver with and without these series. The report in dmesg is
>> now missing. From what I understood, the status should be reported at least
>> once, even if everything is in order.
> 
> It is not what this series is doing and it removes prints completely if
> fabric can deliver more than card is capable.
> 
>> We need this functionality to stay.
> 
> I'm not sure that you need this information in driver's dmesg output,
> but most probably something globally visible and accessible per-pci
> device.

Currently we have users that look for it. If we remove the dmesg print 
we need this to be reported elsewhere. Adding it to sysfs for example 
should be a valid solution for our case.

> 
>>
>> net-next (dmesg output for 07:00.0):
>> [270498.625351] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
>> [270498.632130] mlx5_core 0000:07:00.0: 63.008 Gb/s available PCIe bandwidth
>> (8 GT/s x8 link)
>> [270499.169533] (0000:07:00.0): E-Switch: Total vports 9, per vport: max
>> uc(1024) max mc(16384)
>> [270499.182358] mlx5_core 0000:07:00.0: Port module event: module 0, Cable
>> plugged
>>
>> net-next + patches (dmesg output for 07:00.0):
>> [  331.608472] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
>> [  332.564938] (0000:07:00.0): E-Switch: Total vports 9, per vport: max
>> uc(1024) max mc(16384)
>> [  332.616271] mlx5_core 0000:07:00.0: Port module event: module 0, Cable
>> plugged
>>
>>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 8/9] net/mlx5: Do not call pcie_print_link_status()
  2018-08-08 15:56           ` Tal Gilboa
@ 2018-08-08 16:33             ` Alex G.
  2018-08-08 17:27               ` Leon Romanovsky
  0 siblings, 1 reply; 21+ messages in thread
From: Alex G. @ 2018-08-08 16:33 UTC (permalink / raw)
  To: Tal Gilboa, Leon Romanovsky
  Cc: linux-pci, bhelgaas, jakub.kicinski, keith.busch, alex_gagniuc,
	austin_bolen, shyam_iyer, Ariel Elior, everest-linux-l2,
	David S. Miller, Michael Chan, Ganesh Goudar, Jeff Kirsher,
	Tariq Toukan, Saeed Mahameed, Dirk van der Merwe, netdev,
	linux-kernel, intel-wired-lan, linux-rdma, oss-drivers



On 08/08/2018 10:56 AM, Tal Gilboa wrote:
> On 8/8/2018 6:41 PM, Leon Romanovsky wrote:
>> On Wed, Aug 08, 2018 at 05:23:12PM +0300, Tal Gilboa wrote:
>>> On 8/8/2018 9:08 AM, Leon Romanovsky wrote:
>>>> On Mon, Aug 06, 2018 at 06:25:42PM -0500, Alexandru Gagniuc wrote:
>>>>> This is now done by the PCI core to warn of sub-optimal bandwidth.
>>>>>
>>>>> Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
>>>>> ---
>>>>>    drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ----
>>>>>    1 file changed, 4 deletions(-)
>>>>>
>>>>
>>>> Thanks,
>>>> Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
>>>>
>>>
>>> Alex,
>>> I loaded mlx5 driver with and without these series. The report in 
>>> dmesg is
>>> now missing. From what I understood, the status should be reported at 
>>> least
>>> once, even if everything is in order.
>>
>> It is not what this series is doing and it removes prints completely if
>> fabric can deliver more than card is capable.
>>
>>> We need this functionality to stay.
>>
>> I'm not sure that you need this information in driver's dmesg output,
>> but most probably something globally visible and accessible per-pci
>> device.
> 
> Currently we have users that look for it. If we remove the dmesg print 
> we need this to be reported elsewhere. Adding it to sysfs for example 
> should be a valid solution for our case.

I think a stop-gap measure is to leave the pcie_print_link_status() call 
in drivers that really need it for whatever reason. Implementing a 
reliable reporting through sysfs might take some tinkering, and I don't 
think it's a sufficient reason to block the heart of this series -- 
being able to detect bottlenecks and link downtraining.

Alex

>>
>>>
>>> net-next (dmesg output for 07:00.0):
>>> [270498.625351] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
>>> [270498.632130] mlx5_core 0000:07:00.0: 63.008 Gb/s available PCIe 
>>> bandwidth
>>> (8 GT/s x8 link)
>>> [270499.169533] (0000:07:00.0): E-Switch: Total vports 9, per vport: max
>>> uc(1024) max mc(16384)
>>> [270499.182358] mlx5_core 0000:07:00.0: Port module event: module 0, 
>>> Cable
>>> plugged
>>>
>>> net-next + patches (dmesg output for 07:00.0):
>>> [  331.608472] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
>>> [  332.564938] (0000:07:00.0): E-Switch: Total vports 9, per vport: max
>>> uc(1024) max mc(16384)
>>> [  332.616271] mlx5_core 0000:07:00.0: Port module event: module 0, 
>>> Cable
>>> plugged
>>>
>>>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 8/9] net/mlx5: Do not call pcie_print_link_status()
  2018-08-08 16:33             ` Alex G.
@ 2018-08-08 17:27               ` Leon Romanovsky
  2018-08-09 14:02                 ` Bjorn Helgaas
  0 siblings, 1 reply; 21+ messages in thread
From: Leon Romanovsky @ 2018-08-08 17:27 UTC (permalink / raw)
  To: Alex G.
  Cc: Tal Gilboa, linux-pci, bhelgaas, jakub.kicinski, keith.busch,
	alex_gagniuc, austin_bolen, shyam_iyer, Ariel Elior,
	everest-linux-l2, David S. Miller, Michael Chan, Ganesh Goudar,
	Jeff Kirsher, Tariq Toukan, Saeed Mahameed, Dirk van der Merwe,
	netdev, linux-kernel, intel-wired-lan, linux-rdma, oss-drivers

[-- Attachment #1: Type: text/plain, Size: 2996 bytes --]

On Wed, Aug 08, 2018 at 11:33:51AM -0500, Alex G. wrote:
>
>
> On 08/08/2018 10:56 AM, Tal Gilboa wrote:
> > On 8/8/2018 6:41 PM, Leon Romanovsky wrote:
> > > On Wed, Aug 08, 2018 at 05:23:12PM +0300, Tal Gilboa wrote:
> > > > On 8/8/2018 9:08 AM, Leon Romanovsky wrote:
> > > > > On Mon, Aug 06, 2018 at 06:25:42PM -0500, Alexandru Gagniuc wrote:
> > > > > > This is now done by the PCI core to warn of sub-optimal bandwidth.
> > > > > >
> > > > > > Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> > > > > > ---
> > > > > >    drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ----
> > > > > >    1 file changed, 4 deletions(-)
> > > > > >
> > > > >
> > > > > Thanks,
> > > > > Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> > > > >
> > > >
> > > > Alex,
> > > > I loaded mlx5 driver with and without these series. The report
> > > > in dmesg is
> > > > now missing. From what I understood, the status should be
> > > > reported at least
> > > > once, even if everything is in order.
> > >
> > > It is not what this series is doing and it removes prints completely if
> > > fabric can deliver more than card is capable.
> > >
> > > > We need this functionality to stay.
> > >
> > > I'm not sure that you need this information in driver's dmesg output,
> > > but most probably something globally visible and accessible per-pci
> > > device.
> >
> > Currently we have users that look for it. If we remove the dmesg print
> > we need this to be reported elsewhere. Adding it to sysfs for example
> > should be a valid solution for our case.
>
> I think a stop-gap measure is to leave the pcie_print_link_status() call in
> drivers that really need it for whatever reason. Implementing a reliable
> reporting through sysfs might take some tinkering, and I don't think it's a
> sufficient reason to block the heart of this series -- being able to detect
> bottlenecks and link downtraining.

IMHO, you did right change and it is better to replace this print to some
more generic solution now while you are doing it and don't leave leftovers.

Thanks

>
> Alex
>
> > >
> > > >
> > > > net-next (dmesg output for 07:00.0):
> > > > [270498.625351] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
> > > > [270498.632130] mlx5_core 0000:07:00.0: 63.008 Gb/s available
> > > > PCIe bandwidth
> > > > (8 GT/s x8 link)
> > > > [270499.169533] (0000:07:00.0): E-Switch: Total vports 9, per vport: max
> > > > uc(1024) max mc(16384)
> > > > [270499.182358] mlx5_core 0000:07:00.0: Port module event:
> > > > module 0, Cable
> > > > plugged
> > > >
> > > > net-next + patches (dmesg output for 07:00.0):
> > > > [  331.608472] mlx5_core 0000:07:00.0: firmware version: 14.22.4020
> > > > [  332.564938] (0000:07:00.0): E-Switch: Total vports 9, per vport: max
> > > > uc(1024) max mc(16384)
> > > > [  332.616271] mlx5_core 0000:07:00.0: Port module event: module
> > > > 0, Cable
> > > > plugged
> > > >
> > > >

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH v6 8/9] net/mlx5: Do not call pcie_print_link_status()
  2018-08-08 17:27               ` Leon Romanovsky
@ 2018-08-09 14:02                 ` Bjorn Helgaas
  0 siblings, 0 replies; 21+ messages in thread
From: Bjorn Helgaas @ 2018-08-09 14:02 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Alex G.,
	Tal Gilboa, linux-pci, bhelgaas, jakub.kicinski, keith.busch,
	alex_gagniuc, austin_bolen, shyam_iyer, Ariel Elior,
	everest-linux-l2, David S. Miller, Michael Chan, Ganesh Goudar,
	Jeff Kirsher, Tariq Toukan, Saeed Mahameed, Dirk van der Merwe,
	netdev, linux-kernel, intel-wired-lan, linux-rdma, oss-drivers

On Wed, Aug 08, 2018 at 08:27:36PM +0300, Leon Romanovsky wrote:
> On Wed, Aug 08, 2018 at 11:33:51AM -0500, Alex G. wrote:
> >
> >
> > On 08/08/2018 10:56 AM, Tal Gilboa wrote:
> > > On 8/8/2018 6:41 PM, Leon Romanovsky wrote:
> > > > On Wed, Aug 08, 2018 at 05:23:12PM +0300, Tal Gilboa wrote:
> > > > > On 8/8/2018 9:08 AM, Leon Romanovsky wrote:
> > > > > > On Mon, Aug 06, 2018 at 06:25:42PM -0500, Alexandru Gagniuc wrote:
> > > > > > > This is now done by the PCI core to warn of sub-optimal bandwidth.
> > > > > > >
> > > > > > > Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
> > > > > > > ---
> > > > > > >    drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 ----
> > > > > > >    1 file changed, 4 deletions(-)
> > > > > > >
> > > > > >
> > > > > > Thanks,
> > > > > > Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > >
> > > > >
> > > > > Alex,
> > > > > I loaded mlx5 driver with and without these series. The report
> > > > > in dmesg is
> > > > > now missing. From what I understood, the status should be
> > > > > reported at least
> > > > > once, even if everything is in order.
> > > >
> > > > It is not what this series is doing and it removes prints completely if
> > > > fabric can deliver more than card is capable.
> > > >
> > > > > We need this functionality to stay.
> > > >
> > > > I'm not sure that you need this information in driver's dmesg output,
> > > > but most probably something globally visible and accessible per-pci
> > > > device.
> > >
> > > Currently we have users that look for it. If we remove the dmesg print
> > > we need this to be reported elsewhere. Adding it to sysfs for example
> > > should be a valid solution for our case.
> >
> > I think a stop-gap measure is to leave the pcie_print_link_status() call in
> > drivers that really need it for whatever reason. Implementing a reliable
> > reporting through sysfs might take some tinkering, and I don't think it's a
> > sufficient reason to block the heart of this series -- being able to detect
> > bottlenecks and link downtraining.
> 
> IMHO, you did right change and it is better to replace this print to some
> more generic solution now while you are doing it and don't leave leftovers.

I'd like to make forward progress on this, so I propose we merge only
the PCI core change (patch 1/9) and drop the individual driver
changes.  That would mean:

  - We'll get a message from every NIC driver that calls
    pcie_print_link_status() as before.

  - We'll get a new message from the core for every downtrained link.

  - If a link leading to the NIC is downtrained, there will be
    duplicate messages.  Maybe that's overkill but it's not terrible.

I provisionally put the patch below on my pci/enumeration branch.
Objections?


commit c870cc8cbc4d79014f3daa74d1e412f32e42bf1b
Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Date:   Mon Aug 6 18:25:35 2018 -0500

    PCI: Check for PCIe Link downtraining
    
    When both ends of a PCIe Link are capable of a higher bandwidth than is
    currently in use, the Link is said to be "downtrained".  A downtrained Link
    may indicate hardware or configuration problems in the system, but it's
    hard to identify such Links from userspace.
    
    Refactor pcie_print_link_status() so it continues to always print PCIe
    bandwidth information, as several NIC drivers desire.
    
    Add a new internal __pcie_print_link_status() to emit a message only when a
    device's bandwidth is constrained by the fabric and call it from the PCI
    core for all devices, which identifies all downtrained Links.  It also
    emits messages for a few cases that are technically not downtrained, such
    as a x4 device in an open-ended x1 slot.
    
    Signed-off-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
    [bhelgaas: changelog, move __pcie_print_link_status() declaration to
    drivers/pci/, rename pcie_check_upstream_link() to
    pcie_report_downtraining()]
    Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 97acba712e4e..a84d341504a5 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5264,14 +5264,16 @@ u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
 }
 
 /**
- * pcie_print_link_status - Report the PCI device's link speed and width
+ * __pcie_print_link_status - Report the PCI device's link speed and width
  * @dev: PCI device to query
+ * @verbose: Print info even when enough bandwidth is available
  *
- * Report the available bandwidth at the device.  If this is less than the
- * device is capable of, report the device's maximum possible bandwidth and
- * the upstream link that limits its performance to less than that.
+ * If the available bandwidth at the device is less than the device is
+ * capable of, report the device's maximum possible bandwidth and the
+ * upstream link that limits its performance.  If @verbose, always print
+ * the available bandwidth, even if the device isn't constrained.
  */
-void pcie_print_link_status(struct pci_dev *dev)
+void __pcie_print_link_status(struct pci_dev *dev, bool verbose)
 {
 	enum pcie_link_width width, width_cap;
 	enum pci_bus_speed speed, speed_cap;
@@ -5281,11 +5283,11 @@ void pcie_print_link_status(struct pci_dev *dev)
 	bw_cap = pcie_bandwidth_capable(dev, &speed_cap, &width_cap);
 	bw_avail = pcie_bandwidth_available(dev, &limiting_dev, &speed, &width);
 
-	if (bw_avail >= bw_cap)
+	if (bw_avail >= bw_cap && verbose)
 		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth (%s x%d link)\n",
 			 bw_cap / 1000, bw_cap % 1000,
 			 PCIE_SPEED2STR(speed_cap), width_cap);
-	else
+	else if (bw_avail < bw_cap)
 		pci_info(dev, "%u.%03u Gb/s available PCIe bandwidth, limited by %s x%d link at %s (capable of %u.%03u Gb/s with %s x%d link)\n",
 			 bw_avail / 1000, bw_avail % 1000,
 			 PCIE_SPEED2STR(speed), width,
@@ -5293,6 +5295,17 @@ void pcie_print_link_status(struct pci_dev *dev)
 			 bw_cap / 1000, bw_cap % 1000,
 			 PCIE_SPEED2STR(speed_cap), width_cap);
 }
+
+/**
+ * pcie_print_link_status - Report the PCI device's link speed and width
+ * @dev: PCI device to query
+ *
+ * Report the available bandwidth at the device.
+ */
+void pcie_print_link_status(struct pci_dev *dev)
+{
+	__pcie_print_link_status(dev, true);
+}
 EXPORT_SYMBOL(pcie_print_link_status);
 
 /**
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 70808c168fb9..ce880dab5bc8 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -263,6 +263,7 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev);
 enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev);
 u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
 			   enum pcie_link_width *width);
+void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
 
 /* Single Root I/O Virtualization */
 struct pci_sriov {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index bc147c586643..387fc8ac54ec 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2231,6 +2231,25 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn)
 	return dev;
 }
 
+static void pcie_report_downtraining(struct pci_dev *dev)
+{
+	if (!pci_is_pcie(dev))
+		return;
+
+	/* Look from the device up to avoid downstream ports with no devices */
+	if ((pci_pcie_type(dev) != PCI_EXP_TYPE_ENDPOINT) &&
+	    (pci_pcie_type(dev) != PCI_EXP_TYPE_LEG_END) &&
+	    (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM))
+		return;
+
+	/* Multi-function PCIe devices share the same link/status */
+	if (PCI_FUNC(dev->devfn) != 0 || dev->is_virtfn)
+		return;
+
+	/* Print link status only if the device is constrained by the fabric */
+	__pcie_print_link_status(dev, false);
+}
+
 static void pci_init_capabilities(struct pci_dev *dev)
 {
 	/* Enhanced Allocation */
@@ -2266,6 +2285,8 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	/* Advanced Error Reporting */
 	pci_aer_init(dev);
 
+	pcie_report_downtraining(dev);
+
 	if (pci_probe_reset_function(dev) == 0)
 		dev->reset_fn = 1;
 }

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2018-08-09 14:02 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CAErSpo4PK+SQLNfdkLNt9eLc7bpbxbkZCsa8T-tQpid601n0SQ@mail.gmail.com>
2018-08-06 23:25 ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions Alexandru Gagniuc
2018-08-06 23:25   ` [PATCH v6 2/9] bnx2x: Do not call pcie_print_link_status() Alexandru Gagniuc
2018-08-06 23:25   ` [PATCH v6 3/9] bnxt_en: " Alexandru Gagniuc
2018-08-06 23:25   ` [PATCH v6 4/9] cxgb4: " Alexandru Gagniuc
2018-08-06 23:25   ` [PATCH v6 5/9] fm10k: " Alexandru Gagniuc
2018-08-07 17:52     ` Jeff Kirsher
2018-08-06 23:25   ` [PATCH v6 6/9] ixgbe: " Alexandru Gagniuc
2018-08-07 17:51     ` Jeff Kirsher
2018-08-06 23:25   ` [PATCH v6 7/9] net/mlx4: " Alexandru Gagniuc
2018-08-08  6:10     ` Leon Romanovsky
2018-08-06 23:25   ` [PATCH v6 8/9] net/mlx5: " Alexandru Gagniuc
2018-08-08  6:08     ` Leon Romanovsky
2018-08-08 14:23       ` Tal Gilboa
2018-08-08 15:41         ` Leon Romanovsky
2018-08-08 15:56           ` Tal Gilboa
2018-08-08 16:33             ` Alex G.
2018-08-08 17:27               ` Leon Romanovsky
2018-08-09 14:02                 ` Bjorn Helgaas
2018-08-06 23:25   ` [PATCH v6 9/9] nfp: " Alexandru Gagniuc
2018-08-07 19:44   ` [PATCH v6 1/9] PCI: Check for PCIe downtraining conditions David Miller
2018-08-07 21:41   ` Bjorn Helgaas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).