All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH next V1 0/7] Report PCI device link status
@ 2018-01-15 15:35 Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 1/7] PCI: Add a query function for PCI device's speed cap Tal Gilboa
                   ` (6 more replies)
  0 siblings, 7 replies; 17+ messages in thread
From: Tal Gilboa @ 2018-01-15 15:35 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Linux PCI, Tariq Toukan, Tal Gilboa

These patches introduce the ability to report PCI link width
and speed for all PCI devices in kernel log. It also warns if
the BW capability is bellow maximum. This would reduce code
duplication between drivers and unify the approach for reporting
PCI link status.
Implemented and tested for Mellanox devices.

v1: Split to multiple patches, calculate BW limitation and remove
MACRO definition for LNKCAP shift (suggested by Bjorn Helgaas).
Multiple fixes - conventions, typos, function naming and functional
(suggested by Tariq Toukan).

Tal Gilboa (7):
  PCI: Add a query function for PCI device's speed cap
  PCI: Add a query function for PCI device's width cap
  PCI: Calculate BW limitation for PCI devices
  PCI: Print PCI device link status in kernel log
  net/mlx4_core: Report PCI properties using dedicated function
  net/mlx5: Report device PCI link status and issues
  net/mlx5e: Use generic PCI function for BW calculation

 drivers/net/ethernet/mellanox/mlx4/main.c         |  81 +----------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  17 +--
 drivers/net/ethernet/mellanox/mlx5/core/main.c    |   2 +
 drivers/pci/pci-sysfs.c                           |  28 ++--
 drivers/pci/pci.c                                 | 163 +++++++++++++++++++++-
 include/linux/pci.h                               |  17 +++
 6 files changed, 191 insertions(+), 117 deletions(-)

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH next V1 1/7] PCI: Add a query function for PCI device's speed cap
  2018-01-15 15:35 [PATCH next V1 0/7] Report PCI device link status Tal Gilboa
@ 2018-01-15 15:35 ` Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 2/7] PCI: Add a query function for PCI device's width cap Tal Gilboa
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: Tal Gilboa @ 2018-01-15 15:35 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Linux PCI, Tariq Toukan, Tal Gilboa

pcie_get_speed_cap() implements the logic for querying a PCI device's
maximum speed capability. Change max_link_speed_show() function to
use pcie_get_speed_cap().

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/pci/pci-sysfs.c | 22 +++++-----------------
 drivers/pci/pci.c       | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h     |  7 +++++++
 3 files changed, 54 insertions(+), 17 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index ccc0e28..14fce9d 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -159,29 +159,17 @@ static ssize_t max_link_speed_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	u32 linkcap;
+	enum pci_bus_speed speed;
+	const char *speed_str;
 	int err;
-	const char *speed;
 
-	err = pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP, &linkcap);
+	err = pcie_get_speed_cap(pci_dev, &speed);
 	if (err)
 		return -EINVAL;
 
-	switch (linkcap & PCI_EXP_LNKCAP_SLS) {
-	case PCI_EXP_LNKCAP_SLS_8_0GB:
-		speed = "8 GT/s";
-		break;
-	case PCI_EXP_LNKCAP_SLS_5_0GB:
-		speed = "5 GT/s";
-		break;
-	case PCI_EXP_LNKCAP_SLS_2_5GB:
-		speed = "2.5 GT/s";
-		break;
-	default:
-		speed = "Unknown speed";
-	}
+	speed_str = PCIE_SPEED2STR(speed);
 
-	return sprintf(buf, "%s\n", speed);
+	return sprintf(buf, "%s\n", speed_str);
 }
 static DEVICE_ATTR_RO(max_link_speed);
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 4a7c686..05fb356 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5050,6 +5050,48 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
 EXPORT_SYMBOL(pcie_get_minimum_link);
 
 /**
+ * pcie_get_speed_cap - queries for the PCI device's link speed capability
+ * @dev: PCI device to query
+ * @speed: storage for link speed
+ *
+ * This function queries the PCI device speed capability.
+ */
+int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed)
+{
+	u32 lnkcap;
+	int err1, err2;
+
+	*speed = PCI_SPEED_UNKNOWN;
+
+	err1 = pcie_capability_read_dword(dev, PCI_EXP_LNKCAP,
+					  &lnkcap);
+	if (!err1 && lnkcap) {
+		if (lnkcap & PCI_EXP_LNKCAP_SLS_8_0GB)
+			*speed = PCIE_SPEED_8_0GT;
+		else if (lnkcap & PCI_EXP_LNKCAP_SLS_5_0GB)
+			*speed = PCIE_SPEED_5_0GT;
+		else if (lnkcap & PCI_EXP_LNKCAP_SLS_2_5GB)
+			*speed = PCIE_SPEED_2_5GT;
+		return 0;
+	}
+
+	err2 = pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2,
+					  &lnkcap);
+	if (!err2 && lnkcap) { /* PCIe r3.0-compliant */
+		if (lnkcap & PCI_EXP_LNKCAP2_SLS_8_0GB)
+			*speed = PCIE_SPEED_8_0GT;
+		else if (lnkcap & PCI_EXP_LNKCAP2_SLS_5_0GB)
+			*speed = PCIE_SPEED_5_0GT;
+		else if (lnkcap & PCI_EXP_LNKCAP2_SLS_2_5GB)
+			*speed = PCIE_SPEED_2_5GT;
+		return 0;
+	}
+
+	return err1 ? err1 : err2;
+}
+EXPORT_SYMBOL(pcie_get_speed_cap);
+
+/**
  * pci_select_bars - Make BAR mask from the type of resource
  * @dev: the PCI device for which BAR mask is made
  * @flags: resource type mask to be selected
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 978aad7..7ff9d46 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -259,6 +259,12 @@ enum pci_bus_speed {
 	PCI_SPEED_UNKNOWN		= 0xff,
 };
 
+#define PCIE_SPEED2STR(speed) \
+	((speed) == PCIE_SPEED_8_0GT ? "8 GT/s" : \
+	 (speed) == PCIE_SPEED_5_0GT ? "5 GT/s" : \
+	 (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \
+	 "Unknown speed")
+
 struct pci_cap_saved_data {
 	u16		cap_nr;
 	bool		cap_extended;
@@ -1080,6 +1086,7 @@ static inline int pci_is_managed(struct pci_dev *pdev)
 int pcie_set_mps(struct pci_dev *dev, int mps);
 int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
 			  enum pcie_link_width *width);
+int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed);
 void pcie_flr(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH next V1 2/7] PCI: Add a query function for PCI device's width cap
  2018-01-15 15:35 [PATCH next V1 0/7] Report PCI device link status Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 1/7] PCI: Add a query function for PCI device's speed cap Tal Gilboa
@ 2018-01-15 15:35 ` Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices Tal Gilboa
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 17+ messages in thread
From: Tal Gilboa @ 2018-01-15 15:35 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Linux PCI, Tariq Toukan, Tal Gilboa

pcie_get_width_cap() implements the logic for querying a PCI device's
maximum width capability. Change max_link_width_show() function to
use pcie_get_width_cap().

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/pci/pci-sysfs.c |  6 +++---
 drivers/pci/pci.c       | 23 +++++++++++++++++++++++
 include/linux/pci.h     |  1 +
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 14fce9d..16213d3 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -177,14 +177,14 @@ static ssize_t max_link_width_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	u32 linkcap;
+	enum pcie_link_width width;
 	int err;
 
-	err = pcie_capability_read_dword(pci_dev, PCI_EXP_LNKCAP, &linkcap);
+	err = pcie_get_width_cap(pci_dev, &width);
 	if (err)
 		return -EINVAL;
 
-	return sprintf(buf, "%u\n", (linkcap & PCI_EXP_LNKCAP_MLW) >> 4);
+	return sprintf(buf, "%u\n", width);
 }
 static DEVICE_ATTR_RO(max_link_width);
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 05fb356..cf0401d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5092,6 +5092,29 @@ int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed)
 EXPORT_SYMBOL(pcie_get_speed_cap);
 
 /**
+ * pcie_get_width_cap - queries for the PCI device's link width capability.
+ * @dev: PCI device to query
+ * @width: storage for link width
+ *
+ * This function queries the PCI device width capability.
+ */
+int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width)
+{
+	u32 lnkcap;
+	int err;
+
+	*width = PCIE_LNK_WIDTH_UNKNOWN;
+
+	err = pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
+	if (!err && lnkcap)
+		/* Shift start of width mask by 4 to get actual speed cap */
+		*width = (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4;
+
+	return err;
+}
+EXPORT_SYMBOL(pcie_get_width_cap);
+
+/**
  * pci_select_bars - Make BAR mask from the type of resource
  * @dev: the PCI device for which BAR mask is made
  * @flags: resource type mask to be selected
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7ff9d46..27dc070 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1087,6 +1087,7 @@ static inline int pci_is_managed(struct pci_dev *pdev)
 int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
 			  enum pcie_link_width *width);
 int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed);
+int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width);
 void pcie_flr(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices
  2018-01-15 15:35 [PATCH next V1 0/7] Report PCI device link status Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 1/7] PCI: Add a query function for PCI device's speed cap Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 2/7] PCI: Add a query function for PCI device's width cap Tal Gilboa
@ 2018-01-15 15:35 ` Tal Gilboa
  2018-02-22 20:21   ` Bjorn Helgaas
  2018-01-15 15:35 ` [PATCH next V1 4/7] PCI: Print PCI device link status in kernel log Tal Gilboa
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Tal Gilboa @ 2018-01-15 15:35 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Linux PCI, Tariq Toukan, Tal Gilboa

pcie_get_link_limits() function, which is based on
pcie_get_minimum_link(), iterates over the PCI chain
and calculates maximum BW in addition to minimum speed and
width. The BW calculation at each level is speed * width, so
even if, for instance, a level has lower width, than the device max
capabilities, it still might not cause a BW limitation if it has a
higher speed. pcie_get_minimum_link() is kept for compatibility.

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/pci/pci.c   | 31 ++++++++++++++++++++++++++++---
 include/linux/pci.h |  8 ++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index cf0401d..f0c60dc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5012,16 +5012,37 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
  * @speed: storage for minimum speed
  * @width: storage for minimum width
  *
- * This function will walk up the PCI device chain and determine the minimum
- * link width and speed of the device.
+ * This function use pcie_get_link_limits() for determining the minimum
+ * link width and speed of the device. Legacy code is kept for compatibility.
  */
 int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
 			  enum pcie_link_width *width)
 {
+	int bw;
+
+	return pcie_get_link_limits(dev, speed, width, &bw);
+}
+EXPORT_SYMBOL(pcie_get_minimum_link);
+
+/**
+ * pcie_get_link_limits - determine minimum link settings of a PCI
+			  device and its BW limitation
+ * @dev: PCI device to query
+ * @speed: storage for minimum speed
+ * @width: storage for minimum width
+ * @bw: storage for BW limitation
+ *
+ * This function walks up the PCI device chain and determines the link
+ * limitations - minimum width, minimum speed and max possible BW of the device.
+ */
+int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
+			 enum pcie_link_width *width, int *bw)
+{
 	int ret;
 
 	*speed = PCI_SPEED_UNKNOWN;
 	*width = PCIE_LNK_WIDTH_UNKNOWN;
+	*bw = 0;
 
 	while (dev) {
 		u16 lnksta;
@@ -5042,12 +5063,16 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
 		if (next_width < *width)
 			*width = next_width;
 
+		/* Check if current level in the chain limits the total BW */
+		if (!(*bw) || *bw > (next_width) * PCIE_SPEED2MBS(next_speed))
+			*bw = (next_width) * PCIE_SPEED2MBS(next_speed);
+
 		dev = dev->bus->self;
 	}
 
 	return 0;
 }
-EXPORT_SYMBOL(pcie_get_minimum_link);
+EXPORT_SYMBOL(pcie_get_link_limits);
 
 /**
  * pcie_get_speed_cap - queries for the PCI device's link speed capability
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27dc070..88e23eb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -265,6 +265,12 @@ enum pci_bus_speed {
 	 (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \
 	 "Unknown speed")
 
+#define PCIE_SPEED2MBS(speed) \
+	((speed) == PCIE_SPEED_8_0GT ? 8000 : \
+	 (speed) == PCIE_SPEED_5_0GT ? 5000 : \
+	 (speed) == PCIE_SPEED_2_5GT ? 2500 : \
+	 0)
+
 struct pci_cap_saved_data {
 	u16		cap_nr;
 	bool		cap_extended;
@@ -1088,6 +1094,8 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
 			  enum pcie_link_width *width);
 int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed);
 int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width);
+int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
+			 enum pcie_link_width *width, int *bw);
 void pcie_flr(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH next V1 4/7] PCI: Print PCI device link status in kernel log
  2018-01-15 15:35 [PATCH next V1 0/7] Report PCI device link status Tal Gilboa
                   ` (2 preceding siblings ...)
  2018-01-15 15:35 ` [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices Tal Gilboa
@ 2018-01-15 15:35 ` Tal Gilboa
  2018-02-22 19:57   ` Bjorn Helgaas
  2018-01-15 15:35 ` [PATCH next V1 5/7] net/mlx4_core: Report PCI properties using dedicated function Tal Gilboa
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 17+ messages in thread
From: Tal Gilboa @ 2018-01-15 15:35 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Linux PCI, Tariq Toukan, Tal Gilboa

Add pcie_print_link_status() function for querying and verifying
a PCI device link status. The PCI speed and width are reported
in kernel log. In case the PCI device BW is limited somewhere in
the PCI chain a warning would be reported in kernel log.

This provides a unified method for all PCI devices to
report status and issues, instead of each device reporting in a
different way, using different code.

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/pci/pci.c   | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 68 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index f0c60dc..35873cc 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5140,6 +5140,73 @@ int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width)
 EXPORT_SYMBOL(pcie_get_width_cap);
 
 /**
+ * pcie_print_link_status - Reports the PCI device's link speed and width.
+ * @dev: PCI device to query
+ *
+ * This function checks whether the PCI device current speed and width are equal
+ * to the maximum PCI device capabilities and that available BW equals to BW capability.
+ * It issues a warning in cases there's a BW limitation in the PCI chain.
+ */
+void pcie_print_link_status(struct pci_dev *dev)
+{
+	enum pcie_link_width width, width_cap;
+	enum pci_bus_speed speed, speed_cap;
+	int bw, bw_cap;
+	int err;
+
+	err = pcie_get_speed_cap(dev, &speed_cap);
+	if (err) {
+		dev_warn(&dev->dev,
+			 "Warning: unable to determine PCIe device speed capability (err = %d)\n",
+			 err);
+		return;
+	}
+
+	err = pcie_get_width_cap(dev, &width_cap);
+	if (err) {
+		dev_warn(&dev->dev,
+			 "Warning: unable to determine PCIe device width capability (err = %d)\n",
+			 err);
+		return;
+	}
+
+	err = pcie_get_link_limits(dev, &speed, &width, &bw);
+	if (err) {
+		dev_warn(&dev->dev,
+			 "Warning: unable to determine PCIe device chain minimum BW (err = %d)\n",
+			 err);
+		return;
+	}
+
+	if (speed == PCI_SPEED_UNKNOWN)
+		dev_warn(&dev->dev,
+			 "Warning: unable to determine PCIe device chain minimum speed\n");
+
+	if (width == PCIE_LNK_WIDTH_UNKNOWN)
+		dev_warn(&dev->dev,
+			 "Warning: unable to determine PCIe device chain minimum width\n");
+
+	bw_cap = width_cap * PCIE_SPEED2MBS(speed_cap);
+	if (!bw_cap)
+		dev_warn(&dev->dev,
+			 "Warning: unable to determine max PCIe chain BW\n");
+
+	if (bw_cap && bw < bw_cap) {
+		dev_warn(&dev->dev,
+			 "Warning: PCIe BW (%dGb/s) is lower than device's capability (%dGb/s)\n",
+			 bw / 1000, bw_cap / 1000);
+		dev_info(&dev->dev,
+			 "If device status is at max capabilities, might be due to a narrow part down the chain\n");
+	}
+
+	dev_info(&dev->dev, "PCIe link speed is %s, device supports %s\n",
+		 PCIE_SPEED2STR(speed), PCIE_SPEED2STR(speed_cap));
+	dev_info(&dev->dev, "PCIe link width is x%d, device supports x%d\n",
+		 width, width_cap);
+}
+EXPORT_SYMBOL(pcie_print_link_status);
+
+/**
  * pci_select_bars - Make BAR mask from the type of resource
  * @dev: the PCI device for which BAR mask is made
  * @flags: resource type mask to be selected
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 88e23eb..321cf22 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1096,6 +1096,7 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
 int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width);
 int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
 			 enum pcie_link_width *width, int *bw);
+void pcie_print_link_status(struct pci_dev *dev);
 void pcie_flr(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH next V1 5/7] net/mlx4_core: Report PCI properties using dedicated function
  2018-01-15 15:35 [PATCH next V1 0/7] Report PCI device link status Tal Gilboa
                   ` (3 preceding siblings ...)
  2018-01-15 15:35 ` [PATCH next V1 4/7] PCI: Print PCI device link status in kernel log Tal Gilboa
@ 2018-01-15 15:35 ` Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 6/7] net/mlx5: Report device PCI link status and issues Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 7/7] net/mlx5e: Use generic PCI function for BW calculation Tal Gilboa
  6 siblings, 0 replies; 17+ messages in thread
From: Tal Gilboa @ 2018-01-15 15:35 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Linux PCI, Tariq Toukan, Tal Gilboa

Change mlx4 method of checking and reporting PCI status and
maximum capabilities to use the pci driver functions instead
of implementing them in the driver code.

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 81 +------------------------------
 1 file changed, 1 insertion(+), 80 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 4d84cab..30cacac 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -623,85 +623,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	return 0;
 }
 
-static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
-				       enum pci_bus_speed *speed,
-				       enum pcie_link_width *width)
-{
-	u32 lnkcap1, lnkcap2;
-	int err1, err2;
-
-#define  PCIE_MLW_CAP_SHIFT 4	/* start of MLW mask in link capabilities */
-
-	*speed = PCI_SPEED_UNKNOWN;
-	*width = PCIE_LNK_WIDTH_UNKNOWN;
-
-	err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
-					  &lnkcap1);
-	err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
-					  &lnkcap2);
-	if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
-		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
-			*speed = PCIE_SPEED_8_0GT;
-		else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
-			*speed = PCIE_SPEED_5_0GT;
-		else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
-			*speed = PCIE_SPEED_2_5GT;
-	}
-	if (!err1) {
-		*width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
-		if (!lnkcap2) { /* pre-r3.0 */
-			if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
-				*speed = PCIE_SPEED_5_0GT;
-			else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
-				*speed = PCIE_SPEED_2_5GT;
-		}
-	}
-
-	if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
-		return err1 ? err1 :
-			err2 ? err2 : -EINVAL;
-	}
-	return 0;
-}
-
-static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
-{
-	enum pcie_link_width width, width_cap;
-	enum pci_bus_speed speed, speed_cap;
-	int err;
-
-#define PCIE_SPEED_STR(speed) \
-	(speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
-	 speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
-	 speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
-	 "Unknown")
-
-	err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
-	if (err) {
-		mlx4_warn(dev,
-			  "Unable to determine PCIe device BW capabilities\n");
-		return;
-	}
-
-	err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
-	if (err || speed == PCI_SPEED_UNKNOWN ||
-	    width == PCIE_LNK_WIDTH_UNKNOWN) {
-		mlx4_warn(dev,
-			  "Unable to determine PCI device chain minimum BW\n");
-		return;
-	}
-
-	if (width != width_cap || speed != speed_cap)
-		mlx4_warn(dev,
-			  "PCIe BW is different than device's capability\n");
-
-	mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
-		  PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
-	mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
-		  width, width_cap);
-	return;
-}
-
 /*The function checks if there are live vf, return the num of them*/
 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
 {
@@ -3475,7 +3396,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 	 * express device capabilities are under-satisfied by the bus.
 	 */
 	if (!mlx4_is_slave(dev))
-		mlx4_check_pcie_caps(dev);
+		pcie_print_link_status(dev->persist->pdev);
 
 	/* In master functions, the communication channel must be initialized
 	 * after obtaining its address from fw */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH next V1 6/7] net/mlx5: Report device PCI link status and issues
  2018-01-15 15:35 [PATCH next V1 0/7] Report PCI device link status Tal Gilboa
                   ` (4 preceding siblings ...)
  2018-01-15 15:35 ` [PATCH next V1 5/7] net/mlx4_core: Report PCI properties using dedicated function Tal Gilboa
@ 2018-01-15 15:35 ` Tal Gilboa
  2018-01-15 15:35 ` [PATCH next V1 7/7] net/mlx5e: Use generic PCI function for BW calculation Tal Gilboa
  6 siblings, 0 replies; 17+ messages in thread
From: Tal Gilboa @ 2018-01-15 15:35 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Linux PCI, Tariq Toukan, Tal Gilboa

Add a kernel log print for mlx5 PCI device's link status.

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 5f32344..7a43735 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -977,6 +977,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
 		 fw_rev_min(dev), fw_rev_sub(dev));
 
+	pcie_print_link_status(dev->pdev);
+
 	/* on load removing any previous indication of internal error, device is
 	 * up
 	 */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH next V1 7/7] net/mlx5e: Use generic PCI function for BW calculation
  2018-01-15 15:35 [PATCH next V1 0/7] Report PCI device link status Tal Gilboa
                   ` (5 preceding siblings ...)
  2018-01-15 15:35 ` [PATCH next V1 6/7] net/mlx5: Report device PCI link status and issues Tal Gilboa
@ 2018-01-15 15:35 ` Tal Gilboa
  2018-02-22 20:09   ` Bjorn Helgaas
  6 siblings, 1 reply; 17+ messages in thread
From: Tal Gilboa @ 2018-01-15 15:35 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Linux PCI, Tariq Toukan, Tal Gilboa

Newly introduced pcie_get_link_limits() function calculates
maximum available BW through the PCI chain. We can use this
value for mlx5e_get_pci_bw() instead of calculating ourselves.

Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d2b057a..d1d7427 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3971,27 +3971,16 @@ static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
 	enum pcie_link_width width;
 	enum pci_bus_speed speed;
 	int err = 0;
+	int bw;
 
-	err = pcie_get_minimum_link(mdev->pdev, &speed, &width);
+	err = pcie_get_link_limits(mdev->pdev, &speed, &width, &bw);
 	if (err)
 		return err;
 
 	if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN)
 		return -EINVAL;
 
-	switch (speed) {
-	case PCIE_SPEED_2_5GT:
-		*pci_bw = 2500 * width;
-		break;
-	case PCIE_SPEED_5_0GT:
-		*pci_bw = 5000 * width;
-		break;
-	case PCIE_SPEED_8_0GT:
-		*pci_bw = 8000 * width;
-		break;
-	default:
-		return -EINVAL;
-	}
+	*pci_bw = bw;
 
 	return 0;
 }
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 4/7] PCI: Print PCI device link status in kernel log
  2018-01-15 15:35 ` [PATCH next V1 4/7] PCI: Print PCI device link status in kernel log Tal Gilboa
@ 2018-02-22 19:57   ` Bjorn Helgaas
  2018-02-25  9:30     ` Tal Gilboa
  0 siblings, 1 reply; 17+ messages in thread
From: Bjorn Helgaas @ 2018-02-22 19:57 UTC (permalink / raw)
  To: Tal Gilboa; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On Mon, Jan 15, 2018 at 05:35:38PM +0200, Tal Gilboa wrote:
> Add pcie_print_link_status() function for querying and verifying
> a PCI device link status. The PCI speed and width are reported
> in kernel log. In case the PCI device BW is limited somewhere in
> the PCI chain a warning would be reported in kernel log.
> 
> This provides a unified method for all PCI devices to
> report status and issues, instead of each device reporting in a
> different way, using different code.
> 
> Signed-off-by: Tal Gilboa <talgi@mellanox.com>
> Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
> ---
>  drivers/pci/pci.c   | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/pci.h |  1 +
>  2 files changed, 68 insertions(+)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index f0c60dc..35873cc 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -5140,6 +5140,73 @@ int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width)
>  EXPORT_SYMBOL(pcie_get_width_cap);
>  
>  /**
> + * pcie_print_link_status - Reports the PCI device's link speed and width.
> + * @dev: PCI device to query
> + *
> + * This function checks whether the PCI device current speed and width are equal
> + * to the maximum PCI device capabilities and that available BW equals to BW capability.
> + * It issues a warning in cases there's a BW limitation in the PCI chain.

Please wrap the comment so it fits in 80 columns.  Please spell out
"bandwidth".

> + */
> +void pcie_print_link_status(struct pci_dev *dev)
> +{
> +	enum pcie_link_width width, width_cap;
> +	enum pci_bus_speed speed, speed_cap;
> +	int bw, bw_cap;
> +	int err;
> +
> +	err = pcie_get_speed_cap(dev, &speed_cap);
> +	if (err) {
> +		dev_warn(&dev->dev,

Use the new pci_warn(), pci_info(), etc.

I don't think these warnings about failure of pcie_get_speed_cap(),
etc., are necessary as long as the last dev_info() about the current
link speed/width shows "Unknown".

> +			 "Warning: unable to determine PCIe device speed capability (err = %d)\n",
> +			 err);
> +		return;
> +	}
> +
> +	err = pcie_get_width_cap(dev, &width_cap);
> +	if (err) {
> +		dev_warn(&dev->dev,
> +			 "Warning: unable to determine PCIe device width capability (err = %d)\n",
> +			 err);
> +		return;
> +	}
> +
> +	err = pcie_get_link_limits(dev, &speed, &width, &bw);
> +	if (err) {
> +		dev_warn(&dev->dev,
> +			 "Warning: unable to determine PCIe device chain minimum BW (err = %d)\n",
> +			 err);
> +		return;
> +	}
> +
> +	if (speed == PCI_SPEED_UNKNOWN)
> +		dev_warn(&dev->dev,
> +			 "Warning: unable to determine PCIe device chain minimum speed\n");
> +
> +	if (width == PCIE_LNK_WIDTH_UNKNOWN)
> +		dev_warn(&dev->dev,
> +			 "Warning: unable to determine PCIe device chain minimum width\n");
> +
> +	bw_cap = width_cap * PCIE_SPEED2MBS(speed_cap);
> +	if (!bw_cap)
> +		dev_warn(&dev->dev,
> +			 "Warning: unable to determine max PCIe chain BW\n");
> +
> +	if (bw_cap && bw < bw_cap) {
> +		dev_warn(&dev->dev,
> +			 "Warning: PCIe BW (%dGb/s) is lower than device's capability (%dGb/s)\n",
> +			 bw / 1000, bw_cap / 1000);

Suggested text:

  pci_info(dev, "available bandwidth %dGB/s (capable of %dGB/s)\n", ...

I'm not sure this merits a pci_warn().  In some cases a user might be
able to move the device to a different slot or something, but I'm sure
there are platforms where it is impossible for the user to do anything
about this, and I don't think we need to warn about that.

> +		dev_info(&dev->dev,
> +			 "If device status is at max capabilities, might be due to a narrow part down the chain\n");

I'm not sure this message is strictly necessary.  It should be fairly
trivial for a user to figure this out with lspci.

> +	}
> +
> +	dev_info(&dev->dev, "PCIe link speed is %s, device supports %s\n",
> +		 PCIE_SPEED2STR(speed), PCIE_SPEED2STR(speed_cap));
> +	dev_info(&dev->dev, "PCIe link width is x%d, device supports x%d\n",
> +		 width, width_cap);

I want to minimize the dmesg noise.  Here's a possibility:

  if (speed == speed_cap && width == width_cap)
    pci_info(dev, "%s x%d link\n", PCIE_SPEED2STR(speed), width);
  else
    pci_info(dev, "%s x%d link (capable of %s x%d)\n",
             PCIE_SPEED2STR(speed), width,
             PCIE_SPEED2STR(speed_cap), width_cap);

> +}
> +EXPORT_SYMBOL(pcie_print_link_status);
> +
> +/**
>   * pci_select_bars - Make BAR mask from the type of resource
>   * @dev: the PCI device for which BAR mask is made
>   * @flags: resource type mask to be selected
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 88e23eb..321cf22 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1096,6 +1096,7 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>  int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width);
>  int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
>  			 enum pcie_link_width *width, int *bw);
> +void pcie_print_link_status(struct pci_dev *dev);
>  void pcie_flr(struct pci_dev *dev);
>  int __pci_reset_function_locked(struct pci_dev *dev);
>  int pci_reset_function(struct pci_dev *dev);
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 7/7] net/mlx5e: Use generic PCI function for BW calculation
  2018-01-15 15:35 ` [PATCH next V1 7/7] net/mlx5e: Use generic PCI function for BW calculation Tal Gilboa
@ 2018-02-22 20:09   ` Bjorn Helgaas
  2018-02-25 11:20     ` Tal Gilboa
  0 siblings, 1 reply; 17+ messages in thread
From: Bjorn Helgaas @ 2018-02-22 20:09 UTC (permalink / raw)
  To: Tal Gilboa; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On Mon, Jan 15, 2018 at 05:35:41PM +0200, Tal Gilboa wrote:
> Newly introduced pcie_get_link_limits() function calculates
> maximum available BW through the PCI chain. We can use this
> value for mlx5e_get_pci_bw() instead of calculating ourselves.
> 
> Signed-off-by: Tal Gilboa <talgi@mellanox.com>
> Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 17 +++--------------
>  1 file changed, 3 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index d2b057a..d1d7427 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -3971,27 +3971,16 @@ static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
>  	enum pcie_link_width width;
>  	enum pci_bus_speed speed;
>  	int err = 0;
> +	int bw;
>  
> -	err = pcie_get_minimum_link(mdev->pdev, &speed, &width);
> +	err = pcie_get_link_limits(mdev->pdev, &speed, &width, &bw);

Why don't you refactor this a little so it calls
pcie_print_link_status() like mlx5 and mlx4?

I think this is the only call of pcie_get_link_limits() outside
drivers/pci, and it seems like a little overkill here since you really
don't need "speed" here.

But I don't understand what's going on with mlx5e_get_max_linkspeed().
It doesn't look like that is actually looking at the PCIe link speed,
so maybe there's something special going on there.

>  	if (err)
>  		return err;
>  
>  	if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN)
>  		return -EINVAL;
>  
> -	switch (speed) {
> -	case PCIE_SPEED_2_5GT:
> -		*pci_bw = 2500 * width;
> -		break;
> -	case PCIE_SPEED_5_0GT:
> -		*pci_bw = 5000 * width;
> -		break;
> -	case PCIE_SPEED_8_0GT:
> -		*pci_bw = 8000 * width;
> -		break;
> -	default:
> -		return -EINVAL;
> -	}
> +	*pci_bw = bw;
>  
>  	return 0;
>  }
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices
  2018-01-15 15:35 ` [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices Tal Gilboa
@ 2018-02-22 20:21   ` Bjorn Helgaas
  2018-02-27 12:17     ` Tal Gilboa
  0 siblings, 1 reply; 17+ messages in thread
From: Bjorn Helgaas @ 2018-02-22 20:21 UTC (permalink / raw)
  To: Tal Gilboa; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On Mon, Jan 15, 2018 at 05:35:37PM +0200, Tal Gilboa wrote:
> pcie_get_link_limits() function, which is based on
> pcie_get_minimum_link(), iterates over the PCI chain
> and calculates maximum BW in addition to minimum speed and
> width. The BW calculation at each level is speed * width, so
> even if, for instance, a level has lower width, than the device max
> capabilities, it still might not cause a BW limitation if it has a
> higher speed. pcie_get_minimum_link() is kept for compatibility.
> 
> Signed-off-by: Tal Gilboa <talgi@mellanox.com>
> Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
> ---
>  drivers/pci/pci.c   | 31 ++++++++++++++++++++++++++++---
>  include/linux/pci.h |  8 ++++++++
>  2 files changed, 36 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index cf0401d..f0c60dc 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -5012,16 +5012,37 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>   * @speed: storage for minimum speed
>   * @width: storage for minimum width
>   *
> - * This function will walk up the PCI device chain and determine the minimum
> - * link width and speed of the device.
> + * This function use pcie_get_link_limits() for determining the minimum
> + * link width and speed of the device. Legacy code is kept for compatibility.
>   */
>  int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>  			  enum pcie_link_width *width)
>  {
> +	int bw;
> +
> +	return pcie_get_link_limits(dev, speed, width, &bw);
> +}
> +EXPORT_SYMBOL(pcie_get_minimum_link);
> +
> +/**
> + * pcie_get_link_limits - determine minimum link settings of a PCI
> +			  device and its BW limitation
> + * @dev: PCI device to query
> + * @speed: storage for minimum speed
> + * @width: storage for minimum width
> + * @bw: storage for BW limitation
> + *
> + * This function walks up the PCI device chain and determines the link
> + * limitations - minimum width, minimum speed and max possible BW of the device.

I don't really like this interface because it mixes two separate
things: (1) the speed/width capabilities of the current device, and
(2) the minimum speed/width of the path leading to the device (and
these minimums might even be at different points in the path).

I think what we want is a way to get the max bandwidth a device can
use, i.e., pcie_get_speed_cap() * pcie_get_width_cap().

If we also had a way to find the minimum bandwidth point leading to a
device, e.g., a pcie_get_minimum_bandwidth() that returned a pci_dev
and its bandwidth (and maybe the link speed and width corresponding to
that device), then you could print a meaningful message like "this
device is capable of X, but upstream device Y limits us to Z".

I think the current pcie_print_link_status() is a little misleading
because it prints the minimum link speed and width from the path, but
those might be from different devices, which doesn't really make
sense.

> + */
> +int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
> +			 enum pcie_link_width *width, int *bw)
> +{
>  	int ret;
>  
>  	*speed = PCI_SPEED_UNKNOWN;
>  	*width = PCIE_LNK_WIDTH_UNKNOWN;
> +	*bw = 0;
>  
>  	while (dev) {
>  		u16 lnksta;
> @@ -5042,12 +5063,16 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>  		if (next_width < *width)
>  			*width = next_width;
>  
> +		/* Check if current level in the chain limits the total BW */
> +		if (!(*bw) || *bw > (next_width) * PCIE_SPEED2MBS(next_speed))
> +			*bw = (next_width) * PCIE_SPEED2MBS(next_speed);
> +
>  		dev = dev->bus->self;
>  	}
>  
>  	return 0;
>  }
> -EXPORT_SYMBOL(pcie_get_minimum_link);
> +EXPORT_SYMBOL(pcie_get_link_limits);
>  
>  /**
>   * pcie_get_speed_cap - queries for the PCI device's link speed capability
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 27dc070..88e23eb 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -265,6 +265,12 @@ enum pci_bus_speed {
>  	 (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \
>  	 "Unknown speed")
>  
> +#define PCIE_SPEED2MBS(speed) \
> +	((speed) == PCIE_SPEED_8_0GT ? 8000 : \
> +	 (speed) == PCIE_SPEED_5_0GT ? 5000 : \
> +	 (speed) == PCIE_SPEED_2_5GT ? 2500 : \
> +	 0)
> +
>  struct pci_cap_saved_data {
>  	u16		cap_nr;
>  	bool		cap_extended;
> @@ -1088,6 +1094,8 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>  			  enum pcie_link_width *width);
>  int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed);
>  int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width);
> +int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
> +			 enum pcie_link_width *width, int *bw);
>  void pcie_flr(struct pci_dev *dev);
>  int __pci_reset_function_locked(struct pci_dev *dev);
>  int pci_reset_function(struct pci_dev *dev);
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 4/7] PCI: Print PCI device link status in kernel log
  2018-02-22 19:57   ` Bjorn Helgaas
@ 2018-02-25  9:30     ` Tal Gilboa
  2018-02-27 12:18       ` Tal Gilboa
  0 siblings, 1 reply; 17+ messages in thread
From: Tal Gilboa @ 2018-02-25  9:30 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On 2/22/2018 9:57 PM, Bjorn Helgaas wrote:>
> Suggested text:
> 
>    pci_info(dev, "available bandwidth %dGB/s (capable of %dGB/s)\n", ...
> 
> I'm not sure this merits a pci_warn().  In some cases a user might be
> able to move the device to a different slot or something, but I'm sure
> there are platforms where it is impossible for the user to do anything
> about this, and I don't think we need to warn about that.

We face a lot of customer "bugs" due to lower than expected PCIe 
bandwidth. I think an info print would be too light in this case. Even 
if the user has nothing to do it should be aware of the problem.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 7/7] net/mlx5e: Use generic PCI function for BW calculation
  2018-02-22 20:09   ` Bjorn Helgaas
@ 2018-02-25 11:20     ` Tal Gilboa
  0 siblings, 0 replies; 17+ messages in thread
From: Tal Gilboa @ 2018-02-25 11:20 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On 2/22/2018 10:09 PM, Bjorn Helgaas wrote:
> On Mon, Jan 15, 2018 at 05:35:41PM +0200, Tal Gilboa wrote:
>> Newly introduced pcie_get_link_limits() function calculates
>> maximum available BW through the PCI chain. We can use this
>> value for mlx5e_get_pci_bw() instead of calculating ourselves.
>>
>> Signed-off-by: Tal Gilboa <talgi@mellanox.com>
>> Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
>> ---
>>   drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 17 +++--------------
>>   1 file changed, 3 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> index d2b057a..d1d7427 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>> @@ -3971,27 +3971,16 @@ static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
>>   	enum pcie_link_width width;
>>   	enum pci_bus_speed speed;
>>   	int err = 0;
>> +	int bw;
>>   
>> -	err = pcie_get_minimum_link(mdev->pdev, &speed, &width);
>> +	err = pcie_get_link_limits(mdev->pdev, &speed, &width, &bw);
> 
> Why don't you refactor this a little so it calls
> pcie_print_link_status() like mlx5 and mlx4?
> 
> I think this is the only call of pcie_get_link_limits() outside
> drivers/pci, and it seems like a little overkill here since you really
> don't need "speed" here.

I wouldn't like to call pcie_print_link_status() if I don't really want 
to print the status. In this case mlx5e driver needs the PCI bandwidth 
limitation in order to make a decision. I think it is good that we 
expose a method to do precisely that.

> 
> But I don't understand what's going on with mlx5e_get_max_linkspeed().
> It doesn't look like that is actually looking at the PCIe link speed,
> so maybe there's something special going on there.

mlx5e_get_max_linkspeed() queries for the port wire speed. mlx5e driver 
might change some settings (CQE zipping, LRO) in case PCI bandwidth 
can't support wire speed.

> 
>>   	if (err)
>>   		return err;
>>   
>>   	if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN)
>>   		return -EINVAL;
>>   
>> -	switch (speed) {
>> -	case PCIE_SPEED_2_5GT:
>> -		*pci_bw = 2500 * width;
>> -		break;
>> -	case PCIE_SPEED_5_0GT:
>> -		*pci_bw = 5000 * width;
>> -		break;
>> -	case PCIE_SPEED_8_0GT:
>> -		*pci_bw = 8000 * width;
>> -		break;
>> -	default:
>> -		return -EINVAL;
>> -	}
>> +	*pci_bw = bw;
>>   
>>   	return 0;
>>   }
>> -- 
>> 1.8.3.1
>>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices
  2018-02-22 20:21   ` Bjorn Helgaas
@ 2018-02-27 12:17     ` Tal Gilboa
  2018-02-27 15:19       ` Bjorn Helgaas
  0 siblings, 1 reply; 17+ messages in thread
From: Tal Gilboa @ 2018-02-27 12:17 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On 2/22/2018 10:21 PM, Bjorn Helgaas wrote:
> On Mon, Jan 15, 2018 at 05:35:37PM +0200, Tal Gilboa wrote:
>> pcie_get_link_limits() function, which is based on
>> pcie_get_minimum_link(), iterates over the PCI chain
>> and calculates maximum BW in addition to minimum speed and
>> width. The BW calculation at each level is speed * width, so
>> even if, for instance, a level has lower width, than the device max
>> capabilities, it still might not cause a BW limitation if it has a
>> higher speed. pcie_get_minimum_link() is kept for compatibility.
>>
>> Signed-off-by: Tal Gilboa <talgi@mellanox.com>
>> Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
>> ---
>>   drivers/pci/pci.c   | 31 ++++++++++++++++++++++++++++---
>>   include/linux/pci.h |  8 ++++++++
>>   2 files changed, 36 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index cf0401d..f0c60dc 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -5012,16 +5012,37 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>>    * @speed: storage for minimum speed
>>    * @width: storage for minimum width
>>    *
>> - * This function will walk up the PCI device chain and determine the minimum
>> - * link width and speed of the device.
>> + * This function use pcie_get_link_limits() for determining the minimum
>> + * link width and speed of the device. Legacy code is kept for compatibility.
>>    */
>>   int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>>   			  enum pcie_link_width *width)
>>   {
>> +	int bw;
>> +
>> +	return pcie_get_link_limits(dev, speed, width, &bw);
>> +}
>> +EXPORT_SYMBOL(pcie_get_minimum_link);
>> +
>> +/**
>> + * pcie_get_link_limits - determine minimum link settings of a PCI
>> +			  device and its BW limitation
>> + * @dev: PCI device to query
>> + * @speed: storage for minimum speed
>> + * @width: storage for minimum width
>> + * @bw: storage for BW limitation
>> + *
>> + * This function walks up the PCI device chain and determines the link
>> + * limitations - minimum width, minimum speed and max possible BW of the device.
> 
> I don't really like this interface because it mixes two separate
> things: (1) the speed/width capabilities of the current device, and
> (2) the minimum speed/width of the path leading to the device (and
> these minimums might even be at different points in the path).
> 
> I think what we want is a way to get the max bandwidth a device can
> use, i.e., pcie_get_speed_cap() * pcie_get_width_cap().
> 
> If we also had a way to find the minimum bandwidth point leading to a
> device, e.g., a pcie_get_minimum_bandwidth() that returned a pci_dev
> and its bandwidth (and maybe the link speed and width corresponding to
> that device), then you could print a meaningful message like "this
> device is capable of X, but upstream device Y limits us to Z".
> 
> I think the current pcie_print_link_status() is a little misleading
> because it prints the minimum link speed and width from the path, but
> those might be from different devices, which doesn't really make
> sense.

I see. We started out with simply checking speed and width only for the 
device. If the chain bandwidth check is misleading I'll revert back to 
the original behavior. No need to calculate bandwidth IMO. Checking 
current speed and width and comparing to max capabilities should be enough.

> 
>> + */
>> +int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
>> +			 enum pcie_link_width *width, int *bw)
>> +{
>>   	int ret;
>>   
>>   	*speed = PCI_SPEED_UNKNOWN;
>>   	*width = PCIE_LNK_WIDTH_UNKNOWN;
>> +	*bw = 0;
>>   
>>   	while (dev) {
>>   		u16 lnksta;
>> @@ -5042,12 +5063,16 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>>   		if (next_width < *width)
>>   			*width = next_width;
>>   
>> +		/* Check if current level in the chain limits the total BW */
>> +		if (!(*bw) || *bw > (next_width) * PCIE_SPEED2MBS(next_speed))
>> +			*bw = (next_width) * PCIE_SPEED2MBS(next_speed);
>> +
>>   		dev = dev->bus->self;
>>   	}
>>   
>>   	return 0;
>>   }
>> -EXPORT_SYMBOL(pcie_get_minimum_link);
>> +EXPORT_SYMBOL(pcie_get_link_limits);
>>   
>>   /**
>>    * pcie_get_speed_cap - queries for the PCI device's link speed capability
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index 27dc070..88e23eb 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -265,6 +265,12 @@ enum pci_bus_speed {
>>   	 (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \
>>   	 "Unknown speed")
>>   
>> +#define PCIE_SPEED2MBS(speed) \
>> +	((speed) == PCIE_SPEED_8_0GT ? 8000 : \
>> +	 (speed) == PCIE_SPEED_5_0GT ? 5000 : \
>> +	 (speed) == PCIE_SPEED_2_5GT ? 2500 : \
>> +	 0)
>> +
>>   struct pci_cap_saved_data {
>>   	u16		cap_nr;
>>   	bool		cap_extended;
>> @@ -1088,6 +1094,8 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>>   			  enum pcie_link_width *width);
>>   int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed);
>>   int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width);
>> +int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
>> +			 enum pcie_link_width *width, int *bw);
>>   void pcie_flr(struct pci_dev *dev);
>>   int __pci_reset_function_locked(struct pci_dev *dev);
>>   int pci_reset_function(struct pci_dev *dev);
>> -- 
>> 1.8.3.1
>>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 4/7] PCI: Print PCI device link status in kernel log
  2018-02-25  9:30     ` Tal Gilboa
@ 2018-02-27 12:18       ` Tal Gilboa
  0 siblings, 0 replies; 17+ messages in thread
From: Tal Gilboa @ 2018-02-27 12:18 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On 2/25/2018 11:30 AM, Tal Gilboa wrote:
> On 2/22/2018 9:57 PM, Bjorn Helgaas wrote:>
>> Suggested text:
>>
>>    pci_info(dev, "available bandwidth %dGB/s (capable of %dGB/s)\n", ...
>>
>> I'm not sure this merits a pci_warn().  In some cases a user might be
>> able to move the device to a different slot or something, but I'm sure
>> there are platforms where it is impossible for the user to do anything
>> about this, and I don't think we need to warn about that.
> 
> We face a lot of customer "bugs" due to lower than expected PCIe 
> bandwidth. I think an info print would be too light in this case. Even 
> if the user has nothing to do it should be aware of the problem.

We actually got some feedback saying the warnings are a bit too much. 
I'll remove most of them and change the remaining to pci_info().

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices
  2018-02-27 12:17     ` Tal Gilboa
@ 2018-02-27 15:19       ` Bjorn Helgaas
  2018-02-27 15:34         ` Tal Gilboa
  0 siblings, 1 reply; 17+ messages in thread
From: Bjorn Helgaas @ 2018-02-27 15:19 UTC (permalink / raw)
  To: Tal Gilboa; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On Tue, Feb 27, 2018 at 02:17:28PM +0200, Tal Gilboa wrote:
> On 2/22/2018 10:21 PM, Bjorn Helgaas wrote:
> > On Mon, Jan 15, 2018 at 05:35:37PM +0200, Tal Gilboa wrote:
> > > pcie_get_link_limits() function, which is based on
> > > pcie_get_minimum_link(), iterates over the PCI chain
> > > and calculates maximum BW in addition to minimum speed and
> > > width. The BW calculation at each level is speed * width, so
> > > even if, for instance, a level has lower width, than the device max
> > > capabilities, it still might not cause a BW limitation if it has a
> > > higher speed. pcie_get_minimum_link() is kept for compatibility.
> > > 
> > > Signed-off-by: Tal Gilboa <talgi@mellanox.com>
> > > Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
> > > ---
> > >   drivers/pci/pci.c   | 31 ++++++++++++++++++++++++++++---
> > >   include/linux/pci.h |  8 ++++++++
> > >   2 files changed, 36 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> > > index cf0401d..f0c60dc 100644
> > > --- a/drivers/pci/pci.c
> > > +++ b/drivers/pci/pci.c
> > > @@ -5012,16 +5012,37 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
> > >    * @speed: storage for minimum speed
> > >    * @width: storage for minimum width
> > >    *
> > > - * This function will walk up the PCI device chain and determine the minimum
> > > - * link width and speed of the device.
> > > + * This function use pcie_get_link_limits() for determining the minimum
> > > + * link width and speed of the device. Legacy code is kept for compatibility.
> > >    */
> > >   int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
> > >   			  enum pcie_link_width *width)
> > >   {
> > > +	int bw;
> > > +
> > > +	return pcie_get_link_limits(dev, speed, width, &bw);
> > > +}
> > > +EXPORT_SYMBOL(pcie_get_minimum_link);
> > > +
> > > +/**
> > > + * pcie_get_link_limits - determine minimum link settings of a PCI
> > > +			  device and its BW limitation
> > > + * @dev: PCI device to query
> > > + * @speed: storage for minimum speed
> > > + * @width: storage for minimum width
> > > + * @bw: storage for BW limitation
> > > + *
> > > + * This function walks up the PCI device chain and determines the link
> > > + * limitations - minimum width, minimum speed and max possible BW of the device.
> > 
> > I don't really like this interface because it mixes two separate
> > things: (1) the speed/width capabilities of the current device, and
> > (2) the minimum speed/width of the path leading to the device (and
> > these minimums might even be at different points in the path).
> > 
> > I think what we want is a way to get the max bandwidth a device can
> > use, i.e., pcie_get_speed_cap() * pcie_get_width_cap().
> > 
> > If we also had a way to find the minimum bandwidth point leading to a
> > device, e.g., a pcie_get_minimum_bandwidth() that returned a pci_dev
> > and its bandwidth (and maybe the link speed and width corresponding to
> > that device), then you could print a meaningful message like "this
> > device is capable of X, but upstream device Y limits us to Z".
> > 
> > I think the current pcie_print_link_status() is a little misleading
> > because it prints the minimum link speed and width from the path, but
> > those might be from different devices, which doesn't really make
> > sense.
> 
> I see. We started out with simply checking speed and width only for the
> device. If the chain bandwidth check is misleading I'll revert back to the
> original behavior. No need to calculate bandwidth IMO. Checking current
> speed and width and comparing to max capabilities should be enough.

Let's think about where we want to end up.  I think you want to emit a
message if either the current link width or speed is lower than the
device is capable of.  You don't need the bandwidth for that.

Do you also want to emit a message if there's a bottleneck upstream
from the device?  E.g., the link to the device may be x8, 5GT/s, which
is capable of a bandwidth of 40Gb/s, but if there's an upstream link
that is x4, 5GT/s, the device will be limited to a bandwidth of
20Gb/s.  If you want to check for that, I think we do need to figure
the bandwidth.  If we want a message in situations like this, but we
didn't figure the bandwidth, we would erroneously complain if the
upstream link were x16, 2.5GT/s, where that link has a lower speed but
is wider so it's still capable of 40Gb/s.

If you want a message about upstream bottlenecks, do you want to
identify the specific device that is the bottleneck?

IIRC, you're adding these:

  int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width)
  int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed)
  int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
			   enum pcie_link_width *width, int *bw);

It seems like we might not need all three of these, since the last one
looks like it could subsume the first two.

> > > +int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
> > > +			 enum pcie_link_width *width, int *bw)
> > > +{
> > >   	int ret;
> > >   	*speed = PCI_SPEED_UNKNOWN;
> > >   	*width = PCIE_LNK_WIDTH_UNKNOWN;
> > > +	*bw = 0;
> > >   	while (dev) {
> > >   		u16 lnksta;
> > > @@ -5042,12 +5063,16 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
> > >   		if (next_width < *width)
> > >   			*width = next_width;
> > > +		/* Check if current level in the chain limits the total BW */
> > > +		if (!(*bw) || *bw > (next_width) * PCIE_SPEED2MBS(next_speed))
> > > +			*bw = (next_width) * PCIE_SPEED2MBS(next_speed);
> > > +
> > >   		dev = dev->bus->self;
> > >   	}
> > >   	return 0;
> > >   }
> > > -EXPORT_SYMBOL(pcie_get_minimum_link);
> > > +EXPORT_SYMBOL(pcie_get_link_limits);
> > >   /**
> > >    * pcie_get_speed_cap - queries for the PCI device's link speed capability
> > > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > > index 27dc070..88e23eb 100644
> > > --- a/include/linux/pci.h
> > > +++ b/include/linux/pci.h
> > > @@ -265,6 +265,12 @@ enum pci_bus_speed {
> > >   	 (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \
> > >   	 "Unknown speed")
> > > +#define PCIE_SPEED2MBS(speed) \
> > > +	((speed) == PCIE_SPEED_8_0GT ? 8000 : \
> > > +	 (speed) == PCIE_SPEED_5_0GT ? 5000 : \
> > > +	 (speed) == PCIE_SPEED_2_5GT ? 2500 : \
> > > +	 0)
> > > +
> > >   struct pci_cap_saved_data {
> > >   	u16		cap_nr;
> > >   	bool		cap_extended;
> > > @@ -1088,6 +1094,8 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
> > >   			  enum pcie_link_width *width);
> > >   int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed);
> > >   int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width);
> > > +int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
> > > +			 enum pcie_link_width *width, int *bw);
> > >   void pcie_flr(struct pci_dev *dev);
> > >   int __pci_reset_function_locked(struct pci_dev *dev);
> > >   int pci_reset_function(struct pci_dev *dev);
> > > -- 
> > > 1.8.3.1
> > > 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices
  2018-02-27 15:19       ` Bjorn Helgaas
@ 2018-02-27 15:34         ` Tal Gilboa
  0 siblings, 0 replies; 17+ messages in thread
From: Tal Gilboa @ 2018-02-27 15:34 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: Bjorn Helgaas, Linux PCI, Tariq Toukan

On 2/27/2018 5:19 PM, Bjorn Helgaas wrote:
> On Tue, Feb 27, 2018 at 02:17:28PM +0200, Tal Gilboa wrote:
>> On 2/22/2018 10:21 PM, Bjorn Helgaas wrote:
>>> On Mon, Jan 15, 2018 at 05:35:37PM +0200, Tal Gilboa wrote:
>>>> pcie_get_link_limits() function, which is based on
>>>> pcie_get_minimum_link(), iterates over the PCI chain
>>>> and calculates maximum BW in addition to minimum speed and
>>>> width. The BW calculation at each level is speed * width, so
>>>> even if, for instance, a level has lower width, than the device max
>>>> capabilities, it still might not cause a BW limitation if it has a
>>>> higher speed. pcie_get_minimum_link() is kept for compatibility.
>>>>
>>>> Signed-off-by: Tal Gilboa <talgi@mellanox.com>
>>>> Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
>>>> ---
>>>>    drivers/pci/pci.c   | 31 ++++++++++++++++++++++++++++---
>>>>    include/linux/pci.h |  8 ++++++++
>>>>    2 files changed, 36 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>>>> index cf0401d..f0c60dc 100644
>>>> --- a/drivers/pci/pci.c
>>>> +++ b/drivers/pci/pci.c
>>>> @@ -5012,16 +5012,37 @@ int pcie_set_mps(struct pci_dev *dev, int mps)
>>>>     * @speed: storage for minimum speed
>>>>     * @width: storage for minimum width
>>>>     *
>>>> - * This function will walk up the PCI device chain and determine the minimum
>>>> - * link width and speed of the device.
>>>> + * This function use pcie_get_link_limits() for determining the minimum
>>>> + * link width and speed of the device. Legacy code is kept for compatibility.
>>>>     */
>>>>    int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>>>>    			  enum pcie_link_width *width)
>>>>    {
>>>> +	int bw;
>>>> +
>>>> +	return pcie_get_link_limits(dev, speed, width, &bw);
>>>> +}
>>>> +EXPORT_SYMBOL(pcie_get_minimum_link);
>>>> +
>>>> +/**
>>>> + * pcie_get_link_limits - determine minimum link settings of a PCI
>>>> +			  device and its BW limitation
>>>> + * @dev: PCI device to query
>>>> + * @speed: storage for minimum speed
>>>> + * @width: storage for minimum width
>>>> + * @bw: storage for BW limitation
>>>> + *
>>>> + * This function walks up the PCI device chain and determines the link
>>>> + * limitations - minimum width, minimum speed and max possible BW of the device.
>>>
>>> I don't really like this interface because it mixes two separate
>>> things: (1) the speed/width capabilities of the current device, and
>>> (2) the minimum speed/width of the path leading to the device (and
>>> these minimums might even be at different points in the path).
>>>
>>> I think what we want is a way to get the max bandwidth a device can
>>> use, i.e., pcie_get_speed_cap() * pcie_get_width_cap().
>>>
>>> If we also had a way to find the minimum bandwidth point leading to a
>>> device, e.g., a pcie_get_minimum_bandwidth() that returned a pci_dev
>>> and its bandwidth (and maybe the link speed and width corresponding to
>>> that device), then you could print a meaningful message like "this
>>> device is capable of X, but upstream device Y limits us to Z".
>>>
>>> I think the current pcie_print_link_status() is a little misleading
>>> because it prints the minimum link speed and width from the path, but
>>> those might be from different devices, which doesn't really make
>>> sense.
>>
>> I see. We started out with simply checking speed and width only for the
>> device. If the chain bandwidth check is misleading I'll revert back to the
>> original behavior. No need to calculate bandwidth IMO. Checking current
>> speed and width and comparing to max capabilities should be enough.
> 
> Let's think about where we want to end up.  I think you want to emit a
> message if either the current link width or speed is lower than the
> device is capable of.  You don't need the bandwidth for that.
> 
> Do you also want to emit a message if there's a bottleneck upstream
> from the device?  E.g., the link to the device may be x8, 5GT/s, which
> is capable of a bandwidth of 40Gb/s, but if there's an upstream link
> that is x4, 5GT/s, the device will be limited to a bandwidth of
> 20Gb/s.  If you want to check for that, I think we do need to figure
> the bandwidth.  If we want a message in situations like this, but we
> didn't figure the bandwidth, we would erroneously complain if the
> upstream link were x16, 2.5GT/s, where that link has a lower speed but
> is wider so it's still capable of 40Gb/s.

I think this would have been nice but would result in more questions 
than answers. I think we can live without it and simply check the device 
itself.

> 
> If you want a message about upstream bottlenecks, do you want to
> identify the specific device that is the bottleneck?
> 
> IIRC, you're adding these:
> 
>    int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width)
>    int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed)
>    int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
> 			   enum pcie_link_width *width, int *bw);
> 
> It seems like we might not need all three of these, since the last one
> looks like it could subsume the first two.

This patch becomes obsolete is we can just use pcie_get_minimum_link() 
as it was. pcie_get_width/speed_cap() are still needed as they calculate 
the max capability. pcie_get_link_limits and pcie_get_minimum_link() 
calculate the link status.

> 
>>>> +int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
>>>> +			 enum pcie_link_width *width, int *bw)
>>>> +{
>>>>    	int ret;
>>>>    	*speed = PCI_SPEED_UNKNOWN;
>>>>    	*width = PCIE_LNK_WIDTH_UNKNOWN;
>>>> +	*bw = 0;
>>>>    	while (dev) {
>>>>    		u16 lnksta;
>>>> @@ -5042,12 +5063,16 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>>>>    		if (next_width < *width)
>>>>    			*width = next_width;
>>>> +		/* Check if current level in the chain limits the total BW */
>>>> +		if (!(*bw) || *bw > (next_width) * PCIE_SPEED2MBS(next_speed))
>>>> +			*bw = (next_width) * PCIE_SPEED2MBS(next_speed);
>>>> +
>>>>    		dev = dev->bus->self;
>>>>    	}
>>>>    	return 0;
>>>>    }
>>>> -EXPORT_SYMBOL(pcie_get_minimum_link);
>>>> +EXPORT_SYMBOL(pcie_get_link_limits);
>>>>    /**
>>>>     * pcie_get_speed_cap - queries for the PCI device's link speed capability
>>>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>>>> index 27dc070..88e23eb 100644
>>>> --- a/include/linux/pci.h
>>>> +++ b/include/linux/pci.h
>>>> @@ -265,6 +265,12 @@ enum pci_bus_speed {
>>>>    	 (speed) == PCIE_SPEED_2_5GT ? "2.5 GT/s" : \
>>>>    	 "Unknown speed")
>>>> +#define PCIE_SPEED2MBS(speed) \
>>>> +	((speed) == PCIE_SPEED_8_0GT ? 8000 : \
>>>> +	 (speed) == PCIE_SPEED_5_0GT ? 5000 : \
>>>> +	 (speed) == PCIE_SPEED_2_5GT ? 2500 : \
>>>> +	 0)
>>>> +
>>>>    struct pci_cap_saved_data {
>>>>    	u16		cap_nr;
>>>>    	bool		cap_extended;
>>>> @@ -1088,6 +1094,8 @@ int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
>>>>    			  enum pcie_link_width *width);
>>>>    int pcie_get_speed_cap(struct pci_dev *dev, enum pci_bus_speed *speed);
>>>>    int pcie_get_width_cap(struct pci_dev *dev, enum pcie_link_width *width);
>>>> +int pcie_get_link_limits(struct pci_dev *dev, enum pci_bus_speed *speed,
>>>> +			 enum pcie_link_width *width, int *bw);
>>>>    void pcie_flr(struct pci_dev *dev);
>>>>    int __pci_reset_function_locked(struct pci_dev *dev);
>>>>    int pci_reset_function(struct pci_dev *dev);
>>>> -- 
>>>> 1.8.3.1
>>>>

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2018-02-27 15:34 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-15 15:35 [PATCH next V1 0/7] Report PCI device link status Tal Gilboa
2018-01-15 15:35 ` [PATCH next V1 1/7] PCI: Add a query function for PCI device's speed cap Tal Gilboa
2018-01-15 15:35 ` [PATCH next V1 2/7] PCI: Add a query function for PCI device's width cap Tal Gilboa
2018-01-15 15:35 ` [PATCH next V1 3/7] PCI: Calculate BW limitation for PCI devices Tal Gilboa
2018-02-22 20:21   ` Bjorn Helgaas
2018-02-27 12:17     ` Tal Gilboa
2018-02-27 15:19       ` Bjorn Helgaas
2018-02-27 15:34         ` Tal Gilboa
2018-01-15 15:35 ` [PATCH next V1 4/7] PCI: Print PCI device link status in kernel log Tal Gilboa
2018-02-22 19:57   ` Bjorn Helgaas
2018-02-25  9:30     ` Tal Gilboa
2018-02-27 12:18       ` Tal Gilboa
2018-01-15 15:35 ` [PATCH next V1 5/7] net/mlx4_core: Report PCI properties using dedicated function Tal Gilboa
2018-01-15 15:35 ` [PATCH next V1 6/7] net/mlx5: Report device PCI link status and issues Tal Gilboa
2018-01-15 15:35 ` [PATCH next V1 7/7] net/mlx5e: Use generic PCI function for BW calculation Tal Gilboa
2018-02-22 20:09   ` Bjorn Helgaas
2018-02-25 11:20     ` Tal Gilboa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.