linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net] net: thunderx: workaround BGX TX Underflow issue
@ 2020-01-29 22:36 Robert Jones
  2020-01-30 17:10 ` Jakub Kicinski
  2020-01-30 21:45 ` Heiner Kallweit
  0 siblings, 2 replies; 5+ messages in thread
From: Robert Jones @ 2020-01-29 22:36 UTC (permalink / raw)
  To: Sunil Goutham, Robert Richter, David Miller
  Cc: linux-arm-kernel, netdev, linux-kernel, Tim Harvey

From: Tim Harvey <tharvey@gateworks.com>

While it is not yet understood why a TX underflow can easily occur
for SGMII interfaces resulting in a TX wedge. It has been found that
disabling/re-enabling the LMAC resolves the issue.

Signed-off-by: Tim Harvey <tharvey@gateworks.com>
Reviewed-by: Robert Jones <rjones@gateworks.com>
---
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 54 +++++++++++++++++++++++
 drivers/net/ethernet/cavium/thunder/thunder_bgx.h |  9 ++++
 2 files changed, 63 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index c4f6ec0..078ecea 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -74,6 +74,7 @@ struct bgx {
 	struct pci_dev		*pdev;
 	bool                    is_dlm;
 	bool                    is_rgx;
+	char			irq_name[7];
 };

 static struct bgx *bgx_vnic[MAX_BGX_THUNDER];
@@ -1535,6 +1536,53 @@ static int bgx_init_phy(struct bgx *bgx)
 	return bgx_init_of_phy(bgx);
 }

+static irqreturn_t bgx_intr_handler(int irq, void *data)
+{
+	struct bgx *bgx = (struct bgx *)data;
+	struct device *dev = &bgx->pdev->dev;
+	u64 status, val;
+	int lmac;
+
+	for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
+		status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT);
+		if (status & GMI_TXX_INT_UNDFLW) {
+			dev_err(dev, "BGX%d lmac%d UNDFLW\n", bgx->bgx_id,
+				lmac);
+			val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG);
+			val &= ~CMR_EN;
+			bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+			val |= CMR_EN;
+			bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
+		}
+		/* clear interrupts */
+		bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int bgx_register_intr(struct pci_dev *pdev)
+{
+	struct bgx *bgx = pci_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+	int num_vec, ret;
+
+	/* Enable MSI-X */
+	num_vec = pci_msix_vec_count(pdev);
+	ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSIX);
+	if (ret < 0) {
+		dev_err(dev, "Req for #%d msix vectors failed\n", num_vec);
+		return 1;
+	}
+	sprintf(bgx->irq_name, "BGX%d", bgx->bgx_id);
+	ret = request_irq(pci_irq_vector(pdev, GMPX_GMI_TX_INT),
+		bgx_intr_handler, 0, bgx->irq_name, bgx);
+	if (ret)
+		return 1;
+
+	return 0;
+}
+
 static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int err;
@@ -1604,6 +1652,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)

 	bgx_init_hw(bgx);

+	bgx_register_intr(pdev);
+
 	/* Enable all LMACs */
 	for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
 		err = bgx_lmac_enable(bgx, lmac);
@@ -1614,6 +1664,10 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 				bgx_lmac_disable(bgx, --lmac);
 			goto err_enable;
 		}
+
+		/* enable TX FIFO Underflow interrupt */
+		bgx_reg_modify(bgx, lmac, BGX_GMP_GMI_TXX_INT_ENA_W1S,
+			       GMI_TXX_INT_UNDFLW);
 	}

 	return 0;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 2588870..cdea493 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -180,6 +180,15 @@
 #define BGX_GMP_GMI_TXX_BURST		0x38228
 #define BGX_GMP_GMI_TXX_MIN_PKT		0x38240
 #define BGX_GMP_GMI_TXX_SGMII_CTL	0x38300
+#define BGX_GMP_GMI_TXX_INT		0x38500
+#define BGX_GMP_GMI_TXX_INT_W1S		0x38508
+#define BGX_GMP_GMI_TXX_INT_ENA_W1C	0x38510
+#define BGX_GMP_GMI_TXX_INT_ENA_W1S	0x38518
+#define  GMI_TXX_INT_PTP_LOST			BIT_ULL(4)
+#define  GMI_TXX_INT_LATE_COL			BIT_ULL(3)
+#define  GMI_TXX_INT_XSDEF			BIT_ULL(2)
+#define  GMI_TXX_INT_XSCOL			BIT_ULL(1)
+#define  GMI_TXX_INT_UNDFLW			BIT_ULL(0)

 #define BGX_MSIX_VEC_0_29_ADDR		0x400000 /* +(0..29) << 4 */
 #define BGX_MSIX_VEC_0_29_CTL		0x400008
--
2.9.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net] net: thunderx: workaround BGX TX Underflow issue
  2020-01-30 17:10 ` Jakub Kicinski
@ 2020-01-30 12:07   ` Maciej Fijalkowski
  2020-01-30 20:26   ` Bobby Jones
  1 sibling, 0 replies; 5+ messages in thread
From: Maciej Fijalkowski @ 2020-01-30 12:07 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Robert Jones, Sunil Goutham, Robert Richter, David Miller,
	linux-arm-kernel, netdev, linux-kernel, Tim Harvey

On Thu, Jan 30, 2020 at 09:10:55AM -0800, Jakub Kicinski wrote:
> On Wed, 29 Jan 2020 14:36:09 -0800, Robert Jones wrote:
> > From: Tim Harvey <tharvey@gateworks.com>
> > 
> > While it is not yet understood why a TX underflow can easily occur
> > for SGMII interfaces resulting in a TX wedge. It has been found that
> > disabling/re-enabling the LMAC resolves the issue.
> > 
> > Signed-off-by: Tim Harvey <tharvey@gateworks.com>
> > Reviewed-by: Robert Jones <rjones@gateworks.com>
> 
> Sunil or Robert (i.e. one of the maintainers) will have to review this
> patch (as indicated by Dave by marking it with "Needs Review / ACK" in
> patchwork).
> 
> At a quick look there are some things which jump out at me:
> 
> > +static int bgx_register_intr(struct pci_dev *pdev)
> > +{
> > +	struct bgx *bgx = pci_get_drvdata(pdev);
> > +	struct device *dev = &pdev->dev;
> > +	int num_vec, ret;
> > +
> > +	/* Enable MSI-X */
> > +	num_vec = pci_msix_vec_count(pdev);
> > +	ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSIX);
> > +	if (ret < 0) {
> > +		dev_err(dev, "Req for #%d msix vectors failed\n", num_vec);
> > +		return 1;
> 
> Please propagate real error codes, or make this function void as the
> caller never actually checks the return value.
> 
> > +	}
> > +	sprintf(bgx->irq_name, "BGX%d", bgx->bgx_id);

Another quick look: use snprintf so that you won't overflow the
bgx->irq_name in case bgx->bgx_id has some weird big number.

> > +	ret = request_irq(pci_irq_vector(pdev, GMPX_GMI_TX_INT),
> 
> There is a alloc_irq and request_irq call added in this patch but there
> is never any freeing. Are you sure this is fine? Devices can be
> reprobed (unbound and bound to drivers via sysfs).
> 
> > +		bgx_intr_handler, 0, bgx->irq_name, bgx);
> 
> Please align the continuation line with the opening bracket (checkpatch
> --strict should help catch this).
> 
> > +	if (ret)
> > +		return 1;
> > +
> > +	return 0;
> > +}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] net: thunderx: workaround BGX TX Underflow issue
  2020-01-29 22:36 [PATCH net] net: thunderx: workaround BGX TX Underflow issue Robert Jones
@ 2020-01-30 17:10 ` Jakub Kicinski
  2020-01-30 12:07   ` Maciej Fijalkowski
  2020-01-30 20:26   ` Bobby Jones
  2020-01-30 21:45 ` Heiner Kallweit
  1 sibling, 2 replies; 5+ messages in thread
From: Jakub Kicinski @ 2020-01-30 17:10 UTC (permalink / raw)
  To: Robert Jones
  Cc: Sunil Goutham, Robert Richter, David Miller, linux-arm-kernel,
	netdev, linux-kernel, Tim Harvey

On Wed, 29 Jan 2020 14:36:09 -0800, Robert Jones wrote:
> From: Tim Harvey <tharvey@gateworks.com>
> 
> While it is not yet understood why a TX underflow can easily occur
> for SGMII interfaces resulting in a TX wedge. It has been found that
> disabling/re-enabling the LMAC resolves the issue.
> 
> Signed-off-by: Tim Harvey <tharvey@gateworks.com>
> Reviewed-by: Robert Jones <rjones@gateworks.com>

Sunil or Robert (i.e. one of the maintainers) will have to review this
patch (as indicated by Dave by marking it with "Needs Review / ACK" in
patchwork).

At a quick look there are some things which jump out at me:

> +static int bgx_register_intr(struct pci_dev *pdev)
> +{
> +	struct bgx *bgx = pci_get_drvdata(pdev);
> +	struct device *dev = &pdev->dev;
> +	int num_vec, ret;
> +
> +	/* Enable MSI-X */
> +	num_vec = pci_msix_vec_count(pdev);
> +	ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSIX);
> +	if (ret < 0) {
> +		dev_err(dev, "Req for #%d msix vectors failed\n", num_vec);
> +		return 1;

Please propagate real error codes, or make this function void as the
caller never actually checks the return value.

> +	}
> +	sprintf(bgx->irq_name, "BGX%d", bgx->bgx_id);
> +	ret = request_irq(pci_irq_vector(pdev, GMPX_GMI_TX_INT),

There is a alloc_irq and request_irq call added in this patch but there
is never any freeing. Are you sure this is fine? Devices can be
reprobed (unbound and bound to drivers via sysfs).

> +		bgx_intr_handler, 0, bgx->irq_name, bgx);

Please align the continuation line with the opening bracket (checkpatch
--strict should help catch this).

> +	if (ret)
> +		return 1;
> +
> +	return 0;
> +}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] net: thunderx: workaround BGX TX Underflow issue
  2020-01-30 17:10 ` Jakub Kicinski
  2020-01-30 12:07   ` Maciej Fijalkowski
@ 2020-01-30 20:26   ` Bobby Jones
  1 sibling, 0 replies; 5+ messages in thread
From: Bobby Jones @ 2020-01-30 20:26 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Sunil Goutham, Robert Richter, David Miller,
	moderated list:ARM/FREESCALE IMX / MXC ARM ARCHITECTURE, netdev,
	linux-kernel, Tim Harvey

On Thu, Jan 30, 2020 at 9:10 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 29 Jan 2020 14:36:09 -0800, Robert Jones wrote:
> > From: Tim Harvey <tharvey@gateworks.com>
> >
> > While it is not yet understood why a TX underflow can easily occur
> > for SGMII interfaces resulting in a TX wedge. It has been found that
> > disabling/re-enabling the LMAC resolves the issue.
> >
> > Signed-off-by: Tim Harvey <tharvey@gateworks.com>
> > Reviewed-by: Robert Jones <rjones@gateworks.com>
>
> Sunil or Robert (i.e. one of the maintainers) will have to review this
> patch (as indicated by Dave by marking it with "Needs Review / ACK" in
> patchwork).
>
> At a quick look there are some things which jump out at me:
>
> > +static int bgx_register_intr(struct pci_dev *pdev)
> > +{
> > +     struct bgx *bgx = pci_get_drvdata(pdev);
> > +     struct device *dev = &pdev->dev;
> > +     int num_vec, ret;
> > +
> > +     /* Enable MSI-X */
> > +     num_vec = pci_msix_vec_count(pdev);
> > +     ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSIX);
> > +     if (ret < 0) {
> > +             dev_err(dev, "Req for #%d msix vectors failed\n", num_vec);
> > +             return 1;
>
> Please propagate real error codes, or make this function void as the
> caller never actually checks the return value.
>
> > +     }
> > +     sprintf(bgx->irq_name, "BGX%d", bgx->bgx_id);
> > +     ret = request_irq(pci_irq_vector(pdev, GMPX_GMI_TX_INT),
>
> There is a alloc_irq and request_irq call added in this patch but there
> is never any freeing. Are you sure this is fine? Devices can be
> reprobed (unbound and bound to drivers via sysfs).

I agree there needs to be accompanying free calls. I'm referencing
drivers/net/ethernet/cavium/thunder/nic_main.c and see instances of
both pci_free_irq_vectors() and free_irq(). My initial thought was
that I should use pci_free_irq_vectors() in the error check
conditional of the above request irq and also in the bgx_remove()
function. Would that be appropriate in this case?

I'd also plan on using a conditional like this for the free calls:

if (bgx->irq_name)
    pci_free_irq_vectors(pdev);

I'm new to kernel development so suggestions are welcome.

>
> > +             bgx_intr_handler, 0, bgx->irq_name, bgx);
>
> Please align the continuation line with the opening bracket (checkpatch
> --strict should help catch this).
>
> > +     if (ret)
> > +             return 1;
> > +
> > +     return 0;
> > +}

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH net] net: thunderx: workaround BGX TX Underflow issue
  2020-01-29 22:36 [PATCH net] net: thunderx: workaround BGX TX Underflow issue Robert Jones
  2020-01-30 17:10 ` Jakub Kicinski
@ 2020-01-30 21:45 ` Heiner Kallweit
  1 sibling, 0 replies; 5+ messages in thread
From: Heiner Kallweit @ 2020-01-30 21:45 UTC (permalink / raw)
  To: Robert Jones, Sunil Goutham, Robert Richter, David Miller
  Cc: linux-arm-kernel, netdev, linux-kernel, Tim Harvey

On 29.01.2020 23:36, Robert Jones wrote:
> From: Tim Harvey <tharvey@gateworks.com>
> 
> While it is not yet understood why a TX underflow can easily occur
> for SGMII interfaces resulting in a TX wedge. It has been found that
> disabling/re-enabling the LMAC resolves the issue.
> 
> Signed-off-by: Tim Harvey <tharvey@gateworks.com>
> Reviewed-by: Robert Jones <rjones@gateworks.com>
> ---
>  drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 54 +++++++++++++++++++++++
>  drivers/net/ethernet/cavium/thunder/thunder_bgx.h |  9 ++++
>  2 files changed, 63 insertions(+)
> 
> diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
> index c4f6ec0..078ecea 100644
> --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
> +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
> @@ -74,6 +74,7 @@ struct bgx {
>  	struct pci_dev		*pdev;
>  	bool                    is_dlm;
>  	bool                    is_rgx;
> +	char			irq_name[7];

Why do you store the name? It's used in probe() only.

>  };
> 
>  static struct bgx *bgx_vnic[MAX_BGX_THUNDER];
> @@ -1535,6 +1536,53 @@ static int bgx_init_phy(struct bgx *bgx)
>  	return bgx_init_of_phy(bgx);
>  }
> 
> +static irqreturn_t bgx_intr_handler(int irq, void *data)
> +{
> +	struct bgx *bgx = (struct bgx *)data;
> +	struct device *dev = &bgx->pdev->dev;
> +	u64 status, val;
> +	int lmac;
> +
> +	for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
> +		status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT);
> +		if (status & GMI_TXX_INT_UNDFLW) {
> +			dev_err(dev, "BGX%d lmac%d UNDFLW\n", bgx->bgx_id,

Using pci_err() would make your life a lttle easier.

> +				lmac);
> +			val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG);
> +			val &= ~CMR_EN;
> +			bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
> +			val |= CMR_EN;
> +			bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val);
> +		}
> +		/* clear interrupts */
> +		bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status);
> +	}
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static int bgx_register_intr(struct pci_dev *pdev)
> +{
> +	struct bgx *bgx = pci_get_drvdata(pdev);
> +	struct device *dev = &pdev->dev;
> +	int num_vec, ret;
> +
> +	/* Enable MSI-X */
> +	num_vec = pci_msix_vec_count(pdev);
> +	ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSIX);

Why do you want to enforce using MSI-X? Any interrupt type should be
fine for you, so let the system decide and use PCI_IRQ_ALL_TYPES.
And why do you need more than one vector if all you're interested in
is tx underflow events?

> +	if (ret < 0) {
> +		dev_err(dev, "Req for #%d msix vectors failed\n", num_vec);
> +		return 1;
> +	}
> +	sprintf(bgx->irq_name, "BGX%d", bgx->bgx_id);
> +	ret = request_irq(pci_irq_vector(pdev, GMPX_GMI_TX_INT),
> +		bgx_intr_handler, 0, bgx->irq_name, bgx);

Here using pci_request_irq() would make your life easier.
This function also allows to dynamically create the irq name.

> +	if (ret)
> +		return 1;
> +
> +	return 0;
> +}
> +
>  static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>  {
>  	int err;
> @@ -1604,6 +1652,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> 
>  	bgx_init_hw(bgx);
> 
> +	bgx_register_intr(pdev);
> +
>  	/* Enable all LMACs */
>  	for (lmac = 0; lmac < bgx->lmac_count; lmac++) {
>  		err = bgx_lmac_enable(bgx, lmac);
> @@ -1614,6 +1664,10 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>  				bgx_lmac_disable(bgx, --lmac);
>  			goto err_enable;
>  		}
> +
> +		/* enable TX FIFO Underflow interrupt */
> +		bgx_reg_modify(bgx, lmac, BGX_GMP_GMI_TXX_INT_ENA_W1S,
> +			       GMI_TXX_INT_UNDFLW);

If allocating an interrupt fails then you most likely don't want to do this.
And do you need this interrupt if the interface is down? If not then you
could think about moving this to the ndo_open() callback.
And the chip interrupt should be masked if not needed any longer.
Else you risk spurious interrupts e.g. after driver unload.

>  	}
> 
>  	return 0;
> diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
> index 2588870..cdea493 100644
> --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
> +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
> @@ -180,6 +180,15 @@
>  #define BGX_GMP_GMI_TXX_BURST		0x38228
>  #define BGX_GMP_GMI_TXX_MIN_PKT		0x38240
>  #define BGX_GMP_GMI_TXX_SGMII_CTL	0x38300
> +#define BGX_GMP_GMI_TXX_INT		0x38500
> +#define BGX_GMP_GMI_TXX_INT_W1S		0x38508
> +#define BGX_GMP_GMI_TXX_INT_ENA_W1C	0x38510
> +#define BGX_GMP_GMI_TXX_INT_ENA_W1S	0x38518
> +#define  GMI_TXX_INT_PTP_LOST			BIT_ULL(4)
> +#define  GMI_TXX_INT_LATE_COL			BIT_ULL(3)
> +#define  GMI_TXX_INT_XSDEF			BIT_ULL(2)
> +#define  GMI_TXX_INT_XSCOL			BIT_ULL(1)
> +#define  GMI_TXX_INT_UNDFLW			BIT_ULL(0)
> 
>  #define BGX_MSIX_VEC_0_29_ADDR		0x400000 /* +(0..29) << 4 */
>  #define BGX_MSIX_VEC_0_29_CTL		0x400008
> --
> 2.9.2
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-01-30 21:45 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-29 22:36 [PATCH net] net: thunderx: workaround BGX TX Underflow issue Robert Jones
2020-01-30 17:10 ` Jakub Kicinski
2020-01-30 12:07   ` Maciej Fijalkowski
2020-01-30 20:26   ` Bobby Jones
2020-01-30 21:45 ` Heiner Kallweit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).