netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 net] net: ethernet: fec: prevent tx starvation under high rx load
@ 2020-07-03 14:10 Tobias Waldekranz
  2020-07-06  7:23 ` [EXT] " Andy Duan
  2020-07-07 22:25 ` David Miller
  0 siblings, 2 replies; 3+ messages in thread
From: Tobias Waldekranz @ 2020-07-03 14:10 UTC (permalink / raw)
  To: davem; +Cc: netdev, fugang.duan

In the ISR, we poll the event register for the queues in need of
service and then enter polled mode. After this point, the event
register will never be read again until we exit polled mode.

In a scenario where a UDP flow is routed back out through the same
interface, i.e. "router-on-a-stick" we'll typically only see an rx
queue event initially. Once we start to process the incoming flow
we'll be locked polled mode, but we'll never clean the tx rings since
that event is never caught.

Eventually the netdev watchdog will trip, causing all buffers to be
dropped and then the process starts over again.

Rework the NAPI poll to keep trying to consome the entire budget as
long as new events are coming in, making sure to service all rx/tx
queues, in priority order, on each pass.

Fixes: 4d494cdc92b3 ("net: fec: change data structure to support multiqueue")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---

v2 -> v3:
* Actually iterate over number of tx queues in the tx path, not number
  of rx queues.

v1 -> v2:
* Always do a full pass over all rx/tx queues as soon as any event is
  received, as suggested by David.
* Keep dequeuing packets as long as events keep coming in and we're
  under budget.

 drivers/net/ethernet/freescale/fec.h      |  5 --
 drivers/net/ethernet/freescale/fec_main.c | 94 ++++++++---------------
 2 files changed, 31 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index a6cdd5b61921..d8d76da51c5e 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -525,11 +525,6 @@ struct fec_enet_private {
 	unsigned int total_tx_ring_size;
 	unsigned int total_rx_ring_size;
 
-	unsigned long work_tx;
-	unsigned long work_rx;
-	unsigned long work_ts;
-	unsigned long work_mdio;
-
 	struct	platform_device *pdev;
 
 	int	dev_id;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 2d0d313ee7c5..3982285ed020 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -75,8 +75,6 @@ static void fec_enet_itr_coal_init(struct net_device *ndev);
 
 #define DRIVER_NAME	"fec"
 
-#define FEC_ENET_GET_QUQUE(_x) ((_x == 0) ? 1 : ((_x == 1) ? 2 : 0))
-
 /* Pause frame feild and FIFO threshold */
 #define FEC_ENET_FCE	(1 << 5)
 #define FEC_ENET_RSEM_V	0x84
@@ -1248,8 +1246,6 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 
 	fep = netdev_priv(ndev);
 
-	queue_id = FEC_ENET_GET_QUQUE(queue_id);
-
 	txq = fep->tx_queue[queue_id];
 	/* get next bdp of dirty_tx */
 	nq = netdev_get_tx_queue(ndev, queue_id);
@@ -1340,17 +1336,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 		writel(0, txq->bd.reg_desc_active);
 }
 
-static void
-fec_enet_tx(struct net_device *ndev)
+static void fec_enet_tx(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	u16 queue_id;
-	/* First process class A queue, then Class B and Best Effort queue */
-	for_each_set_bit(queue_id, &fep->work_tx, FEC_ENET_MAX_TX_QS) {
-		clear_bit(queue_id, &fep->work_tx);
-		fec_enet_tx_queue(ndev, queue_id);
-	}
-	return;
+	int i;
+
+	/* Make sure that AVB queues are processed first. */
+	for (i = fep->num_tx_queues - 1; i >= 0; i--)
+		fec_enet_tx_queue(ndev, i);
 }
 
 static int
@@ -1426,7 +1419,6 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 #ifdef CONFIG_M532x
 	flush_cache_all();
 #endif
-	queue_id = FEC_ENET_GET_QUQUE(queue_id);
 	rxq = fep->rx_queue[queue_id];
 
 	/* First, grab all of the stats for the incoming packet.
@@ -1550,6 +1542,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 					       htons(ETH_P_8021Q),
 					       vlan_tag);
 
+		skb_record_rx_queue(skb, queue_id);
 		napi_gro_receive(&fep->napi, skb);
 
 		if (is_copybreak) {
@@ -1595,48 +1588,30 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 	return pkt_received;
 }
 
-static int
-fec_enet_rx(struct net_device *ndev, int budget)
+static int fec_enet_rx(struct net_device *ndev, int budget)
 {
-	int     pkt_received = 0;
-	u16	queue_id;
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	int i, done = 0;
 
-	for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) {
-		int ret;
-
-		ret = fec_enet_rx_queue(ndev,
-					budget - pkt_received, queue_id);
+	/* Make sure that AVB queues are processed first. */
+	for (i = fep->num_rx_queues - 1; i >= 0; i--)
+		done += fec_enet_rx_queue(ndev, budget - done, i);
 
-		if (ret < budget - pkt_received)
-			clear_bit(queue_id, &fep->work_rx);
-
-		pkt_received += ret;
-	}
-	return pkt_received;
+	return done;
 }
 
-static bool
-fec_enet_collect_events(struct fec_enet_private *fep, uint int_events)
+static bool fec_enet_collect_events(struct fec_enet_private *fep)
 {
-	if (int_events == 0)
-		return false;
+	uint int_events;
+
+	int_events = readl(fep->hwp + FEC_IEVENT);
 
-	if (int_events & FEC_ENET_RXF_0)
-		fep->work_rx |= (1 << 2);
-	if (int_events & FEC_ENET_RXF_1)
-		fep->work_rx |= (1 << 0);
-	if (int_events & FEC_ENET_RXF_2)
-		fep->work_rx |= (1 << 1);
+	/* Don't clear MDIO events, we poll for those */
+	int_events &= ~FEC_ENET_MII;
 
-	if (int_events & FEC_ENET_TXF_0)
-		fep->work_tx |= (1 << 2);
-	if (int_events & FEC_ENET_TXF_1)
-		fep->work_tx |= (1 << 0);
-	if (int_events & FEC_ENET_TXF_2)
-		fep->work_tx |= (1 << 1);
+	writel(int_events, fep->hwp + FEC_IEVENT);
 
-	return true;
+	return int_events != 0;
 }
 
 static irqreturn_t
@@ -1644,18 +1619,9 @@ fec_enet_interrupt(int irq, void *dev_id)
 {
 	struct net_device *ndev = dev_id;
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	uint int_events;
 	irqreturn_t ret = IRQ_NONE;
 
-	int_events = readl(fep->hwp + FEC_IEVENT);
-
-	/* Don't clear MDIO events, we poll for those */
-	int_events &= ~FEC_ENET_MII;
-
-	writel(int_events, fep->hwp + FEC_IEVENT);
-	fec_enet_collect_events(fep, int_events);
-
-	if ((fep->work_tx || fep->work_rx) && fep->link) {
+	if (fec_enet_collect_events(fep) && fep->link) {
 		ret = IRQ_HANDLED;
 
 		if (napi_schedule_prep(&fep->napi)) {
@@ -1672,17 +1638,19 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
 {
 	struct net_device *ndev = napi->dev;
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	int pkts;
+	int done = 0;
 
-	pkts = fec_enet_rx(ndev, budget);
-
-	fec_enet_tx(ndev);
+	do {
+		done += fec_enet_rx(ndev, budget - done);
+		fec_enet_tx(ndev);
+	} while ((done < budget) && fec_enet_collect_events(fep));
 
-	if (pkts < budget) {
-		napi_complete_done(napi, pkts);
+	if (done < budget) {
+		napi_complete_done(napi, done);
 		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 	}
-	return pkts;
+
+	return done;
 }
 
 /* ------------------------------------------------------------------------- */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [EXT] [PATCH v3 net] net: ethernet: fec: prevent tx starvation under high rx load
  2020-07-03 14:10 [PATCH v3 net] net: ethernet: fec: prevent tx starvation under high rx load Tobias Waldekranz
@ 2020-07-06  7:23 ` Andy Duan
  2020-07-07 22:25 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: Andy Duan @ 2020-07-06  7:23 UTC (permalink / raw)
  To: Tobias Waldekranz, davem; +Cc: netdev

From: Tobias Waldekranz <tobias@waldekranz.com> Sent: Friday, July 3, 2020 10:11 PM
> In the ISR, we poll the event register for the queues in need of service and
> then enter polled mode. After this point, the event register will never be read
> again until we exit polled mode.
> 
> In a scenario where a UDP flow is routed back out through the same interface,
> i.e. "router-on-a-stick" we'll typically only see an rx queue event initially.
> Once we start to process the incoming flow we'll be locked polled mode, but
> we'll never clean the tx rings since that event is never caught.
> 
> Eventually the netdev watchdog will trip, causing all buffers to be dropped and
> then the process starts over again.
> 
> Rework the NAPI poll to keep trying to consome the entire budget as long as
> new events are coming in, making sure to service all rx/tx queues, in priority
> order, on each pass.
> 
> Fixes: 4d494cdc92b3 ("net: fec: change data structure to support
> multiqueue")
> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>

Tested on imx6qp and imx8mp platforms.

Tested-by: Fugang Duan <fugang.duan@nxp.com>
Reviewed-by: Fugang Duan <fugang.duan@nxp.com>
> ---
> 
> v2 -> v3:
> * Actually iterate over number of tx queues in the tx path, not number
>   of rx queues.
> 
> v1 -> v2:
> * Always do a full pass over all rx/tx queues as soon as any event is
>   received, as suggested by David.
> * Keep dequeuing packets as long as events keep coming in and we're
>   under budget.
> 
>  drivers/net/ethernet/freescale/fec.h      |  5 --
>  drivers/net/ethernet/freescale/fec_main.c | 94 ++++++++---------------
>  2 files changed, 31 insertions(+), 68 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec.h
> b/drivers/net/ethernet/freescale/fec.h
> index a6cdd5b61921..d8d76da51c5e 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -525,11 +525,6 @@ struct fec_enet_private {
>         unsigned int total_tx_ring_size;
>         unsigned int total_rx_ring_size;
> 
> -       unsigned long work_tx;
> -       unsigned long work_rx;
> -       unsigned long work_ts;
> -       unsigned long work_mdio;
> -
>         struct  platform_device *pdev;
> 
>         int     dev_id;
> diff --git a/drivers/net/ethernet/freescale/fec_main.c
> b/drivers/net/ethernet/freescale/fec_main.c
> index 2d0d313ee7c5..3982285ed020 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -75,8 +75,6 @@ static void fec_enet_itr_coal_init(struct net_device
> *ndev);
> 
>  #define DRIVER_NAME    "fec"
> 
> -#define FEC_ENET_GET_QUQUE(_x) ((_x == 0) ? 1 : ((_x == 1) ? 2 : 0))
> -
>  /* Pause frame feild and FIFO threshold */
>  #define FEC_ENET_FCE   (1 << 5)
>  #define FEC_ENET_RSEM_V        0x84
> @@ -1248,8 +1246,6 @@ fec_enet_tx_queue(struct net_device *ndev, u16
> queue_id)
> 
>         fep = netdev_priv(ndev);
> 
> -       queue_id = FEC_ENET_GET_QUQUE(queue_id);
> -
>         txq = fep->tx_queue[queue_id];
>         /* get next bdp of dirty_tx */
>         nq = netdev_get_tx_queue(ndev, queue_id); @@ -1340,17
> +1336,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
>                 writel(0, txq->bd.reg_desc_active);  }
> 
> -static void
> -fec_enet_tx(struct net_device *ndev)
> +static void fec_enet_tx(struct net_device *ndev)
>  {
>         struct fec_enet_private *fep = netdev_priv(ndev);
> -       u16 queue_id;
> -       /* First process class A queue, then Class B and Best Effort queue */
> -       for_each_set_bit(queue_id, &fep->work_tx,
> FEC_ENET_MAX_TX_QS) {
> -               clear_bit(queue_id, &fep->work_tx);
> -               fec_enet_tx_queue(ndev, queue_id);
> -       }
> -       return;
> +       int i;
> +
> +       /* Make sure that AVB queues are processed first. */
> +       for (i = fep->num_tx_queues - 1; i >= 0; i--)
> +               fec_enet_tx_queue(ndev, i);
>  }
> 
>  static int
> @@ -1426,7 +1419,6 @@ fec_enet_rx_queue(struct net_device *ndev, int
> budget, u16 queue_id)  #ifdef CONFIG_M532x
>         flush_cache_all();
>  #endif
> -       queue_id = FEC_ENET_GET_QUQUE(queue_id);
>         rxq = fep->rx_queue[queue_id];
> 
>         /* First, grab all of the stats for the incoming packet.
> @@ -1550,6 +1542,7 @@ fec_enet_rx_queue(struct net_device *ndev, int
> budget, u16 queue_id)
> 
> htons(ETH_P_8021Q),
>                                                vlan_tag);
> 
> +               skb_record_rx_queue(skb, queue_id);
>                 napi_gro_receive(&fep->napi, skb);
> 
>                 if (is_copybreak) {
> @@ -1595,48 +1588,30 @@ fec_enet_rx_queue(struct net_device *ndev, int
> budget, u16 queue_id)
>         return pkt_received;
>  }
> 
> -static int
> -fec_enet_rx(struct net_device *ndev, int budget)
> +static int fec_enet_rx(struct net_device *ndev, int budget)
>  {
> -       int     pkt_received = 0;
> -       u16     queue_id;
>         struct fec_enet_private *fep = netdev_priv(ndev);
> +       int i, done = 0;
> 
> -       for_each_set_bit(queue_id, &fep->work_rx,
> FEC_ENET_MAX_RX_QS) {
> -               int ret;
> -
> -               ret = fec_enet_rx_queue(ndev,
> -                                       budget - pkt_received,
> queue_id);
> +       /* Make sure that AVB queues are processed first. */
> +       for (i = fep->num_rx_queues - 1; i >= 0; i--)
> +               done += fec_enet_rx_queue(ndev, budget - done, i);
> 
> -               if (ret < budget - pkt_received)
> -                       clear_bit(queue_id, &fep->work_rx);
> -
> -               pkt_received += ret;
> -       }
> -       return pkt_received;
> +       return done;
>  }
> 
> -static bool
> -fec_enet_collect_events(struct fec_enet_private *fep, uint int_events)
> +static bool fec_enet_collect_events(struct fec_enet_private *fep)
>  {
> -       if (int_events == 0)
> -               return false;
> +       uint int_events;
> +
> +       int_events = readl(fep->hwp + FEC_IEVENT);
> 
> -       if (int_events & FEC_ENET_RXF_0)
> -               fep->work_rx |= (1 << 2);
> -       if (int_events & FEC_ENET_RXF_1)
> -               fep->work_rx |= (1 << 0);
> -       if (int_events & FEC_ENET_RXF_2)
> -               fep->work_rx |= (1 << 1);
> +       /* Don't clear MDIO events, we poll for those */
> +       int_events &= ~FEC_ENET_MII;
> 
> -       if (int_events & FEC_ENET_TXF_0)
> -               fep->work_tx |= (1 << 2);
> -       if (int_events & FEC_ENET_TXF_1)
> -               fep->work_tx |= (1 << 0);
> -       if (int_events & FEC_ENET_TXF_2)
> -               fep->work_tx |= (1 << 1);
> +       writel(int_events, fep->hwp + FEC_IEVENT);
> 
> -       return true;
> +       return int_events != 0;
>  }
> 
>  static irqreturn_t
> @@ -1644,18 +1619,9 @@ fec_enet_interrupt(int irq, void *dev_id)  {
>         struct net_device *ndev = dev_id;
>         struct fec_enet_private *fep = netdev_priv(ndev);
> -       uint int_events;
>         irqreturn_t ret = IRQ_NONE;
> 
> -       int_events = readl(fep->hwp + FEC_IEVENT);
> -
> -       /* Don't clear MDIO events, we poll for those */
> -       int_events &= ~FEC_ENET_MII;
> -
> -       writel(int_events, fep->hwp + FEC_IEVENT);
> -       fec_enet_collect_events(fep, int_events);
> -
> -       if ((fep->work_tx || fep->work_rx) && fep->link) {
> +       if (fec_enet_collect_events(fep) && fep->link) {
>                 ret = IRQ_HANDLED;
> 
>                 if (napi_schedule_prep(&fep->napi)) { @@ -1672,17
> +1638,19 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
> {
>         struct net_device *ndev = napi->dev;
>         struct fec_enet_private *fep = netdev_priv(ndev);
> -       int pkts;
> +       int done = 0;
> 
> -       pkts = fec_enet_rx(ndev, budget);
> -
> -       fec_enet_tx(ndev);
> +       do {
> +               done += fec_enet_rx(ndev, budget - done);
> +               fec_enet_tx(ndev);
> +       } while ((done < budget) && fec_enet_collect_events(fep));
> 
> -       if (pkts < budget) {
> -               napi_complete_done(napi, pkts);
> +       if (done < budget) {
> +               napi_complete_done(napi, done);
>                 writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
>         }
> -       return pkts;
> +
> +       return done;
>  }
> 
>  /* ------------------------------------------------------------------------- */
> --
> 2.17.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3 net] net: ethernet: fec: prevent tx starvation under high rx load
  2020-07-03 14:10 [PATCH v3 net] net: ethernet: fec: prevent tx starvation under high rx load Tobias Waldekranz
  2020-07-06  7:23 ` [EXT] " Andy Duan
@ 2020-07-07 22:25 ` David Miller
  1 sibling, 0 replies; 3+ messages in thread
From: David Miller @ 2020-07-07 22:25 UTC (permalink / raw)
  To: tobias; +Cc: netdev, fugang.duan

From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Fri,  3 Jul 2020 16:10:58 +0200

> In the ISR, we poll the event register for the queues in need of
> service and then enter polled mode. After this point, the event
> register will never be read again until we exit polled mode.
> 
> In a scenario where a UDP flow is routed back out through the same
> interface, i.e. "router-on-a-stick" we'll typically only see an rx
> queue event initially. Once we start to process the incoming flow
> we'll be locked polled mode, but we'll never clean the tx rings since
> that event is never caught.
> 
> Eventually the netdev watchdog will trip, causing all buffers to be
> dropped and then the process starts over again.
> 
> Rework the NAPI poll to keep trying to consome the entire budget as
> long as new events are coming in, making sure to service all rx/tx
> queues, in priority order, on each pass.
> 
> Fixes: 4d494cdc92b3 ("net: fec: change data structure to support multiqueue")
> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>

Applied and queued up for -stable, thank you.


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-07-07 22:25 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-03 14:10 [PATCH v3 net] net: ethernet: fec: prevent tx starvation under high rx load Tobias Waldekranz
2020-07-06  7:23 ` [EXT] " Andy Duan
2020-07-07 22:25 ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).