All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net] net: ethernet: fec: prevent tx starvation under high rx load
@ 2020-07-02 20:57 Tobias Waldekranz
  2020-07-03  2:45 ` [EXT] " Andy Duan
  0 siblings, 1 reply; 5+ messages in thread
From: Tobias Waldekranz @ 2020-07-02 20:57 UTC (permalink / raw)
  To: davem; +Cc: netdev, fugang.duan

In the ISR, we poll the event register for the queues in need of
service and then enter polled mode. After this point, the event
register will never be read again until we exit polled mode.

In a scenario where a UDP flow is routed back out through the same
interface, i.e. "router-on-a-stick" we'll typically only see an rx
queue event initially. Once we start to process the incoming flow
we'll be locked polled mode, but we'll never clean the tx rings since
that event is never caught.

Eventually the netdev watchdog will trip, causing all buffers to be
dropped and then the process starts over again.

Rework the NAPI poll to keep trying to consome the entire budget as
long as new events are coming in, making sure to service all rx/tx
queues, in priority order, on each pass.

Fixes: 4d494cdc92b3 ("net: fec: change data structure to support multiqueue")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
---

v1 -> v2:
* Always do a full pass over all rx/tx queues as soon as any event is
  received, as suggested by David.
* Keep dequeuing packets as long as events keep coming in and we're
  under budget.

 drivers/net/ethernet/freescale/fec.h      |  5 --
 drivers/net/ethernet/freescale/fec_main.c | 94 ++++++++---------------
 2 files changed, 31 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index a6cdd5b61921..d8d76da51c5e 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -525,11 +525,6 @@ struct fec_enet_private {
 	unsigned int total_tx_ring_size;
 	unsigned int total_rx_ring_size;
 
-	unsigned long work_tx;
-	unsigned long work_rx;
-	unsigned long work_ts;
-	unsigned long work_mdio;
-
 	struct	platform_device *pdev;
 
 	int	dev_id;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 2d0d313ee7c5..84589d464850 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -75,8 +75,6 @@ static void fec_enet_itr_coal_init(struct net_device *ndev);
 
 #define DRIVER_NAME	"fec"
 
-#define FEC_ENET_GET_QUQUE(_x) ((_x == 0) ? 1 : ((_x == 1) ? 2 : 0))
-
 /* Pause frame feild and FIFO threshold */
 #define FEC_ENET_FCE	(1 << 5)
 #define FEC_ENET_RSEM_V	0x84
@@ -1248,8 +1246,6 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 
 	fep = netdev_priv(ndev);
 
-	queue_id = FEC_ENET_GET_QUQUE(queue_id);
-
 	txq = fep->tx_queue[queue_id];
 	/* get next bdp of dirty_tx */
 	nq = netdev_get_tx_queue(ndev, queue_id);
@@ -1340,17 +1336,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 		writel(0, txq->bd.reg_desc_active);
 }
 
-static void
-fec_enet_tx(struct net_device *ndev)
+static void fec_enet_tx(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	u16 queue_id;
-	/* First process class A queue, then Class B and Best Effort queue */
-	for_each_set_bit(queue_id, &fep->work_tx, FEC_ENET_MAX_TX_QS) {
-		clear_bit(queue_id, &fep->work_tx);
-		fec_enet_tx_queue(ndev, queue_id);
-	}
-	return;
+	int i;
+
+	/* Make sure that AVB queues are processed first. */
+	for (i = fep->num_rx_queues - 1; i >= 0; i--)
+		fec_enet_tx_queue(ndev, i);
 }
 
 static int
@@ -1426,7 +1419,6 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 #ifdef CONFIG_M532x
 	flush_cache_all();
 #endif
-	queue_id = FEC_ENET_GET_QUQUE(queue_id);
 	rxq = fep->rx_queue[queue_id];
 
 	/* First, grab all of the stats for the incoming packet.
@@ -1550,6 +1542,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 					       htons(ETH_P_8021Q),
 					       vlan_tag);
 
+		skb_record_rx_queue(skb, queue_id);
 		napi_gro_receive(&fep->napi, skb);
 
 		if (is_copybreak) {
@@ -1595,48 +1588,30 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 	return pkt_received;
 }
 
-static int
-fec_enet_rx(struct net_device *ndev, int budget)
+static int fec_enet_rx(struct net_device *ndev, int budget)
 {
-	int     pkt_received = 0;
-	u16	queue_id;
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	int i, done = 0;
 
-	for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) {
-		int ret;
-
-		ret = fec_enet_rx_queue(ndev,
-					budget - pkt_received, queue_id);
+	/* Make sure that AVB queues are processed first. */
+	for (i = fep->num_rx_queues - 1; i >= 0; i--)
+		done += fec_enet_rx_queue(ndev, budget - done, i);
 
-		if (ret < budget - pkt_received)
-			clear_bit(queue_id, &fep->work_rx);
-
-		pkt_received += ret;
-	}
-	return pkt_received;
+	return done;
 }
 
-static bool
-fec_enet_collect_events(struct fec_enet_private *fep, uint int_events)
+static bool fec_enet_collect_events(struct fec_enet_private *fep)
 {
-	if (int_events == 0)
-		return false;
+	uint int_events;
+
+	int_events = readl(fep->hwp + FEC_IEVENT);
 
-	if (int_events & FEC_ENET_RXF_0)
-		fep->work_rx |= (1 << 2);
-	if (int_events & FEC_ENET_RXF_1)
-		fep->work_rx |= (1 << 0);
-	if (int_events & FEC_ENET_RXF_2)
-		fep->work_rx |= (1 << 1);
+	/* Don't clear MDIO events, we poll for those */
+	int_events &= ~FEC_ENET_MII;
 
-	if (int_events & FEC_ENET_TXF_0)
-		fep->work_tx |= (1 << 2);
-	if (int_events & FEC_ENET_TXF_1)
-		fep->work_tx |= (1 << 0);
-	if (int_events & FEC_ENET_TXF_2)
-		fep->work_tx |= (1 << 1);
+	writel(int_events, fep->hwp + FEC_IEVENT);
 
-	return true;
+	return int_events != 0;
 }
 
 static irqreturn_t
@@ -1644,18 +1619,9 @@ fec_enet_interrupt(int irq, void *dev_id)
 {
 	struct net_device *ndev = dev_id;
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	uint int_events;
 	irqreturn_t ret = IRQ_NONE;
 
-	int_events = readl(fep->hwp + FEC_IEVENT);
-
-	/* Don't clear MDIO events, we poll for those */
-	int_events &= ~FEC_ENET_MII;
-
-	writel(int_events, fep->hwp + FEC_IEVENT);
-	fec_enet_collect_events(fep, int_events);
-
-	if ((fep->work_tx || fep->work_rx) && fep->link) {
+	if (fec_enet_collect_events(fep) && fep->link) {
 		ret = IRQ_HANDLED;
 
 		if (napi_schedule_prep(&fep->napi)) {
@@ -1672,17 +1638,19 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
 {
 	struct net_device *ndev = napi->dev;
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	int pkts;
+	int done = 0;
 
-	pkts = fec_enet_rx(ndev, budget);
-
-	fec_enet_tx(ndev);
+	do {
+		done += fec_enet_rx(ndev, budget - done);
+		fec_enet_tx(ndev);
+	} while ((done < budget) && fec_enet_collect_events(fep));
 
-	if (pkts < budget) {
-		napi_complete_done(napi, pkts);
+	if (done < budget) {
+		napi_complete_done(napi, done);
 		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 	}
-	return pkts;
+
+	return done;
 }
 
 /* ------------------------------------------------------------------------- */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* RE: [EXT] [PATCH v2 net] net: ethernet: fec: prevent tx starvation under high rx load
  2020-07-02 20:57 [PATCH v2 net] net: ethernet: fec: prevent tx starvation under high rx load Tobias Waldekranz
@ 2020-07-03  2:45 ` Andy Duan
  2020-07-03  7:55   ` Tobias Waldekranz
  0 siblings, 1 reply; 5+ messages in thread
From: Andy Duan @ 2020-07-03  2:45 UTC (permalink / raw)
  To: Tobias Waldekranz, davem; +Cc: netdev

From: Tobias Waldekranz <tobias@waldekranz.com> Sent: Friday, July 3, 2020 4:58 AM
> In the ISR, we poll the event register for the queues in need of service and
> then enter polled mode. After this point, the event register will never be read
> again until we exit polled mode.
> 
> In a scenario where a UDP flow is routed back out through the same interface,
> i.e. "router-on-a-stick" we'll typically only see an rx queue event initially.
> Once we start to process the incoming flow we'll be locked polled mode, but
> we'll never clean the tx rings since that event is never caught.
> 
> Eventually the netdev watchdog will trip, causing all buffers to be dropped and
> then the process starts over again.
> 
> Rework the NAPI poll to keep trying to consome the entire budget as long as
> new events are coming in, making sure to service all rx/tx queues, in priority
> order, on each pass.
> 
> Fixes: 4d494cdc92b3 ("net: fec: change data structure to support
> multiqueue")
> Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> ---
> 
> v1 -> v2:
> * Always do a full pass over all rx/tx queues as soon as any event is
>   received, as suggested by David.
> * Keep dequeuing packets as long as events keep coming in and we're
>   under budget.
> 
>  drivers/net/ethernet/freescale/fec.h      |  5 --
>  drivers/net/ethernet/freescale/fec_main.c | 94 ++++++++---------------
>  2 files changed, 31 insertions(+), 68 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fec.h
> b/drivers/net/ethernet/freescale/fec.h
> index a6cdd5b61921..d8d76da51c5e 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -525,11 +525,6 @@ struct fec_enet_private {
>         unsigned int total_tx_ring_size;
>         unsigned int total_rx_ring_size;
> 
> -       unsigned long work_tx;
> -       unsigned long work_rx;
> -       unsigned long work_ts;
> -       unsigned long work_mdio;
> -
>         struct  platform_device *pdev;
> 
>         int     dev_id;
> diff --git a/drivers/net/ethernet/freescale/fec_main.c
> b/drivers/net/ethernet/freescale/fec_main.c
> index 2d0d313ee7c5..84589d464850 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -75,8 +75,6 @@ static void fec_enet_itr_coal_init(struct net_device
> *ndev);
> 
>  #define DRIVER_NAME    "fec"
> 
> -#define FEC_ENET_GET_QUQUE(_x) ((_x == 0) ? 1 : ((_x == 1) ? 2 : 0))
> -
>  /* Pause frame feild and FIFO threshold */
>  #define FEC_ENET_FCE   (1 << 5)
>  #define FEC_ENET_RSEM_V        0x84
> @@ -1248,8 +1246,6 @@ fec_enet_tx_queue(struct net_device *ndev, u16
> queue_id)
> 
>         fep = netdev_priv(ndev);
> 
> -       queue_id = FEC_ENET_GET_QUQUE(queue_id);
> -
>         txq = fep->tx_queue[queue_id];
>         /* get next bdp of dirty_tx */
>         nq = netdev_get_tx_queue(ndev, queue_id); @@ -1340,17
> +1336,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
>                 writel(0, txq->bd.reg_desc_active);  }
> 
> -static void
> -fec_enet_tx(struct net_device *ndev)
> +static void fec_enet_tx(struct net_device *ndev)
>  {
>         struct fec_enet_private *fep = netdev_priv(ndev);
> -       u16 queue_id;
> -       /* First process class A queue, then Class B and Best Effort queue */
> -       for_each_set_bit(queue_id, &fep->work_tx,
> FEC_ENET_MAX_TX_QS) {
> -               clear_bit(queue_id, &fep->work_tx);
> -               fec_enet_tx_queue(ndev, queue_id);
> -       }
> -       return;
> +       int i;
> +
> +       /* Make sure that AVB queues are processed first. */
> +       for (i = fep->num_rx_queues - 1; i >= 0; i--)

In fact, you already change the queue priority comparing before.
Before: queue1 (Audio) > queue2 (video) > queue0 (best effort)
Now: queue2 (video) > queue1 (Audio) > queue0 (best effort)

Other logic seems fine, but you should run stress test to avoid any
block issue since the driver cover more than 20 imx platforms.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [EXT] [PATCH v2 net] net: ethernet: fec: prevent tx starvation under high rx load
  2020-07-03  2:45 ` [EXT] " Andy Duan
@ 2020-07-03  7:55   ` Tobias Waldekranz
  2020-07-03  9:58     ` Andy Duan
  0 siblings, 1 reply; 5+ messages in thread
From: Tobias Waldekranz @ 2020-07-03  7:55 UTC (permalink / raw)
  To: Andy Duan, davem; +Cc: netdev

On Fri Jul 3, 2020 at 4:45 AM CEST, Andy Duan wrote:
> From: Tobias Waldekranz <tobias@waldekranz.com> Sent: Friday, July 3,
> 2020 4:58 AM
> > In the ISR, we poll the event register for the queues in need of service and
> > then enter polled mode. After this point, the event register will never be read
> > again until we exit polled mode.
> > 
> > In a scenario where a UDP flow is routed back out through the same interface,
> > i.e. "router-on-a-stick" we'll typically only see an rx queue event initially.
> > Once we start to process the incoming flow we'll be locked polled mode, but
> > we'll never clean the tx rings since that event is never caught.
> > 
> > Eventually the netdev watchdog will trip, causing all buffers to be dropped and
> > then the process starts over again.
> > 
> > Rework the NAPI poll to keep trying to consome the entire budget as long as
> > new events are coming in, making sure to service all rx/tx queues, in priority
> > order, on each pass.
> > 
> > Fixes: 4d494cdc92b3 ("net: fec: change data structure to support
> > multiqueue")
> > Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> > ---
> > 
> > v1 -> v2:
> > * Always do a full pass over all rx/tx queues as soon as any event is
> >   received, as suggested by David.
> > * Keep dequeuing packets as long as events keep coming in and we're
> >   under budget.
> > 
> >  drivers/net/ethernet/freescale/fec.h      |  5 --
> >  drivers/net/ethernet/freescale/fec_main.c | 94 ++++++++---------------
> >  2 files changed, 31 insertions(+), 68 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/freescale/fec.h
> > b/drivers/net/ethernet/freescale/fec.h
> > index a6cdd5b61921..d8d76da51c5e 100644
> > --- a/drivers/net/ethernet/freescale/fec.h
> > +++ b/drivers/net/ethernet/freescale/fec.h
> > @@ -525,11 +525,6 @@ struct fec_enet_private {
> >         unsigned int total_tx_ring_size;
> >         unsigned int total_rx_ring_size;
> > 
> > -       unsigned long work_tx;
> > -       unsigned long work_rx;
> > -       unsigned long work_ts;
> > -       unsigned long work_mdio;
> > -
> >         struct  platform_device *pdev;
> > 
> >         int     dev_id;
> > diff --git a/drivers/net/ethernet/freescale/fec_main.c
> > b/drivers/net/ethernet/freescale/fec_main.c
> > index 2d0d313ee7c5..84589d464850 100644
> > --- a/drivers/net/ethernet/freescale/fec_main.c
> > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > @@ -75,8 +75,6 @@ static void fec_enet_itr_coal_init(struct net_device
> > *ndev);
> > 
> >  #define DRIVER_NAME    "fec"
> > 
> > -#define FEC_ENET_GET_QUQUE(_x) ((_x == 0) ? 1 : ((_x == 1) ? 2 : 0))
> > -
> >  /* Pause frame feild and FIFO threshold */
> >  #define FEC_ENET_FCE   (1 << 5)
> >  #define FEC_ENET_RSEM_V        0x84
> > @@ -1248,8 +1246,6 @@ fec_enet_tx_queue(struct net_device *ndev, u16
> > queue_id)
> > 
> >         fep = netdev_priv(ndev);
> > 
> > -       queue_id = FEC_ENET_GET_QUQUE(queue_id);
> > -
> >         txq = fep->tx_queue[queue_id];
> >         /* get next bdp of dirty_tx */
> >         nq = netdev_get_tx_queue(ndev, queue_id); @@ -1340,17
> > +1336,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
> >                 writel(0, txq->bd.reg_desc_active);  }
> > 
> > -static void
> > -fec_enet_tx(struct net_device *ndev)
> > +static void fec_enet_tx(struct net_device *ndev)
> >  {
> >         struct fec_enet_private *fep = netdev_priv(ndev);
> > -       u16 queue_id;
> > -       /* First process class A queue, then Class B and Best Effort queue */
> > -       for_each_set_bit(queue_id, &fep->work_tx,
> > FEC_ENET_MAX_TX_QS) {
> > -               clear_bit(queue_id, &fep->work_tx);
> > -               fec_enet_tx_queue(ndev, queue_id);
> > -       }
> > -       return;
> > +       int i;
> > +
> > +       /* Make sure that AVB queues are processed first. */
> > +       for (i = fep->num_rx_queues - 1; i >= 0; i--)
>
> In fact, you already change the queue priority comparing before.
> Before: queue1 (Audio) > queue2 (video) > queue0 (best effort)
> Now: queue2 (video) > queue1 (Audio) > queue0 (best effort)

Yes, thank you, I meant to ask about that. I was looking at these
definitions in fec.h:

#define RCMR_CMP_1		(RCMR_CMP_CFG(0, 0) | RCMR_CMP_CFG(1, 1) | \
				RCMR_CMP_CFG(2, 2) | RCMR_CMP_CFG(3, 3))
#define RCMR_CMP_2		(RCMR_CMP_CFG(4, 0) | RCMR_CMP_CFG(5, 1) | \
				RCMR_CMP_CFG(6, 2) | RCMR_CMP_CFG(7, 3))

I read that as PCP 0-3 being mapped to queue 1 and 4-7 to queue
2. That led me to believe that the order should be 2, 1, 0. Is the
driver supposed to prioritize PCP 0-3 over 4-7, or have I
misunderstood completely?

> Other logic seems fine, but you should run stress test to avoid any
> block issue since the driver cover more than 20 imx platforms.

I have run stress tests and I observe that we're dequeuing about as
many packets from each queue when the incoming line is filled with 1/3
each of untagged/tagged-pcp0/tagged-pcp7 traffic:

root@envoy:~# ply -c "sleep 2" '
t:net/napi_gro_receive_entry {
    @[data->napi_id, data->queue_mapping] = count();
}'
ply: active
ply: deactivating

@:
{ 66, 3 }: 165811
{ 66, 2 }: 167733
{ 66, 1 }: 169470

It seems like this is due to "Receive flushing" not being enabled in
the FEC. If I manually enable it for queue 0, processing is restricted
to only queue 1 and 2:

root@envoy:~# devmem 0x30be01f0 32 $((1 << 3))
root@envoy:~# ply -c "sleep 2" '
t:net/napi_gro_receive_entry {
    @[data->napi_id, data->queue_mapping] = count();
}'
ply: active
ply: deactivating

@:
{ 66, 2 }: 275055
{ 66, 3 }: 275870

Enabling flushing on queue 1, focuses all processing on queue 2:

root@envoy:~# devmem 0x30be01f0 32 $((3 << 3))
root@envoy:~# ply -c "sleep 2" '
t:net/napi_gro_receive_entry {
    @[data->napi_id, data->queue_mapping] = count();
}'
ply: active
ply: deactivating

@:
{ 66, 3 }: 545442

Changing the default QoS settings feels like a separate change, but I
can submit a v3 as a series if you want?

I do not have access to a single-queue iMX device, would it be
possible for you to test this change on such a device?


^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [EXT] [PATCH v2 net] net: ethernet: fec: prevent tx starvation under high rx load
  2020-07-03  7:55   ` Tobias Waldekranz
@ 2020-07-03  9:58     ` Andy Duan
  2020-07-03 10:02       ` Tobias Waldekranz
  0 siblings, 1 reply; 5+ messages in thread
From: Andy Duan @ 2020-07-03  9:58 UTC (permalink / raw)
  To: Tobias Waldekranz, davem; +Cc: netdev

From: Tobias Waldekranz <tobias@waldekranz.com> Sent: Friday, July 3, 2020 3:55 PM
> On Fri Jul 3, 2020 at 4:45 AM CEST, Andy Duan wrote:
> > From: Tobias Waldekranz <tobias@waldekranz.com> Sent: Friday, July 3,
> > 2020 4:58 AM
> > > In the ISR, we poll the event register for the queues in need of
> > > service and then enter polled mode. After this point, the event
> > > register will never be read again until we exit polled mode.
> > >
> > > In a scenario where a UDP flow is routed back out through the same
> > > interface, i.e. "router-on-a-stick" we'll typically only see an rx queue event
> initially.
> > > Once we start to process the incoming flow we'll be locked polled
> > > mode, but we'll never clean the tx rings since that event is never caught.
> > >
> > > Eventually the netdev watchdog will trip, causing all buffers to be
> > > dropped and then the process starts over again.
> > >
> > > Rework the NAPI poll to keep trying to consome the entire budget as
> > > long as new events are coming in, making sure to service all rx/tx
> > > queues, in priority order, on each pass.
> > >
> > > Fixes: 4d494cdc92b3 ("net: fec: change data structure to support
> > > multiqueue")
> > > Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
> > > ---
> > >
> > > v1 -> v2:
> > > * Always do a full pass over all rx/tx queues as soon as any event is
> > >   received, as suggested by David.
> > > * Keep dequeuing packets as long as events keep coming in and we're
> > >   under budget.
> > >
> > >  drivers/net/ethernet/freescale/fec.h      |  5 --
> > >  drivers/net/ethernet/freescale/fec_main.c | 94
> > > ++++++++---------------
> > >  2 files changed, 31 insertions(+), 68 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/freescale/fec.h
> > > b/drivers/net/ethernet/freescale/fec.h
> > > index a6cdd5b61921..d8d76da51c5e 100644
> > > --- a/drivers/net/ethernet/freescale/fec.h
> > > +++ b/drivers/net/ethernet/freescale/fec.h
> > > @@ -525,11 +525,6 @@ struct fec_enet_private {
> > >         unsigned int total_tx_ring_size;
> > >         unsigned int total_rx_ring_size;
> > >
> > > -       unsigned long work_tx;
> > > -       unsigned long work_rx;
> > > -       unsigned long work_ts;
> > > -       unsigned long work_mdio;
> > > -
> > >         struct  platform_device *pdev;
> > >
> > >         int     dev_id;
> > > diff --git a/drivers/net/ethernet/freescale/fec_main.c
> > > b/drivers/net/ethernet/freescale/fec_main.c
> > > index 2d0d313ee7c5..84589d464850 100644
> > > --- a/drivers/net/ethernet/freescale/fec_main.c
> > > +++ b/drivers/net/ethernet/freescale/fec_main.c
> > > @@ -75,8 +75,6 @@ static void fec_enet_itr_coal_init(struct
> > > net_device *ndev);
> > >
> > >  #define DRIVER_NAME    "fec"
> > >
> > > -#define FEC_ENET_GET_QUQUE(_x) ((_x == 0) ? 1 : ((_x == 1) ? 2 :
> > > 0))
> > > -
> > >  /* Pause frame feild and FIFO threshold */
> > >  #define FEC_ENET_FCE   (1 << 5)
> > >  #define FEC_ENET_RSEM_V        0x84
> > > @@ -1248,8 +1246,6 @@ fec_enet_tx_queue(struct net_device *ndev,
> u16
> > > queue_id)
> > >
> > >         fep = netdev_priv(ndev);
> > >
> > > -       queue_id = FEC_ENET_GET_QUQUE(queue_id);
> > > -
> > >         txq = fep->tx_queue[queue_id];
> > >         /* get next bdp of dirty_tx */
> > >         nq = netdev_get_tx_queue(ndev, queue_id); @@ -1340,17
> > > +1336,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16
> queue_id)
> > >                 writel(0, txq->bd.reg_desc_active);  }
> > >
> > > -static void
> > > -fec_enet_tx(struct net_device *ndev)
> > > +static void fec_enet_tx(struct net_device *ndev)
> > >  {
> > >         struct fec_enet_private *fep = netdev_priv(ndev);
> > > -       u16 queue_id;
> > > -       /* First process class A queue, then Class B and Best Effort
> queue */
> > > -       for_each_set_bit(queue_id, &fep->work_tx,
> > > FEC_ENET_MAX_TX_QS) {
> > > -               clear_bit(queue_id, &fep->work_tx);
> > > -               fec_enet_tx_queue(ndev, queue_id);
> > > -       }
> > > -       return;
> > > +       int i;
> > > +
> > > +       /* Make sure that AVB queues are processed first. */
> > > +       for (i = fep->num_rx_queues - 1; i >= 0; i--)
> >
> > In fact, you already change the queue priority comparing before.
> > Before: queue1 (Audio) > queue2 (video) > queue0 (best effort)
> > Now: queue2 (video) > queue1 (Audio) > queue0 (best effort)
> 
> Yes, thank you, I meant to ask about that. I was looking at these definitions in
> fec.h:
> 
> #define RCMR_CMP_1              (RCMR_CMP_CFG(0, 0) |
> RCMR_CMP_CFG(1, 1) | \
>                                 RCMR_CMP_CFG(2, 2) |
> RCMR_CMP_CFG(3, 3))
> #define RCMR_CMP_2              (RCMR_CMP_CFG(4, 0) |
> RCMR_CMP_CFG(5, 1) | \
>                                 RCMR_CMP_CFG(6, 2) |
> RCMR_CMP_CFG(7, 3))
> 
> I read that as PCP 0-3 being mapped to queue 1 and 4-7 to queue 2. That led
> me to believe that the order should be 2, 1, 0. Is the driver supposed to
> prioritize PCP 0-3 over 4-7, or have I misunderstood completely?

The configuration is for RX queues.
If consider PCP 0 is high priority, that does make sense: 2 > 1 > 0.
> 
> > Other logic seems fine, but you should run stress test to avoid any
> > block issue since the driver cover more than 20 imx platforms.
> 
> I have run stress tests and I observe that we're dequeuing about as many
> packets from each queue when the incoming line is filled with 1/3 each of
> untagged/tagged-pcp0/tagged-pcp7 traffic:
> 
> root@envoy:~# ply -c "sleep 2" '
> t:net/napi_gro_receive_entry {
>     @[data->napi_id, data->queue_mapping] = count(); }'
> ply: active
> ply: deactivating
> 
> @:
> { 66, 3 }: 165811
> { 66, 2 }: 167733
> { 66, 1 }: 169470
> 
> It seems like this is due to "Receive flushing" not being enabled in the FEC. If I
> manually enable it for queue 0, processing is restricted to only queue 1 and 2:
> 
> root@envoy:~# devmem 0x30be01f0 32 $((1 << 3)) root@envoy:~# ply -c
> "sleep 2" '
> t:net/napi_gro_receive_entry {
>     @[data->napi_id, data->queue_mapping] = count(); }'
> ply: active
> ply: deactivating
> 
> @:
> { 66, 2 }: 275055
> { 66, 3 }: 275870
> 
> Enabling flushing on queue 1, focuses all processing on queue 2:

Please don't enable flush, there have one IC issue.
NXP latest errata doc should includes the issue, but the flush issue was
fixed at imx8dxl platform.
> 
> root@envoy:~# devmem 0x30be01f0 32 $((3 << 3)) root@envoy:~# ply -c
> "sleep 2" '
> t:net/napi_gro_receive_entry {
>     @[data->napi_id, data->queue_mapping] = count(); }'
> ply: active
> ply: deactivating
> 
> @:
> { 66, 3 }: 545442
> 
> Changing the default QoS settings feels like a separate change, but I can
> submit a v3 as a series if you want?

I think the version is fine.  No need to submit separate change.
> 
> I do not have access to a single-queue iMX device, would it be possible for you
> to test this change on such a device?

Yes, I will do stress test on imx8 and legacy platform like imx6 with single-queue,
try to avoid any block issue.
Thank you !


^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [EXT] [PATCH v2 net] net: ethernet: fec: prevent tx starvation under high rx load
  2020-07-03  9:58     ` Andy Duan
@ 2020-07-03 10:02       ` Tobias Waldekranz
  0 siblings, 0 replies; 5+ messages in thread
From: Tobias Waldekranz @ 2020-07-03 10:02 UTC (permalink / raw)
  To: Andy Duan, davem; +Cc: netdev

On Fri Jul 3, 2020 at 11:58 AM CEST, Andy Duan wrote:
> From: Tobias Waldekranz <tobias@waldekranz.com> Sent: Friday, July 3,
> 2020 3:55 PM
> > On Fri Jul 3, 2020 at 4:45 AM CEST, Andy Duan wrote:
> > > In fact, you already change the queue priority comparing before.
> > > Before: queue1 (Audio) > queue2 (video) > queue0 (best effort)
> > > Now: queue2 (video) > queue1 (Audio) > queue0 (best effort)
> > 
> > Yes, thank you, I meant to ask about that. I was looking at these definitions in
> > fec.h:
> > 
> > #define RCMR_CMP_1              (RCMR_CMP_CFG(0, 0) |
> > RCMR_CMP_CFG(1, 1) | \
> >                                 RCMR_CMP_CFG(2, 2) |
> > RCMR_CMP_CFG(3, 3))
> > #define RCMR_CMP_2              (RCMR_CMP_CFG(4, 0) |
> > RCMR_CMP_CFG(5, 1) | \
> >                                 RCMR_CMP_CFG(6, 2) |
> > RCMR_CMP_CFG(7, 3))
> > 
> > I read that as PCP 0-3 being mapped to queue 1 and 4-7 to queue 2. That led
> > me to believe that the order should be 2, 1, 0. Is the driver supposed to
> > prioritize PCP 0-3 over 4-7, or have I misunderstood completely?
>
> The configuration is for RX queues.

The order in which we clean the Tx queues should not matter as there
is no budget limit in that case. I.e. even in the worst case where all
three queues are filled with transmitted buffers we will always
collect all of them in a single NAPI poll, no?. I just put them in the
same order to be consistent.


> If consider PCP 0 is high priority, that does make sense: 2 > 1 > 0.

Sorry, now I'm confused. The PCP->Queue mapping is:

0->1, 1->1, 2->1, 3->1
4->2, 5->2, 6->2, 7->2

A higher PCP value means higher priority, at least in 802.1Q/p. So the
order should be 2 > 1 > 0 _not_ because PCP 0 is the highest prio, but
because PCP 7 is, right?

> > 
> > > Other logic seems fine, but you should run stress test to avoid any
> > > block issue since the driver cover more than 20 imx platforms.
> > 
> > I have run stress tests and I observe that we're dequeuing about as many
> > packets from each queue when the incoming line is filled with 1/3 each of
> > untagged/tagged-pcp0/tagged-pcp7 traffic:
> > 
> > root@envoy:~# ply -c "sleep 2" '
> > t:net/napi_gro_receive_entry {
> >     @[data->napi_id, data->queue_mapping] = count(); }'
> > ply: active
> > ply: deactivating
> > 
> > @:
> > { 66, 3 }: 165811
> > { 66, 2 }: 167733
> > { 66, 1 }: 169470
> > 
> > It seems like this is due to "Receive flushing" not being enabled in the FEC. If I
> > manually enable it for queue 0, processing is restricted to only queue 1 and 2:
> > 
> > root@envoy:~# devmem 0x30be01f0 32 $((1 << 3)) root@envoy:~# ply -c
> > "sleep 2" '
> > t:net/napi_gro_receive_entry {
> >     @[data->napi_id, data->queue_mapping] = count(); }'
> > ply: active
> > ply: deactivating
> > 
> > @:
> > { 66, 2 }: 275055
> > { 66, 3 }: 275870
> > 
> > Enabling flushing on queue 1, focuses all processing on queue 2:
>
> Please don't enable flush, there have one IC issue.
> NXP latest errata doc should includes the issue, but the flush issue was
> fixed at imx8dxl platform.
> > 
> > root@envoy:~# devmem 0x30be01f0 32 $((3 << 3)) root@envoy:~# ply -c
> > "sleep 2" '
> > t:net/napi_gro_receive_entry {
> >     @[data->napi_id, data->queue_mapping] = count(); }'
> > ply: active
> > ply: deactivating
> > 
> > @:
> > { 66, 3 }: 545442
> > 
> > Changing the default QoS settings feels like a separate change, but I can
> > submit a v3 as a series if you want?
>
> I think the version is fine. No need to submit separate change.
> > 
> > I do not have access to a single-queue iMX device, would it be possible for you
> > to test this change on such a device?
>
> Yes, I will do stress test on imx8 and legacy platform like imx6 with
> single-queue,
> try to avoid any block issue.
> Thank you !

Excellent, thank you!

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-07-03 10:26 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-02 20:57 [PATCH v2 net] net: ethernet: fec: prevent tx starvation under high rx load Tobias Waldekranz
2020-07-03  2:45 ` [EXT] " Andy Duan
2020-07-03  7:55   ` Tobias Waldekranz
2020-07-03  9:58     ` Andy Duan
2020-07-03 10:02       ` Tobias Waldekranz

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.