All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/4] dpaa2-eth: add support for xdp bulk enqueue
@ 2020-04-21 15:21 Ioana Ciornei
  2020-04-21 15:21 ` [PATCH net-next 1/4] dpaa2-eth: return num_enqueued frames from enqueue callback Ioana Ciornei
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Ioana Ciornei @ 2020-04-21 15:21 UTC (permalink / raw)
  To: davem, netdev; +Cc: brouer, Ioana Ciornei

The first 3 patches are there to setup the scene for using the bulk
enqueue feature.  First of all, the prototype of the enqueue function is
changed so that it returns the number of enqueued frames. Second, the
bulk enqueue interface is used but without any functional changes, still
one frame at a time is enqueued.  Third, the .ndo_xdp_xmit callback is
split into two stages, create all FDs for the xdp_frames received and
then enqueue them.

The last patch of the series builds on top of the others and instead of
issuing an enqueue operation for each FD it issues a bulk enqueue call
for as many frames as possible. This is repeated until all frames are
enqueued or the maximum number of retries is hit. We do not use the
XDP_XMIT_FLUSH flag since the architecture is not capable to store all
frames dequeued in a NAPI cycle, instead we send out right away all
frames received in a .ndo_xdp_xmit call.

Ioana Ciornei (4):
  dpaa2-eth: return num_enqueued frames from enqueue callback
  dpaa2-eth: use the bulk ring mode enqueue interface
  dpaa2-eth: split the .ndo_xdp_xmit callback into two stages
  dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit

 .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 141 +++++++++++-------
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.h  |   6 +-
 2 files changed, 88 insertions(+), 59 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net-next 1/4] dpaa2-eth: return num_enqueued frames from enqueue callback
  2020-04-21 15:21 [PATCH net-next 0/4] dpaa2-eth: add support for xdp bulk enqueue Ioana Ciornei
@ 2020-04-21 15:21 ` Ioana Ciornei
  2020-04-21 15:21 ` [PATCH net-next 2/4] dpaa2-eth: use the bulk ring mode enqueue interface Ioana Ciornei
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Ioana Ciornei @ 2020-04-21 15:21 UTC (permalink / raw)
  To: davem, netdev; +Cc: brouer, Ioana Ciornei

The enqueue dpaa2-eth callback now returns the number of successfully
enqueued frames. This is a preliminary patch necessary for adding
support for bulk ring mode enqueue.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 34 +++++++++++++------
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.h  |  5 +--
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index b6c46639aa4c..7b41ece8f160 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016-2019 NXP
+ * Copyright 2016-2020 NXP
  */
 #include <linux/init.h>
 #include <linux/module.h>
@@ -268,7 +268,7 @@ static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
 
 	fq = &priv->fq[queue_id];
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, fd, 0);
+		err = priv->enqueue(priv, fq, fd, 0, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -847,7 +847,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	 * the Tx confirmation callback for this frame
 	 */
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, prio);
+		err = priv->enqueue(priv, fq, &fd, prio, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -1937,7 +1937,7 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 
 	fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, 0);
+		err = priv->enqueue(priv, fq, &fd, 0, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -2523,19 +2523,31 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
 
 static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
 				       struct dpaa2_eth_fq *fq,
-				       struct dpaa2_fd *fd, u8 prio)
+				       struct dpaa2_fd *fd, u8 prio,
+				       int *frames_enqueued)
 {
-	return dpaa2_io_service_enqueue_qd(fq->channel->dpio,
-					   priv->tx_qdid, prio,
-					   fq->tx_qdbin, fd);
+	int err;
+
+	err = dpaa2_io_service_enqueue_qd(fq->channel->dpio,
+					  priv->tx_qdid, prio,
+					  fq->tx_qdbin, fd);
+	if (!err && frames_enqueued)
+		*frames_enqueued = 1;
+	return err;
 }
 
 static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv,
 				       struct dpaa2_eth_fq *fq,
-				       struct dpaa2_fd *fd, u8 prio)
+				       struct dpaa2_fd *fd, u8 prio,
+				       int *frames_enqueued)
 {
-	return dpaa2_io_service_enqueue_fq(fq->channel->dpio,
-					   fq->tx_fqid[prio], fd);
+	int err;
+
+	err = dpaa2_io_service_enqueue_fq(fq->channel->dpio,
+					  fq->tx_fqid[prio], fd);
+	if (!err && frames_enqueued)
+		*frames_enqueued = 1;
+	return err;
 }
 
 static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 7635db3ef903..085ff750e4b5 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
 /* Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP
+ * Copyright 2016-2020 NXP
  */
 
 #ifndef __DPAA2_ETH_H
@@ -371,7 +371,8 @@ struct dpaa2_eth_priv {
 	struct dpaa2_eth_fq fq[DPAA2_ETH_MAX_QUEUES];
 	int (*enqueue)(struct dpaa2_eth_priv *priv,
 		       struct dpaa2_eth_fq *fq,
-		       struct dpaa2_fd *fd, u8 prio);
+		       struct dpaa2_fd *fd, u8 prio,
+		       int *frames_enqueued);
 
 	u8 num_channels;
 	struct dpaa2_eth_channel *channel[DPAA2_ETH_MAX_DPCONS];
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 2/4] dpaa2-eth: use the bulk ring mode enqueue interface
  2020-04-21 15:21 [PATCH net-next 0/4] dpaa2-eth: add support for xdp bulk enqueue Ioana Ciornei
  2020-04-21 15:21 ` [PATCH net-next 1/4] dpaa2-eth: return num_enqueued frames from enqueue callback Ioana Ciornei
@ 2020-04-21 15:21 ` Ioana Ciornei
  2020-04-21 15:21 ` [PATCH net-next 3/4] dpaa2-eth: split the .ndo_xdp_xmit callback into two stages Ioana Ciornei
  2020-04-21 15:21 ` [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit Ioana Ciornei
  3 siblings, 0 replies; 9+ messages in thread
From: Ioana Ciornei @ 2020-04-21 15:21 UTC (permalink / raw)
  To: davem, netdev; +Cc: brouer, Ioana Ciornei

Update the dpaa2-eth driver to use the bulk enqueue function introduced
with the change to QBMAN ring mode. At the moment, no functional changes
are made but rather the driver just transitions to the new interface
while still enqueuing just one frame at a time.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 35 +++++++++++--------
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.h  |  1 +
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 7b41ece8f160..26c2868435d5 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -268,7 +268,7 @@ static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
 
 	fq = &priv->fq[queue_id];
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, fd, 0, NULL);
+		err = priv->enqueue(priv, fq, fd, 0, 1, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -847,7 +847,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	 * the Tx confirmation callback for this frame
 	 */
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, prio, NULL);
+		err = priv->enqueue(priv, fq, &fd, prio, 1, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -1937,7 +1937,7 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 
 	fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, 0, NULL);
+		err = priv->enqueue(priv, fq, &fd, 0, 1, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -2524,6 +2524,7 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
 static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
 				       struct dpaa2_eth_fq *fq,
 				       struct dpaa2_fd *fd, u8 prio,
+				       u32 num_frames __always_unused,
 				       int *frames_enqueued)
 {
 	int err;
@@ -2536,18 +2537,24 @@ static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
 	return err;
 }
 
-static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv,
-				       struct dpaa2_eth_fq *fq,
-				       struct dpaa2_fd *fd, u8 prio,
-				       int *frames_enqueued)
+static inline int dpaa2_eth_enqueue_fq_multiple(struct dpaa2_eth_priv *priv,
+						struct dpaa2_eth_fq *fq,
+						struct dpaa2_fd *fd,
+						u8 prio, u32 num_frames,
+						int *frames_enqueued)
 {
 	int err;
 
-	err = dpaa2_io_service_enqueue_fq(fq->channel->dpio,
-					  fq->tx_fqid[prio], fd);
-	if (!err && frames_enqueued)
-		*frames_enqueued = 1;
-	return err;
+	err = dpaa2_io_service_enqueue_multiple_fq(fq->channel->dpio,
+						   fq->tx_fqid[prio],
+						   fd, num_frames);
+
+	if (err == 0)
+		return -EBUSY;
+
+	if (frames_enqueued)
+		*frames_enqueued = err;
+	return 0;
 }
 
 static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
@@ -2556,7 +2563,7 @@ static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
 				   DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
 		priv->enqueue = dpaa2_eth_enqueue_qd;
 	else
-		priv->enqueue = dpaa2_eth_enqueue_fq;
+		priv->enqueue = dpaa2_eth_enqueue_fq_multiple;
 }
 
 static int set_pause(struct dpaa2_eth_priv *priv)
@@ -2617,7 +2624,7 @@ static void update_tx_fqids(struct dpaa2_eth_priv *priv)
 		}
 	}
 
-	priv->enqueue = dpaa2_eth_enqueue_fq;
+	priv->enqueue = dpaa2_eth_enqueue_fq_multiple;
 
 	return;
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 085ff750e4b5..2440ba6b21ef 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -372,6 +372,7 @@ struct dpaa2_eth_priv {
 	int (*enqueue)(struct dpaa2_eth_priv *priv,
 		       struct dpaa2_eth_fq *fq,
 		       struct dpaa2_fd *fd, u8 prio,
+		       u32 num_frames,
 		       int *frames_enqueued);
 
 	u8 num_channels;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 3/4] dpaa2-eth: split the .ndo_xdp_xmit callback into two stages
  2020-04-21 15:21 [PATCH net-next 0/4] dpaa2-eth: add support for xdp bulk enqueue Ioana Ciornei
  2020-04-21 15:21 ` [PATCH net-next 1/4] dpaa2-eth: return num_enqueued frames from enqueue callback Ioana Ciornei
  2020-04-21 15:21 ` [PATCH net-next 2/4] dpaa2-eth: use the bulk ring mode enqueue interface Ioana Ciornei
@ 2020-04-21 15:21 ` Ioana Ciornei
  2020-04-21 15:21 ` [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit Ioana Ciornei
  3 siblings, 0 replies; 9+ messages in thread
From: Ioana Ciornei @ 2020-04-21 15:21 UTC (permalink / raw)
  To: davem, netdev; +Cc: brouer, Ioana Ciornei

Instead of having a function that both creates a frame descriptor from
an xdp_frame and enqueues it, split this into two stages.
Add the dpaa2_eth_xdp_create_fd that just transforms an xdp_frame into a
FD while the actual enqueue callback is called directly from the ndo for
each frame.
This is particulary useful in conjunction with bulk enqueue.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 76 ++++++++++---------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 26c2868435d5..9a0432cd893c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1880,20 +1880,16 @@ static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	return 0;
 }
 
-static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
-				    struct xdp_frame *xdpf)
+static int dpaa2_eth_xdp_create_fd(struct net_device *net_dev,
+				   struct xdp_frame *xdpf,
+				   struct dpaa2_fd *fd)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct device *dev = net_dev->dev.parent;
-	struct rtnl_link_stats64 *percpu_stats;
-	struct dpaa2_eth_drv_stats *percpu_extras;
 	unsigned int needed_headroom;
 	struct dpaa2_eth_swa *swa;
-	struct dpaa2_eth_fq *fq;
-	struct dpaa2_fd fd;
 	void *buffer_start, *aligned_start;
 	dma_addr_t addr;
-	int err, i;
 
 	/* We require a minimum headroom to be able to transmit the frame.
 	 * Otherwise return an error and let the original net_device handle it
@@ -1902,11 +1898,8 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 	if (xdpf->headroom < needed_headroom)
 		return -EINVAL;
 
-	percpu_stats = this_cpu_ptr(priv->percpu_stats);
-	percpu_extras = this_cpu_ptr(priv->percpu_extras);
-
 	/* Setup the FD fields */
-	memset(&fd, 0, sizeof(fd));
+	memset(fd, 0, sizeof(*fd));
 
 	/* Align FD address, if possible */
 	buffer_start = xdpf->data - needed_headroom;
@@ -1924,32 +1917,14 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 	addr = dma_map_single(dev, buffer_start,
 			      swa->xdp.dma_size,
 			      DMA_BIDIRECTIONAL);
-	if (unlikely(dma_mapping_error(dev, addr))) {
-		percpu_stats->tx_dropped++;
+	if (unlikely(dma_mapping_error(dev, addr)))
 		return -ENOMEM;
-	}
-
-	dpaa2_fd_set_addr(&fd, addr);
-	dpaa2_fd_set_offset(&fd, xdpf->data - buffer_start);
-	dpaa2_fd_set_len(&fd, xdpf->len);
-	dpaa2_fd_set_format(&fd, dpaa2_fd_single);
-	dpaa2_fd_set_ctrl(&fd, FD_CTRL_PTA);
-
-	fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
-	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, 0, 1, NULL);
-		if (err != -EBUSY)
-			break;
-	}
-	percpu_extras->tx_portal_busy += i;
-	if (unlikely(err < 0)) {
-		percpu_stats->tx_errors++;
-		/* let the Rx device handle the cleanup */
-		return err;
-	}
 
-	percpu_stats->tx_packets++;
-	percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
+	dpaa2_fd_set_addr(fd, addr);
+	dpaa2_fd_set_offset(fd, xdpf->data - buffer_start);
+	dpaa2_fd_set_len(fd, xdpf->len);
+	dpaa2_fd_set_format(fd, dpaa2_fd_single);
+	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
 
 	return 0;
 }
@@ -1957,6 +1932,11 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 			      struct xdp_frame **frames, u32 flags)
 {
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpaa2_eth_drv_stats *percpu_extras;
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa2_eth_fq *fq;
+	struct dpaa2_fd fd;
 	int drops = 0;
 	int i, err;
 
@@ -1966,14 +1946,38 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 	if (!netif_running(net_dev))
 		return -ENETDOWN;
 
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+	percpu_extras = this_cpu_ptr(priv->percpu_extras);
+
 	for (i = 0; i < n; i++) {
 		struct xdp_frame *xdpf = frames[i];
 
-		err = dpaa2_eth_xdp_xmit_frame(net_dev, xdpf);
+		/* create the FD from the xdp_frame */
+		err = dpaa2_eth_xdp_create_fd(net_dev, xdpf, &fd);
 		if (err) {
+			percpu_stats->tx_dropped++;
 			xdp_return_frame_rx_napi(xdpf);
 			drops++;
+			continue;
+		}
+
+		/* enqueue the newly created FD */
+		fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
+		for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
+			err = priv->enqueue(priv, fq, &fd, 0, 1);
+			if (err != -EBUSY)
+				break;
 		}
+
+		percpu_extras->tx_portal_busy += i;
+		if (unlikely(err < 0)) {
+			percpu_stats->tx_errors++;
+			xdp_return_frame_rx_napi(xdpf);
+			continue;
+		}
+
+		percpu_stats->tx_packets++;
+		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
 	}
 
 	return n - drops;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit
  2020-04-21 15:21 [PATCH net-next 0/4] dpaa2-eth: add support for xdp bulk enqueue Ioana Ciornei
                   ` (2 preceding siblings ...)
  2020-04-21 15:21 ` [PATCH net-next 3/4] dpaa2-eth: split the .ndo_xdp_xmit callback into two stages Ioana Ciornei
@ 2020-04-21 15:21 ` Ioana Ciornei
  2020-04-22  7:12   ` Jesper Dangaard Brouer
  3 siblings, 1 reply; 9+ messages in thread
From: Ioana Ciornei @ 2020-04-21 15:21 UTC (permalink / raw)
  To: davem, netdev; +Cc: brouer, Ioana Ciornei

Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
to store all the frames dequeued in a NAPI cycle so we instead are
enqueueing all the frames received in a ndo_xdp_xmit call right away.

After setting up all FDs for the xdp_frames received, enqueue multiple
frames at a time until all are sent or the maximum number of retries is
hit.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60 ++++++++++---------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 9a0432cd893c..08b4efad46fd 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 			      struct xdp_frame **frames, u32 flags)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int total_enqueued = 0, retries = 0, enqueued;
 	struct dpaa2_eth_drv_stats *percpu_extras;
 	struct rtnl_link_stats64 *percpu_stats;
+	int num_fds, i, err, max_retries;
 	struct dpaa2_eth_fq *fq;
-	struct dpaa2_fd fd;
-	int drops = 0;
-	int i, err;
+	struct dpaa2_fd *fds;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
@@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 	if (!netif_running(net_dev))
 		return -ENETDOWN;
 
+	/* create the array of frame descriptors */
+	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);
+	if (!fds)
+		return -ENOMEM;
+
 	percpu_stats = this_cpu_ptr(priv->percpu_stats);
 	percpu_extras = this_cpu_ptr(priv->percpu_extras);
 
+	/* create a FD for each xdp_frame in the list received */
 	for (i = 0; i < n; i++) {
-		struct xdp_frame *xdpf = frames[i];
+		err = dpaa2_eth_xdp_create_fd(net_dev, frames[i], &fds[i]);
+		if (err)
+			break;
+	}
+	num_fds = i;
 
-		/* create the FD from the xdp_frame */
-		err = dpaa2_eth_xdp_create_fd(net_dev, xdpf, &fd);
-		if (err) {
-			percpu_stats->tx_dropped++;
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
+	/* try to enqueue all the FDs until the max number of retries is hit */
+	fq = &priv->fq[smp_processor_id()];
+	max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES;
+	while (total_enqueued < num_fds && retries < max_retries) {
+		err = priv->enqueue(priv, fq, &fds[total_enqueued],
+				    0, num_fds - total_enqueued, &enqueued);
+		if (err == -EBUSY) {
+			percpu_extras->tx_portal_busy += ++retries;
 			continue;
 		}
+		total_enqueued += enqueued;
+	}
 
-		/* enqueue the newly created FD */
-		fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
-		for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-			err = priv->enqueue(priv, fq, &fd, 0, 1);
-			if (err != -EBUSY)
-				break;
-		}
+	/* update statistics */
+	percpu_stats->tx_packets += total_enqueued;
+	for (i = 0; i < total_enqueued; i++)
+		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]);
+	for (i = total_enqueued; i < n; i++)
+		xdp_return_frame_rx_napi(frames[i]);
 
-		percpu_extras->tx_portal_busy += i;
-		if (unlikely(err < 0)) {
-			percpu_stats->tx_errors++;
-			xdp_return_frame_rx_napi(xdpf);
-			continue;
-		}
-
-		percpu_stats->tx_packets++;
-		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
-	}
+	kfree(fds);
 
-	return n - drops;
+	return total_enqueued;
 }
 
 static int update_xps(struct dpaa2_eth_priv *priv)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit
  2020-04-21 15:21 ` [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit Ioana Ciornei
@ 2020-04-22  7:12   ` Jesper Dangaard Brouer
  2020-04-22  7:51     ` Ioana Ciornei
  0 siblings, 1 reply; 9+ messages in thread
From: Jesper Dangaard Brouer @ 2020-04-22  7:12 UTC (permalink / raw)
  To: Ioana Ciornei; +Cc: davem, netdev, brouer

On Tue, 21 Apr 2020 18:21:54 +0300
Ioana Ciornei <ioana.ciornei@nxp.com> wrote:

> Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
> We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
> to store all the frames dequeued in a NAPI cycle so we instead are
> enqueueing all the frames received in a ndo_xdp_xmit call right away.
> 
> After setting up all FDs for the xdp_frames received, enqueue multiple
> frames at a time until all are sent or the maximum number of retries is
> hit.
> 
> Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> ---
>  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60 ++++++++++---------
>  1 file changed, 32 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> index 9a0432cd893c..08b4efad46fd 100644
> --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> @@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
>  			      struct xdp_frame **frames, u32 flags)
>  {
>  	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
> +	int total_enqueued = 0, retries = 0, enqueued;
>  	struct dpaa2_eth_drv_stats *percpu_extras;
>  	struct rtnl_link_stats64 *percpu_stats;
> +	int num_fds, i, err, max_retries;
>  	struct dpaa2_eth_fq *fq;
> -	struct dpaa2_fd fd;
> -	int drops = 0;
> -	int i, err;
> +	struct dpaa2_fd *fds;
>  
>  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
>  		return -EINVAL;
> @@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
>  	if (!netif_running(net_dev))
>  		return -ENETDOWN;
>  
> +	/* create the array of frame descriptors */
> +	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);

I don't like that you have an allocation on the transmit fast-path.

There are a number of ways you can avoid this.

Option (1) Given we know that (currently) devmap will max bulk 16
xdp_frames, we can have a call-stack local array with struct dpaa2_fd,
that contains 16 elements, sizeof(struct dpaa2_fd)==32 bytes times 16 is
512 bytes, so it might be acceptable.  (And add code to alloc if n >
16, to be compatible with someone increasing max bulk in devmap).

Option (2) extend struct dpaa2_eth_priv with an array of 16 struct
dpaa2_fd's, that can be used as fds storage.

> +	if (!fds)
> +		return -ENOMEM;
> +

[...]
> +	kfree(fds);


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit
  2020-04-22  7:12   ` Jesper Dangaard Brouer
@ 2020-04-22  7:51     ` Ioana Ciornei
  2020-04-22  8:37       ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 9+ messages in thread
From: Ioana Ciornei @ 2020-04-22  7:51 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: davem, netdev

> Subject: Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in
> .ndo_xdp_xmit
> 
> On Tue, 21 Apr 2020 18:21:54 +0300
> Ioana Ciornei <ioana.ciornei@nxp.com> wrote:
> 
> > Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
> > We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
> > to store all the frames dequeued in a NAPI cycle so we instead are
> > enqueueing all the frames received in a ndo_xdp_xmit call right away.
> >
> > After setting up all FDs for the xdp_frames received, enqueue multiple
> > frames at a time until all are sent or the maximum number of retries
> > is hit.
> >
> > Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> > ---
> >  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60
> > ++++++++++---------
> >  1 file changed, 32 insertions(+), 28 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > index 9a0432cd893c..08b4efad46fd 100644
> > --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > @@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct net_device
> *net_dev, int n,
> >  			      struct xdp_frame **frames, u32 flags)  {
> >  	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
> > +	int total_enqueued = 0, retries = 0, enqueued;
> >  	struct dpaa2_eth_drv_stats *percpu_extras;
> >  	struct rtnl_link_stats64 *percpu_stats;
> > +	int num_fds, i, err, max_retries;
> >  	struct dpaa2_eth_fq *fq;
> > -	struct dpaa2_fd fd;
> > -	int drops = 0;
> > -	int i, err;
> > +	struct dpaa2_fd *fds;
> >
> >  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> >  		return -EINVAL;
> > @@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct net_device
> *net_dev, int n,
> >  	if (!netif_running(net_dev))
> >  		return -ENETDOWN;
> >
> > +	/* create the array of frame descriptors */
> > +	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);
> 
> I don't like that you have an allocation on the transmit fast-path.
> 
> There are a number of ways you can avoid this.
> 
> Option (1) Given we know that (currently) devmap will max bulk 16 xdp_frames,
> we can have a call-stack local array with struct dpaa2_fd, that contains 16
> elements, sizeof(struct dpaa2_fd)==32 bytes times 16 is
> 512 bytes, so it might be acceptable.  (And add code to alloc if n > 16, to be
> compatible with someone increasing max bulk in devmap).
> 

I didn't know about the 16 max xdp_frames . Thanks.

> Option (2) extend struct dpaa2_eth_priv with an array of 16 struct dpaa2_fd's,
> that can be used as fds storage.

I have more of a noob question here before proceeding with one of the two options.

The ndo_xdp_xmit() callback can be called concurrently from multiple softirq contexts, right?

If the above is true, then I think the dpaa2_eth_ch_xdp is the right struct to place the array of 16 FDs.

Also, is there any caveat to just use DEV_MAP_BULK_SIZE when declaring the array?

Ioana

> 
> > +	if (!fds)
> > +		return -ENOMEM;
> > +
> 
> [...]
> > +	kfree(fds);
> 
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit
  2020-04-22  7:51     ` Ioana Ciornei
@ 2020-04-22  8:37       ` Jesper Dangaard Brouer
  2020-04-22  9:06         ` Ioana Ciornei
  0 siblings, 1 reply; 9+ messages in thread
From: Jesper Dangaard Brouer @ 2020-04-22  8:37 UTC (permalink / raw)
  To: Ioana Ciornei; +Cc: davem, netdev, brouer

On Wed, 22 Apr 2020 07:51:46 +0000
Ioana Ciornei <ioana.ciornei@nxp.com> wrote:

> > Subject: Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in
> > .ndo_xdp_xmit
> > 
> > On Tue, 21 Apr 2020 18:21:54 +0300
> > Ioana Ciornei <ioana.ciornei@nxp.com> wrote:
> >   
> > > Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
> > > We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
> > > to store all the frames dequeued in a NAPI cycle so we instead are
> > > enqueueing all the frames received in a ndo_xdp_xmit call right away.
> > >
> > > After setting up all FDs for the xdp_frames received, enqueue multiple
> > > frames at a time until all are sent or the maximum number of retries
> > > is hit.
> > >
> > > Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> > > ---
> > >  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60
> > > ++++++++++---------
> > >  1 file changed, 32 insertions(+), 28 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > index 9a0432cd893c..08b4efad46fd 100644
> > > --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > @@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct net_device  
> > *net_dev, int n,  
> > >  			      struct xdp_frame **frames, u32 flags)  {
> > >  	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
> > > +	int total_enqueued = 0, retries = 0, enqueued;
> > >  	struct dpaa2_eth_drv_stats *percpu_extras;
> > >  	struct rtnl_link_stats64 *percpu_stats;
> > > +	int num_fds, i, err, max_retries;
> > >  	struct dpaa2_eth_fq *fq;
> > > -	struct dpaa2_fd fd;
> > > -	int drops = 0;
> > > -	int i, err;
> > > +	struct dpaa2_fd *fds;
> > >
> > >  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> > >  		return -EINVAL;
> > > @@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct net_device  
> > *net_dev, int n,  
> > >  	if (!netif_running(net_dev))
> > >  		return -ENETDOWN;
> > >
> > > +	/* create the array of frame descriptors */
> > > +	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);  
> > 
> > I don't like that you have an allocation on the transmit fast-path.
> > 
> > There are a number of ways you can avoid this.
> > 
> > Option (1) Given we know that (currently) devmap will max bulk 16 xdp_frames,
> > we can have a call-stack local array with struct dpaa2_fd, that contains 16
> > elements, sizeof(struct dpaa2_fd)==32 bytes times 16 is
> > 512 bytes, so it might be acceptable.  (And add code to alloc if n > 16, to be
> > compatible with someone increasing max bulk in devmap).
> >   
> 
> I didn't know about the 16 max xdp_frames . Thanks.
> 
> > Option (2) extend struct dpaa2_eth_priv with an array of 16 struct dpaa2_fd's,
> > that can be used as fds storage.  
> 
> I have more of a noob question here before proceeding with one of the
> two options.
> 
> The ndo_xdp_xmit() callback can be called concurrently from multiple
> softirq contexts, right?

Yes.
 
> If the above is true, then I think the dpaa2_eth_ch_xdp is the right
> struct to place the array of 16 FDs.

Good point, and it sounds correct to use dpaa2_eth_ch_xdp.

> Also, is there any caveat to just use DEV_MAP_BULK_SIZE when
> declaring the array?

Using DEV_MAP_BULK_SIZE sounds like a good idea.  Even if someone
decide to increase that to 64, then this is only 2048 bytes(32*64).


> >   
> > > +	if (!fds)
> > > +		return -ENOMEM;
> > > +  
> > 
> > [...]  
> > > +	kfree(fds);  
> > 
> >   
> 



-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit
  2020-04-22  8:37       ` Jesper Dangaard Brouer
@ 2020-04-22  9:06         ` Ioana Ciornei
  0 siblings, 0 replies; 9+ messages in thread
From: Ioana Ciornei @ 2020-04-22  9:06 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: davem, netdev

> Subject: Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in
> .ndo_xdp_xmit
> 
> On Wed, 22 Apr 2020 07:51:46 +0000
> Ioana Ciornei <ioana.ciornei@nxp.com> wrote:
> 
> > > Subject: Re: [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in
> > > .ndo_xdp_xmit
> > >
> > > On Tue, 21 Apr 2020 18:21:54 +0300
> > > Ioana Ciornei <ioana.ciornei@nxp.com> wrote:
> > >
> > > > Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
> > > > We cannot use the XDP_XMIT_FLUSH since the architecture is not
> > > > capable to store all the frames dequeued in a NAPI cycle so we
> > > > instead are enqueueing all the frames received in a ndo_xdp_xmit call right
> away.
> > > >
> > > > After setting up all FDs for the xdp_frames received, enqueue
> > > > multiple frames at a time until all are sent or the maximum number
> > > > of retries is hit.
> > > >
> > > > Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
> > > > ---
> > > >  .../net/ethernet/freescale/dpaa2/dpaa2-eth.c  | 60
> > > > ++++++++++---------
> > > >  1 file changed, 32 insertions(+), 28 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > > b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > > index 9a0432cd893c..08b4efad46fd 100644
> > > > --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > > +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
> > > > @@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct
> > > > net_device
> > > *net_dev, int n,
> > > >  			      struct xdp_frame **frames, u32 flags)  {
> > > >  	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
> > > > +	int total_enqueued = 0, retries = 0, enqueued;
> > > >  	struct dpaa2_eth_drv_stats *percpu_extras;
> > > >  	struct rtnl_link_stats64 *percpu_stats;
> > > > +	int num_fds, i, err, max_retries;
> > > >  	struct dpaa2_eth_fq *fq;
> > > > -	struct dpaa2_fd fd;
> > > > -	int drops = 0;
> > > > -	int i, err;
> > > > +	struct dpaa2_fd *fds;
> > > >
> > > >  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> > > >  		return -EINVAL;
> > > > @@ -1946,41 +1946,45 @@ static int dpaa2_eth_xdp_xmit(struct
> > > > net_device
> > > *net_dev, int n,
> > > >  	if (!netif_running(net_dev))
> > > >  		return -ENETDOWN;
> > > >
> > > > +	/* create the array of frame descriptors */
> > > > +	fds = kcalloc(n, sizeof(*fds), GFP_ATOMIC);
> > >
> > > I don't like that you have an allocation on the transmit fast-path.
> > >
> > > There are a number of ways you can avoid this.
> > >
> > > Option (1) Given we know that (currently) devmap will max bulk 16
> > > xdp_frames, we can have a call-stack local array with struct
> > > dpaa2_fd, that contains 16 elements, sizeof(struct dpaa2_fd)==32
> > > bytes times 16 is
> > > 512 bytes, so it might be acceptable.  (And add code to alloc if n >
> > > 16, to be compatible with someone increasing max bulk in devmap).
> > >
> >
> > I didn't know about the 16 max xdp_frames . Thanks.
> >
> > > Option (2) extend struct dpaa2_eth_priv with an array of 16 struct
> > > dpaa2_fd's, that can be used as fds storage.
> >
> > I have more of a noob question here before proceeding with one of the
> > two options.
> >
> > The ndo_xdp_xmit() callback can be called concurrently from multiple
> > softirq contexts, right?
> 
> Yes.
> 
> > If the above is true, then I think the dpaa2_eth_ch_xdp is the right
> > struct to place the array of 16 FDs.
> 
> Good point, and it sounds correct to use dpaa2_eth_ch_xdp.
> 
> > Also, is there any caveat to just use DEV_MAP_BULK_SIZE when declaring
> > the array?
> 
> Using DEV_MAP_BULK_SIZE sounds like a good idea.  Even if someone decide to
> increase that to 64, then this is only 2048 bytes(32*64).
> 

Currently the macro is private to devmap.c. Maybe move it to the xdp.h header
file for access from drivers? I'll include this in v2.

Ioana

> 
> > >
> > > > +	if (!fds)
> > > > +		return -ENOMEM;
> > > > +
> > >
> > > [...]
> > > > +	kfree(fds);
> > >
> > >
> >
> 
> 
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-04-22  9:06 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-21 15:21 [PATCH net-next 0/4] dpaa2-eth: add support for xdp bulk enqueue Ioana Ciornei
2020-04-21 15:21 ` [PATCH net-next 1/4] dpaa2-eth: return num_enqueued frames from enqueue callback Ioana Ciornei
2020-04-21 15:21 ` [PATCH net-next 2/4] dpaa2-eth: use the bulk ring mode enqueue interface Ioana Ciornei
2020-04-21 15:21 ` [PATCH net-next 3/4] dpaa2-eth: split the .ndo_xdp_xmit callback into two stages Ioana Ciornei
2020-04-21 15:21 ` [PATCH net-next 4/4] dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit Ioana Ciornei
2020-04-22  7:12   ` Jesper Dangaard Brouer
2020-04-22  7:51     ` Ioana Ciornei
2020-04-22  8:37       ` Jesper Dangaard Brouer
2020-04-22  9:06         ` Ioana Ciornei

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.