netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets
@ 2019-06-28 22:15 Jonathan Lemon
  2019-06-28 22:15 ` [PATCH 1/3 bpf-next] net: add convert_to_xdp_frame_keep_zc function Jonathan Lemon
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Jonathan Lemon @ 2019-06-28 22:15 UTC (permalink / raw)
  To: netdev, bjorn.topel, magnus.karlsson, jakub.kicinski, jeffrey.t.kirsher
  Cc: kernel-team

NOTE: This patch depends on my previous "xsk: reuse cleanup" patch,
sent to netdev earlier.

The motivation is to have packets which were received on a zero-copy
AF_XDP socket, and which returned a TX verdict from the bpf program,
queued directly on the TX ring (if they're in the same napi context).

When these TX packets are completed, they are placed back onto the
reuse queue, as there isn't really any other place to handle them.

Space in the reuse queue is preallocated at init time for both the
RX and TX rings.  Another option would have a smaller TX queue size
and count in-flight TX packets, dropping any which exceed the reuseq
size - this approach is omitted for simplicity.


Jonathan Lemon (3):
  net: add convert_to_xdp_frame_keep_zc function
  i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
  ixgbe: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.

 drivers/net/ethernet/intel/i40e/i40e_txrx.h  |  1 +
 drivers/net/ethernet/intel/i40e/i40e_xsk.c   | 54 ++++++++++++--
 drivers/net/ethernet/intel/ixgbe/ixgbe.h     |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 74 +++++++++++++++++---
 include/net/xdp.h                            | 20 ++++--
 5 files changed, 134 insertions(+), 16 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/3 bpf-next] net: add convert_to_xdp_frame_keep_zc function
  2019-06-28 22:15 [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets Jonathan Lemon
@ 2019-06-28 22:15 ` Jonathan Lemon
  2019-07-01 11:02   ` Magnus Karlsson
  2019-06-28 22:15 ` [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets Jonathan Lemon
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 10+ messages in thread
From: Jonathan Lemon @ 2019-06-28 22:15 UTC (permalink / raw)
  To: netdev, bjorn.topel, magnus.karlsson, jakub.kicinski, jeffrey.t.kirsher
  Cc: kernel-team

Add a function which converts a ZC xdp_buff to a an xdp_frame, while
keeping the zc backing storage.  This will be used to support TX of
received AF_XDP frames.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 include/net/xdp.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 40c6d3398458..abe5f47ff0a5 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -82,6 +82,7 @@ struct xdp_frame {
 	 */
 	struct xdp_mem_info mem;
 	struct net_device *dev_rx; /* used by cpumap */
+	unsigned long handle;
 };
 
 /* Clear kernel pointers in xdp_frame */
@@ -95,15 +96,12 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
 
 /* Convert xdp_buff to xdp_frame */
 static inline
-struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
+struct xdp_frame *__convert_to_xdp_frame(struct xdp_buff *xdp)
 {
 	struct xdp_frame *xdp_frame;
 	int metasize;
 	int headroom;
 
-	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
-		return xdp_convert_zc_to_xdp_frame(xdp);
-
 	/* Assure headroom is available for storing info */
 	headroom = xdp->data - xdp->data_hard_start;
 	metasize = xdp->data - xdp->data_meta;
@@ -125,6 +123,20 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	return xdp_frame;
 }
 
+static inline
+struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
+{
+	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
+		return xdp_convert_zc_to_xdp_frame(xdp);
+	return __convert_to_xdp_frame(xdp);
+}
+
+static inline
+struct xdp_frame *convert_to_xdp_frame_keep_zc(struct xdp_buff *xdp)
+{
+	return __convert_to_xdp_frame(xdp);
+}
+
 void xdp_return_frame(struct xdp_frame *xdpf);
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
 void xdp_return_buff(struct xdp_buff *xdp);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
  2019-06-28 22:15 [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets Jonathan Lemon
  2019-06-28 22:15 ` [PATCH 1/3 bpf-next] net: add convert_to_xdp_frame_keep_zc function Jonathan Lemon
@ 2019-06-28 22:15 ` Jonathan Lemon
  2019-07-01 11:04   ` Magnus Karlsson
  2019-07-02  7:07   ` Maxim Mikityanskiy
  2019-06-28 22:15 ` [PATCH 3/3 bpf-next] ixgbe: " Jonathan Lemon
  2019-07-01 11:01 ` [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets Magnus Karlsson
  3 siblings, 2 replies; 10+ messages in thread
From: Jonathan Lemon @ 2019-06-28 22:15 UTC (permalink / raw)
  To: netdev, bjorn.topel, magnus.karlsson, jakub.kicinski, jeffrey.t.kirsher
  Cc: kernel-team

When the XDP program attached to a zero-copy AF_XDP socket returns XDP_TX,
queue the umem frame on the XDP TX ring.  Space on the recycle stack is
pre-allocated when the xsk is created.  (taken from tx_ring, since the
xdp ring is not initialized yet)

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |  1 +
 drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 54 +++++++++++++++++++--
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 100e92d2982f..3e7954277737 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -274,6 +274,7 @@ static inline unsigned int i40e_txd_use_count(unsigned int size)
 #define I40E_TX_FLAGS_TSYN		BIT(8)
 #define I40E_TX_FLAGS_FD_SB		BIT(9)
 #define I40E_TX_FLAGS_UDP_TUNNEL	BIT(10)
+#define I40E_TX_FLAGS_ZC_FRAME		BIT(11)
 #define I40E_TX_FLAGS_VLAN_MASK		0xffff0000
 #define I40E_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
 #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT	29
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index ce8650d06962..020f9859215d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -91,7 +91,8 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 	    qid >= netdev->real_num_tx_queues)
 		return -EINVAL;
 
-	if (!xsk_umem_recycle_alloc(umem, vsi->rx_rings[0]->count))
+	if (!xsk_umem_recycle_alloc(umem, vsi->rx_rings[0]->count +
+					  vsi->tx_rings[0]->count))
 		return -ENOMEM;
 
 	err = i40e_xsk_umem_dma_map(vsi, umem);
@@ -175,6 +176,48 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
 		i40e_xsk_umem_disable(vsi, qid);
 }
 
+static int i40e_xmit_rcvd_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
+{
+	struct i40e_ring *xdp_ring;
+	struct i40e_tx_desc *tx_desc;
+	struct i40e_tx_buffer *tx_bi;
+	struct xdp_frame *xdpf;
+	dma_addr_t dma;
+
+	xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+
+	if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
+		xdp_ring->tx_stats.tx_busy++;
+		return I40E_XDP_CONSUMED;
+	}
+	xdpf = convert_to_xdp_frame_keep_zc(xdp);
+	if (unlikely(!xdpf))
+		return I40E_XDP_CONSUMED;
+	xdpf->handle = xdp->handle;
+
+	dma = xdp_umem_get_dma(rx_ring->xsk_umem, xdp->handle);
+	tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
+	tx_bi->bytecount = xdpf->len;
+	tx_bi->gso_segs = 1;
+	tx_bi->xdpf = xdpf;
+	tx_bi->tx_flags = I40E_TX_FLAGS_ZC_FRAME;
+
+	tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+	tx_desc->buffer_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC |
+						  I40E_TX_DESC_CMD_EOP,
+				                  0, xdpf->len, 0);
+	smp_wmb();
+
+	xdp_ring->next_to_use++;
+	if (xdp_ring->next_to_use == xdp_ring->count)
+		xdp_ring->next_to_use = 0;
+
+	tx_bi->next_to_watch = tx_desc;
+
+	return I40E_XDP_TX;
+}
+
 /**
  * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff
  * @rx_ring: Rx ring
@@ -187,7 +230,6 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
 static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 {
 	int err, result = I40E_XDP_PASS;
-	struct i40e_ring *xdp_ring;
 	struct bpf_prog *xdp_prog;
 	u32 act;
 
@@ -202,8 +244,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
-		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+		result = i40e_xmit_rcvd_zc(rx_ring, xdp);
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
@@ -628,6 +669,11 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
 				     struct i40e_tx_buffer *tx_bi)
 {
+	if (tx_bi->tx_flags & I40E_TX_FLAGS_ZC_FRAME) {
+		xsk_umem_recycle_addr(tx_ring->xsk_umem, tx_bi->xdpf->handle);
+		tx_bi->tx_flags = 0;
+		return;
+	}
 	xdp_return_frame(tx_bi->xdpf);
 	dma_unmap_single(tx_ring->dev,
 			 dma_unmap_addr(tx_bi, dma),
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3 bpf-next] ixgbe: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
  2019-06-28 22:15 [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets Jonathan Lemon
  2019-06-28 22:15 ` [PATCH 1/3 bpf-next] net: add convert_to_xdp_frame_keep_zc function Jonathan Lemon
  2019-06-28 22:15 ` [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets Jonathan Lemon
@ 2019-06-28 22:15 ` Jonathan Lemon
  2019-07-01 11:01 ` [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets Magnus Karlsson
  3 siblings, 0 replies; 10+ messages in thread
From: Jonathan Lemon @ 2019-06-28 22:15 UTC (permalink / raw)
  To: netdev, bjorn.topel, magnus.karlsson, jakub.kicinski, jeffrey.t.kirsher
  Cc: kernel-team

When the XDP program attached to a zero-copy AF_XDP socket returns XDP_TX,
queue the umem frame on the XDP TX ring.  Space on the recycle stack is
pre-allocated when the xsk is created.  (taken from tx_ring, since the 
xdp ring is not initialized yet)

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h     |  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 74 +++++++++++++++++---
 2 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 39e73ad60352..aca33e4773f5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -150,6 +150,7 @@ enum ixgbe_tx_flags {
 	/* software defined flags */
 	IXGBE_TX_FLAGS_SW_VLAN	= 0x80,
 	IXGBE_TX_FLAGS_FCOE	= 0x100,
+	IXGBE_TX_FLAGS_ZC_FRAME	= 0x200,
 };
 
 /* VLAN info */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 65feb16200ea..c7a661736ab8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -77,7 +77,8 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 	    qid >= netdev->real_num_tx_queues)
 		return -EINVAL;
 
-	if (!xsk_umem_recycle_alloc(umem, adapter->rx_ring[0]->count))
+	if (!xsk_umem_recycle_alloc(umem, adapter->rx_ring[0]->count +
+					  adapter->tx_ring[0]->count))
 		return -ENOMEM;
 
 	err = ixgbe_xsk_umem_dma_map(adapter, umem);
@@ -135,13 +136,70 @@ int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
 		ixgbe_xsk_umem_disable(adapter, qid);
 }
 
+static int ixgbe_xmit_rcvd_zc(struct ixgbe_adapter *adapter,
+			      struct ixgbe_ring *rx_ring,
+			      struct xdp_buff *xdp)
+{
+	struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
+	struct ixgbe_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	struct xdp_frame *xdpf;
+	u32 len, cmd_type;
+	dma_addr_t dma;
+	u16 i;
+
+	if (unlikely(!ixgbe_desc_unused(ring)))
+		return IXGBE_XDP_CONSUMED;
+	xdpf = convert_to_xdp_frame_keep_zc(xdp);
+	if (unlikely(!xdpf))
+		return IXGBE_XDP_CONSUMED;
+	xdpf->handle = xdp->handle;
+	len = xdpf->len;
+
+	dma = xdp_umem_get_dma(rx_ring->xsk_umem, xdp->handle);
+
+	/* record the location of the first descriptor for this packet */
+	tx_buffer = &ring->tx_buffer_info[ring->next_to_use];
+	tx_buffer->bytecount = len;
+	tx_buffer->gso_segs = 1;
+	tx_buffer->protocol = 0;
+	tx_buffer->xdpf = xdpf;
+	tx_buffer->tx_flags = IXGBE_TX_FLAGS_ZC_FRAME;
+
+	i = ring->next_to_use;
+	tx_desc = IXGBE_TX_DESC(ring, i);
+
+	tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+	/* put descriptor type bits */
+	cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+		   IXGBE_ADVTXD_DCMD_DEXT |
+		   IXGBE_ADVTXD_DCMD_IFCS;
+	cmd_type |= len | IXGBE_TXD_CMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+	tx_desc->read.olinfo_status =
+		cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
+	/* Avoid any potential race with xdp_xmit and cleanup */
+	smp_wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	i++;
+	if (i == ring->count)
+		i = 0;
+
+	tx_buffer->next_to_watch = tx_desc;
+	ring->next_to_use = i;
+
+	return IXGBE_XDP_TX;
+}
+
 static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 			    struct ixgbe_ring *rx_ring,
 			    struct xdp_buff *xdp)
 {
 	int err, result = IXGBE_XDP_PASS;
 	struct bpf_prog *xdp_prog;
-	struct xdp_frame *xdpf;
 	u32 act;
 
 	rcu_read_lock();
@@ -152,12 +210,7 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		xdpf = convert_to_xdp_frame(xdp);
-		if (unlikely(!xdpf)) {
-			result = IXGBE_XDP_CONSUMED;
-			break;
-		}
-		result = ixgbe_xmit_xdp_ring(adapter, xdpf);
+		result = ixgbe_xmit_rcvd_zc(adapter, rx_ring, xdp);
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
@@ -576,6 +629,11 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
 static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring,
 				      struct ixgbe_tx_buffer *tx_bi)
 {
+	if (tx_bi->tx_flags & IXGBE_TX_FLAGS_ZC_FRAME) {
+		xsk_umem_recycle_addr(tx_ring->xsk_umem, tx_bi->xdpf->handle);
+		tx_bi->tx_flags = 0;
+		return;
+	}
 	xdp_return_frame(tx_bi->xdpf);
 	dma_unmap_single(tx_ring->dev,
 			 dma_unmap_addr(tx_bi, dma),
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets
  2019-06-28 22:15 [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets Jonathan Lemon
                   ` (2 preceding siblings ...)
  2019-06-28 22:15 ` [PATCH 3/3 bpf-next] ixgbe: " Jonathan Lemon
@ 2019-07-01 11:01 ` Magnus Karlsson
  3 siblings, 0 replies; 10+ messages in thread
From: Magnus Karlsson @ 2019-07-01 11:01 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: Network Development, Björn Töpel, Karlsson, Magnus,
	Jakub Kicinski, jeffrey.t.kirsher, kernel-team

On Sat, Jun 29, 2019 at 12:18 AM Jonathan Lemon
<jonathan.lemon@gmail.com> wrote:
>
> NOTE: This patch depends on my previous "xsk: reuse cleanup" patch,
> sent to netdev earlier.
>
> The motivation is to have packets which were received on a zero-copy
> AF_XDP socket, and which returned a TX verdict from the bpf program,
> queued directly on the TX ring (if they're in the same napi context).
>
> When these TX packets are completed, they are placed back onto the
> reuse queue, as there isn't really any other place to handle them.
>
> Space in the reuse queue is preallocated at init time for both the
> RX and TX rings.  Another option would have a smaller TX queue size
> and count in-flight TX packets, dropping any which exceed the reuseq
> size - this approach is omitted for simplicity.

This should speed up XDP_TX under ZC substantially, which of course is
a good thing. Would be great if you could add some performance
numbers.

As other people have pointed out, it would have been great if we had a
page pool we could return the buffers to. But we do not so there are
only two options: keep it in the kernel on the reuse queue in this
case, or return the buffer to user space with a length of zero
indicating that there is no packet data. Just a transfer of ownership.
But let us go with the former one as you have done in this patch set,
as we have so far have always tried to reuse the buffers inside the
kernel. But the latter option might be good to have in store as a
solution for other problems.

/Magnus

>
> Jonathan Lemon (3):
>   net: add convert_to_xdp_frame_keep_zc function
>   i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
>   ixgbe: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
>
>  drivers/net/ethernet/intel/i40e/i40e_txrx.h  |  1 +
>  drivers/net/ethernet/intel/i40e/i40e_xsk.c   | 54 ++++++++++++--
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h     |  1 +
>  drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 74 +++++++++++++++++---
>  include/net/xdp.h                            | 20 ++++--
>  5 files changed, 134 insertions(+), 16 deletions(-)
>
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/3 bpf-next] net: add convert_to_xdp_frame_keep_zc function
  2019-06-28 22:15 ` [PATCH 1/3 bpf-next] net: add convert_to_xdp_frame_keep_zc function Jonathan Lemon
@ 2019-07-01 11:02   ` Magnus Karlsson
  0 siblings, 0 replies; 10+ messages in thread
From: Magnus Karlsson @ 2019-07-01 11:02 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: Network Development, Björn Töpel, Karlsson, Magnus,
	Jakub Kicinski, jeffrey.t.kirsher, kernel-team

On Sat, Jun 29, 2019 at 12:19 AM Jonathan Lemon
<jonathan.lemon@gmail.com> wrote:
>
> Add a function which converts a ZC xdp_buff to a an xdp_frame, while

nit: "a an" -> "an"

> keeping the zc backing storage.  This will be used to support TX of
> received AF_XDP frames.
>
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  include/net/xdp.h | 20 ++++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 40c6d3398458..abe5f47ff0a5 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -82,6 +82,7 @@ struct xdp_frame {
>          */
>         struct xdp_mem_info mem;
>         struct net_device *dev_rx; /* used by cpumap */
> +       unsigned long handle;
>  };
>
>  /* Clear kernel pointers in xdp_frame */
> @@ -95,15 +96,12 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
>
>  /* Convert xdp_buff to xdp_frame */
>  static inline
> -struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
> +struct xdp_frame *__convert_to_xdp_frame(struct xdp_buff *xdp)
>  {
>         struct xdp_frame *xdp_frame;
>         int metasize;
>         int headroom;
>
> -       if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
> -               return xdp_convert_zc_to_xdp_frame(xdp);
> -
>         /* Assure headroom is available for storing info */
>         headroom = xdp->data - xdp->data_hard_start;
>         metasize = xdp->data - xdp->data_meta;
> @@ -125,6 +123,20 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
>         return xdp_frame;
>  }
>
> +static inline
> +struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
> +{
> +       if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
> +               return xdp_convert_zc_to_xdp_frame(xdp);
> +       return __convert_to_xdp_frame(xdp);
> +}
> +
> +static inline
> +struct xdp_frame *convert_to_xdp_frame_keep_zc(struct xdp_buff *xdp)
> +{
> +       return __convert_to_xdp_frame(xdp);
> +}
> +
>  void xdp_return_frame(struct xdp_frame *xdpf);
>  void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
>  void xdp_return_buff(struct xdp_buff *xdp);
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
  2019-06-28 22:15 ` [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets Jonathan Lemon
@ 2019-07-01 11:04   ` Magnus Karlsson
  2019-07-02 16:44     ` Jonathan Lemon
  2019-07-02  7:07   ` Maxim Mikityanskiy
  1 sibling, 1 reply; 10+ messages in thread
From: Magnus Karlsson @ 2019-07-01 11:04 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: Network Development, Björn Töpel, Karlsson, Magnus,
	Jakub Kicinski, jeffrey.t.kirsher, kernel-team

On Sat, Jun 29, 2019 at 12:18 AM Jonathan Lemon
<jonathan.lemon@gmail.com> wrote:
>
> When the XDP program attached to a zero-copy AF_XDP socket returns XDP_TX,
> queue the umem frame on the XDP TX ring.  Space on the recycle stack is
> pre-allocated when the xsk is created.  (taken from tx_ring, since the
> xdp ring is not initialized yet)
>
> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
> ---
>  drivers/net/ethernet/intel/i40e/i40e_txrx.h |  1 +
>  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 54 +++++++++++++++++++--
>  2 files changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> index 100e92d2982f..3e7954277737 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
> @@ -274,6 +274,7 @@ static inline unsigned int i40e_txd_use_count(unsigned int size)
>  #define I40E_TX_FLAGS_TSYN             BIT(8)
>  #define I40E_TX_FLAGS_FD_SB            BIT(9)
>  #define I40E_TX_FLAGS_UDP_TUNNEL       BIT(10)
> +#define I40E_TX_FLAGS_ZC_FRAME         BIT(11)
>  #define I40E_TX_FLAGS_VLAN_MASK                0xffff0000
>  #define I40E_TX_FLAGS_VLAN_PRIO_MASK   0xe0000000
>  #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT  29
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> index ce8650d06962..020f9859215d 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> @@ -91,7 +91,8 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
>             qid >= netdev->real_num_tx_queues)
>                 return -EINVAL;
>
> -       if (!xsk_umem_recycle_alloc(umem, vsi->rx_rings[0]->count))
> +       if (!xsk_umem_recycle_alloc(umem, vsi->rx_rings[0]->count +
> +                                         vsi->tx_rings[0]->count))
>                 return -ENOMEM;
>
>         err = i40e_xsk_umem_dma_map(vsi, umem);
> @@ -175,6 +176,48 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
>                 i40e_xsk_umem_disable(vsi, qid);
>  }
>
> +static int i40e_xmit_rcvd_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)

This function looks very much like i40e_xmit_xdp_ring(). How can we
refactor them to make them share more code and not lose performance at
the same time? This comment is also valid for the ixgbe driver patch
that follows.

Thanks: Magnus

> +{
> +       struct i40e_ring *xdp_ring;
> +       struct i40e_tx_desc *tx_desc;
> +       struct i40e_tx_buffer *tx_bi;
> +       struct xdp_frame *xdpf;
> +       dma_addr_t dma;
> +
> +       xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
> +
> +       if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
> +               xdp_ring->tx_stats.tx_busy++;
> +               return I40E_XDP_CONSUMED;
> +       }
> +       xdpf = convert_to_xdp_frame_keep_zc(xdp);
> +       if (unlikely(!xdpf))
> +               return I40E_XDP_CONSUMED;
> +       xdpf->handle = xdp->handle;
> +
> +       dma = xdp_umem_get_dma(rx_ring->xsk_umem, xdp->handle);
> +       tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
> +       tx_bi->bytecount = xdpf->len;
> +       tx_bi->gso_segs = 1;
> +       tx_bi->xdpf = xdpf;
> +       tx_bi->tx_flags = I40E_TX_FLAGS_ZC_FRAME;
> +
> +       tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use);
> +       tx_desc->buffer_addr = cpu_to_le64(dma);
> +       tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC |
> +                                                 I40E_TX_DESC_CMD_EOP,
> +                                                 0, xdpf->len, 0);
> +       smp_wmb();
> +
> +       xdp_ring->next_to_use++;
> +       if (xdp_ring->next_to_use == xdp_ring->count)
> +               xdp_ring->next_to_use = 0;
> +
> +       tx_bi->next_to_watch = tx_desc;
> +
> +       return I40E_XDP_TX;
> +}
> +
>  /**
>   * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff
>   * @rx_ring: Rx ring
> @@ -187,7 +230,6 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
>  static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
>  {
>         int err, result = I40E_XDP_PASS;
> -       struct i40e_ring *xdp_ring;
>         struct bpf_prog *xdp_prog;
>         u32 act;
>
> @@ -202,8 +244,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
>         case XDP_PASS:
>                 break;
>         case XDP_TX:
> -               xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
> -               result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
> +               result = i40e_xmit_rcvd_zc(rx_ring, xdp);
>                 break;
>         case XDP_REDIRECT:
>                 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
> @@ -628,6 +669,11 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>  static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
>                                      struct i40e_tx_buffer *tx_bi)
>  {
> +       if (tx_bi->tx_flags & I40E_TX_FLAGS_ZC_FRAME) {
> +               xsk_umem_recycle_addr(tx_ring->xsk_umem, tx_bi->xdpf->handle);
> +               tx_bi->tx_flags = 0;
> +               return;
> +       }
>         xdp_return_frame(tx_bi->xdpf);
>         dma_unmap_single(tx_ring->dev,
>                          dma_unmap_addr(tx_bi, dma),
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
  2019-06-28 22:15 ` [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets Jonathan Lemon
  2019-07-01 11:04   ` Magnus Karlsson
@ 2019-07-02  7:07   ` Maxim Mikityanskiy
  2019-07-02 16:31     ` Jonathan Lemon
  1 sibling, 1 reply; 10+ messages in thread
From: Maxim Mikityanskiy @ 2019-07-02  7:07 UTC (permalink / raw)
  To: Jonathan Lemon
  Cc: netdev, bjorn.topel, magnus.karlsson, jakub.kicinski,
	jeffrey.t.kirsher, kernel-team

On 2019-06-29 01:15, Jonathan Lemon wrote:
> +	xdpf = convert_to_xdp_frame_keep_zc(xdp);
> +	if (unlikely(!xdpf))
> +		return I40E_XDP_CONSUMED;
> +	xdpf->handle = xdp->handle;

Shouldn't this line belong to convert_to_xdp_frame_keep_zc (and the 
previous patch)? It looks like it's code common for all drivers, and 
also patch 1 adds the handle field, but doesn't use it, which looks weird.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
  2019-07-02  7:07   ` Maxim Mikityanskiy
@ 2019-07-02 16:31     ` Jonathan Lemon
  0 siblings, 0 replies; 10+ messages in thread
From: Jonathan Lemon @ 2019-07-02 16:31 UTC (permalink / raw)
  To: Maxim Mikityanskiy
  Cc: netdev, bjorn.topel, magnus.karlsson, jakub.kicinski,
	jeffrey.t.kirsher, kernel-team



On 2 Jul 2019, at 0:07, Maxim Mikityanskiy wrote:

> On 2019-06-29 01:15, Jonathan Lemon wrote:
>> +	xdpf = convert_to_xdp_frame_keep_zc(xdp);
>> +	if (unlikely(!xdpf))
>> +		return I40E_XDP_CONSUMED;
>> +	xdpf->handle = xdp->handle;
>
> Shouldn't this line belong to convert_to_xdp_frame_keep_zc (and the
> previous patch)? It looks like it's code common for all drivers, and
> also patch 1 adds the handle field, but doesn't use it, which looks weird.

Good point.  I'll move it into the function.
-- 
Jonathan

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets.
  2019-07-01 11:04   ` Magnus Karlsson
@ 2019-07-02 16:44     ` Jonathan Lemon
  0 siblings, 0 replies; 10+ messages in thread
From: Jonathan Lemon @ 2019-07-02 16:44 UTC (permalink / raw)
  To: Magnus Karlsson
  Cc: Network Development, Björn Töpel, Karlsson, Magnus,
	Jakub Kicinski, jeffrey.t.kirsher, kernel-team

On 1 Jul 2019, at 4:04, Magnus Karlsson wrote:

> On Sat, Jun 29, 2019 at 12:18 AM Jonathan Lemon
> <jonathan.lemon@gmail.com> wrote:
>>
>> When the XDP program attached to a zero-copy AF_XDP socket returns 
>> XDP_TX,
>> queue the umem frame on the XDP TX ring.  Space on the recycle stack 
>> is
>> pre-allocated when the xsk is created.  (taken from tx_ring, since 
>> the
>> xdp ring is not initialized yet)
>>
>> Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
>> ---
>>  drivers/net/ethernet/intel/i40e/i40e_txrx.h |  1 +
>>  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 54 
>> +++++++++++++++++++--
>>  2 files changed, 51 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h 
>> b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>> index 100e92d2982f..3e7954277737 100644
>> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
>> @@ -274,6 +274,7 @@ static inline unsigned int 
>> i40e_txd_use_count(unsigned int size)
>>  #define I40E_TX_FLAGS_TSYN             BIT(8)
>>  #define I40E_TX_FLAGS_FD_SB            BIT(9)
>>  #define I40E_TX_FLAGS_UDP_TUNNEL       BIT(10)
>> +#define I40E_TX_FLAGS_ZC_FRAME         BIT(11)
>>  #define I40E_TX_FLAGS_VLAN_MASK                0xffff0000
>>  #define I40E_TX_FLAGS_VLAN_PRIO_MASK   0xe0000000
>>  #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT  29
>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c 
>> b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
>> index ce8650d06962..020f9859215d 100644
>> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
>> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
>> @@ -91,7 +91,8 @@ static int i40e_xsk_umem_enable(struct i40e_vsi 
>> *vsi, struct xdp_umem *umem,
>>             qid >= netdev->real_num_tx_queues)
>>                 return -EINVAL;
>>
>> -       if (!xsk_umem_recycle_alloc(umem, vsi->rx_rings[0]->count))
>> +       if (!xsk_umem_recycle_alloc(umem, vsi->rx_rings[0]->count +
>> +                                         vsi->tx_rings[0]->count))
>>                 return -ENOMEM;
>>
>>         err = i40e_xsk_umem_dma_map(vsi, umem);
>> @@ -175,6 +176,48 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, 
>> struct xdp_umem *umem,
>>                 i40e_xsk_umem_disable(vsi, qid);
>>  }
>>
>> +static int i40e_xmit_rcvd_zc(struct i40e_ring *rx_ring, struct 
>> xdp_buff *xdp)
>
> This function looks very much like i40e_xmit_xdp_ring(). How can we
> refactor them to make them share more code and not lose performance at
> the same time? This comment is also valid for the ixgbe driver patch
> that follows.

The next patch will split these into a small preamble setup and then
call a common send function.
-- 
Jonathan



>
> Thanks: Magnus
>
>> +{
>> +       struct i40e_ring *xdp_ring;
>> +       struct i40e_tx_desc *tx_desc;
>> +       struct i40e_tx_buffer *tx_bi;
>> +       struct xdp_frame *xdpf;
>> +       dma_addr_t dma;
>> +
>> +       xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
>> +
>> +       if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
>> +               xdp_ring->tx_stats.tx_busy++;
>> +               return I40E_XDP_CONSUMED;
>> +       }
>> +       xdpf = convert_to_xdp_frame_keep_zc(xdp);
>> +       if (unlikely(!xdpf))
>> +               return I40E_XDP_CONSUMED;
>> +       xdpf->handle = xdp->handle;
>> +
>> +       dma = xdp_umem_get_dma(rx_ring->xsk_umem, xdp->handle);
>> +       tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
>> +       tx_bi->bytecount = xdpf->len;
>> +       tx_bi->gso_segs = 1;
>> +       tx_bi->xdpf = xdpf;
>> +       tx_bi->tx_flags = I40E_TX_FLAGS_ZC_FRAME;
>> +
>> +       tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use);
>> +       tx_desc->buffer_addr = cpu_to_le64(dma);
>> +       tx_desc->cmd_type_offset_bsz = 
>> build_ctob(I40E_TX_DESC_CMD_ICRC |
>> +                                                 
>> I40E_TX_DESC_CMD_EOP,
>> +                                                 0, xdpf->len, 0);
>> +       smp_wmb();
>> +
>> +       xdp_ring->next_to_use++;
>> +       if (xdp_ring->next_to_use == xdp_ring->count)
>> +               xdp_ring->next_to_use = 0;
>> +
>> +       tx_bi->next_to_watch = tx_desc;
>> +
>> +       return I40E_XDP_TX;
>> +}
>> +
>>  /**
>>   * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff
>>   * @rx_ring: Rx ring
>> @@ -187,7 +230,6 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, 
>> struct xdp_umem *umem,
>>  static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct 
>> xdp_buff *xdp)
>>  {
>>         int err, result = I40E_XDP_PASS;
>> -       struct i40e_ring *xdp_ring;
>>         struct bpf_prog *xdp_prog;
>>         u32 act;
>>
>> @@ -202,8 +244,7 @@ static int i40e_run_xdp_zc(struct i40e_ring 
>> *rx_ring, struct xdp_buff *xdp)
>>         case XDP_PASS:
>>                 break;
>>         case XDP_TX:
>> -               xdp_ring = 
>> rx_ring->vsi->xdp_rings[rx_ring->queue_index];
>> -               result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
>> +               result = i40e_xmit_rcvd_zc(rx_ring, xdp);
>>                 break;
>>         case XDP_REDIRECT:
>>                 err = xdp_do_redirect(rx_ring->netdev, xdp, 
>> xdp_prog);
>> @@ -628,6 +669,11 @@ static bool i40e_xmit_zc(struct i40e_ring 
>> *xdp_ring, unsigned int budget)
>>  static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
>>                                      struct i40e_tx_buffer *tx_bi)
>>  {
>> +       if (tx_bi->tx_flags & I40E_TX_FLAGS_ZC_FRAME) {
>> +               xsk_umem_recycle_addr(tx_ring->xsk_umem, 
>> tx_bi->xdpf->handle);
>> +               tx_bi->tx_flags = 0;
>> +               return;
>> +       }
>>         xdp_return_frame(tx_bi->xdpf);
>>         dma_unmap_single(tx_ring->dev,
>>                          dma_unmap_addr(tx_bi, dma),
>> --
>> 2.17.1
>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-07-02 16:44 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-28 22:15 [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets Jonathan Lemon
2019-06-28 22:15 ` [PATCH 1/3 bpf-next] net: add convert_to_xdp_frame_keep_zc function Jonathan Lemon
2019-07-01 11:02   ` Magnus Karlsson
2019-06-28 22:15 ` [PATCH 2/3 bpf-next] i40e: Support zero-copy XDP_TX on the RX path for AF_XDP sockets Jonathan Lemon
2019-07-01 11:04   ` Magnus Karlsson
2019-07-02 16:44     ` Jonathan Lemon
2019-07-02  7:07   ` Maxim Mikityanskiy
2019-07-02 16:31     ` Jonathan Lemon
2019-06-28 22:15 ` [PATCH 3/3 bpf-next] ixgbe: " Jonathan Lemon
2019-07-01 11:01 ` [PATCH 0/3 bpf-next] intel: AF_XDP support for TX of RX packets Magnus Karlsson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).