bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Magnus Karlsson <magnus.karlsson@intel.com>
To: magnus.karlsson@intel.com, bjorn.topel@intel.com, ast@kernel.org,
	daniel@iogearbox.net, netdev@vger.kernel.org,
	jonathan.lemon@gmail.com, maximmi@mellanox.com
Cc: bpf@vger.kernel.org, jeffrey.t.kirsher@intel.com,
	anthony.l.nguyen@intel.com, maciej.fijalkowski@intel.com,
	maciejromanfijalkowski@gmail.com, cristian.dumitrescu@intel.com
Subject: [PATCH bpf-next v5 10/15] xsk: i40e: ice: ixgbe: mlx5: test for dma_need_sync earlier for better performance
Date: Fri, 28 Aug 2020 10:26:24 +0200	[thread overview]
Message-ID: <1598603189-32145-11-git-send-email-magnus.karlsson@intel.com> (raw)
In-Reply-To: <1598603189-32145-1-git-send-email-magnus.karlsson@intel.com>

Test for dma_need_sync earlier to increase
performance. xsk_buff_dma_sync_for_cpu() takes an xdp_buff as
parameter and from that the xsk_buff_pool reference is dug out. Perf
shows that this dereference causes a lot of cache misses. But as the
buffer pool is now sent down to the driver at zero-copy initialization
time, we might as well use this pointer directly, instead of going via
the xsk_buff and we can do so already in xsk_buff_dma_sync_for_cpu()
instead of in xp_dma_sync_for_cpu. This gets rid of these cache
misses.

Throughput increases with 3% for the xdpsock l2fwd sample application
on my machine.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_xsk.c          | 2 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c            | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c        | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 4 ++--
 include/net/xdp_sock_drv.h                          | 7 +++++--
 include/net/xsk_buff_pool.h                         | 3 ---
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 95b9a7e..2a1153d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -314,7 +314,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)

 		bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 		(*bi)->data_end = (*bi)->data + size;
-		xsk_buff_dma_sync_for_cpu(*bi);
+		xsk_buff_dma_sync_for_cpu(*bi, rx_ring->xsk_pool);

 		xdp_res = i40e_run_xdp_zc(rx_ring, *bi);
 		if (xdp_res) {
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index dffef37..7978865 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -595,7 +595,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)

 		rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
 		rx_buf->xdp->data_end = rx_buf->xdp->data + size;
-		xsk_buff_dma_sync_for_cpu(rx_buf->xdp);
+		xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool);

 		xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp);
 		if (xdp_res) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 6af34da..3771857 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -287,7 +287,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 		}

 		bi->xdp->data_end = bi->xdp->data + size;
-		xsk_buff_dma_sync_for_cpu(bi->xdp);
+		xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool);
 		xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);

 		if (xdp_res) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index a33a1f7..902ce77 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -48,7 +48,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,

 	xdp->data_end = xdp->data + cqe_bcnt32;
 	xdp_set_data_meta_invalid(xdp);
-	xsk_buff_dma_sync_for_cpu(xdp);
+	xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
 	prefetch(xdp->data);

 	rcu_read_lock();
@@ -99,7 +99,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,

 	xdp->data_end = xdp->data + cqe_bcnt;
 	xdp_set_data_meta_invalid(xdp);
-	xsk_buff_dma_sync_for_cpu(xdp);
+	xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
 	prefetch(xdp->data);

 	if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index a7c7d2e..5b1ee8a 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -99,10 +99,13 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 	return xp_raw_get_data(pool, addr);
 }

-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
 {
 	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);

+	if (!pool->dma_need_sync)
+		return;
+
 	xp_dma_sync_for_cpu(xskb);
 }

@@ -222,7 +225,7 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 	return NULL;
 }

-static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
 {
 }

diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 38d03a6..907537d 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -114,9 +114,6 @@ static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
 void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
 static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
 {
-	if (!xskb->pool->dma_need_sync)
-		return;
-
 	xp_dma_sync_for_cpu_slow(xskb);
 }

--
2.7.4

  parent reply	other threads:[~2020-08-28  8:27 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-28  8:26 [PATCH bpf-next v5 00/15] xsk: support shared umems between devices and queues Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 01/15] xsk: i40e: ice: ixgbe: mlx5: pass buffer pool to driver instead of umem Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 02/15] xsk: i40e: ice: ixgbe: mlx5: rename xsk zero-copy driver interfaces Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 03/15] xsk: create and free buffer pool independently from umem Magnus Karlsson
2020-10-05  8:35   ` please revert " Christoph Hellwig
2020-10-05  8:43     ` Christoph Hellwig
2020-10-05  8:47       ` Björn Töpel
2020-08-28  8:26 ` [PATCH bpf-next v5 04/15] xsk: move fill and completion rings to buffer pool Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 05/15] xsk: move queue_id, dev and need_wakeup " Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 06/15] xsk: move xsk_tx_list and its lock " Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 07/15] xsk: move addrs from buffer pool to umem Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 08/15] xsk: enable sharing of dma mappings Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 09/15] xsk: rearrange internal structs for better performance Magnus Karlsson
2020-08-28  8:26 ` Magnus Karlsson [this message]
2020-08-28  8:26 ` [PATCH bpf-next v5 11/15] xsk: add shared umem support between queue ids Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 12/15] xsk: add shared umem support between devices Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 13/15] libbpf: support shared umems between queues and devices Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 14/15] samples/bpf: add new sample xsk_fwd.c Magnus Karlsson
2020-08-28  8:26 ` [PATCH bpf-next v5 15/15] xsk: documentation for XDP_SHARED_UMEM between queues and netdevs Magnus Karlsson
2020-08-31 20:20 ` [PATCH bpf-next v5 00/15] xsk: support shared umems between devices and queues Daniel Borkmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1598603189-32145-11-git-send-email-magnus.karlsson@intel.com \
    --to=magnus.karlsson@intel.com \
    --cc=anthony.l.nguyen@intel.com \
    --cc=ast@kernel.org \
    --cc=bjorn.topel@intel.com \
    --cc=bpf@vger.kernel.org \
    --cc=cristian.dumitrescu@intel.com \
    --cc=daniel@iogearbox.net \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=jonathan.lemon@gmail.com \
    --cc=maciej.fijalkowski@intel.com \
    --cc=maciejromanfijalkowski@gmail.com \
    --cc=maximmi@mellanox.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).