* [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
@ 2019-08-15 3:46 Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 1/5] xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap Sridhar Samudrala
` (7 more replies)
0 siblings, 8 replies; 18+ messages in thread
From: Sridhar Samudrala @ 2019-08-15 3:46 UTC (permalink / raw)
To: magnus.karlsson, bjorn.topel, netdev, bpf, sridhar.samudrala,
intel-wired-lan, maciej.fijalkowski, tom.herbert
This patch series introduces XDP_SKIP_BPF flag that can be specified
during the bind() call of an AF_XDP socket to skip calling the BPF
program in the receive path and pass the buffer directly to the socket.
When a single AF_XDP socket is associated with a queue and a HW
filter is used to redirect the packets and the app is interested in
receiving all the packets on that queue, we don't need an additional
BPF program to do further filtering or lookup/redirect to a socket.
Here are some performance numbers collected on
- 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
- Intel 40Gb Ethernet NIC (i40e)
All tests use 2 cores and the results are in Mpps.
turbo on (default)
---------------------------------------------
no-skip-bpf skip-bpf
---------------------------------------------
rxdrop zerocopy 21.9 38.5
l2fwd zerocopy 17.0 20.5
rxdrop copy 11.1 13.3
l2fwd copy 1.9 2.0
no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
---------------------------------------------
no-skip-bpf skip-bpf
---------------------------------------------
rxdrop zerocopy 15.4 29.0
l2fwd zerocopy 11.8 18.2
rxdrop copy 8.2 10.5
l2fwd copy 1.7 1.7
---------------------------------------------
Sridhar Samudrala (5):
xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap
xsk: Introduce XDP_SKIP_BPF bind option
i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets
ixgbe: Enable XDP_SKIP_BPF option for AF_XDP sockets
xdpsock_user: Add skip_bpf option
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 22 +++++++++-
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 +++
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 ++++++++-
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 16 ++++++-
include/net/xdp_sock.h | 21 ++++++++-
include/uapi/linux/if_xdp.h | 1 +
include/uapi/linux/xdp_diag.h | 1 +
net/xdp/xdp_umem.c | 9 ++--
net/xdp/xsk.c | 43 ++++++++++++++++---
net/xdp/xsk_diag.c | 5 ++-
samples/bpf/xdpsock_user.c | 8 ++++
11 files changed, 135 insertions(+), 17 deletions(-)
--
2.20.1
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH bpf-next 1/5] xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
@ 2019-08-15 3:46 ` Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 2/5] xsk: Introduce XDP_SKIP_BPF bind option Sridhar Samudrala
` (6 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Sridhar Samudrala @ 2019-08-15 3:46 UTC (permalink / raw)
To: magnus.karlsson, bjorn.topel, netdev, bpf, sridhar.samudrala,
intel-wired-lan, maciej.fijalkowski, tom.herbert
The bool 'zc' field in struct xdp_uem is replaced with a u32 flags
field and a bit within flags is used to indicate zerocopy. This flags
field will be used in later patches for other bit fields.
Also, removed the bool 'zc' field from struct xdp_sock as it can be
accessed via flags in xs->umem.
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
include/net/xdp_sock.h | 12 ++++++++++--
net/xdp/xdp_umem.c | 6 +++---
net/xdp/xsk.c | 12 +++++++++---
net/xdp/xsk_diag.c | 3 ++-
4 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 69796d264f06..b6716dbdce1a 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -27,6 +27,9 @@ struct xdp_umem_fq_reuse {
u64 handles[];
};
+/* Bits for the umem flags field. */
+#define XDP_UMEM_F_ZEROCOPY (1 << 0)
+
struct xdp_umem {
struct xsk_queue *fq;
struct xsk_queue *cq;
@@ -45,7 +48,7 @@ struct xdp_umem {
struct net_device *dev;
struct xdp_umem_fq_reuse *fq_reuse;
u16 queue_id;
- bool zc;
+ u32 flags;
spinlock_t xsk_list_lock;
struct list_head xsk_list;
};
@@ -58,7 +61,6 @@ struct xdp_sock {
struct xdp_umem *umem;
struct list_head flush_node;
u16 queue_id;
- bool zc;
enum {
XSK_READY = 0,
XSK_BOUND,
@@ -95,6 +97,7 @@ struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
struct xdp_umem_fq_reuse *newq);
void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
+bool xsk_umem_zerocopy(struct xdp_umem *umem);
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
@@ -213,6 +216,11 @@ static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
return NULL;
}
+static inline bool xsk_umem_zerocopy(struct xdp_umem *umem)
+{
+ return false;
+}
+
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
return NULL;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index a0607969f8c0..411b3e3498c4 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -126,7 +126,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
if (err)
goto err_unreg_umem;
- umem->zc = true;
+ umem->flags |= XDP_UMEM_F_ZEROCOPY;
return 0;
err_unreg_umem:
@@ -147,7 +147,7 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
if (!umem->dev)
return;
- if (umem->zc) {
+ if (xsk_umem_zerocopy(umem)) {
bpf.command = XDP_SETUP_XSK_UMEM;
bpf.xsk.umem = NULL;
bpf.xsk.queue_id = umem->queue_id;
@@ -162,7 +162,7 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
dev_put(umem->dev);
umem->dev = NULL;
- umem->zc = false;
+ umem->flags &= ~XDP_UMEM_F_ZEROCOPY;
}
static void xdp_umem_unmap_pages(struct xdp_umem *umem)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 59b57d708697..ca95676ef75d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -295,6 +295,12 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
return err;
}
+bool xsk_umem_zerocopy(struct xdp_umem *umem)
+{
+ return (umem && (umem->flags & XDP_UMEM_F_ZEROCOPY));
+}
+EXPORT_SYMBOL(xsk_umem_zerocopy);
+
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
@@ -310,7 +316,8 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (need_wait)
return -EOPNOTSUPP;
- return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
+ return xsk_umem_zerocopy(xs->umem) ? xsk_zc_xmit(sk) :
+ xsk_generic_xmit(sk, m, total_len);
}
static unsigned int xsk_poll(struct file *file, struct socket *sock,
@@ -503,7 +510,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
xs->dev = dev;
- xs->zc = xs->umem->zc;
xs->queue_id = qid;
xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask);
xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask);
@@ -683,7 +689,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
mutex_lock(&xs->mutex);
- if (xs->zc)
+ if (xsk_umem_zerocopy(xs->umem))
opts.flags |= XDP_OPTIONS_ZEROCOPY;
mutex_unlock(&xs->mutex);
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index d5e06c8e0cbf..8a19b7e87cfb 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -10,6 +10,7 @@
#include <net/xdp_sock.h>
#include <linux/xdp_diag.h>
#include <linux/sock_diag.h>
+#include <net/xdp_sock.h>
#include "xsk_queue.h"
#include "xsk.h"
@@ -61,7 +62,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.ifindex = umem->dev ? umem->dev->ifindex : 0;
du.queue_id = umem->queue_id;
du.flags = 0;
- if (umem->zc)
+ if (xsk_umem_zerocopy(umem))
du.flags |= XDP_DU_F_ZEROCOPY;
du.refs = refcount_read(&umem->users);
--
2.20.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH bpf-next 2/5] xsk: Introduce XDP_SKIP_BPF bind option
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 1/5] xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap Sridhar Samudrala
@ 2019-08-15 3:46 ` Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 3/5] i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets Sridhar Samudrala
` (5 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Sridhar Samudrala @ 2019-08-15 3:46 UTC (permalink / raw)
To: magnus.karlsson, bjorn.topel, netdev, bpf, sridhar.samudrala,
intel-wired-lan, maciej.fijalkowski, tom.herbert
This option enables an AF_XDP socket to specify XDP_SKIP_BPF flag
with the bind() call to skip calling the BPF program in the receive
path and pass the XDP buffer directly to the socket.
When a single AF_XDP socket is associated with a queue and a HW
filter is used to redirect the packets and the app is interested in
receiving all the packets on that queue, we don't need an additional
BPF program to do further filtering or lookup/redirect to a socket.
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
include/net/xdp_sock.h | 9 +++++++++
include/uapi/linux/if_xdp.h | 1 +
include/uapi/linux/xdp_diag.h | 1 +
net/xdp/xdp_umem.c | 5 ++++-
net/xdp/xsk.c | 31 +++++++++++++++++++++++++++++--
net/xdp/xsk_diag.c | 2 ++
6 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index b6716dbdce1a..ad132a69db7c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -29,6 +29,7 @@ struct xdp_umem_fq_reuse {
/* Bits for the umem flags field. */
#define XDP_UMEM_F_ZEROCOPY (1 << 0)
+#define XDP_UMEM_F_SKIP_BPF (1 << 1)
struct xdp_umem {
struct xsk_queue *fq;
@@ -98,6 +99,9 @@ struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
bool xsk_umem_zerocopy(struct xdp_umem *umem);
+bool xsk_umem_skip_bpf(struct xdp_umem *umem);
+void xsk_umem_flush(struct xdp_umem *umem);
+int xsk_umem_rcv(struct xdp_umem *umem, struct xdp_buff *xdp);
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
@@ -221,6 +225,11 @@ static inline bool xsk_umem_zerocopy(struct xdp_umem *umem)
return false;
}
+static inline bool xsk_umem_skip_bpf(struct xdp_umem *umem)
+{
+ return false;
+}
+
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
return NULL;
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index faaa5ca2a117..881447ebf3c9 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -16,6 +16,7 @@
#define XDP_SHARED_UMEM (1 << 0)
#define XDP_COPY (1 << 1) /* Force copy-mode */
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+#define XDP_SKIP_BPF (1 << 3) /* Skip running BPF program */
struct sockaddr_xdp {
__u16 sxdp_family;
diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h
index 78b2591a7782..6caf3d9c9abe 100644
--- a/include/uapi/linux/xdp_diag.h
+++ b/include/uapi/linux/xdp_diag.h
@@ -56,6 +56,7 @@ struct xdp_diag_ring {
};
#define XDP_DU_F_ZEROCOPY (1 << 0)
+#define XDP_DU_F_SKIP_BPF (2 << 0)
struct xdp_diag_umem {
__u64 size;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 411b3e3498c4..cbc02509dc90 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -106,6 +106,9 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
umem->dev = dev;
umem->queue_id = queue_id;
+ if (flags & XDP_SKIP_BPF)
+ umem->flags |= XDP_UMEM_F_SKIP_BPF;
+
dev_hold(dev);
if (force_copy)
@@ -162,7 +165,7 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
dev_put(umem->dev);
umem->dev = NULL;
- umem->flags &= ~XDP_UMEM_F_ZEROCOPY;
+ umem->flags &= ~(XDP_UMEM_F_ZEROCOPY | XDP_UMEM_F_SKIP_BPF);
}
static void xdp_umem_unmap_pages(struct xdp_umem *umem)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ca95676ef75d..bcb6a77fae22 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -166,6 +166,27 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return err;
}
+void xsk_umem_flush(struct xdp_umem *umem)
+{
+ struct xdp_sock *xs;
+
+ if (!list_empty(&umem->xsk_list)) {
+ xs = list_first_entry(&umem->xsk_list, struct xdp_sock, list);
+ xsk_flush(xs);
+ }
+}
+EXPORT_SYMBOL(xsk_umem_flush);
+
+int xsk_umem_rcv(struct xdp_umem *umem, struct xdp_buff *xdp)
+{
+ struct xdp_sock *xs;
+
+ xs = list_first_entry(&umem->xsk_list, struct xdp_sock, list);
+ xdp->handle += xdp->data - xdp->data_hard_start;
+ return xsk_rcv(xs, xdp);
+}
+EXPORT_SYMBOL(xsk_umem_rcv);
+
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
{
xskq_produce_flush_addr_n(umem->cq, nb_entries);
@@ -301,6 +322,12 @@ bool xsk_umem_zerocopy(struct xdp_umem *umem)
}
EXPORT_SYMBOL(xsk_umem_zerocopy);
+bool xsk_umem_skip_bpf(struct xdp_umem *umem)
+{
+ return (umem && (umem->flags & XDP_UMEM_F_SKIP_BPF));
+}
+EXPORT_SYMBOL(xsk_umem_skip_bpf);
+
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
@@ -434,7 +461,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;
flags = sxdp->sxdp_flags;
- if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
+ if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY | XDP_SKIP_BPF))
return -EINVAL;
rtnl_lock();
@@ -461,7 +488,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
struct xdp_sock *umem_xs;
struct socket *sock;
- if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+ if (flags & (XDP_COPY | XDP_ZEROCOPY | XDP_SKIP_BPF)) {
/* Cannot specify flags for shared sockets. */
err = -EINVAL;
goto out_unlock;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 8a19b7e87cfb..f6f4b7912a22 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -64,6 +64,8 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.flags = 0;
if (xsk_umem_zerocopy(umem))
du.flags |= XDP_DU_F_ZEROCOPY;
+ if (xsk_umem_skip_bpf(umem))
+ du.flags |= XDP_DU_F_SKIP_BPF;
du.refs = refcount_read(&umem->users);
err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
--
2.20.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH bpf-next 3/5] i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 1/5] xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 2/5] xsk: Introduce XDP_SKIP_BPF bind option Sridhar Samudrala
@ 2019-08-15 3:46 ` Sridhar Samudrala
2019-08-16 9:21 ` kbuild test robot
2019-08-15 3:46 ` [PATCH bpf-next 4/5] ixgbe: " Sridhar Samudrala
` (4 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Sridhar Samudrala @ 2019-08-15 3:46 UTC (permalink / raw)
To: magnus.karlsson, bjorn.topel, netdev, bpf, sridhar.samudrala,
intel-wired-lan, maciej.fijalkowski, tom.herbert
This patch skips calling BPF program in the receive path if
the queue is associated with UMEM that is not shared and
bound to an AF_XDP socket that has enabled skip bpf during
bind() call.
Here are some performance numbers collected on
- 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
- Intel 40Gb Ethernet NIC (i40e)
All tests use 2 cores and the results are in Mpps.
turbo on (default)
---------------------------------------------
no-skip-bpf skip-bpf
---------------------------------------------
rxdrop zerocopy 21.9 38.5
l2fwd zerocopy 17.0 20.5
rxdrop copy 11.1 13.3
l2fwd copy 1.9 2.0
no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
---------------------------------------------
no-skip-bpf skip-bpf
---------------------------------------------
rxdrop zerocopy 15.4 29.0
l2fwd zerocopy 11.8 18.2
rxdrop copy 8.2 10.5
l2fwd copy 1.7 1.7
---------------------------------------------
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 22 +++++++++++++++++++--
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 ++++++
2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index e3f29dc8b290..5e63e3644e87 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2199,6 +2199,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
int err, result = I40E_XDP_PASS;
struct i40e_ring *xdp_ring;
struct bpf_prog *xdp_prog;
+ struct xdp_umem *umem;
u32 act;
rcu_read_lock();
@@ -2209,6 +2210,13 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
prefetchw(xdp->data_hard_start); /* xdp_frame write */
+ umem = xdp_get_umem_from_qid(rx_ring->netdev, rx_ring->queue_index);
+ if (xsk_umem_skip_bpf(umem)) {
+ err = xsk_umem_rcv(umem, xdp);
+ result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+ goto xdp_out;
+ }
+
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
@@ -2303,8 +2311,18 @@ void i40e_update_rx_stats(struct i40e_ring *rx_ring,
**/
void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
{
- if (xdp_res & I40E_XDP_REDIR)
- xdp_do_flush_map();
+ if (xdp_res & I40E_XDP_REDIR) {
+ struct xdp_umem *umem;
+
+ umem = rx_ring->xsk_umem;
+ if (!umem)
+ umem = xdp_get_umem_from_qid(rx_ring->netdev,
+ rx_ring->queue_index);
+ if (xsk_umem_skip_bpf(umem))
+ xsk_umem_flush(umem);
+ else
+ xdp_do_flush_map();
+ }
if (xdp_res & I40E_XDP_TX) {
struct i40e_ring *xdp_ring =
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 32bad014d76c..cc538479c95d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -195,6 +195,12 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
struct bpf_prog *xdp_prog;
u32 act;
+ if (xsk_umem_skip_bpf(rx_ring->xsk_umem)) {
+ err = xsk_umem_rcv(rx_ring->xsk_umem, xdp);
+ result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+ return result;
+ }
+
rcu_read_lock();
/* NB! xdp_prog will always be !NULL, due to the fact that
* this path is enabled by setting an XDP program.
--
2.20.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH bpf-next 4/5] ixgbe: Enable XDP_SKIP_BPF option for AF_XDP sockets
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
` (2 preceding siblings ...)
2019-08-15 3:46 ` [PATCH bpf-next 3/5] i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets Sridhar Samudrala
@ 2019-08-15 3:46 ` Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 5/5] xdpsock_user: Add skip_bpf option Sridhar Samudrala
` (3 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Sridhar Samudrala @ 2019-08-15 3:46 UTC (permalink / raw)
To: magnus.karlsson, bjorn.topel, netdev, bpf, sridhar.samudrala,
intel-wired-lan, maciej.fijalkowski, tom.herbert
This patch skips calling BPF program in the receive path if
the queue is associated with UMEM that is not shared and
bound to an AF_XDP socket that has enabled skip bpf during
bind() call.
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 +++++++++++++++++--
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 16 +++++++++++++--
2 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index dc7b128c780e..594792860cdd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2197,6 +2197,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
int err, result = IXGBE_XDP_PASS;
struct bpf_prog *xdp_prog;
struct xdp_frame *xdpf;
+ struct xdp_umem *umem;
u32 act;
rcu_read_lock();
@@ -2207,6 +2208,13 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
prefetchw(xdp->data_hard_start); /* xdp_frame write */
+ umem = xdp_get_umem_from_qid(rx_ring->netdev, rx_ring->queue_index);
+ if (xsk_umem_skip_bpf(umem)) {
+ err = xsk_umem_rcv(umem, xdp);
+ result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
+ goto xdp_out;
+ }
+
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
@@ -2400,8 +2408,16 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
total_rx_packets++;
}
- if (xdp_xmit & IXGBE_XDP_REDIR)
- xdp_do_flush_map();
+ if (xdp_xmit & IXGBE_XDP_REDIR) {
+ struct xdp_umem *umem;
+
+ umem = xdp_get_umem_from_qid(rx_ring->netdev,
+ rx_ring->queue_index);
+ if (xsk_umem_skip_bpf(umem))
+ xsk_umem_flush(umem);
+ else
+ xdp_do_flush_map();
+ }
if (xdp_xmit & IXGBE_XDP_TX) {
struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 6b609553329f..9ea8a769d7a8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -148,6 +148,12 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
struct xdp_frame *xdpf;
u32 act;
+ if (xsk_umem_skip_bpf(rx_ring->xsk_umem)) {
+ err = xsk_umem_rcv(rx_ring->xsk_umem, xdp);
+ result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
+ return result;
+ }
+
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
@@ -527,8 +533,14 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
ixgbe_rx_skb(q_vector, skb);
}
- if (xdp_xmit & IXGBE_XDP_REDIR)
- xdp_do_flush_map();
+ if (xdp_xmit & IXGBE_XDP_REDIR) {
+ struct xdp_umem *umem = rx_ring->xsk_umem;
+
+ if (xsk_umem_skip_bpf(umem))
+ xsk_umem_flush(umem);
+ else
+ xdp_do_flush_map();
+ }
if (xdp_xmit & IXGBE_XDP_TX) {
struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
--
2.20.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH bpf-next 5/5] xdpsock_user: Add skip_bpf option
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
` (3 preceding siblings ...)
2019-08-15 3:46 ` [PATCH bpf-next 4/5] ixgbe: " Sridhar Samudrala
@ 2019-08-15 3:46 ` Sridhar Samudrala
2019-08-15 11:12 ` [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Toke Høiland-Jørgensen
` (2 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Sridhar Samudrala @ 2019-08-15 3:46 UTC (permalink / raw)
To: magnus.karlsson, bjorn.topel, netdev, bpf, sridhar.samudrala,
intel-wired-lan, maciej.fijalkowski, tom.herbert
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
samples/bpf/xdpsock_user.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 93eaaf7239b2..509fc6a18af9 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -123,6 +123,9 @@ static void print_benchmark(bool running)
if (opt_poll)
printf("poll() ");
+ if (opt_xdp_bind_flags & XDP_SKIP_BPF)
+ printf("skip-bpf ");
+
if (running) {
printf("running...");
fflush(stdout);
@@ -352,6 +355,7 @@ static struct option long_options[] = {
{"zero-copy", no_argument, 0, 'z'},
{"copy", no_argument, 0, 'c'},
{"frame-size", required_argument, 0, 'f'},
+ {"skip-bpf", no_argument, 0, 's'},
{0, 0, 0, 0}
};
@@ -372,6 +376,7 @@ static void usage(const char *prog)
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
" -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n"
+ " -s, --skip-bpf Skip running bpf program.\n"
"\n";
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
exit(EXIT_FAILURE);
@@ -430,6 +435,9 @@ static void parse_command_line(int argc, char **argv)
case 'f':
opt_xsk_frame_size = atoi(optarg);
break;
+ case 's':
+ opt_xdp_bind_flags |= XDP_SKIP_BPF;
+ break;
default:
usage(basename(argv[0]));
}
--
2.20.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
` (4 preceding siblings ...)
2019-08-15 3:46 ` [PATCH bpf-next 5/5] xdpsock_user: Add skip_bpf option Sridhar Samudrala
@ 2019-08-15 11:12 ` Toke Høiland-Jørgensen
2019-08-15 16:25 ` Samudrala, Sridhar
2019-08-15 12:51 ` Björn Töpel
2019-08-15 19:28 ` Jakub Kicinski
7 siblings, 1 reply; 18+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-08-15 11:12 UTC (permalink / raw)
To: Sridhar Samudrala, magnus.karlsson, bjorn.topel, netdev, bpf,
sridhar.samudrala, intel-wired-lan, maciej.fijalkowski,
tom.herbert
Sridhar Samudrala <sridhar.samudrala@intel.com> writes:
> This patch series introduces XDP_SKIP_BPF flag that can be specified
> during the bind() call of an AF_XDP socket to skip calling the BPF
> program in the receive path and pass the buffer directly to the socket.
>
> When a single AF_XDP socket is associated with a queue and a HW
> filter is used to redirect the packets and the app is interested in
> receiving all the packets on that queue, we don't need an additional
> BPF program to do further filtering or lookup/redirect to a socket.
>
> Here are some performance numbers collected on
> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
> - Intel 40Gb Ethernet NIC (i40e)
>
> All tests use 2 cores and the results are in Mpps.
>
> turbo on (default)
> ---------------------------------------------
> no-skip-bpf skip-bpf
> ---------------------------------------------
> rxdrop zerocopy 21.9 38.5
> l2fwd zerocopy 17.0 20.5
> rxdrop copy 11.1 13.3
> l2fwd copy 1.9 2.0
>
> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
> ---------------------------------------------
> no-skip-bpf skip-bpf
> ---------------------------------------------
> rxdrop zerocopy 15.4 29.0
> l2fwd zerocopy 11.8 18.2
> rxdrop copy 8.2 10.5
> l2fwd copy 1.7 1.7
> ---------------------------------------------
You're getting this performance boost by adding more code in the fast
path for every XDP program; so what's the performance impact of that for
cases where we do run an eBPF program?
Also, this is basically a special-casing of a particular deployment
scenario. Without a way to control RX queue assignment and traffic
steering, you're basically hard-coding a particular app's takeover of
the network interface; I'm not sure that is such a good idea...
-Toke
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
` (5 preceding siblings ...)
2019-08-15 11:12 ` [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Toke Høiland-Jørgensen
@ 2019-08-15 12:51 ` Björn Töpel
2019-08-15 16:46 ` Samudrala, Sridhar
2019-08-15 19:28 ` Jakub Kicinski
7 siblings, 1 reply; 18+ messages in thread
From: Björn Töpel @ 2019-08-15 12:51 UTC (permalink / raw)
To: Sridhar Samudrala, magnus.karlsson, netdev, bpf, intel-wired-lan,
maciej.fijalkowski, tom.herbert
On 2019-08-15 05:46, Sridhar Samudrala wrote:
> This patch series introduces XDP_SKIP_BPF flag that can be specified
> during the bind() call of an AF_XDP socket to skip calling the BPF
> program in the receive path and pass the buffer directly to the socket.
>
> When a single AF_XDP socket is associated with a queue and a HW
> filter is used to redirect the packets and the app is interested in
> receiving all the packets on that queue, we don't need an additional
> BPF program to do further filtering or lookup/redirect to a socket.
>
> Here are some performance numbers collected on
> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
> - Intel 40Gb Ethernet NIC (i40e)
>
> All tests use 2 cores and the results are in Mpps.
>
> turbo on (default)
> ---------------------------------------------
> no-skip-bpf skip-bpf
> ---------------------------------------------
> rxdrop zerocopy 21.9 38.5
> l2fwd zerocopy 17.0 20.5
> rxdrop copy 11.1 13.3
> l2fwd copy 1.9 2.0
>
> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
> ---------------------------------------------
> no-skip-bpf skip-bpf
> ---------------------------------------------
> rxdrop zerocopy 15.4 29.0
> l2fwd zerocopy 11.8 18.2
> rxdrop copy 8.2 10.5
> l2fwd copy 1.7 1.7
> ---------------------------------------------
>
This work is somewhat similar to the XDP_ATTACH work [1]. Avoiding the
retpoline in the XDP program call is a nice performance boost! I like
the numbers! :-) I also like the idea of adding a flag that just does
what most AF_XDP Rx users want -- just getting all packets of a
certain queue into the XDP sockets.
In addition to Toke's mail, I have some more concerns with the series:
* AFAIU the SKIP_BPF only works for zero-copy enabled sockets. IMO, it
should work for all modes (including XDP_SKB).
* In order to work, a user still needs an XDP program running. That's
clunky. I'd like the behavior that if no XDP program is attached,
and the option is set, the packets for a that queue end up in the
socket. If there's an XDP program attached, the program has
precedence.
* It requires changes in all drivers. Not nice, and scales badly. Try
making it generic (xdp_do_redirect/xdp_flush), so it Just Works for
all XDP capable drivers.
Thanks for working on this!
Björn
[1]
https://lore.kernel.org/netdev/20181207114431.18038-1-bjorn.topel@gmail.com/
> Sridhar Samudrala (5):
> xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap
> xsk: Introduce XDP_SKIP_BPF bind option
> i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets
> ixgbe: Enable XDP_SKIP_BPF option for AF_XDP sockets
> xdpsock_user: Add skip_bpf option
>
> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 22 +++++++++-
> drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 +++
> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 ++++++++-
> drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 16 ++++++-
> include/net/xdp_sock.h | 21 ++++++++-
> include/uapi/linux/if_xdp.h | 1 +
> include/uapi/linux/xdp_diag.h | 1 +
> net/xdp/xdp_umem.c | 9 ++--
> net/xdp/xsk.c | 43 ++++++++++++++++---
> net/xdp/xsk_diag.c | 5 ++-
> samples/bpf/xdpsock_user.c | 8 ++++
> 11 files changed, 135 insertions(+), 17 deletions(-)
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 11:12 ` [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Toke Høiland-Jørgensen
@ 2019-08-15 16:25 ` Samudrala, Sridhar
2019-08-15 17:11 ` Toke Høiland-Jørgensen
0 siblings, 1 reply; 18+ messages in thread
From: Samudrala, Sridhar @ 2019-08-15 16:25 UTC (permalink / raw)
To: Toke Høiland-Jørgensen, magnus.karlsson, bjorn.topel,
netdev, bpf, intel-wired-lan, maciej.fijalkowski, tom.herbert
On 8/15/2019 4:12 AM, Toke Høiland-Jørgensen wrote:
> Sridhar Samudrala <sridhar.samudrala@intel.com> writes:
>
>> This patch series introduces XDP_SKIP_BPF flag that can be specified
>> during the bind() call of an AF_XDP socket to skip calling the BPF
>> program in the receive path and pass the buffer directly to the socket.
>>
>> When a single AF_XDP socket is associated with a queue and a HW
>> filter is used to redirect the packets and the app is interested in
>> receiving all the packets on that queue, we don't need an additional
>> BPF program to do further filtering or lookup/redirect to a socket.
>>
>> Here are some performance numbers collected on
>> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
>> - Intel 40Gb Ethernet NIC (i40e)
>>
>> All tests use 2 cores and the results are in Mpps.
>>
>> turbo on (default)
>> ---------------------------------------------
>> no-skip-bpf skip-bpf
>> ---------------------------------------------
>> rxdrop zerocopy 21.9 38.5
>> l2fwd zerocopy 17.0 20.5
>> rxdrop copy 11.1 13.3
>> l2fwd copy 1.9 2.0
>>
>> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>> ---------------------------------------------
>> no-skip-bpf skip-bpf
>> ---------------------------------------------
>> rxdrop zerocopy 15.4 29.0
>> l2fwd zerocopy 11.8 18.2
>> rxdrop copy 8.2 10.5
>> l2fwd copy 1.7 1.7
>> ---------------------------------------------
>
> You're getting this performance boost by adding more code in the fast
> path for every XDP program; so what's the performance impact of that for
> cases where we do run an eBPF program?
The no-skip-bpf results are pretty close to what i see before the
patches are applied. As umem is cached in rx_ring for zerocopy the
overhead is much smaller compared to the copy scenario where i am
currently calling xdp_get_umem_from_qid().
>
> Also, this is basically a special-casing of a particular deployment
> scenario. Without a way to control RX queue assignment and traffic
> steering, you're basically hard-coding a particular app's takeover of
> the network interface; I'm not sure that is such a good idea...
Yes. This is mainly targeted for application that create 1 AF_XDP socket
per RX queue and can use a HW filter (via ethtool or TC flower) to
redirect the packets to a queue or a group of queues.
>
> -Toke
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 12:51 ` Björn Töpel
@ 2019-08-15 16:46 ` Samudrala, Sridhar
2019-08-16 13:32 ` [Intel-wired-lan] " Björn Töpel
0 siblings, 1 reply; 18+ messages in thread
From: Samudrala, Sridhar @ 2019-08-15 16:46 UTC (permalink / raw)
To: Björn Töpel, magnus.karlsson, netdev, bpf,
intel-wired-lan, maciej.fijalkowski, tom.herbert
On 8/15/2019 5:51 AM, Björn Töpel wrote:
> On 2019-08-15 05:46, Sridhar Samudrala wrote:
>> This patch series introduces XDP_SKIP_BPF flag that can be specified
>> during the bind() call of an AF_XDP socket to skip calling the BPF
>> program in the receive path and pass the buffer directly to the socket.
>>
>> When a single AF_XDP socket is associated with a queue and a HW
>> filter is used to redirect the packets and the app is interested in
>> receiving all the packets on that queue, we don't need an additional
>> BPF program to do further filtering or lookup/redirect to a socket.
>>
>> Here are some performance numbers collected on
>> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
>> - Intel 40Gb Ethernet NIC (i40e)
>>
>> All tests use 2 cores and the results are in Mpps.
>>
>> turbo on (default)
>> ---------------------------------------------
>> no-skip-bpf skip-bpf
>> ---------------------------------------------
>> rxdrop zerocopy 21.9 38.5
>> l2fwd zerocopy 17.0 20.5
>> rxdrop copy 11.1 13.3
>> l2fwd copy 1.9 2.0
>>
>> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>> ---------------------------------------------
>> no-skip-bpf skip-bpf
>> ---------------------------------------------
>> rxdrop zerocopy 15.4 29.0
>> l2fwd zerocopy 11.8 18.2
>> rxdrop copy 8.2 10.5
>> l2fwd copy 1.7 1.7
>> ---------------------------------------------
>>
>
> This work is somewhat similar to the XDP_ATTACH work [1]. Avoiding the
> retpoline in the XDP program call is a nice performance boost! I like
> the numbers! :-) I also like the idea of adding a flag that just does
> what most AF_XDP Rx users want -- just getting all packets of a
> certain queue into the XDP sockets.
>
> In addition to Toke's mail, I have some more concerns with the series:
>
> * AFAIU the SKIP_BPF only works for zero-copy enabled sockets. IMO, it
> should work for all modes (including XDP_SKB).
This patch enables SKIP_BPF for AF_XDP sockets where an XDP program is
attached at driver level (both zerocopy and copy modes)
I tried a quick hack to see the perf benefit with generic XDP mode, but
i didn't see any significant improvement in performance in that
scenario. so i didn't include that mode.
>
> * In order to work, a user still needs an XDP program running. That's
> clunky. I'd like the behavior that if no XDP program is attached,
> and the option is set, the packets for a that queue end up in the
> socket. If there's an XDP program attached, the program has
> precedence.
I think this would require more changes in the drivers to take XDP
datapath even when there is no XDP program loaded.
>
> * It requires changes in all drivers. Not nice, and scales badly. Try
> making it generic (xdp_do_redirect/xdp_flush), so it Just Works for
> all XDP capable drivers.
I tried to make this as generic as possible and make the changes to the
driver very minimal, but could not find a way to avoid any changes at
all to the driver. xdp_do_direct() gets called based after the call to
bpf_prog_run_xdp() in the drivers.
>
> Thanks for working on this!
>
>
> Björn
>
> [1]
> https://lore.kernel.org/netdev/20181207114431.18038-1-bjorn.topel@gmail.com/
>
>
>
>> Sridhar Samudrala (5):
>> xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap
>> xsk: Introduce XDP_SKIP_BPF bind option
>> i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets
>> ixgbe: Enable XDP_SKIP_BPF option for AF_XDP sockets
>> xdpsock_user: Add skip_bpf option
>>
>> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 22 +++++++++-
>> drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 +++
>> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 ++++++++-
>> drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 16 ++++++-
>> include/net/xdp_sock.h | 21 ++++++++-
>> include/uapi/linux/if_xdp.h | 1 +
>> include/uapi/linux/xdp_diag.h | 1 +
>> net/xdp/xdp_umem.c | 9 ++--
>> net/xdp/xsk.c | 43 ++++++++++++++++---
>> net/xdp/xsk_diag.c | 5 ++-
>> samples/bpf/xdpsock_user.c | 8 ++++
>> 11 files changed, 135 insertions(+), 17 deletions(-)
>>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 16:25 ` Samudrala, Sridhar
@ 2019-08-15 17:11 ` Toke Høiland-Jørgensen
2019-08-16 6:12 ` Samudrala, Sridhar
0 siblings, 1 reply; 18+ messages in thread
From: Toke Høiland-Jørgensen @ 2019-08-15 17:11 UTC (permalink / raw)
To: Samudrala, Sridhar, magnus.karlsson, bjorn.topel, netdev, bpf,
intel-wired-lan, maciej.fijalkowski, tom.herbert
"Samudrala, Sridhar" <sridhar.samudrala@intel.com> writes:
> On 8/15/2019 4:12 AM, Toke Høiland-Jørgensen wrote:
>> Sridhar Samudrala <sridhar.samudrala@intel.com> writes:
>>
>>> This patch series introduces XDP_SKIP_BPF flag that can be specified
>>> during the bind() call of an AF_XDP socket to skip calling the BPF
>>> program in the receive path and pass the buffer directly to the socket.
>>>
>>> When a single AF_XDP socket is associated with a queue and a HW
>>> filter is used to redirect the packets and the app is interested in
>>> receiving all the packets on that queue, we don't need an additional
>>> BPF program to do further filtering or lookup/redirect to a socket.
>>>
>>> Here are some performance numbers collected on
>>> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
>>> - Intel 40Gb Ethernet NIC (i40e)
>>>
>>> All tests use 2 cores and the results are in Mpps.
>>>
>>> turbo on (default)
>>> ---------------------------------------------
>>> no-skip-bpf skip-bpf
>>> ---------------------------------------------
>>> rxdrop zerocopy 21.9 38.5
>>> l2fwd zerocopy 17.0 20.5
>>> rxdrop copy 11.1 13.3
>>> l2fwd copy 1.9 2.0
>>>
>>> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>>> ---------------------------------------------
>>> no-skip-bpf skip-bpf
>>> ---------------------------------------------
>>> rxdrop zerocopy 15.4 29.0
>>> l2fwd zerocopy 11.8 18.2
>>> rxdrop copy 8.2 10.5
>>> l2fwd copy 1.7 1.7
>>> ---------------------------------------------
>>
>> You're getting this performance boost by adding more code in the fast
>> path for every XDP program; so what's the performance impact of that for
>> cases where we do run an eBPF program?
>
> The no-skip-bpf results are pretty close to what i see before the
> patches are applied. As umem is cached in rx_ring for zerocopy the
> overhead is much smaller compared to the copy scenario where i am
> currently calling xdp_get_umem_from_qid().
I meant more for other XDP programs; what is the performance impact of
XDP_DROP, for instance?
>> Also, this is basically a special-casing of a particular deployment
>> scenario. Without a way to control RX queue assignment and traffic
>> steering, you're basically hard-coding a particular app's takeover of
>> the network interface; I'm not sure that is such a good idea...
>
> Yes. This is mainly targeted for application that create 1 AF_XDP
> socket per RX queue and can use a HW filter (via ethtool or TC flower)
> to redirect the packets to a queue or a group of queues.
Yeah, and I'd prefer it if the handling of this to be unified somehow...
-Toke
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
` (6 preceding siblings ...)
2019-08-15 12:51 ` Björn Töpel
@ 2019-08-15 19:28 ` Jakub Kicinski
2019-08-16 6:25 ` Samudrala, Sridhar
7 siblings, 1 reply; 18+ messages in thread
From: Jakub Kicinski @ 2019-08-15 19:28 UTC (permalink / raw)
To: Sridhar Samudrala
Cc: magnus.karlsson, bjorn.topel, netdev, bpf, intel-wired-lan,
maciej.fijalkowski, tom.herbert
On Wed, 14 Aug 2019 20:46:18 -0700, Sridhar Samudrala wrote:
> This patch series introduces XDP_SKIP_BPF flag that can be specified
> during the bind() call of an AF_XDP socket to skip calling the BPF
> program in the receive path and pass the buffer directly to the socket.
>
> When a single AF_XDP socket is associated with a queue and a HW
> filter is used to redirect the packets and the app is interested in
> receiving all the packets on that queue, we don't need an additional
> BPF program to do further filtering or lookup/redirect to a socket.
>
> Here are some performance numbers collected on
> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
> - Intel 40Gb Ethernet NIC (i40e)
>
> All tests use 2 cores and the results are in Mpps.
>
> turbo on (default)
> ---------------------------------------------
> no-skip-bpf skip-bpf
> ---------------------------------------------
> rxdrop zerocopy 21.9 38.5
> l2fwd zerocopy 17.0 20.5
> rxdrop copy 11.1 13.3
> l2fwd copy 1.9 2.0
>
> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
> ---------------------------------------------
> no-skip-bpf skip-bpf
> ---------------------------------------------
> rxdrop zerocopy 15.4 29.0
> l2fwd zerocopy 11.8 18.2
> rxdrop copy 8.2 10.5
> l2fwd copy 1.7 1.7
> ---------------------------------------------
Could you include a third column here - namely the in-XDP performance?
AFAIU the way to achieve better performance with AF_XDP is to move the
fast path into the kernel's XDP program..
Maciej's work on batching XDP program's execution should lower the
retpoline overhead, without leaning close to the bypass model.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 17:11 ` Toke Høiland-Jørgensen
@ 2019-08-16 6:12 ` Samudrala, Sridhar
0 siblings, 0 replies; 18+ messages in thread
From: Samudrala, Sridhar @ 2019-08-16 6:12 UTC (permalink / raw)
To: Toke Høiland-Jørgensen, magnus.karlsson, bjorn.topel,
netdev, bpf, intel-wired-lan, maciej.fijalkowski, tom.herbert
On 8/15/2019 10:11 AM, Toke Høiland-Jørgensen wrote:
> "Samudrala, Sridhar" <sridhar.samudrala@intel.com> writes:
>
>> On 8/15/2019 4:12 AM, Toke Høiland-Jørgensen wrote:
>>> Sridhar Samudrala <sridhar.samudrala@intel.com> writes:
>>>
>>>> This patch series introduces XDP_SKIP_BPF flag that can be specified
>>>> during the bind() call of an AF_XDP socket to skip calling the BPF
>>>> program in the receive path and pass the buffer directly to the socket.
>>>>
>>>> When a single AF_XDP socket is associated with a queue and a HW
>>>> filter is used to redirect the packets and the app is interested in
>>>> receiving all the packets on that queue, we don't need an additional
>>>> BPF program to do further filtering or lookup/redirect to a socket.
>>>>
>>>> Here are some performance numbers collected on
>>>> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
>>>> - Intel 40Gb Ethernet NIC (i40e)
>>>>
>>>> All tests use 2 cores and the results are in Mpps.
>>>>
>>>> turbo on (default)
>>>> ---------------------------------------------
>>>> no-skip-bpf skip-bpf
>>>> ---------------------------------------------
>>>> rxdrop zerocopy 21.9 38.5
>>>> l2fwd zerocopy 17.0 20.5
>>>> rxdrop copy 11.1 13.3
>>>> l2fwd copy 1.9 2.0
>>>>
>>>> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>>>> ---------------------------------------------
>>>> no-skip-bpf skip-bpf
>>>> ---------------------------------------------
>>>> rxdrop zerocopy 15.4 29.0
>>>> l2fwd zerocopy 11.8 18.2
>>>> rxdrop copy 8.2 10.5
>>>> l2fwd copy 1.7 1.7
>>>> ---------------------------------------------
>>>
>>> You're getting this performance boost by adding more code in the fast
>>> path for every XDP program; so what's the performance impact of that for
>>> cases where we do run an eBPF program?
>>
>> The no-skip-bpf results are pretty close to what i see before the
>> patches are applied. As umem is cached in rx_ring for zerocopy the
>> overhead is much smaller compared to the copy scenario where i am
>> currently calling xdp_get_umem_from_qid().
>
> I meant more for other XDP programs; what is the performance impact of
> XDP_DROP, for instance?
Will run xdp1 with and without the patches and include that data with
the next revision.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 19:28 ` Jakub Kicinski
@ 2019-08-16 6:25 ` Samudrala, Sridhar
0 siblings, 0 replies; 18+ messages in thread
From: Samudrala, Sridhar @ 2019-08-16 6:25 UTC (permalink / raw)
To: Jakub Kicinski
Cc: magnus.karlsson, bjorn.topel, netdev, bpf, intel-wired-lan,
maciej.fijalkowski, tom.herbert
On 8/15/2019 12:28 PM, Jakub Kicinski wrote:
> On Wed, 14 Aug 2019 20:46:18 -0700, Sridhar Samudrala wrote:
>> This patch series introduces XDP_SKIP_BPF flag that can be specified
>> during the bind() call of an AF_XDP socket to skip calling the BPF
>> program in the receive path and pass the buffer directly to the socket.
>>
>> When a single AF_XDP socket is associated with a queue and a HW
>> filter is used to redirect the packets and the app is interested in
>> receiving all the packets on that queue, we don't need an additional
>> BPF program to do further filtering or lookup/redirect to a socket.
>>
>> Here are some performance numbers collected on
>> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
>> - Intel 40Gb Ethernet NIC (i40e)
>>
>> All tests use 2 cores and the results are in Mpps.
>>
>> turbo on (default)
>> ---------------------------------------------
>> no-skip-bpf skip-bpf
>> ---------------------------------------------
>> rxdrop zerocopy 21.9 38.5
>> l2fwd zerocopy 17.0 20.5
>> rxdrop copy 11.1 13.3
>> l2fwd copy 1.9 2.0
>>
>> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>> ---------------------------------------------
>> no-skip-bpf skip-bpf
>> ---------------------------------------------
>> rxdrop zerocopy 15.4 29.0
>> l2fwd zerocopy 11.8 18.2
>> rxdrop copy 8.2 10.5
>> l2fwd copy 1.7 1.7
>> ---------------------------------------------
>
> Could you include a third column here - namely the in-XDP performance?
> AFAIU the way to achieve better performance with AF_XDP is to move the
> fast path into the kernel's XDP program..
The in-xdp drop that can be measured with xdp1 is lower than rxdrop
zerocopy with skip-bpf although in-xdp drop uses only 1 core. af-xdp
1-core performance would improve with need-wakeup or busypoll patches
and based on early experiments so far af-xdp with need-wakeup/busypoll +
skip-bpf perf is higher than in-xdp drop.
Will include in-xdp drop data too in the next revision.
>
> Maciej's work on batching XDP program's execution should lower the
> retpoline overhead, without leaning close to the bypass model.
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH bpf-next 3/5] i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets
2019-08-15 3:46 ` [PATCH bpf-next 3/5] i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets Sridhar Samudrala
@ 2019-08-16 9:21 ` kbuild test robot
0 siblings, 0 replies; 18+ messages in thread
From: kbuild test robot @ 2019-08-16 9:21 UTC (permalink / raw)
To: Sridhar Samudrala
Cc: kbuild-all, magnus.karlsson, bjorn.topel, netdev, bpf,
sridhar.samudrala, intel-wired-lan, maciej.fijalkowski,
tom.herbert
[-- Attachment #1: Type: text/plain, Size: 6746 bytes --]
Hi Sridhar,
I love your patch! Yet something to improve:
[auto build test ERROR on bpf-next/master]
url: https://github.com/0day-ci/linux/commits/Sridhar-Samudrala/xsk-Convert-bool-zc-field-in-struct-xdp_umem-to-a-u32-bitmap/20190816-144642
base: https://kernel.googlesource.com/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-lkp (attached as .config)
compiler: gcc-7 (Debian 7.4.0-10) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
drivers/net/ethernet/intel/i40e/i40e_txrx.c: In function 'i40e_run_xdp':
>> drivers/net/ethernet/intel/i40e/i40e_txrx.c:2215:9: error: implicit declaration of function 'xsk_umem_rcv'; did you mean 'xsk_rcv'? [-Werror=implicit-function-declaration]
err = xsk_umem_rcv(umem, xdp);
^~~~~~~~~~~~
xsk_rcv
drivers/net/ethernet/intel/i40e/i40e_txrx.c: In function 'i40e_finalize_xdp_rx':
>> drivers/net/ethernet/intel/i40e/i40e_txrx.c:2322:4: error: implicit declaration of function 'xsk_umem_flush'; did you mean 'xsk_umem_fq_reuse'? [-Werror=implicit-function-declaration]
xsk_umem_flush(umem);
^~~~~~~~~~~~~~
xsk_umem_fq_reuse
cc1: some warnings being treated as errors
--
drivers/net/ethernet/intel/i40e/i40e_xsk.c: In function 'i40e_run_xdp_zc':
>> drivers/net/ethernet/intel/i40e/i40e_xsk.c:199:9: error: implicit declaration of function 'xsk_umem_rcv'; did you mean 'xsk_rcv'? [-Werror=implicit-function-declaration]
err = xsk_umem_rcv(rx_ring->xsk_umem, xdp);
^~~~~~~~~~~~
xsk_rcv
cc1: some warnings being treated as errors
vim +2215 drivers/net/ethernet/intel/i40e/i40e_txrx.c
2190
2191 /**
2192 * i40e_run_xdp - run an XDP program
2193 * @rx_ring: Rx ring being processed
2194 * @xdp: XDP buffer containing the frame
2195 **/
2196 static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
2197 struct xdp_buff *xdp)
2198 {
2199 int err, result = I40E_XDP_PASS;
2200 struct i40e_ring *xdp_ring;
2201 struct bpf_prog *xdp_prog;
2202 struct xdp_umem *umem;
2203 u32 act;
2204
2205 rcu_read_lock();
2206 xdp_prog = READ_ONCE(rx_ring->xdp_prog);
2207
2208 if (!xdp_prog)
2209 goto xdp_out;
2210
2211 prefetchw(xdp->data_hard_start); /* xdp_frame write */
2212
2213 umem = xdp_get_umem_from_qid(rx_ring->netdev, rx_ring->queue_index);
2214 if (xsk_umem_skip_bpf(umem)) {
> 2215 err = xsk_umem_rcv(umem, xdp);
2216 result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
2217 goto xdp_out;
2218 }
2219
2220 act = bpf_prog_run_xdp(xdp_prog, xdp);
2221 switch (act) {
2222 case XDP_PASS:
2223 break;
2224 case XDP_TX:
2225 xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
2226 result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
2227 break;
2228 case XDP_REDIRECT:
2229 err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
2230 result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
2231 break;
2232 default:
2233 bpf_warn_invalid_xdp_action(act);
2234 /* fall through */
2235 case XDP_ABORTED:
2236 trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
2237 /* fall through -- handle aborts by dropping packet */
2238 case XDP_DROP:
2239 result = I40E_XDP_CONSUMED;
2240 break;
2241 }
2242 xdp_out:
2243 rcu_read_unlock();
2244 return ERR_PTR(-result);
2245 }
2246
2247 /**
2248 * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
2249 * @rx_ring: Rx ring
2250 * @rx_buffer: Rx buffer to adjust
2251 * @size: Size of adjustment
2252 **/
2253 static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
2254 struct i40e_rx_buffer *rx_buffer,
2255 unsigned int size)
2256 {
2257 #if (PAGE_SIZE < 8192)
2258 unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
2259
2260 rx_buffer->page_offset ^= truesize;
2261 #else
2262 unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);
2263
2264 rx_buffer->page_offset += truesize;
2265 #endif
2266 }
2267
2268 /**
2269 * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register
2270 * @xdp_ring: XDP Tx ring
2271 *
2272 * This function updates the XDP Tx ring tail register.
2273 **/
2274 void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
2275 {
2276 /* Force memory writes to complete before letting h/w
2277 * know there are new descriptors to fetch.
2278 */
2279 wmb();
2280 writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
2281 }
2282
2283 /**
2284 * i40e_update_rx_stats - Update Rx ring statistics
2285 * @rx_ring: rx descriptor ring
2286 * @total_rx_bytes: number of bytes received
2287 * @total_rx_packets: number of packets received
2288 *
2289 * This function updates the Rx ring statistics.
2290 **/
2291 void i40e_update_rx_stats(struct i40e_ring *rx_ring,
2292 unsigned int total_rx_bytes,
2293 unsigned int total_rx_packets)
2294 {
2295 u64_stats_update_begin(&rx_ring->syncp);
2296 rx_ring->stats.packets += total_rx_packets;
2297 rx_ring->stats.bytes += total_rx_bytes;
2298 u64_stats_update_end(&rx_ring->syncp);
2299 rx_ring->q_vector->rx.total_packets += total_rx_packets;
2300 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
2301 }
2302
2303 /**
2304 * i40e_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
2305 * @rx_ring: Rx ring
2306 * @xdp_res: Result of the receive batch
2307 *
2308 * This function bumps XDP Tx tail and/or flush redirect map, and
2309 * should be called when a batch of packets has been processed in the
2310 * napi loop.
2311 **/
2312 void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
2313 {
2314 if (xdp_res & I40E_XDP_REDIR) {
2315 struct xdp_umem *umem;
2316
2317 umem = rx_ring->xsk_umem;
2318 if (!umem)
2319 umem = xdp_get_umem_from_qid(rx_ring->netdev,
2320 rx_ring->queue_index);
2321 if (xsk_umem_skip_bpf(umem))
> 2322 xsk_umem_flush(umem);
2323 else
2324 xdp_do_flush_map();
2325 }
2326
2327 if (xdp_res & I40E_XDP_TX) {
2328 struct i40e_ring *xdp_ring =
2329 rx_ring->vsi->xdp_rings[rx_ring->queue_index];
2330
2331 i40e_xdp_ring_update_tail(xdp_ring);
2332 }
2333 }
2334
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 27706 bytes --]
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Intel-wired-lan] [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-15 16:46 ` Samudrala, Sridhar
@ 2019-08-16 13:32 ` Björn Töpel
2019-08-16 22:08 ` Jonathan Lemon
0 siblings, 1 reply; 18+ messages in thread
From: Björn Töpel @ 2019-08-16 13:32 UTC (permalink / raw)
To: Samudrala, Sridhar
Cc: Björn Töpel, Karlsson, Magnus, Netdev, bpf,
intel-wired-lan, maciej.fijalkowski, tom.herbert
On Thu, 15 Aug 2019 at 18:46, Samudrala, Sridhar
<sridhar.samudrala@intel.com> wrote:
>
> On 8/15/2019 5:51 AM, Björn Töpel wrote:
> > On 2019-08-15 05:46, Sridhar Samudrala wrote:
> >> This patch series introduces XDP_SKIP_BPF flag that can be specified
> >> during the bind() call of an AF_XDP socket to skip calling the BPF
> >> program in the receive path and pass the buffer directly to the socket.
> >>
> >> When a single AF_XDP socket is associated with a queue and a HW
> >> filter is used to redirect the packets and the app is interested in
> >> receiving all the packets on that queue, we don't need an additional
> >> BPF program to do further filtering or lookup/redirect to a socket.
> >>
> >> Here are some performance numbers collected on
> >> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
> >> - Intel 40Gb Ethernet NIC (i40e)
> >>
> >> All tests use 2 cores and the results are in Mpps.
> >>
> >> turbo on (default)
> >> ---------------------------------------------
> >> no-skip-bpf skip-bpf
> >> ---------------------------------------------
> >> rxdrop zerocopy 21.9 38.5
> >> l2fwd zerocopy 17.0 20.5
> >> rxdrop copy 11.1 13.3
> >> l2fwd copy 1.9 2.0
> >>
> >> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
> >> ---------------------------------------------
> >> no-skip-bpf skip-bpf
> >> ---------------------------------------------
> >> rxdrop zerocopy 15.4 29.0
> >> l2fwd zerocopy 11.8 18.2
> >> rxdrop copy 8.2 10.5
> >> l2fwd copy 1.7 1.7
> >> ---------------------------------------------
> >>
> >
> > This work is somewhat similar to the XDP_ATTACH work [1]. Avoiding the
> > retpoline in the XDP program call is a nice performance boost! I like
> > the numbers! :-) I also like the idea of adding a flag that just does
> > what most AF_XDP Rx users want -- just getting all packets of a
> > certain queue into the XDP sockets.
> >
> > In addition to Toke's mail, I have some more concerns with the series:
> >
> > * AFAIU the SKIP_BPF only works for zero-copy enabled sockets. IMO, it
> > should work for all modes (including XDP_SKB).
>
> This patch enables SKIP_BPF for AF_XDP sockets where an XDP program is
> attached at driver level (both zerocopy and copy modes)
> I tried a quick hack to see the perf benefit with generic XDP mode, but
> i didn't see any significant improvement in performance in that
> scenario. so i didn't include that mode.
>
> >
> > * In order to work, a user still needs an XDP program running. That's
> > clunky. I'd like the behavior that if no XDP program is attached,
> > and the option is set, the packets for a that queue end up in the
> > socket. If there's an XDP program attached, the program has
> > precedence.
>
> I think this would require more changes in the drivers to take XDP
> datapath even when there is no XDP program loaded.
>
Today, from a driver perspective, to enable XDP you pass a struct
bpf_prog pointer via the ndo_bpf. The program get executed in
BPF_PROG_RUN (via bpf_prog_run_xdp) from include/linux/filter.h.
I think it's possible to achieve what you're doing w/o *any* driver
modification. Pass a special, invalid, pointer to the driver (say
(void *)0x1 or smth more elegant), which has a special handling in
BPF_RUN_PROG e.g. setting a per-cpu state and return XDP_REDIRECT. The
per-cpu state is picked up in xdp_do_redirect and xdp_flush.
An approach like this would be general, and apply to all modes
automatically.
Thoughts?
> >
> > * It requires changes in all drivers. Not nice, and scales badly. Try
> > making it generic (xdp_do_redirect/xdp_flush), so it Just Works for
> > all XDP capable drivers.
>
> I tried to make this as generic as possible and make the changes to the
> driver very minimal, but could not find a way to avoid any changes at
> all to the driver. xdp_do_direct() gets called based after the call to
> bpf_prog_run_xdp() in the drivers.
>
> >
> > Thanks for working on this!
> >
> >
> > Björn
> >
> > [1]
> > https://lore.kernel.org/netdev/20181207114431.18038-1-bjorn.topel@gmail.com/
> >
> >
> >
> >> Sridhar Samudrala (5):
> >> xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap
> >> xsk: Introduce XDP_SKIP_BPF bind option
> >> i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets
> >> ixgbe: Enable XDP_SKIP_BPF option for AF_XDP sockets
> >> xdpsock_user: Add skip_bpf option
> >>
> >> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 22 +++++++++-
> >> drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 +++
> >> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 ++++++++-
> >> drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 16 ++++++-
> >> include/net/xdp_sock.h | 21 ++++++++-
> >> include/uapi/linux/if_xdp.h | 1 +
> >> include/uapi/linux/xdp_diag.h | 1 +
> >> net/xdp/xdp_umem.c | 9 ++--
> >> net/xdp/xsk.c | 43 ++++++++++++++++---
> >> net/xdp/xsk_diag.c | 5 ++-
> >> samples/bpf/xdpsock_user.c | 8 ++++
> >> 11 files changed, 135 insertions(+), 17 deletions(-)
> >>
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Intel-wired-lan] [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-16 13:32 ` [Intel-wired-lan] " Björn Töpel
@ 2019-08-16 22:08 ` Jonathan Lemon
2019-08-19 7:39 ` Björn Töpel
0 siblings, 1 reply; 18+ messages in thread
From: Jonathan Lemon @ 2019-08-16 22:08 UTC (permalink / raw)
To: Björn Töpel
Cc: Samudrala, Sridhar, Björn Töpel, Karlsson, Magnus,
Netdev, bpf, intel-wired-lan, maciej.fijalkowski, tom.herbert
On 16 Aug 2019, at 6:32, Björn Töpel wrote:
> On Thu, 15 Aug 2019 at 18:46, Samudrala, Sridhar
> <sridhar.samudrala@intel.com> wrote:
>>
>> On 8/15/2019 5:51 AM, Björn Töpel wrote:
>>> On 2019-08-15 05:46, Sridhar Samudrala wrote:
>>>> This patch series introduces XDP_SKIP_BPF flag that can be
>>>> specified
>>>> during the bind() call of an AF_XDP socket to skip calling the BPF
>>>> program in the receive path and pass the buffer directly to the
>>>> socket.
>>>>
>>>> When a single AF_XDP socket is associated with a queue and a HW
>>>> filter is used to redirect the packets and the app is interested in
>>>> receiving all the packets on that queue, we don't need an
>>>> additional
>>>> BPF program to do further filtering or lookup/redirect to a socket.
>>>>
>>>> Here are some performance numbers collected on
>>>> - 2 socket 28 core Intel(R) Xeon(R) Platinum 8180 CPU @ 2.50GHz
>>>> - Intel 40Gb Ethernet NIC (i40e)
>>>>
>>>> All tests use 2 cores and the results are in Mpps.
>>>>
>>>> turbo on (default)
>>>> ---------------------------------------------
>>>> no-skip-bpf skip-bpf
>>>> ---------------------------------------------
>>>> rxdrop zerocopy 21.9 38.5
>>>> l2fwd zerocopy 17.0 20.5
>>>> rxdrop copy 11.1 13.3
>>>> l2fwd copy 1.9 2.0
>>>>
>>>> no turbo : echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
>>>> ---------------------------------------------
>>>> no-skip-bpf skip-bpf
>>>> ---------------------------------------------
>>>> rxdrop zerocopy 15.4 29.0
>>>> l2fwd zerocopy 11.8 18.2
>>>> rxdrop copy 8.2 10.5
>>>> l2fwd copy 1.7 1.7
>>>> ---------------------------------------------
>>>>
>>>
>>> This work is somewhat similar to the XDP_ATTACH work [1]. Avoiding
>>> the
>>> retpoline in the XDP program call is a nice performance boost! I
>>> like
>>> the numbers! :-) I also like the idea of adding a flag that just
>>> does
>>> what most AF_XDP Rx users want -- just getting all packets of a
>>> certain queue into the XDP sockets.
>>>
>>> In addition to Toke's mail, I have some more concerns with the
>>> series:
>>>
>>> * AFAIU the SKIP_BPF only works for zero-copy enabled sockets. IMO,
>>> it
>>> should work for all modes (including XDP_SKB).
>>
>> This patch enables SKIP_BPF for AF_XDP sockets where an XDP program
>> is
>> attached at driver level (both zerocopy and copy modes)
>> I tried a quick hack to see the perf benefit with generic XDP mode,
>> but
>> i didn't see any significant improvement in performance in that
>> scenario. so i didn't include that mode.
>>
>>>
>>> * In order to work, a user still needs an XDP program running.
>>> That's
>>> clunky. I'd like the behavior that if no XDP program is attached,
>>> and the option is set, the packets for a that queue end up in the
>>> socket. If there's an XDP program attached, the program has
>>> precedence.
>>
>> I think this would require more changes in the drivers to take XDP
>> datapath even when there is no XDP program loaded.
>>
>
> Today, from a driver perspective, to enable XDP you pass a struct
> bpf_prog pointer via the ndo_bpf. The program get executed in
> BPF_PROG_RUN (via bpf_prog_run_xdp) from include/linux/filter.h.
>
> I think it's possible to achieve what you're doing w/o *any* driver
> modification. Pass a special, invalid, pointer to the driver (say
> (void *)0x1 or smth more elegant), which has a special handling in
> BPF_RUN_PROG e.g. setting a per-cpu state and return XDP_REDIRECT. The
> per-cpu state is picked up in xdp_do_redirect and xdp_flush.
>
> An approach like this would be general, and apply to all modes
> automatically.
>
> Thoughts?
All the default program does is check that the map entry contains a xsk,
and call bpf_redirect_map(). So this is pretty much the same as above,
without any special case handling.
Why would this be so expensive? Is the JIT compilation time being
counted?
--
Jonathan
>
>>>
>>> * It requires changes in all drivers. Not nice, and scales badly.
>>> Try
>>> making it generic (xdp_do_redirect/xdp_flush), so it Just Works
>>> for
>>> all XDP capable drivers.
>>
>> I tried to make this as generic as possible and make the changes to
>> the
>> driver very minimal, but could not find a way to avoid any changes at
>> all to the driver. xdp_do_direct() gets called based after the call
>> to
>> bpf_prog_run_xdp() in the drivers.
>>
>>>
>>> Thanks for working on this!
>>>
>>>
>>> Björn
>>>
>>> [1]
>>> https://lore.kernel.org/netdev/20181207114431.18038-1-bjorn.topel@gmail.com/
>>>
>>>
>>>
>>>> Sridhar Samudrala (5):
>>>> xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap
>>>> xsk: Introduce XDP_SKIP_BPF bind option
>>>> i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets
>>>> ixgbe: Enable XDP_SKIP_BPF option for AF_XDP sockets
>>>> xdpsock_user: Add skip_bpf option
>>>>
>>>> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 22 +++++++++-
>>>> drivers/net/ethernet/intel/i40e/i40e_xsk.c | 6 +++
>>>> drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 ++++++++-
>>>> drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 16 ++++++-
>>>> include/net/xdp_sock.h | 21 ++++++++-
>>>> include/uapi/linux/if_xdp.h | 1 +
>>>> include/uapi/linux/xdp_diag.h | 1 +
>>>> net/xdp/xdp_umem.c | 9 ++--
>>>> net/xdp/xsk.c | 43
>>>> ++++++++++++++++---
>>>> net/xdp/xsk_diag.c | 5 ++-
>>>> samples/bpf/xdpsock_user.c | 8 ++++
>>>> 11 files changed, 135 insertions(+), 17 deletions(-)
>>>>
>> _______________________________________________
>> Intel-wired-lan mailing list
>> Intel-wired-lan@osuosl.org
>> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [Intel-wired-lan] [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets
2019-08-16 22:08 ` Jonathan Lemon
@ 2019-08-19 7:39 ` Björn Töpel
0 siblings, 0 replies; 18+ messages in thread
From: Björn Töpel @ 2019-08-19 7:39 UTC (permalink / raw)
To: Jonathan Lemon
Cc: Samudrala, Sridhar, Björn Töpel, Karlsson, Magnus,
Netdev, bpf, intel-wired-lan, maciej.fijalkowski, tom.herbert
On Sat, 17 Aug 2019 at 00:08, Jonathan Lemon <jonathan.lemon@gmail.com> wrote:
> On 16 Aug 2019, at 6:32, Björn Töpel wrote:
[...]
> >
> > Today, from a driver perspective, to enable XDP you pass a struct
> > bpf_prog pointer via the ndo_bpf. The program get executed in
> > BPF_PROG_RUN (via bpf_prog_run_xdp) from include/linux/filter.h.
> >
> > I think it's possible to achieve what you're doing w/o *any* driver
> > modification. Pass a special, invalid, pointer to the driver (say
> > (void *)0x1 or smth more elegant), which has a special handling in
> > BPF_RUN_PROG e.g. setting a per-cpu state and return XDP_REDIRECT. The
> > per-cpu state is picked up in xdp_do_redirect and xdp_flush.
> >
> > An approach like this would be general, and apply to all modes
> > automatically.
> >
> > Thoughts?
>
> All the default program does is check that the map entry contains a xsk,
> and call bpf_redirect_map(). So this is pretty much the same as above,
> without any special case handling.
>
> Why would this be so expensive? Is the JIT compilation time being
> counted?
No, not the JIT compilation time, only the fast-path. The gain is from
removing the indirect call (hitting a retpoline) when calling the XDP
program, and reducing code from xdp_do_redirect/xdp_flush.
But, as Jakub pointed out, the XDP batching work by Maciej, might
reduce the retpoline impact quite a bit.
Björn
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2019-08-19 7:39 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-15 3:46 [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 1/5] xsk: Convert bool 'zc' field in struct xdp_umem to a u32 bitmap Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 2/5] xsk: Introduce XDP_SKIP_BPF bind option Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 3/5] i40e: Enable XDP_SKIP_BPF option for AF_XDP sockets Sridhar Samudrala
2019-08-16 9:21 ` kbuild test robot
2019-08-15 3:46 ` [PATCH bpf-next 4/5] ixgbe: " Sridhar Samudrala
2019-08-15 3:46 ` [PATCH bpf-next 5/5] xdpsock_user: Add skip_bpf option Sridhar Samudrala
2019-08-15 11:12 ` [PATCH bpf-next 0/5] Add support for SKIP_BPF flag for AF_XDP sockets Toke Høiland-Jørgensen
2019-08-15 16:25 ` Samudrala, Sridhar
2019-08-15 17:11 ` Toke Høiland-Jørgensen
2019-08-16 6:12 ` Samudrala, Sridhar
2019-08-15 12:51 ` Björn Töpel
2019-08-15 16:46 ` Samudrala, Sridhar
2019-08-16 13:32 ` [Intel-wired-lan] " Björn Töpel
2019-08-16 22:08 ` Jonathan Lemon
2019-08-19 7:39 ` Björn Töpel
2019-08-15 19:28 ` Jakub Kicinski
2019-08-16 6:25 ` Samudrala, Sridhar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).