From: "Björn Töpel" <bjorn.topel@gmail.com>
To: bjorn.topel@gmail.com, magnus.karlsson@intel.com,
alexander.h.duyck@intel.com, alexander.duyck@gmail.com,
john.fastabend@gmail.com, ast@fb.com, brouer@redhat.com,
willemdebruijn.kernel@gmail.com, daniel@iogearbox.net,
netdev@vger.kernel.org
Cc: "Björn Töpel" <bjorn.topel@intel.com>,
michael.lundkvist@ericsson.com, jesse.brandeburg@intel.com,
anjali.singhai@intel.com, qi.z.zhang@intel.com,
ravineet.singh@ericsson.com
Subject: [RFC PATCH v2 06/14] xsk: add Rx receive functions and poll support
Date: Tue, 27 Mar 2018 18:59:11 +0200 [thread overview]
Message-ID: <20180327165919.17933-7-bjorn.topel@gmail.com> (raw)
In-Reply-To: <20180327165919.17933-1-bjorn.topel@gmail.com>
From: Björn Töpel <bjorn.topel@intel.com>
Here the actual receive functions of AF_XDP are implemented, that in a
later commit, will be called from the XDP layers.
There's one set of functions for the XDP_DRV side and another for
XDP_SKB (generic).
Support for the poll syscall is also implemented.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
net/xdp/xdp_umem.h | 18 +++++
net/xdp/xsk.c | 81 ++++++++++++++++++++-
net/xdp/xsk_queue.h | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 304 insertions(+), 1 deletion(-)
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index ad041b911b38..5e7105b7760b 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -36,6 +36,24 @@ struct xdp_umem {
struct user_struct *user;
};
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u32 idx)
+{
+ u64 pg, off;
+ char *data;
+
+ pg = idx >> umem->nfpplog2;
+ off = (idx - (pg << umem->nfpplog2)) << umem->frame_size_log2;
+
+ data = page_address(umem->pgs[pg]);
+ return data + off;
+}
+
+static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
+ u32 idx)
+{
+ return xdp_umem_get_data(umem, idx) + umem->frame_headroom;
+}
+
bool xdp_umem_validate_queues(struct xdp_umem *umem);
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
void xdp_get_umem(struct xdp_umem *umem);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d99a1b830f94..a60b1fcfb2b3 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -35,10 +35,14 @@
#include "xsk_queue.h"
#include "xdp_umem.h"
+#define RX_BATCH_SIZE 16
+
struct xdp_sock {
/* struct sock must be the first member of struct xdp_sock */
struct sock sk;
struct xsk_queue *rx;
+ struct xskq_iter rx_it;
+ u64 rx_dropped;
struct net_device *dev;
/* Protects multiple processes in the control path */
struct mutex mutex;
@@ -52,6 +56,74 @@ static struct xdp_sock *xdp_sk(struct sock *sk)
return (struct xdp_sock *)sk;
}
+static inline int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+ u32 len = xdp->data_end - xdp->data;
+ void *buffer;
+ int err = 0;
+ u32 id;
+
+ if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+ return -EINVAL;
+
+ if (!xskq_next_frame_deq(xs->umem->fq, &xs->rx_it, RX_BATCH_SIZE))
+ return -ENOSPC;
+
+ id = xdp_umem_get_id(xs->umem->fq, &xs->rx_it);
+ buffer = xdp_umem_get_data_with_headroom(xs->umem, id);
+ memcpy(buffer, xdp->data, len);
+ err = xskq_rxtx_enq_frame(xs->rx, id, len, xs->umem->frame_headroom);
+ if (err)
+ xskq_deq_return_frame(&xs->rx_it);
+
+ return err;
+}
+
+int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+ int err;
+
+ err = __xsk_rcv(xs, xdp);
+ if (!err)
+ page_frag_free(xdp->data);
+ else
+ xs->rx_dropped++;
+
+ return err;
+}
+
+void xsk_flush(struct xdp_sock *xs)
+{
+ xskq_enq_flush(xs->rx);
+ xs->sk.sk_data_ready(&xs->sk);
+}
+
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+{
+ int err;
+
+ err = __xsk_rcv(xs, xdp);
+ if (!err)
+ xsk_flush(xs);
+ else
+ xs->rx_dropped++;
+
+ return err;
+}
+
+static unsigned int xsk_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait)
+{
+ unsigned int mask = datagram_poll(file, sock, wait);
+ struct sock *sk = sock->sk;
+ struct xdp_sock *xs = xdp_sk(sk);
+
+ if (xs->rx && !xskq_empty(xs->rx))
+ mask |= POLLIN | POLLRDNORM;
+
+ return mask;
+}
+
static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
bool umem_queue)
{
@@ -190,6 +262,9 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
err = -EINVAL;
goto out_unlock;
+ } else {
+ /* This xsk has its own umem. */
+ xskq_set_umem(xs->umem->fq, &xs->umem->props);
}
/* Rebind? */
@@ -204,6 +279,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->ifindex = sxdp->sxdp_ifindex;
xs->queue_id = sxdp->sxdp_queue_id;
+ xskq_init_iter(&xs->rx_it);
+
+ xskq_set_umem(xs->rx, &xs->umem->props);
+
out_unlock:
if (err)
dev_put(dev);
@@ -340,7 +419,7 @@ static const struct proto_ops xsk_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = sock_no_poll,
+ .poll = xsk_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index d79b613a9e0a..af6e651f1207 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -37,6 +37,187 @@ struct xsk_queue {
u64 invalid_descs;
};
+struct xskq_iter {
+ u32 head;
+ u32 tail;
+ struct xdp_desc desc_copy;
+};
+
+/* Common functions operating for both RXTX and umem queues */
+
+static inline bool xskq_is_valid_rx_entry(struct xsk_queue *q,
+ u32 idx)
+{
+ if (unlikely(idx >= q->umem_props->nframes)) {
+ q->invalid_descs++;
+ return false;
+ }
+ return true;
+}
+
+static inline bool xskq_is_valid_tx_entry(struct xsk_queue *q,
+ struct xdp_desc *d)
+{
+ u32 buff_len;
+
+ if (unlikely(d->idx >= q->umem_props->nframes)) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ buff_len = q->umem_props->frame_size;
+ if (unlikely(d->len > buff_len || d->len == 0 ||
+ d->offset > buff_len || d->offset + d->len > buff_len)) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+}
+
+static inline u32 xskq_nb_free(struct xsk_queue *q, u32 head_idx, u32 dcnt)
+{
+ u32 free_entries = q->nentries - (head_idx - q->cached_tail);
+
+ if (free_entries >= dcnt)
+ return free_entries;
+
+ /* Refresh the local tail pointer */
+ q->cached_tail = READ_ONCE(q->ring->tail_idx);
+ return q->nentries - (head_idx - q->cached_tail);
+}
+
+static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
+{
+ u32 entries = q->cached_head - q->cached_tail;
+
+ if (entries == 0)
+ /* Refresh the local head pointer */
+ q->cached_head = READ_ONCE(q->ring->head_idx);
+
+ entries = q->cached_head - q->cached_tail;
+ return (entries > dcnt) ? dcnt : entries;
+}
+
+static inline bool xskq_empty(struct xsk_queue *q)
+{
+ if (xskq_nb_free(q, q->cached_head, 1) == q->nentries)
+ return true;
+ return false;
+}
+
+static inline bool xskq_full(struct xsk_queue *q)
+{
+ if (xskq_nb_avail(q, q->nentries) == q->nentries)
+ return true;
+ return false;
+}
+
+static inline void xskq_init_iter(struct xskq_iter *it)
+{
+ it->head = 0;
+ it->tail = 0;
+}
+
+static inline void xskq_set_umem(struct xsk_queue *q,
+ struct xdp_umem_props *umem_props)
+{
+ q->umem_props = umem_props;
+}
+
+static inline bool xskq_iter_end(struct xskq_iter *it)
+{
+ return it->tail == it->head;
+}
+
+static inline void xskq_iter_validate(struct xsk_queue *q,
+ struct xskq_iter *it)
+{
+ while (!xskq_iter_end(it)) {
+ unsigned int idx = it->tail & q->ring_mask;
+
+ if (q->validation == XSK_VALIDATION_TX) {
+ struct xdp_rxtx_queue *ring =
+ (struct xdp_rxtx_queue *)q->ring;
+
+ it->desc_copy.idx = ring->desc[idx].idx;
+ it->desc_copy.len = ring->desc[idx].len;
+ it->desc_copy.offset = ring->desc[idx].offset;
+
+ if (xskq_is_valid_tx_entry(q, &it->desc_copy))
+ break;
+ } else {
+ /* XSK_VALIDATION_RX */
+ struct xdp_umem_queue *ring =
+ (struct xdp_umem_queue *)q->ring;
+
+ if (xskq_is_valid_rx_entry(q, ring->desc[idx]))
+ break;
+ }
+
+ it->tail++;
+ }
+}
+
+static inline void xskq_deq_iter(struct xsk_queue *q,
+ struct xskq_iter *it, int cnt)
+{
+ it->tail = q->cached_tail;
+ it->head = q->cached_tail + xskq_nb_avail(q, cnt);
+
+ /* Order tail and data */
+ smp_rmb();
+
+ xskq_iter_validate(q, it);
+}
+
+static inline void xskq_deq_iter_next(struct xsk_queue *q,
+ struct xskq_iter *it)
+{
+ it->tail++;
+ xskq_iter_validate(q, it);
+}
+
+static inline void xskq_deq_iter_done(struct xsk_queue *q,
+ struct xskq_iter *it)
+{
+ q->cached_tail = it->tail;
+ WRITE_ONCE(q->ring->tail_idx, it->tail);
+}
+
+static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
+{
+ return q ? q->invalid_descs : 0;
+}
+
+static inline bool xskq_next_frame_deq(struct xsk_queue *q,
+ struct xskq_iter *it,
+ u32 batch_size)
+{
+ if (xskq_iter_end(it)) {
+ xskq_deq_iter_done(q, it);
+ xskq_deq_iter(q, it, batch_size);
+ return !xskq_iter_end(it);
+ }
+
+ xskq_deq_iter_next(q, it);
+ return !xskq_iter_end(it);
+}
+
+static inline void xskq_deq_return_frame(struct xskq_iter *it)
+{
+ it->tail--;
+}
+
+static inline void xskq_enq_flush(struct xsk_queue *q)
+{
+ /* Order flags and data */
+ smp_wmb();
+
+ WRITE_ONCE(q->ring->head_idx, q->iter_head_idx);
+ q->cached_head = q->iter_head_idx;
+}
+
/* Functions operating on RXTX queues only */
static inline u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
@@ -45,6 +226,23 @@ static inline u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
q->nentries * sizeof(struct xdp_desc));
}
+static inline int xskq_rxtx_enq_frame(struct xsk_queue *q,
+ u32 id, u32 len, u16 offset)
+{
+ struct xdp_rxtx_queue *ring = (struct xdp_rxtx_queue *)q->ring;
+ unsigned int idx;
+
+ if (xskq_nb_free(q, q->iter_head_idx, 1) == 0)
+ return -ENOSPC;
+
+ idx = (q->iter_head_idx++) & q->ring_mask;
+ ring->desc[idx].idx = id;
+ ring->desc[idx].len = len;
+ ring->desc[idx].offset = offset;
+
+ return 0;
+}
+
/* Functions operating on UMEM queues only */
static inline u32 xskq_umem_get_ring_size(struct xsk_queue *q)
@@ -52,6 +250,14 @@ static inline u32 xskq_umem_get_ring_size(struct xsk_queue *q)
return sizeof(struct xdp_umem_queue) + q->nentries * sizeof(u32);
}
+static inline u32 xdp_umem_get_id(struct xsk_queue *q,
+ struct xskq_iter *it)
+{
+ struct xdp_umem_queue *ring = (struct xdp_umem_queue *)q->ring;
+
+ return ring->desc[it->tail & q->ring_mask];
+}
+
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
void xskq_destroy(struct xsk_queue *q_ops);
--
2.14.1
next prev parent reply other threads:[~2018-03-27 16:59 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-27 16:59 [RFC PATCH v2 00/14] Introducing AF_XDP support Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 01/14] net: initial AF_XDP skeleton Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 02/14] xsk: add user memory registration support sockopt Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 03/14] xsk: add umem fill queue support and mmap Björn Töpel
2018-04-12 2:15 ` Michael S. Tsirkin
2018-04-12 7:38 ` Karlsson, Magnus
2018-04-12 8:54 ` Jesper Dangaard Brouer
2018-04-12 14:04 ` Michael S. Tsirkin
2018-04-12 15:19 ` Karlsson, Magnus
2018-04-23 10:26 ` Karlsson, Magnus
2018-03-27 16:59 ` [RFC PATCH v2 04/14] xsk: add Rx queue setup and mmap support Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 05/14] xsk: add support for bind for Rx Björn Töpel
2018-03-27 16:59 ` Björn Töpel [this message]
2018-03-27 16:59 ` [RFC PATCH v2 07/14] bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 08/14] xsk: wire up XDP_DRV side of AF_XDP Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 09/14] xsk: wire up XDP_SKB " Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 10/14] xsk: add umem completion queue support and mmap Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 11/14] xsk: add Tx queue setup and mmap support Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 12/14] xsk: support for Tx Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 13/14] xsk: statistics support Björn Töpel
2018-03-27 16:59 ` [RFC PATCH v2 14/14] samples/bpf: sample application for AF_XDP sockets Björn Töpel
2018-04-12 11:05 ` Jesper Dangaard Brouer
2018-04-12 11:08 ` Karlsson, Magnus
2018-03-28 21:18 ` [RFC PATCH v2 00/14] Introducing AF_XDP support Eric Leblond
2018-03-29 6:16 ` Björn Töpel
2018-03-29 15:36 ` Jesper Dangaard Brouer
2018-04-09 21:51 ` William Tu
2018-04-10 6:47 ` Björn Töpel
2018-04-10 14:14 ` William Tu
2018-04-11 12:17 ` Björn Töpel
2018-04-11 18:43 ` Alexei Starovoitov
2018-04-12 14:14 ` Björn Töpel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180327165919.17933-7-bjorn.topel@gmail.com \
--to=bjorn.topel@gmail.com \
--cc=alexander.duyck@gmail.com \
--cc=alexander.h.duyck@intel.com \
--cc=anjali.singhai@intel.com \
--cc=ast@fb.com \
--cc=bjorn.topel@intel.com \
--cc=brouer@redhat.com \
--cc=daniel@iogearbox.net \
--cc=jesse.brandeburg@intel.com \
--cc=john.fastabend@gmail.com \
--cc=magnus.karlsson@intel.com \
--cc=michael.lundkvist@ericsson.com \
--cc=netdev@vger.kernel.org \
--cc=qi.z.zhang@intel.com \
--cc=ravineet.singh@ericsson.com \
--cc=willemdebruijn.kernel@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).