From: Gerhard Engleder <gerhard@engleder-embedded.com>
To: netdev@vger.kernel.org, bpf@vger.kernel.org
Cc: davem@davemloft.net, kuba@kernel.org, edumazet@google.com,
pabeni@redhat.com, bjorn@kernel.org, magnus.karlsson@intel.com,
maciej.fijalkowski@intel.com, jonathan.lemon@gmail.com,
Gerhard Engleder <gerhard@engleder-embedded.com>
Subject: [PATCH net-next v2 6/6] tsnep: Add XDP socket zero-copy TX support
Date: Sat, 15 Apr 2023 16:42:56 +0200 [thread overview]
Message-ID: <20230415144256.27884-7-gerhard@engleder-embedded.com> (raw)
In-Reply-To: <20230415144256.27884-1-gerhard@engleder-embedded.com>
Send and complete XSK pool frames within TX NAPI context. NAPI context
is triggered by ndo_xsk_wakeup.
Test results with A53 1.2GHz:
xdpsock txonly copy mode:
pps pkts 1.00
tx 284,409 11,398,144
Two CPUs with 100% and 10% utilization.
xdpsock txonly zero-copy mode:
pps pkts 1.00
tx 511,929 5,890,368
Two CPUs with 100% and 1% utilization.
Packet rate increases and CPU utilization is reduced.
Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
---
drivers/net/ethernet/engleder/tsnep.h | 2 +
drivers/net/ethernet/engleder/tsnep_main.c | 131 +++++++++++++++++++--
2 files changed, 123 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/engleder/tsnep.h b/drivers/net/ethernet/engleder/tsnep.h
index d0bea605a1d1..11b29f56aaf9 100644
--- a/drivers/net/ethernet/engleder/tsnep.h
+++ b/drivers/net/ethernet/engleder/tsnep.h
@@ -70,6 +70,7 @@ struct tsnep_tx_entry {
union {
struct sk_buff *skb;
struct xdp_frame *xdpf;
+ bool zc;
};
size_t len;
DEFINE_DMA_UNMAP_ADDR(dma);
@@ -88,6 +89,7 @@ struct tsnep_tx {
int read;
u32 owner_counter;
int increment_owner_counter;
+ struct xsk_buff_pool *xsk_pool;
u32 packets;
u32 bytes;
diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 13e5d4438082..de51d0cc8935 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -54,6 +54,8 @@
#define TSNEP_TX_TYPE_SKB_FRAG BIT(1)
#define TSNEP_TX_TYPE_XDP_TX BIT(2)
#define TSNEP_TX_TYPE_XDP_NDO BIT(3)
+#define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO)
+#define TSNEP_TX_TYPE_XSK BIT(4)
#define TSNEP_XDP_TX BIT(0)
#define TSNEP_XDP_REDIRECT BIT(1)
@@ -322,13 +324,51 @@ static void tsnep_tx_init(struct tsnep_tx *tx)
tx->increment_owner_counter = TSNEP_RING_SIZE - 1;
}
+static void tsnep_tx_enable(struct tsnep_tx *tx)
+{
+ struct netdev_queue *nq;
+
+ nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
+
+ local_bh_disable();
+ __netif_tx_lock(nq, smp_processor_id());
+ netif_tx_wake_queue(nq);
+ __netif_tx_unlock(nq);
+ local_bh_enable();
+}
+
+static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi)
+{
+ struct netdev_queue *nq;
+ u32 val;
+
+ nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
+
+ local_bh_disable();
+ __netif_tx_lock(nq, smp_processor_id());
+ netif_tx_stop_queue(nq);
+ __netif_tx_unlock(nq);
+ local_bh_enable();
+
+ /* wait until TX is done in hardware */
+ readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
+ ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
+ 1000000);
+
+ /* wait until TX is also done in software */
+ while (READ_ONCE(tx->read) != tx->write) {
+ napi_schedule(napi);
+ napi_synchronize(napi);
+ }
+}
+
static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
bool last)
{
struct tsnep_tx_entry *entry = &tx->entry[index];
entry->properties = 0;
- /* xdpf is union with skb */
+ /* xdpf and zc are union with skb */
if (entry->skb) {
entry->properties = length & TSNEP_DESC_LENGTH_MASK;
entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
@@ -646,10 +686,69 @@ static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
return xmit;
}
+static int tsnep_xdp_tx_map_zc(struct xdp_desc *xdpd, struct tsnep_tx *tx)
+{
+ struct tsnep_tx_entry *entry;
+ dma_addr_t dma;
+
+ entry = &tx->entry[tx->write];
+ entry->zc = true;
+
+ dma = xsk_buff_raw_get_dma(tx->xsk_pool, xdpd->addr);
+ xsk_buff_raw_dma_sync_for_device(tx->xsk_pool, dma, xdpd->len);
+
+ entry->type = TSNEP_TX_TYPE_XSK;
+ entry->len = xdpd->len;
+
+ entry->desc->tx = __cpu_to_le64(dma);
+
+ return xdpd->len;
+}
+
+static void tsnep_xdp_xmit_frame_ring_zc(struct xdp_desc *xdpd,
+ struct tsnep_tx *tx)
+{
+ int length;
+
+ length = tsnep_xdp_tx_map_zc(xdpd, tx);
+
+ tsnep_tx_activate(tx, tx->write, length, true);
+ tx->write = (tx->write + 1) & TSNEP_RING_MASK;
+}
+
+static void tsnep_xdp_xmit_zc(struct tsnep_tx *tx)
+{
+ int desc_available = tsnep_tx_desc_available(tx);
+ struct xdp_desc *descs = tx->xsk_pool->tx_descs;
+ int batch, i;
+
+ /* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS
+ * will be available for normal TX path and queue is stopped there if
+ * necessary
+ */
+ if (desc_available <= (MAX_SKB_FRAGS + 1))
+ return;
+ desc_available -= MAX_SKB_FRAGS + 1;
+
+ batch = xsk_tx_peek_release_desc_batch(tx->xsk_pool, desc_available);
+ for (i = 0; i < batch; i++)
+ tsnep_xdp_xmit_frame_ring_zc(&descs[i], tx);
+
+ if (batch) {
+ /* descriptor properties shall be valid before hardware is
+ * notified
+ */
+ dma_wmb();
+
+ tsnep_xdp_xmit_flush(tx);
+ }
+}
+
static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
{
struct tsnep_tx_entry *entry;
struct netdev_queue *nq;
+ int xsk_frames = 0;
int budget = 128;
int length;
int count;
@@ -676,7 +775,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
if ((entry->type & TSNEP_TX_TYPE_SKB) &&
skb_shinfo(entry->skb)->nr_frags > 0)
count += skb_shinfo(entry->skb)->nr_frags;
- else if (!(entry->type & TSNEP_TX_TYPE_SKB) &&
+ else if ((entry->type & TSNEP_TX_TYPE_XDP) &&
xdp_frame_has_frags(entry->xdpf))
count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags;
@@ -705,9 +804,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
if (entry->type & TSNEP_TX_TYPE_SKB)
napi_consume_skb(entry->skb, napi_budget);
- else
+ else if (entry->type & TSNEP_TX_TYPE_XDP)
xdp_return_frame_rx_napi(entry->xdpf);
- /* xdpf is union with skb */
+ else
+ xsk_frames++;
+ /* xdpf and zc are union with skb */
entry->skb = NULL;
tx->read = (tx->read + count) & TSNEP_RING_MASK;
@@ -718,6 +819,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
budget--;
} while (likely(budget));
+ if (tx->xsk_pool) {
+ if (xsk_frames)
+ xsk_tx_completed(tx->xsk_pool, xsk_frames);
+ if (xsk_uses_need_wakeup(tx->xsk_pool))
+ xsk_set_tx_need_wakeup(tx->xsk_pool);
+ tsnep_xdp_xmit_zc(tx);
+ }
+
if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) &&
netif_tx_queue_stopped(nq)) {
netif_tx_wake_queue(nq);
@@ -765,12 +874,6 @@ static int tsnep_tx_open(struct tsnep_tx *tx)
static void tsnep_tx_close(struct tsnep_tx *tx)
{
- u32 val;
-
- readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
- ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
- 1000000);
-
tsnep_tx_ring_cleanup(tx);
}
@@ -1783,12 +1886,18 @@ static void tsnep_queue_enable(struct tsnep_queue *queue)
napi_enable(&queue->napi);
tsnep_enable_irq(queue->adapter, queue->irq_mask);
+ if (queue->tx)
+ tsnep_tx_enable(queue->tx);
+
if (queue->rx)
tsnep_rx_enable(queue->rx);
}
static void tsnep_queue_disable(struct tsnep_queue *queue)
{
+ if (queue->tx)
+ tsnep_tx_disable(queue->tx, &queue->napi);
+
napi_disable(&queue->napi);
tsnep_disable_irq(queue->adapter, queue->irq_mask);
@@ -1905,6 +2014,7 @@ int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool)
if (running)
tsnep_queue_disable(queue);
+ queue->tx->xsk_pool = pool;
queue->rx->xsk_pool = pool;
if (running) {
@@ -1925,6 +2035,7 @@ void tsnep_disable_xsk(struct tsnep_queue *queue)
tsnep_rx_free_zc(queue->rx);
queue->rx->xsk_pool = NULL;
+ queue->tx->xsk_pool = NULL;
if (running) {
tsnep_rx_reopen(queue->rx);
--
2.30.2
next prev parent reply other threads:[~2023-04-15 14:43 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-15 14:42 [PATCH net-next v2 0/6] tsnep: XDP socket zero-copy support Gerhard Engleder
2023-04-15 14:42 ` [PATCH net-next v2 1/6] tsnep: Replace modulo operation with mask Gerhard Engleder
2023-04-15 14:42 ` [PATCH net-next v2 2/6] tsnep: Rework TX/RX queue initialization Gerhard Engleder
2023-04-15 14:42 ` [PATCH net-next v2 3/6] tsnep: Add functions for queue enable/disable Gerhard Engleder
2023-04-15 14:42 ` [PATCH net-next v2 4/6] tsnep: Move skb receive action to separate function Gerhard Engleder
2023-04-15 14:42 ` [PATCH net-next v2 5/6] tsnep: Add XDP socket zero-copy RX support Gerhard Engleder
2023-04-18 8:22 ` Paolo Abeni
2023-04-18 18:41 ` Gerhard Engleder
2023-04-15 14:42 ` Gerhard Engleder [this message]
2023-04-18 8:27 ` [PATCH net-next v2 6/6] tsnep: Add XDP socket zero-copy TX support Paolo Abeni
2023-04-18 18:25 ` Gerhard Engleder
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230415144256.27884-7-gerhard@engleder-embedded.com \
--to=gerhard@engleder-embedded.com \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=jonathan.lemon@gmail.com \
--cc=kuba@kernel.org \
--cc=maciej.fijalkowski@intel.com \
--cc=magnus.karlsson@intel.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).