All of lore.kernel.org
 help / color / mirror / Atom feed
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Cc: Jason Wang <jasowang@redhat.com>,
	Dmitry Fleytman <dmitry.fleytman@gmail.com>,
	Thomas Huth <thuth@redhat.com>,
	Laurent Vivier <lvivier@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	qemu-devel@nongnu.org, qemu-ppc@nongnu.org,
	"Michael S . Tsirkin" <mst@redhat.com>,
	Yan Vugenfirer <yvugenfi@redhat.com>,
	Yuri Benditovich <yuri.benditovich@daynix.com>,
	Sriram Yagnaraman <sriram.yagnaraman@est.tech>,
	Alexander Bulekov <alxndr@bu.edu>,
	Akihiko Odaki <akihiko.odaki@daynix.com>
Subject: [PATCH v5 23/29] e1000e: Perform software segmentation for loopback
Date: Wed,  1 Feb 2023 12:35:33 +0900	[thread overview]
Message-ID: <20230201033539.30049-24-akihiko.odaki@daynix.com> (raw)
In-Reply-To: <20230201033539.30049-1-akihiko.odaki@daynix.com>

e1000e didn't perform software segmentation for loopback if virtio-net
header is enabled, which is wrong.

To fix the problem, introduce net_tx_pkt_send_custom(), which allows the
caller to specify whether offloading should be assumed or not.

net_tx_pkt_send_custom() also allows the caller to provide a custom
sending function. Packets with virtio-net headers and ones without
virtio-net headers will be provided at the same time so the function
can choose the preferred version. In case of e1000e loopback, it prefers
to have virtio-net headers as they allows to skip the checksum
verification if VIRTIO_NET_HDR_F_DATA_VALID is set.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
 hw/net/e1000e_core.c | 27 ++++++++++++++--
 hw/net/net_rx_pkt.c  |  7 ++++
 hw/net/net_rx_pkt.h  |  8 +++++
 hw/net/net_tx_pkt.c  | 76 +++++++++++++++++++++-----------------------
 hw/net/net_tx_pkt.h  | 21 ++++++------
 5 files changed, 88 insertions(+), 51 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 95245c42f5..ff93547f88 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -61,6 +61,10 @@ union e1000_rx_desc_union {
     union e1000_rx_desc_packet_split packet_split;
 };
 
+static ssize_t
+e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
+                        bool has_vnet);
+
 static inline void
 e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val);
 
@@ -655,6 +659,15 @@ e1000e_setup_tx_offloads(E1000ECore *core, struct e1000e_tx *tx)
     return true;
 }
 
+static void e1000e_tx_pkt_callback(void *core,
+                                   const struct iovec *iov,
+                                   int iovcnt,
+                                   const struct iovec *virt_iov,
+                                   int virt_iovcnt)
+{
+    e1000e_receive_internal(core, virt_iov, virt_iovcnt, true);
+}
+
 static bool
 e1000e_tx_pkt_send(E1000ECore *core, struct e1000e_tx *tx, int queue_index)
 {
@@ -669,7 +682,8 @@ e1000e_tx_pkt_send(E1000ECore *core, struct e1000e_tx *tx, int queue_index)
 
     if ((core->phy[0][MII_BMCR] & MII_BMCR_LOOPBACK) ||
         ((core->mac[RCTL] & E1000_RCTL_LBM_MAC) == E1000_RCTL_LBM_MAC)) {
-        return net_tx_pkt_send_loopback(tx->tx_pkt, queue);
+        return net_tx_pkt_send_custom(tx->tx_pkt, false,
+                                      e1000e_tx_pkt_callback, core);
     } else {
         return net_tx_pkt_send(tx->tx_pkt, queue);
     }
@@ -1674,6 +1688,13 @@ e1000e_rx_fix_l4_csum(E1000ECore *core, struct NetRxPkt *pkt)
 
 ssize_t
 e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
+{
+    return e1000e_receive_internal(core, iov, iovcnt, core->has_vnet);
+}
+
+static ssize_t
+e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
+                        bool has_vnet)
 {
     static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4);
 
@@ -1696,9 +1717,11 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
     }
 
     /* Pull virtio header in */
-    if (core->has_vnet) {
+    if (has_vnet) {
         net_rx_pkt_set_vhdr_iovec(core->rx_pkt, iov, iovcnt);
         iov_ofs = sizeof(struct virtio_net_hdr);
+    } else {
+        net_rx_pkt_unset_vhdr(core->rx_pkt);
     }
 
     filter_buf = iov->iov_base + iov_ofs;
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
index b309c2f476..a53e7561c5 100644
--- a/hw/net/net_rx_pkt.c
+++ b/hw/net/net_rx_pkt.c
@@ -463,6 +463,13 @@ void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
     iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
 }
 
+void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt)
+{
+    assert(pkt);
+
+    memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
+}
+
 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
 {
     assert(pkt);
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
index 7277907a22..8b69ddb2da 100644
--- a/hw/net/net_rx_pkt.h
+++ b/hw/net/net_rx_pkt.h
@@ -312,6 +312,14 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
     const struct iovec *iov, int iovcnt);
 
+/**
+ * unset vhdr data from packet context
+ *
+ * @pkt:            packet
+ *
+ */
+void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt);
+
 /**
  * save packet type in packet context
  *
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index cf46c8457f..6afd3f6693 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -53,8 +53,6 @@ struct NetTxPkt {
     uint16_t hdr_len;
     eth_pkt_types_e packet_type;
     uint8_t l4proto;
-
-    bool is_loopback;
 };
 
 void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev,
@@ -508,12 +506,6 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
     iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
 }
 
-enum {
-    NET_TX_PKT_FRAGMENT_L2_HDR_POS = 0,
-    NET_TX_PKT_FRAGMENT_L3_HDR_POS,
-    NET_TX_PKT_FRAGMENT_HEADER_NUM
-};
-
 #define NET_MAX_FRAG_SG_LIST (64)
 
 static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
@@ -522,7 +514,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
     size_t fetched = 0;
     struct iovec *src = pkt->vec;
 
-    *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM;
+    *dst_idx = NET_TX_PKT_PL_START_FRAG;
 
     while (fetched < IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size)) {
 
@@ -555,18 +547,22 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
     return fetched;
 }
 
-static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt,
-    NetClientState *nc, const struct iovec *iov, int iov_cnt)
+static void net_tx_pkt_sendv(
+    void *opaque, const struct iovec *iov, int iov_cnt,
+    const struct iovec *virt_iov, int virt_iov_cnt)
 {
-    if (pkt->is_loopback) {
-        qemu_receive_packet_iov(nc, iov, iov_cnt);
+    NetClientState *nc = opaque;
+
+    if (qemu_get_using_vnet_hdr(nc->peer)) {
+        qemu_sendv_packet(nc, virt_iov, virt_iov_cnt);
     } else {
         qemu_sendv_packet(nc, iov, iov_cnt);
     }
 }
 
 static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
-    NetClientState *nc)
+                                           NetTxPktCallback callback,
+                                           void *context)
 {
     struct iovec fragment[NET_MAX_FRAG_SG_LIST];
     size_t fragment_len = 0;
@@ -578,6 +574,10 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
     int src_idx =  NET_TX_PKT_PL_START_FRAG, dst_idx;
     size_t src_offset = 0;
     size_t fragment_offset = 0;
+    struct virtio_net_hdr virt_hdr = {
+        .flags = pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM ?
+                 VIRTIO_NET_HDR_F_DATA_VALID : 0
+    };
 
     l2_iov_base = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base;
     l2_iov_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len;
@@ -585,10 +585,12 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
     l3_iov_len = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len;
 
     /* Copy headers */
-    fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base;
-    fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len;
-    fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base;
-    fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len;
+    fragment[NET_TX_PKT_VHDR_FRAG].iov_base = &virt_hdr;
+    fragment[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof(virt_hdr);
+    fragment[NET_TX_PKT_L2HDR_FRAG].iov_base = l2_iov_base;
+    fragment[NET_TX_PKT_L2HDR_FRAG].iov_len = l2_iov_len;
+    fragment[NET_TX_PKT_L3HDR_FRAG].iov_base = l3_iov_base;
+    fragment[NET_TX_PKT_L3HDR_FRAG].iov_len = l3_iov_len;
 
 
     /* Put as much data as possible and send */
@@ -603,7 +605,9 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
 
         eth_fix_ip4_checksum(l3_iov_base, l3_iov_len);
 
-        net_tx_pkt_sendv(pkt, nc, fragment, dst_idx);
+        callback(context,
+                 fragment + NET_TX_PKT_L2HDR_FRAG, dst_idx - NET_TX_PKT_L2HDR_FRAG,
+                 fragment + NET_TX_PKT_VHDR_FRAG, dst_idx - NET_TX_PKT_VHDR_FRAG);
 
         fragment_offset += fragment_len;
 
@@ -614,12 +618,16 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
 
 bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
 {
-    bool using_vnet_hdr = qemu_get_using_vnet_hdr(nc->peer);
+    bool offload = qemu_get_using_vnet_hdr(nc->peer);
+    return net_tx_pkt_send_custom(pkt, offload, net_tx_pkt_sendv, nc);
+}
 
+bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
+                            NetTxPktCallback callback, void *context)
+{
     assert(pkt);
 
-    if (!using_vnet_hdr &&
-        pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+    if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
         net_tx_pkt_do_sw_csum(pkt);
     }
 
@@ -635,28 +643,16 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
         }
     }
 
-    if (using_vnet_hdr ||
-        pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
-        int index = using_vnet_hdr ?
-                    NET_TX_PKT_VHDR_FRAG : NET_TX_PKT_L2HDR_FRAG;
+    if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
         net_tx_pkt_fix_ip6_payload_len(pkt);
-        net_tx_pkt_sendv(pkt, nc, pkt->vec + index,
-            pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - index);
+        callback(context, pkt->vec + NET_TX_PKT_L2HDR_FRAG,
+                 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_L2HDR_FRAG,
+                 pkt->vec + NET_TX_PKT_VHDR_FRAG,
+                 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_VHDR_FRAG);
         return true;
     }
 
-    return net_tx_pkt_do_sw_fragmentation(pkt, nc);
-}
-
-bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc)
-{
-    bool res;
-
-    pkt->is_loopback = true;
-    res = net_tx_pkt_send(pkt, nc);
-    pkt->is_loopback = false;
-
-    return res;
+    return net_tx_pkt_do_sw_fragmentation(pkt, callback, context);
 }
 
 void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
index 8d3faa42fb..f57b4e034b 100644
--- a/hw/net/net_tx_pkt.h
+++ b/hw/net/net_tx_pkt.h
@@ -26,6 +26,8 @@
 
 struct NetTxPkt;
 
+typedef void (* NetTxPktCallback)(void *, const struct iovec *, int, const struct iovec *, int);
+
 /**
  * Init function for tx packet functionality
  *
@@ -161,15 +163,16 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt);
 bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc);
 
 /**
-* Redirect packet directly to receive path (emulate loopback phy).
-* Handles sw offloads if vhdr is not supported.
-*
-* @pkt:            packet
-* @nc:             NetClientState
-* @ret:            operation result
-*
-*/
-bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc);
+ * Send packet with a custom function.
+ *
+ * @pkt:            packet
+ * @offload:        whether the callback implements offloading
+ * @callback:       a function to be called back for each transformed packet
+ * @context:        a pointer to be passed to the callback.
+ * @ret:            operation result
+ */
+bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
+                            NetTxPktCallback callback, void *context);
 
 /**
  * parse raw packet data and analyze offload requirements.
-- 
2.39.1



  parent reply	other threads:[~2023-02-01  3:39 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-01  3:35 [PATCH v5 00/29] e1000x cleanups (preliminary for IGB) Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 01/29] e1000e: Fix the code style Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 02/29] hw/net: Add more MII definitions Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 03/29] fsl_etsec: Use hw/net/mii.h Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 04/29] e1000: " Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 05/29] e1000: Mask registers when writing Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 06/29] e1000e: " Akihiko Odaki
2023-02-21  8:28   ` Philippe Mathieu-Daudé
2023-02-01  3:35 ` [PATCH v5 07/29] e1000: Use more constant definitions Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 08/29] e1000e: " Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 09/29] e1000: Use memcpy to intialize registers Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 10/29] e1000e: " Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 11/29] e1000e: Remove pending interrupt flags Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 12/29] e1000e: Improve software reset Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 13/29] e1000: Configure ResettableClass Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 14/29] e1000e: " Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 15/29] e1000e: Introduce e1000_rx_desc_union Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 16/29] e1000e: Set MII_ANER_NWAY Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 17/29] e1000e: Remove extra pointer indirection Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 18/29] net: Check L4 header size Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 19/29] e1000x: Alter the signature of e1000x_is_vlan_packet Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 20/29] net: Strip virtio-net header when dumping Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 21/29] hw/net/net_tx_pkt: Automatically determine if virtio-net header is used Akihiko Odaki
2023-02-21  3:38   ` Jason Wang
2023-02-22 22:04     ` Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 22/29] hw/net/net_rx_pkt: Remove net_rx_pkt_has_virt_hdr Akihiko Odaki
2023-02-01  3:35 ` Akihiko Odaki [this message]
2023-02-01  3:35 ` [PATCH v5 24/29] hw/net/net_tx_pkt: Implement TCP segmentation Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 25/29] hw/net/net_tx_pkt: Check the payload length Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 26/29] e1000e: Do not assert when MSI-X is disabled later Akihiko Odaki
2023-02-01  3:35 ` [PATCH v5 27/29] MAINTAINERS: Add Akihiko Odaki as a e1000e reviewer Akihiko Odaki
2023-02-21  8:17   ` Philippe Mathieu-Daudé
2023-02-01  3:35 ` [PATCH v5 28/29] MAINTAINERS: Add e1000e test files Akihiko Odaki
2023-02-21  8:17   ` Philippe Mathieu-Daudé
2023-02-01  3:35 ` [PATCH v5 29/29] e1000e: Combine rx traces Akihiko Odaki
2023-02-21  8:18   ` Philippe Mathieu-Daudé

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230201033539.30049-24-akihiko.odaki@daynix.com \
    --to=akihiko.odaki@daynix.com \
    --cc=alxndr@bu.edu \
    --cc=dmitry.fleytman@gmail.com \
    --cc=jasowang@redhat.com \
    --cc=lvivier@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=sriram.yagnaraman@est.tech \
    --cc=thuth@redhat.com \
    --cc=yuri.benditovich@daynix.com \
    --cc=yvugenfi@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.