* [Qemu-devel] [RFC PATCH v2 07/10] net_pkt: Name vmxnet3 packet abstractions more generic
2016-01-18 17:35 [Qemu-devel] [RFC PATCH v2 00/10] Introduce Intel 82574 GbE Controller Emulation (e1000e) Leonid Bloch
` (5 preceding siblings ...)
2016-01-18 17:35 ` [Qemu-devel] [RFC PATCH v2 06/10] net: Add macros for ETH address tracing Leonid Bloch
@ 2016-01-18 17:35 ` Leonid Bloch
2016-01-18 17:35 ` [Qemu-devel] [RFC PATCH v2 08/10] net_pkt: Extend packet abstraction as requied by e1000e functionality Leonid Bloch
` (3 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Leonid Bloch @ 2016-01-18 17:35 UTC (permalink / raw)
To: qemu-devel
Cc: Dmitry Fleytman, Jason Wang, Leonid Bloch, Shmulik Ladkani,
Michael S. Tsirkin
From: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
This patch drops "vmx" prefix from packet abstrations names
to emphasize the fact they are generic and not tied to any
specific network device.
These abstrations will be reused by e1000e emulation implementation
introduced by following patches so their names need generalization.
This patch (except renamed files, adjusted comments and changes in MAINTAINTERS)
was produced by:
git grep -lz 'vmxnet_tx_pkt' | xargs -0 perl -i'' -pE "s/vmxnet_tx_pkt/net_tx_pkt/g"
git grep -lz 'vmxnet_rx_pkt' | xargs -0 perl -i'' -pE "s/vmxnet_rx_pkt/net_rx_pkt/g"
git grep -lz 'VmxnetTxPkt' | xargs -0 perl -i'' -pE "s/VmxnetTxPkt/NetTxPkt/g"
git grep -lz 'VMXNET_TX_PKT' | xargs -0 perl -i'' -pE "s/VMXNET_TX_PKT/NET_TX_PKT/g"
git grep -lz 'VmxnetRxPkt' | xargs -0 perl -i'' -pE "s/VmxnetRxPkt/NetRxPkt/g"
git grep -lz 'VMXNET_RX_PKT' | xargs -0 perl -i'' -pE "s/VMXNET_RX_PKT/NET_RX_PKT/g"
sed -ie 's/VMXNET_/NET_/g' hw/net/vmxnet_rx_pkt.c
sed -ie 's/VMXNET_/NET_/g' hw/net/vmxnet_tx_pkt.c
Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com>
---
MAINTAINERS | 8 +
hw/net/Makefile.objs | 2 +-
hw/net/net_rx_pkt.c | 187 ++++++++++++++++
hw/net/net_rx_pkt.h | 174 +++++++++++++++
hw/net/net_tx_pkt.c | 567 +++++++++++++++++++++++++++++++++++++++++++++++++
hw/net/net_tx_pkt.h | 148 +++++++++++++
hw/net/vmxnet3.c | 80 +++----
hw/net/vmxnet_rx_pkt.c | 187 ----------------
hw/net/vmxnet_rx_pkt.h | 174 ---------------
hw/net/vmxnet_tx_pkt.c | 567 -------------------------------------------------
hw/net/vmxnet_tx_pkt.h | 148 -------------
tests/Makefile | 4 +-
12 files changed, 1127 insertions(+), 1119 deletions(-)
create mode 100644 hw/net/net_rx_pkt.c
create mode 100644 hw/net/net_rx_pkt.h
create mode 100644 hw/net/net_tx_pkt.c
create mode 100644 hw/net/net_tx_pkt.h
delete mode 100644 hw/net/vmxnet_rx_pkt.c
delete mode 100644 hw/net/vmxnet_rx_pkt.h
delete mode 100644 hw/net/vmxnet_tx_pkt.c
delete mode 100644 hw/net/vmxnet_tx_pkt.h
diff --git a/MAINTAINERS b/MAINTAINERS
index d7e9ba2..cf93a45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -748,6 +748,14 @@ S: Maintained
F: hw/*/xilinx_*
F: include/hw/xilinx.h
+Network packet abstractions
+M: Dmitry Fleytman <dmitry@daynix.com>
+S: Maintained
+F: include/net/eth.h
+F: net/eth.c
+F: hw/net/net_rx_pkt*
+F: hw/net/net_tx_pkt*
+
Vmware
M: Dmitry Fleytman <dmitry@daynix.com>
S: Maintained
diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs
index ea93293..34039fc 100644
--- a/hw/net/Makefile.objs
+++ b/hw/net/Makefile.objs
@@ -8,7 +8,7 @@ common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
common-obj-$(CONFIG_E1000_PCI) += e1000.o
common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
-common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet_tx_pkt.o vmxnet_rx_pkt.o
+common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o
common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o
common-obj-$(CONFIG_SMC91C111) += smc91c111.o
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
new file mode 100644
index 0000000..f4c929f
--- /dev/null
+++ b/hw/net/net_rx_pkt.c
@@ -0,0 +1,187 @@
+/*
+ * QEMU RX packets abstractions
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Tamir Shomer <tamirs@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "net_rx_pkt.h"
+#include "net/eth.h"
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "net/checksum.h"
+#include "net/tap.h"
+
+/*
+ * RX packet may contain up to 2 fragments - rebuilt eth header
+ * in case of VLAN tag stripping
+ * and payload received from QEMU - in any case
+ */
+#define NET_MAX_RX_PACKET_FRAGMENTS (2)
+
+struct NetRxPkt {
+ struct virtio_net_hdr virt_hdr;
+ uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN];
+ struct iovec vec[NET_MAX_RX_PACKET_FRAGMENTS];
+ uint16_t vec_len;
+ uint32_t tot_len;
+ uint16_t tci;
+ bool vlan_stripped;
+ bool has_virt_hdr;
+ eth_pkt_types_e packet_type;
+
+ /* Analysis results */
+ bool isip4;
+ bool isip6;
+ bool isudp;
+ bool istcp;
+};
+
+void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr)
+{
+ struct NetRxPkt *p = g_malloc0(sizeof *p);
+ p->has_virt_hdr = has_virt_hdr;
+ *pkt = p;
+}
+
+void net_rx_pkt_uninit(struct NetRxPkt *pkt)
+{
+ g_free(pkt);
+}
+
+struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+ return &pkt->virt_hdr;
+}
+
+void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data,
+ size_t len, bool strip_vlan)
+{
+ uint16_t tci = 0;
+ uint16_t ploff;
+ assert(pkt);
+ pkt->vlan_stripped = false;
+
+ if (strip_vlan) {
+ pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci);
+ }
+
+ if (pkt->vlan_stripped) {
+ pkt->vec[0].iov_base = pkt->ehdr_buf;
+ pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header);
+ pkt->vec[1].iov_base = (uint8_t *) data + ploff;
+ pkt->vec[1].iov_len = len - ploff;
+ pkt->vec_len = 2;
+ pkt->tot_len = len - ploff + sizeof(struct eth_header);
+ } else {
+ pkt->vec[0].iov_base = (void *)data;
+ pkt->vec[0].iov_len = len;
+ pkt->vec_len = 1;
+ pkt->tot_len = len;
+ }
+
+ pkt->tci = tci;
+
+ eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6,
+ &pkt->isudp, &pkt->istcp);
+}
+
+void net_rx_pkt_dump(struct NetRxPkt *pkt)
+{
+#ifdef NET_RX_PKT_DEBUG
+ NetRxPkt *pkt = (NetRxPkt *)pkt;
+ assert(pkt);
+
+ printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n",
+ pkt->tot_len, pkt->vlan_stripped, pkt->tci);
+#endif
+}
+
+void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt,
+ eth_pkt_types_e packet_type)
+{
+ assert(pkt);
+
+ pkt->packet_type = packet_type;
+
+}
+
+eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->packet_type;
+}
+
+size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->tot_len;
+}
+
+void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
+ bool *isip4, bool *isip6,
+ bool *isudp, bool *istcp)
+{
+ assert(pkt);
+
+ *isip4 = pkt->isip4;
+ *isip6 = pkt->isip6;
+ *isudp = pkt->isudp;
+ *istcp = pkt->istcp;
+}
+
+struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->vec;
+}
+
+void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
+ struct virtio_net_hdr *vhdr)
+{
+ assert(pkt);
+
+ memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr);
+}
+
+bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->vlan_stripped;
+}
+
+bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->has_virt_hdr;
+}
+
+uint16_t net_rx_pkt_get_num_frags(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->vec_len;
+}
+
+uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->tci;
+}
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
new file mode 100644
index 0000000..1e4accf
--- /dev/null
+++ b/hw/net/net_rx_pkt.h
@@ -0,0 +1,174 @@
+/*
+ * QEMU RX packets abstraction
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Tamir Shomer <tamirs@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef NET_RX_PKT_H
+#define NET_RX_PKT_H
+
+#include "stdint.h"
+#include "stdbool.h"
+#include "net/eth.h"
+
+/* defines to enable packet dump functions */
+/*#define NET_RX_PKT_DEBUG*/
+
+struct NetRxPkt;
+
+/**
+ * Clean all rx packet resources
+ *
+ * @pkt: packet
+ *
+ */
+void net_rx_pkt_uninit(struct NetRxPkt *pkt);
+
+/**
+ * Init function for rx packet functionality
+ *
+ * @pkt: packet pointer
+ * @has_virt_hdr: device uses virtio header
+ *
+ */
+void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr);
+
+/**
+ * returns total length of data attached to rx context
+ *
+ * @pkt: packet
+ *
+ * Return: nothing
+ *
+ */
+size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt);
+
+/**
+ * fetches packet analysis results
+ *
+ * @pkt: packet
+ * @isip4: whether the packet given is IPv4
+ * @isip6: whether the packet given is IPv6
+ * @isudp: whether the packet given is UDP
+ * @istcp: whether the packet given is TCP
+ *
+ */
+void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
+ bool *isip4, bool *isip6,
+ bool *isudp, bool *istcp);
+
+/**
+ * returns virtio header stored in rx context
+ *
+ * @pkt: packet
+ * @ret: virtio header
+ *
+ */
+struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt);
+
+/**
+ * returns packet type
+ *
+ * @pkt: packet
+ * @ret: packet type
+ *
+ */
+eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt);
+
+/**
+ * returns vlan tag
+ *
+ * @pkt: packet
+ * @ret: VLAN tag
+ *
+ */
+uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt);
+
+/**
+ * tells whether vlan was stripped from the packet
+ *
+ * @pkt: packet
+ * @ret: VLAN stripped sign
+ *
+ */
+bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt);
+
+/**
+ * notifies caller if the packet has virtio header
+ *
+ * @pkt: packet
+ * @ret: true if packet has virtio header, false otherwize
+ *
+ */
+bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt);
+
+/**
+ * returns number of frags attached to the packet
+ *
+ * @pkt: packet
+ * @ret: number of frags
+ *
+ */
+uint16_t net_rx_pkt_get_num_frags(struct NetRxPkt *pkt);
+
+/**
+ * attach data to rx packet
+ *
+ * @pkt: packet
+ * @data: pointer to the data buffer
+ * @len: data length
+ * @strip_vlan: should the module strip vlan from data
+ *
+ */
+void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data,
+ size_t len, bool strip_vlan);
+
+/**
+ * returns io vector that holds the attached data
+ *
+ * @pkt: packet
+ * @ret: pointer to IOVec
+ *
+ */
+struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt);
+
+/**
+ * prints rx packet data if debug is enabled
+ *
+ * @pkt: packet
+ *
+ */
+void net_rx_pkt_dump(struct NetRxPkt *pkt);
+
+/**
+ * copy passed vhdr data to packet context
+ *
+ * @pkt: packet
+ * @vhdr: VHDR buffer
+ *
+ */
+void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
+ struct virtio_net_hdr *vhdr);
+
+/**
+ * save packet type in packet context
+ *
+ * @pkt: packet
+ * @packet_type: the packet type
+ *
+ */
+void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt,
+ eth_pkt_types_e packet_type);
+
+#endif
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
new file mode 100644
index 0000000..a2c1a76
--- /dev/null
+++ b/hw/net/net_tx_pkt.c
@@ -0,0 +1,567 @@
+/*
+ * QEMU TX packets abstractions
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Tamir Shomer <tamirs@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw/hw.h"
+#include "net_tx_pkt.h"
+#include "net/eth.h"
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "net/checksum.h"
+#include "net/tap.h"
+#include "net/net.h"
+
+enum {
+ NET_TX_PKT_VHDR_FRAG = 0,
+ NET_TX_PKT_L2HDR_FRAG,
+ NET_TX_PKT_L3HDR_FRAG,
+ NET_TX_PKT_PL_START_FRAG
+};
+
+/* TX packet private context */
+struct NetTxPkt {
+ struct virtio_net_hdr virt_hdr;
+ bool has_virt_hdr;
+
+ struct iovec *raw;
+ uint32_t raw_frags;
+ uint32_t max_raw_frags;
+
+ struct iovec *vec;
+
+ uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
+
+ uint32_t payload_len;
+
+ uint32_t payload_frags;
+ uint32_t max_payload_frags;
+
+ uint16_t hdr_len;
+ eth_pkt_types_e packet_type;
+ uint8_t l4proto;
+};
+
+void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags,
+ bool has_virt_hdr)
+{
+ struct NetTxPkt *p = g_malloc0(sizeof *p);
+
+ p->vec = g_malloc((sizeof *p->vec) *
+ (max_frags + NET_TX_PKT_PL_START_FRAG));
+
+ p->raw = g_malloc((sizeof *p->raw) * max_frags);
+
+ p->max_payload_frags = max_frags;
+ p->max_raw_frags = max_frags;
+ p->has_virt_hdr = has_virt_hdr;
+ p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr;
+ p->vec[NET_TX_PKT_VHDR_FRAG].iov_len =
+ p->has_virt_hdr ? sizeof p->virt_hdr : 0;
+ p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr;
+ p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL;
+ p->vec[NET_TX_PKT_L3HDR_FRAG].iov_len = 0;
+
+ *pkt = p;
+}
+
+void net_tx_pkt_uninit(struct NetTxPkt *pkt)
+{
+ if (pkt) {
+ g_free(pkt->vec);
+ g_free(pkt->raw);
+ g_free(pkt);
+ }
+}
+
+void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
+{
+ uint16_t csum;
+ uint32_t ph_raw_csum;
+ assert(pkt);
+ uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
+ struct ip_header *ip_hdr;
+
+ if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type &&
+ VIRTIO_NET_HDR_GSO_UDP != gso_type) {
+ return;
+ }
+
+ ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
+
+ if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len >
+ ETH_MAX_IP_DGRAM_LEN) {
+ return;
+ }
+
+ ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
+ pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
+
+ /* Calculate IP header checksum */
+ ip_hdr->ip_sum = 0;
+ csum = net_raw_checksum((uint8_t *)ip_hdr,
+ pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
+ ip_hdr->ip_sum = cpu_to_be16(csum);
+
+ /* Calculate IP pseudo header checksum */
+ ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len);
+ csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum));
+ iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
+ pkt->virt_hdr.csum_offset, &csum, sizeof(csum));
+}
+
+static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt)
+{
+ pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len +
+ pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len;
+}
+
+static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt)
+{
+ struct iovec *l2_hdr, *l3_hdr;
+ size_t bytes_read;
+ size_t full_ip6hdr_len;
+ uint16_t l3_proto;
+
+ assert(pkt);
+
+ l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
+ l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG];
+
+ bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base,
+ ETH_MAX_L2_HDR_LEN);
+ if (bytes_read < ETH_MAX_L2_HDR_LEN) {
+ l2_hdr->iov_len = 0;
+ return false;
+ } else {
+ l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base);
+ }
+
+ l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len);
+
+ switch (l3_proto) {
+ case ETH_P_IP:
+ l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN);
+
+ bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
+ l3_hdr->iov_base, sizeof(struct ip_header));
+
+ if (bytes_read < sizeof(struct ip_header)) {
+ l3_hdr->iov_len = 0;
+ return false;
+ }
+
+ l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base);
+ pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p;
+
+ /* copy optional IPv4 header data */
+ bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags,
+ l2_hdr->iov_len + sizeof(struct ip_header),
+ l3_hdr->iov_base + sizeof(struct ip_header),
+ l3_hdr->iov_len - sizeof(struct ip_header));
+ if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) {
+ l3_hdr->iov_len = 0;
+ return false;
+ }
+ break;
+
+ case ETH_P_IPV6:
+ if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
+ &pkt->l4proto, &full_ip6hdr_len)) {
+ l3_hdr->iov_len = 0;
+ return false;
+ }
+
+ l3_hdr->iov_base = g_malloc(full_ip6hdr_len);
+
+ bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
+ l3_hdr->iov_base, full_ip6hdr_len);
+
+ if (bytes_read < full_ip6hdr_len) {
+ l3_hdr->iov_len = 0;
+ return false;
+ } else {
+ l3_hdr->iov_len = full_ip6hdr_len;
+ }
+ break;
+
+ default:
+ l3_hdr->iov_len = 0;
+ break;
+ }
+
+ net_tx_pkt_calculate_hdr_len(pkt);
+ pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base);
+ return true;
+}
+
+static bool net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt)
+{
+ size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len;
+
+ pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG],
+ pkt->max_payload_frags,
+ pkt->raw, pkt->raw_frags,
+ pkt->hdr_len, payload_len);
+
+ if (pkt->payload_frags != (uint32_t) -1) {
+ pkt->payload_len = payload_len;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool net_tx_pkt_parse(struct NetTxPkt *pkt)
+{
+ return net_tx_pkt_parse_headers(pkt) &&
+ net_tx_pkt_rebuild_payload(pkt);
+}
+
+struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt)
+{
+ assert(pkt);
+ return &pkt->virt_hdr;
+}
+
+static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt,
+ bool tso_enable)
+{
+ uint8_t rc = VIRTIO_NET_HDR_GSO_NONE;
+ uint16_t l3_proto;
+
+ l3_proto = eth_get_l3_proto(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
+ pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len);
+
+ if (!tso_enable) {
+ goto func_exit;
+ }
+
+ rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
+ pkt->l4proto);
+
+func_exit:
+ return rc;
+}
+
+void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
+ bool csum_enable, uint32_t gso_size)
+{
+ struct tcp_hdr l4hdr;
+ assert(pkt);
+
+ /* csum has to be enabled if tso is. */
+ assert(csum_enable || !tso_enable);
+
+ pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable);
+
+ switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+ case VIRTIO_NET_HDR_GSO_NONE:
+ pkt->virt_hdr.hdr_len = 0;
+ pkt->virt_hdr.gso_size = 0;
+ break;
+
+ case VIRTIO_NET_HDR_GSO_UDP:
+ pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size);
+ pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header);
+ break;
+
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
+ 0, &l4hdr, sizeof(l4hdr));
+ pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t);
+ pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ if (csum_enable) {
+ switch (pkt->l4proto) {
+ case IP_PROTO_TCP:
+ pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ pkt->virt_hdr.csum_start = pkt->hdr_len;
+ pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum);
+ break;
+ case IP_PROTO_UDP:
+ pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ pkt->virt_hdr.csum_start = pkt->hdr_len;
+ pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan)
+{
+ bool is_new;
+ assert(pkt);
+
+ eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
+ vlan, &is_new);
+
+ /* update l2hdrlen */
+ if (is_new) {
+ pkt->hdr_len += sizeof(struct vlan_header);
+ pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len +=
+ sizeof(struct vlan_header);
+ }
+}
+
+bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
+ size_t len)
+{
+ hwaddr mapped_len = 0;
+ struct iovec *ventry;
+ assert(pkt);
+ assert(pkt->max_raw_frags > pkt->raw_frags);
+
+ if (!len) {
+ return true;
+ }
+
+ ventry = &pkt->raw[pkt->raw_frags];
+ mapped_len = len;
+
+ ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false);
+ ventry->iov_len = mapped_len;
+ pkt->raw_frags += !!ventry->iov_base;
+
+ if ((ventry->iov_base == NULL) || (len != mapped_len)) {
+ return false;
+ }
+
+ return true;
+}
+
+eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->packet_type;
+}
+
+size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->hdr_len + pkt->payload_len;
+}
+
+void net_tx_pkt_dump(struct NetTxPkt *pkt)
+{
+#ifdef NET_TX_PKT_DEBUG
+ assert(pkt);
+
+ printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, "
+ "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type,
+ pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
+ pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len);
+#endif
+}
+
+void net_tx_pkt_reset(struct NetTxPkt *pkt)
+{
+ int i;
+
+ /* no assert, as reset can be called before tx_pkt_init */
+ if (!pkt) {
+ return;
+ }
+
+ memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
+
+ g_free(pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base);
+ pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL;
+
+ assert(pkt->vec);
+ for (i = NET_TX_PKT_L2HDR_FRAG;
+ i < pkt->payload_frags + NET_TX_PKT_PL_START_FRAG; i++) {
+ pkt->vec[i].iov_len = 0;
+ }
+ pkt->payload_len = 0;
+ pkt->payload_frags = 0;
+
+ assert(pkt->raw);
+ for (i = 0; i < pkt->raw_frags; i++) {
+ assert(pkt->raw[i].iov_base);
+ cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len,
+ false, pkt->raw[i].iov_len);
+ pkt->raw[i].iov_len = 0;
+ }
+ pkt->raw_frags = 0;
+
+ pkt->hdr_len = 0;
+ pkt->packet_type = 0;
+ pkt->l4proto = 0;
+}
+
+static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
+{
+ struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
+ uint32_t csum_cntr;
+ uint16_t csum = 0;
+ /* num of iovec without vhdr */
+ uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1;
+ uint16_t csl;
+ struct ip_header *iphdr;
+ size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset;
+
+ /* Put zero to checksum field */
+ iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
+
+ /* Calculate L4 TCP/UDP checksum */
+ csl = pkt->payload_len;
+
+ /* data checksum */
+ csum_cntr =
+ net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl);
+ /* add pseudo header to csum */
+ iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
+ csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl);
+
+ /* Put the checksum obtained into the packet */
+ csum = cpu_to_be16(net_checksum_finish(csum_cntr));
+ iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
+}
+
+enum {
+ NET_TX_PKT_FRAGMENT_L2_HDR_POS = 0,
+ NET_TX_PKT_FRAGMENT_L3_HDR_POS,
+ NET_TX_PKT_FRAGMENT_HEADER_NUM
+};
+
+#define NET_MAX_FRAG_SG_LIST (64)
+
+static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
+ int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx)
+{
+ size_t fetched = 0;
+ struct iovec *src = pkt->vec;
+
+ *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM;
+
+ while (fetched < pkt->virt_hdr.gso_size) {
+
+ /* no more place in fragment iov */
+ if (*dst_idx == NET_MAX_FRAG_SG_LIST) {
+ break;
+ }
+
+ /* no more data in iovec */
+ if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) {
+ break;
+ }
+
+
+ dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset;
+ dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset,
+ pkt->virt_hdr.gso_size - fetched);
+
+ *src_offset += dst[*dst_idx].iov_len;
+ fetched += dst[*dst_idx].iov_len;
+
+ if (*src_offset == src[*src_idx].iov_len) {
+ *src_offset = 0;
+ (*src_idx)++;
+ }
+
+ (*dst_idx)++;
+ }
+
+ return fetched;
+}
+
+static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
+ NetClientState *nc)
+{
+ struct iovec fragment[NET_MAX_FRAG_SG_LIST];
+ size_t fragment_len = 0;
+ bool more_frags = false;
+
+ /* some pointers for shorter code */
+ void *l2_iov_base, *l3_iov_base;
+ size_t l2_iov_len, l3_iov_len;
+ int src_idx = NET_TX_PKT_PL_START_FRAG, dst_idx;
+ size_t src_offset = 0;
+ size_t fragment_offset = 0;
+
+ l2_iov_base = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base;
+ l2_iov_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len;
+ l3_iov_base = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
+ l3_iov_len = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len;
+
+ /* Copy headers */
+ fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base;
+ fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len;
+ fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base;
+ fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len;
+
+
+ /* Put as much data as possible and send */
+ do {
+ fragment_len = net_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset,
+ fragment, &dst_idx);
+
+ more_frags = (fragment_offset + fragment_len < pkt->payload_len);
+
+ eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base,
+ l3_iov_len, fragment_len, fragment_offset, more_frags);
+
+ eth_fix_ip4_checksum(l3_iov_base, l3_iov_len);
+
+ qemu_sendv_packet(nc, fragment, dst_idx);
+
+ fragment_offset += fragment_len;
+
+ } while (more_frags);
+
+ return true;
+}
+
+bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
+{
+ assert(pkt);
+
+ if (!pkt->has_virt_hdr &&
+ pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+ net_tx_pkt_do_sw_csum(pkt);
+ }
+
+ /*
+ * Since underlying infrastructure does not support IP datagrams longer
+ * than 64K we should drop such packets and don't even try to send
+ */
+ if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) {
+ if (pkt->payload_len >
+ ETH_MAX_IP_DGRAM_LEN -
+ pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
+ return false;
+ }
+ }
+
+ if (pkt->has_virt_hdr ||
+ pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
+ qemu_sendv_packet(nc, pkt->vec,
+ pkt->payload_frags + NET_TX_PKT_PL_START_FRAG);
+ return true;
+ }
+
+ return net_tx_pkt_do_sw_fragmentation(pkt, nc);
+}
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
new file mode 100644
index 0000000..73a67f8
--- /dev/null
+++ b/hw/net/net_tx_pkt.h
@@ -0,0 +1,148 @@
+/*
+ * QEMU TX packets abstraction
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Tamir Shomer <tamirs@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef NET_TX_PKT_H
+#define NET_TX_PKT_H
+
+#include "stdint.h"
+#include "stdbool.h"
+#include "net/eth.h"
+#include "exec/hwaddr.h"
+
+/* define to enable packet dump functions */
+/*#define NET_TX_PKT_DEBUG*/
+
+struct NetTxPkt;
+
+/**
+ * Init function for tx packet functionality
+ *
+ * @pkt: packet pointer
+ * @max_frags: max tx ip fragments
+ * @has_virt_hdr: device uses virtio header.
+ */
+void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags,
+ bool has_virt_hdr);
+
+/**
+ * Clean all tx packet resources.
+ *
+ * @pkt: packet.
+ */
+void net_tx_pkt_uninit(struct NetTxPkt *pkt);
+
+/**
+ * get virtio header
+ *
+ * @pkt: packet
+ * @ret: virtio header
+ */
+struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt);
+
+/**
+ * build virtio header (will be stored in module context)
+ *
+ * @pkt: packet
+ * @tso_enable: TSO enabled
+ * @csum_enable: CSO enabled
+ * @gso_size: MSS size for TSO
+ *
+ */
+void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
+ bool csum_enable, uint32_t gso_size);
+
+/**
+ * updates vlan tag, and adds vlan header in case it is missing
+ *
+ * @pkt: packet
+ * @vlan: VLAN tag
+ *
+ */
+void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan);
+
+/**
+ * populate data fragment into pkt context.
+ *
+ * @pkt: packet
+ * @pa: physical address of fragment
+ * @len: length of fragment
+ *
+ */
+bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
+ size_t len);
+
+/**
+ * fix ip header fields and calculate checksums needed.
+ *
+ * @pkt: packet
+ *
+ */
+void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt);
+
+/**
+ * get length of all populated data.
+ *
+ * @pkt: packet
+ * @ret: total data length
+ *
+ */
+size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt);
+
+/**
+ * get packet type
+ *
+ * @pkt: packet
+ * @ret: packet type
+ *
+ */
+eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt);
+
+/**
+ * prints packet data if debug is enabled
+ *
+ * @pkt: packet
+ *
+ */
+void net_tx_pkt_dump(struct NetTxPkt *pkt);
+
+/**
+ * reset tx packet private context (needed to be called between packets)
+ *
+ * @pkt: packet
+ *
+ */
+void net_tx_pkt_reset(struct NetTxPkt *pkt);
+
+/**
+ * Send packet to qemu. handles sw offloads if vhdr is not supported.
+ *
+ * @pkt: packet
+ * @nc: NetClientState
+ * @ret: operation result
+ *
+ */
+bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc);
+
+/**
+ * parse raw packet data and analyze offload requirements.
+ *
+ * @pkt: packet
+ *
+ */
+bool net_tx_pkt_parse(struct NetTxPkt *pkt);
+
+#endif
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index dfb328d..388ec10 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -29,8 +29,8 @@
#include "vmxnet3.h"
#include "vmxnet_debug.h"
#include "vmware_utils.h"
-#include "vmxnet_tx_pkt.h"
-#include "vmxnet_rx_pkt.h"
+#include "net_tx_pkt.h"
+#include "net_rx_pkt.h"
#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
#define VMXNET3_MSIX_BAR_SIZE 0x2000
@@ -287,13 +287,13 @@ typedef struct {
bool peer_has_vhdr;
/* TX packets to QEMU interface */
- struct VmxnetTxPkt *tx_pkt;
+ struct NetTxPkt *tx_pkt;
uint32_t offload_mode;
uint32_t cso_or_gso_size;
uint16_t tci;
bool needs_vlan;
- struct VmxnetRxPkt *rx_pkt;
+ struct NetRxPkt *rx_pkt;
bool tx_sop;
bool skip_current_tx_pkt;
@@ -516,18 +516,18 @@ vmxnet3_setup_tx_offloads(VMXNET3State *s)
{
switch (s->offload_mode) {
case VMXNET3_OM_NONE:
- vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
+ net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0);
break;
case VMXNET3_OM_CSUM:
- vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
+ net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0);
VMW_PKPRN("L4 CSO requested\n");
break;
case VMXNET3_OM_TSO:
- vmxnet_tx_pkt_build_vheader(s->tx_pkt, true, true,
+ net_tx_pkt_build_vheader(s->tx_pkt, true, true,
s->cso_or_gso_size);
- vmxnet_tx_pkt_update_ip_checksums(s->tx_pkt);
+ net_tx_pkt_update_ip_checksums(s->tx_pkt);
VMW_PKPRN("GSO offload requested.");
break;
@@ -560,12 +560,12 @@ static void
vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx,
Vmxnet3PktStatus status)
{
- size_t tot_len = vmxnet_tx_pkt_get_total_len(s->tx_pkt);
+ size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt);
struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
switch (status) {
case VMXNET3_PKT_STATUS_OK:
- switch (vmxnet_tx_pkt_get_packet_type(s->tx_pkt)) {
+ switch (net_tx_pkt_get_packet_type(s->tx_pkt)) {
case ETH_PKT_BCAST:
stats->bcastPktsTxOK++;
stats->bcastBytesTxOK += tot_len;
@@ -613,7 +613,7 @@ vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
Vmxnet3PktStatus status)
{
struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
- size_t tot_len = vmxnet_rx_pkt_get_total_len(s->rx_pkt);
+ size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt);
switch (status) {
case VMXNET3_PKT_STATUS_OUT_OF_BUF:
@@ -624,7 +624,7 @@ vmxnet3_on_rx_done_update_stats(VMXNET3State *s,
stats->pktsRxError++;
break;
case VMXNET3_PKT_STATUS_OK:
- switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) {
+ switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
case ETH_PKT_BCAST:
stats->bcastPktsRxOK++;
stats->bcastBytesRxOK += tot_len;
@@ -685,10 +685,10 @@ vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx)
}
/* debug prints */
- vmxnet3_dump_virt_hdr(vmxnet_tx_pkt_get_vhdr(s->tx_pkt));
- vmxnet_tx_pkt_dump(s->tx_pkt);
+ vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt));
+ net_tx_pkt_dump(s->tx_pkt);
- if (!vmxnet_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
+ if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) {
status = VMXNET3_PKT_STATUS_DISCARD;
goto func_exit;
}
@@ -716,7 +716,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
data_pa = le64_to_cpu(txd.addr);
- if (!vmxnet_tx_pkt_add_raw_fragment(s->tx_pkt,
+ if (!net_tx_pkt_add_raw_fragment(s->tx_pkt,
data_pa,
data_len)) {
s->skip_current_tx_pkt = true;
@@ -730,10 +730,10 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
if (txd.eop) {
if (!s->skip_current_tx_pkt) {
- vmxnet_tx_pkt_parse(s->tx_pkt);
+ net_tx_pkt_parse(s->tx_pkt);
if (s->needs_vlan) {
- vmxnet_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
+ net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci);
}
vmxnet3_send_packet(s, qidx);
@@ -745,7 +745,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
vmxnet3_complete_packet(s, qidx, txd_idx);
s->tx_sop = true;
s->skip_current_tx_pkt = false;
- vmxnet_tx_pkt_reset(s->tx_pkt);
+ net_tx_pkt_reset(s->tx_pkt);
}
}
}
@@ -885,7 +885,7 @@ vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head,
}
}
-static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt,
+static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt,
struct Vmxnet3_RxCompDesc *rxcd)
{
int csum_ok, is_gso;
@@ -893,16 +893,16 @@ static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt,
struct virtio_net_hdr *vhdr;
uint8_t offload_type;
- if (vmxnet_rx_pkt_is_vlan_stripped(pkt)) {
+ if (net_rx_pkt_is_vlan_stripped(pkt)) {
rxcd->ts = 1;
- rxcd->tci = vmxnet_rx_pkt_get_vlan_tag(pkt);
+ rxcd->tci = net_rx_pkt_get_vlan_tag(pkt);
}
- if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) {
+ if (!net_rx_pkt_has_virt_hdr(pkt)) {
goto nocsum;
}
- vhdr = vmxnet_rx_pkt_get_vhdr(pkt);
+ vhdr = net_rx_pkt_get_vhdr(pkt);
/*
* Checksum is valid when lower level tell so or when lower level
* requires checksum offload telling that packet produced/bridged
@@ -919,7 +919,7 @@ static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt,
goto nocsum;
}
- vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
+ net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
if ((!istcp && !isudp) || (!isip4 && !isip6)) {
goto nocsum;
}
@@ -978,13 +978,13 @@ vmxnet3_indicate_packet(VMXNET3State *s)
uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
hwaddr new_rxcd_pa = 0;
hwaddr ready_rxcd_pa = 0;
- struct iovec *data = vmxnet_rx_pkt_get_iovec(s->rx_pkt);
+ struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt);
size_t bytes_copied = 0;
- size_t bytes_left = vmxnet_rx_pkt_get_total_len(s->rx_pkt);
+ size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt);
uint16_t num_frags = 0;
size_t chunk_size;
- vmxnet_rx_pkt_dump(s->rx_pkt);
+ net_rx_pkt_dump(s->rx_pkt);
while (bytes_left > 0) {
@@ -1145,7 +1145,7 @@ static void vmxnet3_reset(VMXNET3State *s)
vmxnet3_deactivate_device(s);
vmxnet3_reset_interrupt_states(s);
- vmxnet_tx_pkt_reset(s->tx_pkt);
+ net_tx_pkt_reset(s->tx_pkt);
s->drv_shmem = 0;
s->tx_sop = true;
s->skip_current_tx_pkt = false;
@@ -1455,8 +1455,8 @@ static void vmxnet3_activate_device(VMXNET3State *s)
/* Preallocate TX packet wrapper */
VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
- vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr);
- vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
+ net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr);
+ net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
/* Read rings memory locations for RX queues */
for (i = 0; i < s->rxq_num; i++) {
@@ -1832,7 +1832,7 @@ vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data,
return false;
}
- switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) {
+ switch (net_rx_pkt_get_packet_type(s->rx_pkt)) {
case ETH_PKT_UCAST:
if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
return false;
@@ -1888,16 +1888,16 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
}
if (s->peer_has_vhdr) {
- vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
+ net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
buf += sizeof(struct virtio_net_hdr);
size -= sizeof(struct virtio_net_hdr);
}
- vmxnet_rx_pkt_set_packet_type(s->rx_pkt,
+ net_rx_pkt_set_packet_type(s->rx_pkt,
get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
- vmxnet_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
+ net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
if (bytes_indicated < size) {
VMW_PKPRN("RX: %lu of %lu bytes indicated", bytes_indicated, size);
@@ -1949,9 +1949,9 @@ static bool vmxnet3_peer_has_vnet_hdr(VMXNET3State *s)
static void vmxnet3_net_uninit(VMXNET3State *s)
{
g_free(s->mcast_list);
- vmxnet_tx_pkt_reset(s->tx_pkt);
- vmxnet_tx_pkt_uninit(s->tx_pkt);
- vmxnet_rx_pkt_uninit(s->rx_pkt);
+ net_tx_pkt_reset(s->tx_pkt);
+ net_tx_pkt_uninit(s->tx_pkt);
+ net_rx_pkt_uninit(s->rx_pkt);
qemu_del_nic(s->nic);
}
@@ -2380,8 +2380,8 @@ static int vmxnet3_post_load(void *opaque, int version_id)
VMXNET3State *s = opaque;
PCIDevice *d = PCI_DEVICE(s);
- vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr);
- vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
+ net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr);
+ net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
if (s->msix_used) {
if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
diff --git a/hw/net/vmxnet_rx_pkt.c b/hw/net/vmxnet_rx_pkt.c
deleted file mode 100644
index a40e346..0000000
--- a/hw/net/vmxnet_rx_pkt.c
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * QEMU VMWARE VMXNET* paravirtual NICs - RX packets abstractions
- *
- * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
- *
- * Developed by Daynix Computing LTD (http://www.daynix.com)
- *
- * Authors:
- * Dmitry Fleytman <dmitry@daynix.com>
- * Tamir Shomer <tamirs@daynix.com>
- * Yan Vugenfirer <yan@daynix.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "vmxnet_rx_pkt.h"
-#include "net/eth.h"
-#include "qemu-common.h"
-#include "qemu/iov.h"
-#include "net/checksum.h"
-#include "net/tap.h"
-
-/*
- * RX packet may contain up to 2 fragments - rebuilt eth header
- * in case of VLAN tag stripping
- * and payload received from QEMU - in any case
- */
-#define VMXNET_MAX_RX_PACKET_FRAGMENTS (2)
-
-struct VmxnetRxPkt {
- struct virtio_net_hdr virt_hdr;
- uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN];
- struct iovec vec[VMXNET_MAX_RX_PACKET_FRAGMENTS];
- uint16_t vec_len;
- uint32_t tot_len;
- uint16_t tci;
- bool vlan_stripped;
- bool has_virt_hdr;
- eth_pkt_types_e packet_type;
-
- /* Analysis results */
- bool isip4;
- bool isip6;
- bool isudp;
- bool istcp;
-};
-
-void vmxnet_rx_pkt_init(struct VmxnetRxPkt **pkt, bool has_virt_hdr)
-{
- struct VmxnetRxPkt *p = g_malloc0(sizeof *p);
- p->has_virt_hdr = has_virt_hdr;
- *pkt = p;
-}
-
-void vmxnet_rx_pkt_uninit(struct VmxnetRxPkt *pkt)
-{
- g_free(pkt);
-}
-
-struct virtio_net_hdr *vmxnet_rx_pkt_get_vhdr(struct VmxnetRxPkt *pkt)
-{
- assert(pkt);
- return &pkt->virt_hdr;
-}
-
-void vmxnet_rx_pkt_attach_data(struct VmxnetRxPkt *pkt, const void *data,
- size_t len, bool strip_vlan)
-{
- uint16_t tci = 0;
- uint16_t ploff;
- assert(pkt);
- pkt->vlan_stripped = false;
-
- if (strip_vlan) {
- pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci);
- }
-
- if (pkt->vlan_stripped) {
- pkt->vec[0].iov_base = pkt->ehdr_buf;
- pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header);
- pkt->vec[1].iov_base = (uint8_t *) data + ploff;
- pkt->vec[1].iov_len = len - ploff;
- pkt->vec_len = 2;
- pkt->tot_len = len - ploff + sizeof(struct eth_header);
- } else {
- pkt->vec[0].iov_base = (void *)data;
- pkt->vec[0].iov_len = len;
- pkt->vec_len = 1;
- pkt->tot_len = len;
- }
-
- pkt->tci = tci;
-
- eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6,
- &pkt->isudp, &pkt->istcp);
-}
-
-void vmxnet_rx_pkt_dump(struct VmxnetRxPkt *pkt)
-{
-#ifdef VMXNET_RX_PKT_DEBUG
- VmxnetRxPkt *pkt = (VmxnetRxPkt *)pkt;
- assert(pkt);
-
- printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n",
- pkt->tot_len, pkt->vlan_stripped, pkt->tci);
-#endif
-}
-
-void vmxnet_rx_pkt_set_packet_type(struct VmxnetRxPkt *pkt,
- eth_pkt_types_e packet_type)
-{
- assert(pkt);
-
- pkt->packet_type = packet_type;
-
-}
-
-eth_pkt_types_e vmxnet_rx_pkt_get_packet_type(struct VmxnetRxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->packet_type;
-}
-
-size_t vmxnet_rx_pkt_get_total_len(struct VmxnetRxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->tot_len;
-}
-
-void vmxnet_rx_pkt_get_protocols(struct VmxnetRxPkt *pkt,
- bool *isip4, bool *isip6,
- bool *isudp, bool *istcp)
-{
- assert(pkt);
-
- *isip4 = pkt->isip4;
- *isip6 = pkt->isip6;
- *isudp = pkt->isudp;
- *istcp = pkt->istcp;
-}
-
-struct iovec *vmxnet_rx_pkt_get_iovec(struct VmxnetRxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->vec;
-}
-
-void vmxnet_rx_pkt_set_vhdr(struct VmxnetRxPkt *pkt,
- struct virtio_net_hdr *vhdr)
-{
- assert(pkt);
-
- memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr);
-}
-
-bool vmxnet_rx_pkt_is_vlan_stripped(struct VmxnetRxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->vlan_stripped;
-}
-
-bool vmxnet_rx_pkt_has_virt_hdr(struct VmxnetRxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->has_virt_hdr;
-}
-
-uint16_t vmxnet_rx_pkt_get_num_frags(struct VmxnetRxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->vec_len;
-}
-
-uint16_t vmxnet_rx_pkt_get_vlan_tag(struct VmxnetRxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->tci;
-}
diff --git a/hw/net/vmxnet_rx_pkt.h b/hw/net/vmxnet_rx_pkt.h
deleted file mode 100644
index 6b2c60e..0000000
--- a/hw/net/vmxnet_rx_pkt.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * QEMU VMWARE VMXNET* paravirtual NICs - RX packets abstraction
- *
- * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
- *
- * Developed by Daynix Computing LTD (http://www.daynix.com)
- *
- * Authors:
- * Dmitry Fleytman <dmitry@daynix.com>
- * Tamir Shomer <tamirs@daynix.com>
- * Yan Vugenfirer <yan@daynix.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef VMXNET_RX_PKT_H
-#define VMXNET_RX_PKT_H
-
-#include "stdint.h"
-#include "stdbool.h"
-#include "net/eth.h"
-
-/* defines to enable packet dump functions */
-/*#define VMXNET_RX_PKT_DEBUG*/
-
-struct VmxnetRxPkt;
-
-/**
- * Clean all rx packet resources
- *
- * @pkt: packet
- *
- */
-void vmxnet_rx_pkt_uninit(struct VmxnetRxPkt *pkt);
-
-/**
- * Init function for rx packet functionality
- *
- * @pkt: packet pointer
- * @has_virt_hdr: device uses virtio header
- *
- */
-void vmxnet_rx_pkt_init(struct VmxnetRxPkt **pkt, bool has_virt_hdr);
-
-/**
- * returns total length of data attached to rx context
- *
- * @pkt: packet
- *
- * Return: nothing
- *
- */
-size_t vmxnet_rx_pkt_get_total_len(struct VmxnetRxPkt *pkt);
-
-/**
- * fetches packet analysis results
- *
- * @pkt: packet
- * @isip4: whether the packet given is IPv4
- * @isip6: whether the packet given is IPv6
- * @isudp: whether the packet given is UDP
- * @istcp: whether the packet given is TCP
- *
- */
-void vmxnet_rx_pkt_get_protocols(struct VmxnetRxPkt *pkt,
- bool *isip4, bool *isip6,
- bool *isudp, bool *istcp);
-
-/**
- * returns virtio header stored in rx context
- *
- * @pkt: packet
- * @ret: virtio header
- *
- */
-struct virtio_net_hdr *vmxnet_rx_pkt_get_vhdr(struct VmxnetRxPkt *pkt);
-
-/**
- * returns packet type
- *
- * @pkt: packet
- * @ret: packet type
- *
- */
-eth_pkt_types_e vmxnet_rx_pkt_get_packet_type(struct VmxnetRxPkt *pkt);
-
-/**
- * returns vlan tag
- *
- * @pkt: packet
- * @ret: VLAN tag
- *
- */
-uint16_t vmxnet_rx_pkt_get_vlan_tag(struct VmxnetRxPkt *pkt);
-
-/**
- * tells whether vlan was stripped from the packet
- *
- * @pkt: packet
- * @ret: VLAN stripped sign
- *
- */
-bool vmxnet_rx_pkt_is_vlan_stripped(struct VmxnetRxPkt *pkt);
-
-/**
- * notifies caller if the packet has virtio header
- *
- * @pkt: packet
- * @ret: true if packet has virtio header, false otherwize
- *
- */
-bool vmxnet_rx_pkt_has_virt_hdr(struct VmxnetRxPkt *pkt);
-
-/**
- * returns number of frags attached to the packet
- *
- * @pkt: packet
- * @ret: number of frags
- *
- */
-uint16_t vmxnet_rx_pkt_get_num_frags(struct VmxnetRxPkt *pkt);
-
-/**
- * attach data to rx packet
- *
- * @pkt: packet
- * @data: pointer to the data buffer
- * @len: data length
- * @strip_vlan: should the module strip vlan from data
- *
- */
-void vmxnet_rx_pkt_attach_data(struct VmxnetRxPkt *pkt, const void *data,
- size_t len, bool strip_vlan);
-
-/**
- * returns io vector that holds the attached data
- *
- * @pkt: packet
- * @ret: pointer to IOVec
- *
- */
-struct iovec *vmxnet_rx_pkt_get_iovec(struct VmxnetRxPkt *pkt);
-
-/**
- * prints rx packet data if debug is enabled
- *
- * @pkt: packet
- *
- */
-void vmxnet_rx_pkt_dump(struct VmxnetRxPkt *pkt);
-
-/**
- * copy passed vhdr data to packet context
- *
- * @pkt: packet
- * @vhdr: VHDR buffer
- *
- */
-void vmxnet_rx_pkt_set_vhdr(struct VmxnetRxPkt *pkt,
- struct virtio_net_hdr *vhdr);
-
-/**
- * save packet type in packet context
- *
- * @pkt: packet
- * @packet_type: the packet type
- *
- */
-void vmxnet_rx_pkt_set_packet_type(struct VmxnetRxPkt *pkt,
- eth_pkt_types_e packet_type);
-
-#endif
diff --git a/hw/net/vmxnet_tx_pkt.c b/hw/net/vmxnet_tx_pkt.c
deleted file mode 100644
index f7344c4..0000000
--- a/hw/net/vmxnet_tx_pkt.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstractions
- *
- * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
- *
- * Developed by Daynix Computing LTD (http://www.daynix.com)
- *
- * Authors:
- * Dmitry Fleytman <dmitry@daynix.com>
- * Tamir Shomer <tamirs@daynix.com>
- * Yan Vugenfirer <yan@daynix.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "hw/hw.h"
-#include "vmxnet_tx_pkt.h"
-#include "net/eth.h"
-#include "qemu-common.h"
-#include "qemu/iov.h"
-#include "net/checksum.h"
-#include "net/tap.h"
-#include "net/net.h"
-
-enum {
- VMXNET_TX_PKT_VHDR_FRAG = 0,
- VMXNET_TX_PKT_L2HDR_FRAG,
- VMXNET_TX_PKT_L3HDR_FRAG,
- VMXNET_TX_PKT_PL_START_FRAG
-};
-
-/* TX packet private context */
-struct VmxnetTxPkt {
- struct virtio_net_hdr virt_hdr;
- bool has_virt_hdr;
-
- struct iovec *raw;
- uint32_t raw_frags;
- uint32_t max_raw_frags;
-
- struct iovec *vec;
-
- uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
-
- uint32_t payload_len;
-
- uint32_t payload_frags;
- uint32_t max_payload_frags;
-
- uint16_t hdr_len;
- eth_pkt_types_e packet_type;
- uint8_t l4proto;
-};
-
-void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags,
- bool has_virt_hdr)
-{
- struct VmxnetTxPkt *p = g_malloc0(sizeof *p);
-
- p->vec = g_malloc((sizeof *p->vec) *
- (max_frags + VMXNET_TX_PKT_PL_START_FRAG));
-
- p->raw = g_malloc((sizeof *p->raw) * max_frags);
-
- p->max_payload_frags = max_frags;
- p->max_raw_frags = max_frags;
- p->has_virt_hdr = has_virt_hdr;
- p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr;
- p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_len =
- p->has_virt_hdr ? sizeof p->virt_hdr : 0;
- p->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr;
- p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL;
- p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len = 0;
-
- *pkt = p;
-}
-
-void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt)
-{
- if (pkt) {
- g_free(pkt->vec);
- g_free(pkt->raw);
- g_free(pkt);
- }
-}
-
-void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt)
-{
- uint16_t csum;
- uint32_t ph_raw_csum;
- assert(pkt);
- uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
- struct ip_header *ip_hdr;
-
- if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type &&
- VIRTIO_NET_HDR_GSO_UDP != gso_type) {
- return;
- }
-
- ip_hdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base;
-
- if (pkt->payload_len + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len >
- ETH_MAX_IP_DGRAM_LEN) {
- return;
- }
-
- ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
- pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len);
-
- /* Calculate IP header checksum */
- ip_hdr->ip_sum = 0;
- csum = net_raw_checksum((uint8_t *)ip_hdr,
- pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len);
- ip_hdr->ip_sum = cpu_to_be16(csum);
-
- /* Calculate IP pseudo header checksum */
- ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len);
- csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum));
- iov_from_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
- pkt->virt_hdr.csum_offset, &csum, sizeof(csum));
-}
-
-static void vmxnet_tx_pkt_calculate_hdr_len(struct VmxnetTxPkt *pkt)
-{
- pkt->hdr_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len +
- pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len;
-}
-
-static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt)
-{
- struct iovec *l2_hdr, *l3_hdr;
- size_t bytes_read;
- size_t full_ip6hdr_len;
- uint16_t l3_proto;
-
- assert(pkt);
-
- l2_hdr = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG];
- l3_hdr = &pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG];
-
- bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base,
- ETH_MAX_L2_HDR_LEN);
- if (bytes_read < ETH_MAX_L2_HDR_LEN) {
- l2_hdr->iov_len = 0;
- return false;
- } else {
- l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base);
- }
-
- l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len);
-
- switch (l3_proto) {
- case ETH_P_IP:
- l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN);
-
- bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
- l3_hdr->iov_base, sizeof(struct ip_header));
-
- if (bytes_read < sizeof(struct ip_header)) {
- l3_hdr->iov_len = 0;
- return false;
- }
-
- l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base);
- pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p;
-
- /* copy optional IPv4 header data */
- bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags,
- l2_hdr->iov_len + sizeof(struct ip_header),
- l3_hdr->iov_base + sizeof(struct ip_header),
- l3_hdr->iov_len - sizeof(struct ip_header));
- if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) {
- l3_hdr->iov_len = 0;
- return false;
- }
- break;
-
- case ETH_P_IPV6:
- if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
- &pkt->l4proto, &full_ip6hdr_len)) {
- l3_hdr->iov_len = 0;
- return false;
- }
-
- l3_hdr->iov_base = g_malloc(full_ip6hdr_len);
-
- bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
- l3_hdr->iov_base, full_ip6hdr_len);
-
- if (bytes_read < full_ip6hdr_len) {
- l3_hdr->iov_len = 0;
- return false;
- } else {
- l3_hdr->iov_len = full_ip6hdr_len;
- }
- break;
-
- default:
- l3_hdr->iov_len = 0;
- break;
- }
-
- vmxnet_tx_pkt_calculate_hdr_len(pkt);
- pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base);
- return true;
-}
-
-static bool vmxnet_tx_pkt_rebuild_payload(struct VmxnetTxPkt *pkt)
-{
- size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len;
-
- pkt->payload_frags = iov_copy(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG],
- pkt->max_payload_frags,
- pkt->raw, pkt->raw_frags,
- pkt->hdr_len, payload_len);
-
- if (pkt->payload_frags != (uint32_t) -1) {
- pkt->payload_len = payload_len;
- return true;
- } else {
- return false;
- }
-}
-
-bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt)
-{
- return vmxnet_tx_pkt_parse_headers(pkt) &&
- vmxnet_tx_pkt_rebuild_payload(pkt);
-}
-
-struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt)
-{
- assert(pkt);
- return &pkt->virt_hdr;
-}
-
-static uint8_t vmxnet_tx_pkt_get_gso_type(struct VmxnetTxPkt *pkt,
- bool tso_enable)
-{
- uint8_t rc = VIRTIO_NET_HDR_GSO_NONE;
- uint16_t l3_proto;
-
- l3_proto = eth_get_l3_proto(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base,
- pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len);
-
- if (!tso_enable) {
- goto func_exit;
- }
-
- rc = eth_get_gso_type(l3_proto, pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base,
- pkt->l4proto);
-
-func_exit:
- return rc;
-}
-
-void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable,
- bool csum_enable, uint32_t gso_size)
-{
- struct tcp_hdr l4hdr;
- assert(pkt);
-
- /* csum has to be enabled if tso is. */
- assert(csum_enable || !tso_enable);
-
- pkt->virt_hdr.gso_type = vmxnet_tx_pkt_get_gso_type(pkt, tso_enable);
-
- switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
- case VIRTIO_NET_HDR_GSO_NONE:
- pkt->virt_hdr.hdr_len = 0;
- pkt->virt_hdr.gso_size = 0;
- break;
-
- case VIRTIO_NET_HDR_GSO_UDP:
- pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size);
- pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header);
- break;
-
- case VIRTIO_NET_HDR_GSO_TCPV4:
- case VIRTIO_NET_HDR_GSO_TCPV6:
- iov_to_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
- 0, &l4hdr, sizeof(l4hdr));
- pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t);
- pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size);
- break;
-
- default:
- g_assert_not_reached();
- }
-
- if (csum_enable) {
- switch (pkt->l4proto) {
- case IP_PROTO_TCP:
- pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- pkt->virt_hdr.csum_start = pkt->hdr_len;
- pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum);
- break;
- case IP_PROTO_UDP:
- pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- pkt->virt_hdr.csum_start = pkt->hdr_len;
- pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum);
- break;
- default:
- break;
- }
- }
-}
-
-void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan)
-{
- bool is_new;
- assert(pkt);
-
- eth_setup_vlan_headers(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base,
- vlan, &is_new);
-
- /* update l2hdrlen */
- if (is_new) {
- pkt->hdr_len += sizeof(struct vlan_header);
- pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len +=
- sizeof(struct vlan_header);
- }
-}
-
-bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa,
- size_t len)
-{
- hwaddr mapped_len = 0;
- struct iovec *ventry;
- assert(pkt);
- assert(pkt->max_raw_frags > pkt->raw_frags);
-
- if (!len) {
- return true;
- }
-
- ventry = &pkt->raw[pkt->raw_frags];
- mapped_len = len;
-
- ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false);
- ventry->iov_len = mapped_len;
- pkt->raw_frags += !!ventry->iov_base;
-
- if ((ventry->iov_base == NULL) || (len != mapped_len)) {
- return false;
- }
-
- return true;
-}
-
-eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->packet_type;
-}
-
-size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt)
-{
- assert(pkt);
-
- return pkt->hdr_len + pkt->payload_len;
-}
-
-void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt)
-{
-#ifdef VMXNET_TX_PKT_DEBUG
- assert(pkt);
-
- printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, "
- "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type,
- pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len,
- pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len);
-#endif
-}
-
-void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt)
-{
- int i;
-
- /* no assert, as reset can be called before tx_pkt_init */
- if (!pkt) {
- return;
- }
-
- memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
-
- g_free(pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base);
- pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL;
-
- assert(pkt->vec);
- for (i = VMXNET_TX_PKT_L2HDR_FRAG;
- i < pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG; i++) {
- pkt->vec[i].iov_len = 0;
- }
- pkt->payload_len = 0;
- pkt->payload_frags = 0;
-
- assert(pkt->raw);
- for (i = 0; i < pkt->raw_frags; i++) {
- assert(pkt->raw[i].iov_base);
- cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len,
- false, pkt->raw[i].iov_len);
- pkt->raw[i].iov_len = 0;
- }
- pkt->raw_frags = 0;
-
- pkt->hdr_len = 0;
- pkt->packet_type = 0;
- pkt->l4proto = 0;
-}
-
-static void vmxnet_tx_pkt_do_sw_csum(struct VmxnetTxPkt *pkt)
-{
- struct iovec *iov = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG];
- uint32_t csum_cntr;
- uint16_t csum = 0;
- /* num of iovec without vhdr */
- uint32_t iov_len = pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG - 1;
- uint16_t csl;
- struct ip_header *iphdr;
- size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset;
-
- /* Put zero to checksum field */
- iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
-
- /* Calculate L4 TCP/UDP checksum */
- csl = pkt->payload_len;
-
- /* data checksum */
- csum_cntr =
- net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl);
- /* add pseudo header to csum */
- iphdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base;
- csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl);
-
- /* Put the checksum obtained into the packet */
- csum = cpu_to_be16(net_checksum_finish(csum_cntr));
- iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
-}
-
-enum {
- VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS = 0,
- VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS,
- VMXNET_TX_PKT_FRAGMENT_HEADER_NUM
-};
-
-#define VMXNET_MAX_FRAG_SG_LIST (64)
-
-static size_t vmxnet_tx_pkt_fetch_fragment(struct VmxnetTxPkt *pkt,
- int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx)
-{
- size_t fetched = 0;
- struct iovec *src = pkt->vec;
-
- *dst_idx = VMXNET_TX_PKT_FRAGMENT_HEADER_NUM;
-
- while (fetched < pkt->virt_hdr.gso_size) {
-
- /* no more place in fragment iov */
- if (*dst_idx == VMXNET_MAX_FRAG_SG_LIST) {
- break;
- }
-
- /* no more data in iovec */
- if (*src_idx == (pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG)) {
- break;
- }
-
-
- dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset;
- dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset,
- pkt->virt_hdr.gso_size - fetched);
-
- *src_offset += dst[*dst_idx].iov_len;
- fetched += dst[*dst_idx].iov_len;
-
- if (*src_offset == src[*src_idx].iov_len) {
- *src_offset = 0;
- (*src_idx)++;
- }
-
- (*dst_idx)++;
- }
-
- return fetched;
-}
-
-static bool vmxnet_tx_pkt_do_sw_fragmentation(struct VmxnetTxPkt *pkt,
- NetClientState *nc)
-{
- struct iovec fragment[VMXNET_MAX_FRAG_SG_LIST];
- size_t fragment_len = 0;
- bool more_frags = false;
-
- /* some pointers for shorter code */
- void *l2_iov_base, *l3_iov_base;
- size_t l2_iov_len, l3_iov_len;
- int src_idx = VMXNET_TX_PKT_PL_START_FRAG, dst_idx;
- size_t src_offset = 0;
- size_t fragment_offset = 0;
-
- l2_iov_base = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base;
- l2_iov_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len;
- l3_iov_base = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base;
- l3_iov_len = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len;
-
- /* Copy headers */
- fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base;
- fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len;
- fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base;
- fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len;
-
-
- /* Put as much data as possible and send */
- do {
- fragment_len = vmxnet_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset,
- fragment, &dst_idx);
-
- more_frags = (fragment_offset + fragment_len < pkt->payload_len);
-
- eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base,
- l3_iov_len, fragment_len, fragment_offset, more_frags);
-
- eth_fix_ip4_checksum(l3_iov_base, l3_iov_len);
-
- qemu_sendv_packet(nc, fragment, dst_idx);
-
- fragment_offset += fragment_len;
-
- } while (more_frags);
-
- return true;
-}
-
-bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc)
-{
- assert(pkt);
-
- if (!pkt->has_virt_hdr &&
- pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
- vmxnet_tx_pkt_do_sw_csum(pkt);
- }
-
- /*
- * Since underlying infrastructure does not support IP datagrams longer
- * than 64K we should drop such packets and don't even try to send
- */
- if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) {
- if (pkt->payload_len >
- ETH_MAX_IP_DGRAM_LEN -
- pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len) {
- return false;
- }
- }
-
- if (pkt->has_virt_hdr ||
- pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
- qemu_sendv_packet(nc, pkt->vec,
- pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG);
- return true;
- }
-
- return vmxnet_tx_pkt_do_sw_fragmentation(pkt, nc);
-}
diff --git a/hw/net/vmxnet_tx_pkt.h b/hw/net/vmxnet_tx_pkt.h
deleted file mode 100644
index 57121a6..0000000
--- a/hw/net/vmxnet_tx_pkt.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstraction
- *
- * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
- *
- * Developed by Daynix Computing LTD (http://www.daynix.com)
- *
- * Authors:
- * Dmitry Fleytman <dmitry@daynix.com>
- * Tamir Shomer <tamirs@daynix.com>
- * Yan Vugenfirer <yan@daynix.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef VMXNET_TX_PKT_H
-#define VMXNET_TX_PKT_H
-
-#include "stdint.h"
-#include "stdbool.h"
-#include "net/eth.h"
-#include "exec/hwaddr.h"
-
-/* define to enable packet dump functions */
-/*#define VMXNET_TX_PKT_DEBUG*/
-
-struct VmxnetTxPkt;
-
-/**
- * Init function for tx packet functionality
- *
- * @pkt: packet pointer
- * @max_frags: max tx ip fragments
- * @has_virt_hdr: device uses virtio header.
- */
-void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags,
- bool has_virt_hdr);
-
-/**
- * Clean all tx packet resources.
- *
- * @pkt: packet.
- */
-void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt);
-
-/**
- * get virtio header
- *
- * @pkt: packet
- * @ret: virtio header
- */
-struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt);
-
-/**
- * build virtio header (will be stored in module context)
- *
- * @pkt: packet
- * @tso_enable: TSO enabled
- * @csum_enable: CSO enabled
- * @gso_size: MSS size for TSO
- *
- */
-void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable,
- bool csum_enable, uint32_t gso_size);
-
-/**
- * updates vlan tag, and adds vlan header in case it is missing
- *
- * @pkt: packet
- * @vlan: VLAN tag
- *
- */
-void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan);
-
-/**
- * populate data fragment into pkt context.
- *
- * @pkt: packet
- * @pa: physical address of fragment
- * @len: length of fragment
- *
- */
-bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa,
- size_t len);
-
-/**
- * fix ip header fields and calculate checksums needed.
- *
- * @pkt: packet
- *
- */
-void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt);
-
-/**
- * get length of all populated data.
- *
- * @pkt: packet
- * @ret: total data length
- *
- */
-size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt);
-
-/**
- * get packet type
- *
- * @pkt: packet
- * @ret: packet type
- *
- */
-eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt);
-
-/**
- * prints packet data if debug is enabled
- *
- * @pkt: packet
- *
- */
-void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt);
-
-/**
- * reset tx packet private context (needed to be called between packets)
- *
- * @pkt: packet
- *
- */
-void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt);
-
-/**
- * Send packet to qemu. handles sw offloads if vhdr is not supported.
- *
- * @pkt: packet
- * @nc: NetClientState
- * @ret: operation result
- *
- */
-bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc);
-
-/**
- * parse raw packet data and analyze offload requirements.
- *
- * @pkt: packet
- *
- */
-bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt);
-
-#endif
diff --git a/tests/Makefile b/tests/Makefile
index 55aa745..32d7c12 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -155,8 +155,8 @@ check-qtest-i386-y += $(check-qtest-pci-y)
gcov-files-i386-y += $(gcov-files-pci-y)
check-qtest-i386-y += tests/vmxnet3-test$(EXESUF)
gcov-files-i386-y += hw/net/vmxnet3.c
-gcov-files-i386-y += hw/net/vmxnet_rx_pkt.c
-gcov-files-i386-y += hw/net/vmxnet_tx_pkt.c
+gcov-files-i386-y += hw/net/net_rx_pkt.c
+gcov-files-i386-y += hw/net/net_tx_pkt.c
check-qtest-i386-y += tests/pvpanic-test$(EXESUF)
gcov-files-i386-y += i386-softmmu/hw/misc/pvpanic.c
check-qtest-i386-y += tests/i82801b11-test$(EXESUF)
--
2.4.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [Qemu-devel] [RFC PATCH v2 08/10] net_pkt: Extend packet abstraction as requied by e1000e functionality
2016-01-18 17:35 [Qemu-devel] [RFC PATCH v2 00/10] Introduce Intel 82574 GbE Controller Emulation (e1000e) Leonid Bloch
` (6 preceding siblings ...)
2016-01-18 17:35 ` [Qemu-devel] [RFC PATCH v2 07/10] net_pkt: Name vmxnet3 packet abstractions more generic Leonid Bloch
@ 2016-01-18 17:35 ` Leonid Bloch
2016-01-18 17:35 ` [Qemu-devel] [RFC PATCH v2 09/10] e1000_regs: Add definitions for Intel 82574-specific bits Leonid Bloch
` (2 subsequent siblings)
10 siblings, 0 replies; 18+ messages in thread
From: Leonid Bloch @ 2016-01-18 17:35 UTC (permalink / raw)
To: qemu-devel
Cc: Dmitry Fleytman, Jason Wang, Leonid Bloch, Shmulik Ladkani,
Michael S. Tsirkin
From: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
This patch extends TX/RX packet abstractions with features that will
be used by e1000e device implementation.
Changes are:
1. Support iovec lists for RX buffers
2. Deeper RX packets parsing
3. Loopback option for TX packets
4. Extended VLAN headers handling
Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com>
---
hw/net/net_rx_pkt.c | 403 ++++++++++++++++++++++++++++++++++++++++++++----
hw/net/net_rx_pkt.h | 183 +++++++++++++++++++++-
hw/net/net_tx_pkt.c | 134 +++++++++++-----
hw/net/net_tx_pkt.h | 59 ++++++-
include/net/checksum.h | 4 +-
include/net/eth.h | 161 +++++++++++++------
net/checksum.c | 7 +-
net/eth.c | 410 ++++++++++++++++++++++++++++++++++++++++++++-----
trace-events | 25 +++
9 files changed, 1221 insertions(+), 165 deletions(-)
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
index f4c929f..68e51e6 100644
--- a/hw/net/net_rx_pkt.c
+++ b/hw/net/net_rx_pkt.c
@@ -15,6 +15,7 @@
*
*/
+#include "trace.h"
#include "net_rx_pkt.h"
#include "net/eth.h"
#include "qemu-common.h"
@@ -22,17 +23,11 @@
#include "net/checksum.h"
#include "net/tap.h"
-/*
- * RX packet may contain up to 2 fragments - rebuilt eth header
- * in case of VLAN tag stripping
- * and payload received from QEMU - in any case
- */
-#define NET_MAX_RX_PACKET_FRAGMENTS (2)
-
struct NetRxPkt {
struct virtio_net_hdr virt_hdr;
- uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN];
- struct iovec vec[NET_MAX_RX_PACKET_FRAGMENTS];
+ uint8_t ehdr_buf[sizeof(struct eth_header)];
+ struct iovec *vec;
+ uint16_t vec_len_total;
uint16_t vec_len;
uint32_t tot_len;
uint16_t tci;
@@ -45,17 +40,31 @@ struct NetRxPkt {
bool isip6;
bool isudp;
bool istcp;
+
+ size_t l3hdr_off;
+ size_t l4hdr_off;
+ size_t l5hdr_off;
+
+ eth_ip6_hdr_info ip6hdr_info;
+ eth_ip4_hdr_info ip4hdr_info;
+ eth_l4_hdr_info l4hdr_info;
};
void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr)
{
struct NetRxPkt *p = g_malloc0(sizeof *p);
p->has_virt_hdr = has_virt_hdr;
+ p->vec = NULL;
+ p->vec_len_total = 0;
*pkt = p;
}
void net_rx_pkt_uninit(struct NetRxPkt *pkt)
{
+ if (pkt->vec_len_total != 0) {
+ g_free(pkt->vec);
+ }
+
g_free(pkt);
}
@@ -65,36 +74,88 @@ struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt)
return &pkt->virt_hdr;
}
-void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data,
- size_t len, bool strip_vlan)
+static void
+net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt,
+ int new_iov_len)
+{
+ if (pkt->vec_len_total < new_iov_len) {
+ g_free(pkt->vec);
+ pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len);
+ pkt->vec_len_total = new_iov_len;
+ }
+}
+
+static void
+net_rx_pkt_pull_data(struct NetRxPkt *pkt,
+ const struct iovec *iov, int iovcnt,
+ size_t ploff)
+{
+ if (pkt->vlan_stripped) {
+ net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
+
+ pkt->vec[0].iov_base = pkt->ehdr_buf;
+ pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf);
+
+ pkt->tot_len =
+ iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header);
+
+ pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
+ iov, iovcnt, ploff, pkt->tot_len);
+ } else {
+ net_rx_pkt_iovec_realloc(pkt, iovcnt);
+
+ pkt->tot_len = iov_size(iov, iovcnt) - ploff;
+ pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total,
+ iov, iovcnt, ploff, pkt->tot_len);
+ }
+
+ eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6,
+ &pkt->isudp, &pkt->istcp,
+ &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
+ &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
+
+ trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp,
+ pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off);
+}
+
+void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
+ const struct iovec *iov, int iovcnt,
+ size_t iovoff, bool strip_vlan)
{
uint16_t tci = 0;
- uint16_t ploff;
+ uint16_t ploff = iovoff;
assert(pkt);
pkt->vlan_stripped = false;
if (strip_vlan) {
- pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci);
+ pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf,
+ &ploff, &tci);
}
- if (pkt->vlan_stripped) {
- pkt->vec[0].iov_base = pkt->ehdr_buf;
- pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header);
- pkt->vec[1].iov_base = (uint8_t *) data + ploff;
- pkt->vec[1].iov_len = len - ploff;
- pkt->vec_len = 2;
- pkt->tot_len = len - ploff + sizeof(struct eth_header);
- } else {
- pkt->vec[0].iov_base = (void *)data;
- pkt->vec[0].iov_len = len;
- pkt->vec_len = 1;
- pkt->tot_len = len;
+ pkt->tci = tci;
+
+ net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
+}
+
+void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
+ const struct iovec *iov, int iovcnt,
+ size_t iovoff, bool strip_vlan,
+ uint16_t vet)
+{
+ uint16_t tci = 0;
+ uint16_t ploff = iovoff;
+ assert(pkt);
+ pkt->vlan_stripped = false;
+
+ if (strip_vlan) {
+ pkt->vlan_stripped = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
+ pkt->ehdr_buf,
+ &ploff, &tci);
}
pkt->tci = tci;
- eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6,
- &pkt->isudp, &pkt->istcp);
+ net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
}
void net_rx_pkt_dump(struct NetRxPkt *pkt)
@@ -143,6 +204,180 @@ void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
*istcp = pkt->istcp;
}
+size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+ return pkt->l3hdr_off;
+}
+
+size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+ return pkt->l4hdr_off;
+}
+
+size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+ return pkt->l5hdr_off;
+}
+
+eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt)
+{
+ return &pkt->ip6hdr_info;
+}
+
+eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt)
+{
+ return &pkt->ip4hdr_info;
+}
+
+eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt)
+{
+ return &pkt->l4hdr_info;
+}
+
+static inline void
+_net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written,
+ void *ptr, size_t size)
+{
+ memcpy(&rss_input[*bytes_written], ptr, size);
+ trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written);
+ *bytes_written += size;
+}
+
+static inline void
+_net_rx_rss_prepare_ip4(uint8_t *rss_input,
+ struct NetRxPkt *pkt,
+ size_t *bytes_written)
+{
+ struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr;
+
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
+ &ip4_hdr->ip_src, sizeof(uint32_t));
+
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
+ &ip4_hdr->ip_dst, sizeof(uint32_t));
+}
+
+static inline void
+_net_rx_rss_prepare_ip6(uint8_t *rss_input,
+ struct NetRxPkt *pkt,
+ bool ipv6ex, size_t *bytes_written)
+{
+ eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info;
+
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
+ (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src
+ : &ip6info->ip6_hdr.ip6_src,
+ sizeof(struct in6_address));
+
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
+ (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst
+ : &ip6info->ip6_hdr.ip6_dst,
+ sizeof(struct in6_address));
+}
+
+static inline void
+_net_rx_rss_prepare_tcp(uint8_t *rss_input,
+ struct NetRxPkt *pkt,
+ size_t *bytes_written)
+{
+ struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp;
+
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
+ &tcphdr->th_sport, sizeof(uint16_t));
+
+ _net_rx_rss_add_chunk(rss_input, bytes_written,
+ &tcphdr->th_dport, sizeof(uint16_t));
+}
+
+uint32_t
+net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
+ NetRxPktRssType type,
+ uint8_t *key)
+{
+ uint8_t rss_input[36];
+ size_t rss_length = 0;
+ uint32_t rss_hash = 0;
+ net_toeplitz_key key_data;
+
+ switch (type) {
+ case NetPktRssIpV4:
+ assert(pkt->isip4);
+ trace_net_rx_pkt_rss_ip4();
+ _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
+ break;
+ case NetPktRssIpV4Tcp:
+ assert(pkt->isip4);
+ assert(pkt->istcp);
+ trace_net_rx_pkt_rss_ip4_tcp();
+ _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
+ _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
+ break;
+ case NetPktRssIpV6Tcp:
+ assert(pkt->isip6);
+ assert(pkt->istcp);
+ trace_net_rx_pkt_rss_ip6_tcp();
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
+ _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
+ break;
+ case NetPktRssIpV6:
+ assert(pkt->isip6);
+ trace_net_rx_pkt_rss_ip6();
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
+ break;
+ case NetPktRssIpV6Ex:
+ assert(pkt->isip6);
+ trace_net_rx_pkt_rss_ip6_ex();
+ _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ net_toeplitz_key_init(&key_data, key);
+ net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data);
+
+ trace_net_rx_pkt_rss_hash(rss_length, rss_hash);
+
+ return rss_hash;
+}
+
+uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ if (pkt->isip4) {
+ return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id);
+ }
+
+ return 0;
+}
+
+bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ if (pkt->istcp) {
+ return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK;
+ }
+
+ return false;
+}
+
+bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ if (pkt->istcp) {
+ return pkt->l4hdr_info.has_tcp_data;
+ }
+
+ return false;
+}
+
struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt)
{
assert(pkt);
@@ -150,6 +385,13 @@ struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt)
return pkt->vec;
}
+uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ return pkt->vec_len;
+}
+
void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
struct virtio_net_hdr *vhdr)
{
@@ -158,6 +400,14 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr);
}
+void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
+ const struct iovec *iov, int iovcnt)
+{
+ assert(pkt);
+
+ iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
+}
+
bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
{
assert(pkt);
@@ -185,3 +435,102 @@ uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt)
return pkt->tci;
}
+
+bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid)
+{
+ uint32_t cntr;
+ uint16_t csum;
+ uint32_t csl;
+
+ trace_net_rx_pkt_l3_csum_validate_entry();
+
+ if (!pkt->isip4) {
+ trace_net_rx_pkt_l3_csum_validate_not_ip4();
+ return false;
+ }
+
+ csl = pkt->l4hdr_off - pkt->l3hdr_off;
+
+ cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len,
+ pkt->l3hdr_off,
+ csl, 0);
+
+ csum = net_checksum_finish(cntr);
+
+ *csum_valid = (csum == 0);
+
+ trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl,
+ cntr, csum, *csum_valid);
+
+ return true;
+}
+
+bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)
+{
+ uint32_t cntr;
+ uint16_t csum;
+ uint16_t csl;
+ uint32_t cso;
+
+ trace_net_rx_pkt_l4_csum_validate_entry();
+
+ if (!pkt->istcp && !pkt->isudp) {
+ trace_net_rx_pkt_l4_csum_validate_not_xxp();
+ return false;
+ }
+
+ if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) {
+ trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
+ return false;
+ }
+
+ if (pkt->isip4) {
+ if (pkt->ip4hdr_info.fragment) {
+ trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
+ return false;
+ }
+
+ if (pkt->isudp) {
+ csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
+ trace_net_rx_pkt_l4_csum_validate_ip4_udp();
+ } else {
+ csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) -
+ IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr);
+ trace_net_rx_pkt_l4_csum_validate_ip4_tcp();
+ }
+
+ cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr,
+ csl, &cso);
+ trace_net_rx_pkt_l4_csum_validate_ph_csum(cntr, csl);
+ } else {
+ if (pkt->isudp) {
+ csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
+ trace_net_rx_pkt_l4_csum_validate_ip6_udp();
+ } else {
+ struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr;
+ size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off;
+ size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
+
+ csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) -
+ ip6opts_len;
+ trace_net_rx_pkt_l4_csum_validate_ip6_tcp();
+ }
+
+ cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl,
+ pkt->ip6hdr_info.l4proto, &cso);
+ trace_net_rx_pkt_l4_csum_validate_ph_csum(cntr, csl);
+ }
+
+ cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len,
+ pkt->l4hdr_off, csl, cso);
+
+ csum = net_checksum_finish(cntr);
+
+ *csum_valid = ((csum == 0) || (csum == 0xFFFF));
+
+ trace_net_rx_pkt_l4_csum_validate_csum(pkt->l4hdr_off, csl,
+ cntr, csum,
+ *csum_valid);
+
+ return true;
+}
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
index 1e4accf..8d16b58 100644
--- a/hw/net/net_rx_pkt.h
+++ b/hw/net/net_rx_pkt.h
@@ -69,6 +69,103 @@ void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
bool *isudp, bool *istcp);
/**
+* fetches L3 header offset
+*
+* @pkt: packet
+*
+*/
+size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt);
+
+/**
+* fetches L4 header offset
+*
+* @pkt: packet
+*
+*/
+size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt);
+
+/**
+* fetches L5 header offset
+*
+* @pkt: packet
+*
+*/
+size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt);
+
+/**
+ * fetches IP6 header analysis results
+ *
+ * Return: pointer to analysis results structure which is stored in internal
+ * packet area.
+ *
+ */
+eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt);
+
+/**
+ * fetches IP4 header analysis results
+ *
+ * Return: pointer to analysis results structure which is stored in internal
+ * packet area.
+ *
+ */
+eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt);
+
+/**
+ * fetches L4 header analysis results
+ *
+ * Return: pointer to analysis results structure which is stored in internal
+ * packet area.
+ *
+ */
+eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt);
+
+typedef enum {
+ NetPktRssIpV4,
+ NetPktRssIpV4Tcp,
+ NetPktRssIpV6Tcp,
+ NetPktRssIpV6,
+ NetPktRssIpV6Ex
+} NetRxPktRssType;
+
+/**
+* calculates RSS hash for packet
+*
+* @pkt: packet
+* @type: RSS hash type
+*
+* Return: Toeplitz RSS hash.
+*
+*/
+uint32_t
+net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
+ NetRxPktRssType type,
+ uint8_t *key);
+
+/**
+* fetches IP identification for the packet
+*
+* @pkt: packet
+*
+*/
+uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt);
+
+/**
+* check if given packet is a TCP ACK packet
+*
+* @pkt: packet
+*
+*/
+bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt);
+
+/**
+* check if given packet contains TCP data
+*
+* @pkt: packet
+*
+*/
+bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt);
+
+/**
* returns virtio header stored in rx context
*
* @pkt: packet
@@ -123,6 +220,37 @@ bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt);
uint16_t net_rx_pkt_get_num_frags(struct NetRxPkt *pkt);
/**
+* attach scatter-gather data to rx packet
+*
+* @pkt: packet
+* @iov: received data scatter-gather list
+* @iovcnt number of elements in iov
+* @iovoff data start offset in the iov
+* @strip_vlan: should the module strip vlan from data
+*
+*/
+void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
+ const struct iovec *iov,
+ int iovcnt, size_t iovoff,
+ bool strip_vlan);
+
+/**
+* attach scatter-gather data to rx packet
+*
+* @pkt: packet
+* @iov: received data scatter-gather list
+* @iovcnt number of elements in iov
+* @iovoff data start offset in the iov
+* @strip_vlan: should the module strip vlan from data
+* @vet: VLAN tag Ethernet type
+*
+*/
+void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
+ const struct iovec *iov, int iovcnt,
+ size_t iovoff, bool strip_vlan,
+ uint16_t vet);
+
+/**
* attach data to rx packet
*
* @pkt: packet
@@ -131,8 +259,17 @@ uint16_t net_rx_pkt_get_num_frags(struct NetRxPkt *pkt);
* @strip_vlan: should the module strip vlan from data
*
*/
-void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data,
- size_t len, bool strip_vlan);
+static inline void
+net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data,
+ size_t len, bool strip_vlan)
+{
+ const struct iovec iov = {
+ .iov_base = (void *) data,
+ .iov_len = len
+ };
+
+ net_rx_pkt_attach_iovec(pkt, &iov, 1, 0, strip_vlan);
+}
/**
* returns io vector that holds the attached data
@@ -144,6 +281,15 @@ void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data,
struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt);
/**
+* returns io vector length that holds the attached data
+*
+* @pkt: packet
+* @ret: IOVec length
+*
+*/
+uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt);
+
+/**
* prints rx packet data if debug is enabled
*
* @pkt: packet
@@ -162,6 +308,17 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
struct virtio_net_hdr *vhdr);
/**
+* copy passed vhdr data to packet context
+*
+* @pkt: packet
+* @iov: VHDR iov
+* @iovcnt: VHDR iov array size
+*
+*/
+void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
+ const struct iovec *iov, int iovcnt);
+
+/**
* save packet type in packet context
*
* @pkt: packet
@@ -171,4 +328,26 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt,
eth_pkt_types_e packet_type);
+/**
+* validate TCP/UDP checksum of the packet
+*
+* @pkt: packet
+* @csum_valid: checksum validation result
+* @ret: true if validation was performed, false in case packet is
+* not TCP/UDP or checksum validation is not possible
+*
+*/
+bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid);
+
+/**
+* validate IPv4 checksum of the packet
+*
+* @pkt: packet
+* @csum_valid: checksum validation result
+* @ret: true if validation was performed, false in case packet is
+* not TCP/UDP or checksum validation is not possible
+*
+*/
+bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid);
+
#endif
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index a2c1a76..606bca7 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -52,6 +52,8 @@ struct NetTxPkt {
uint16_t hdr_len;
eth_pkt_types_e packet_type;
uint8_t l4proto;
+
+ bool is_loopback;
};
void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags,
@@ -86,40 +88,55 @@ void net_tx_pkt_uninit(struct NetTxPkt *pkt)
}
}
-void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
+void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
{
uint16_t csum;
- uint32_t ph_raw_csum;
assert(pkt);
- uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
struct ip_header *ip_hdr;
-
- if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type &&
- VIRTIO_NET_HDR_GSO_UDP != gso_type) {
- return;
- }
-
ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
- if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len >
- ETH_MAX_IP_DGRAM_LEN) {
- return;
- }
-
ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
- /* Calculate IP header checksum */
ip_hdr->ip_sum = 0;
csum = net_raw_checksum((uint8_t *)ip_hdr,
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
ip_hdr->ip_sum = cpu_to_be16(csum);
+}
+
+void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
+{
+ uint16_t csum;
+ uint32_t cntr, cso;
+ assert(pkt);
+ uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
+ void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
+
+ if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len >
+ ETH_MAX_IP_DGRAM_LEN) {
+ return;
+ }
+
+ if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
+ gso_type == VIRTIO_NET_HDR_GSO_UDP) {
+ /* Calculate IP header checksum */
+ net_tx_pkt_update_ip_hdr_checksum(pkt);
+
+ /* Calculate IP pseudo header checksum */
+ cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso);
+ csum = cpu_to_be16(~net_checksum_finish(cntr));
+ } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
+ /* Calculate IP pseudo header checksum */
+ cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len,
+ IP_PROTO_TCP, &cso);
+ csum = cpu_to_be16(~net_checksum_finish(cntr));
+ } else {
+ return;
+ }
- /* Calculate IP pseudo header checksum */
- ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len);
- csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum));
iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
pkt->virt_hdr.csum_offset, &csum, sizeof(csum));
+
}
static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt)
@@ -144,12 +161,15 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt)
ETH_MAX_L2_HDR_LEN);
if (bytes_read < ETH_MAX_L2_HDR_LEN) {
l2_hdr->iov_len = 0;
+ pkt->packet_type = ETH_PKT_UCAST;
return false;
} else {
- l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base);
+ l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN;
+ l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr, 1);
+ pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base);
}
- l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len);
+ l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len);
switch (l3_proto) {
case ETH_P_IP:
@@ -164,6 +184,12 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt)
}
l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base);
+
+ if (l3_hdr->iov_len < sizeof(struct ip_header)) {
+ l3_hdr->iov_len = 0;
+ return false;
+ }
+
pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p;
/* copy optional IPv4 header data */
@@ -178,12 +204,18 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt)
break;
case ETH_P_IPV6:
+ {
+ eth_ip6_hdr_info hdrinfo;
+
if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
- &pkt->l4proto, &full_ip6hdr_len)) {
+ &hdrinfo)) {
l3_hdr->iov_len = 0;
return false;
}
+ pkt->l4proto = hdrinfo.l4proto;
+ full_ip6hdr_len = hdrinfo.full_hdr_len;
+
l3_hdr->iov_base = g_malloc(full_ip6hdr_len);
bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
@@ -196,14 +228,13 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt)
l3_hdr->iov_len = full_ip6hdr_len;
}
break;
-
+ }
default:
l3_hdr->iov_len = 0;
break;
}
net_tx_pkt_calculate_hdr_len(pkt);
- pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base);
return true;
}
@@ -242,7 +273,7 @@ static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt,
uint8_t rc = VIRTIO_NET_HDR_GSO_NONE;
uint16_t l3_proto;
- l3_proto = eth_get_l3_proto(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
+ l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1,
pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len);
if (!tso_enable) {
@@ -274,7 +305,7 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
break;
case VIRTIO_NET_HDR_GSO_UDP:
- pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size);
+ pkt->virt_hdr.gso_size = gso_size;
pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header);
break;
@@ -283,7 +314,7 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
0, &l4hdr, sizeof(l4hdr));
pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t);
- pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size);
+ pkt->virt_hdr.gso_size = gso_size;
break;
default:
@@ -308,13 +339,14 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
}
}
-void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan)
+void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
+ uint16_t vlan, uint16_t vlan_ethtype)
{
bool is_new;
assert(pkt);
- eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
- vlan, &is_new);
+ eth_setup_vlan_headers_ex(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
+ vlan, vlan_ethtype, &is_new);
/* update l2hdrlen */
if (is_new) {
@@ -350,6 +382,11 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
return true;
}
+bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt)
+{
+ return pkt->raw_frags > 0;
+}
+
eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt)
{
assert(pkt);
@@ -417,6 +454,7 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
uint32_t csum_cntr;
uint16_t csum = 0;
+ uint32_t cso;
/* num of iovec without vhdr */
uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1;
uint16_t csl;
@@ -429,12 +467,13 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
/* Calculate L4 TCP/UDP checksum */
csl = pkt->payload_len;
- /* data checksum */
- csum_cntr =
- net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl);
/* add pseudo header to csum */
iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
- csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl);
+ csum_cntr = eth_calc_ip4_pseudo_hdr_csum(iphdr, csl, &cso);
+
+ /* data checksum */
+ csum_cntr +=
+ net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso);
/* Put the checksum obtained into the packet */
csum = cpu_to_be16(net_checksum_finish(csum_cntr));
@@ -457,7 +496,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
*dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM;
- while (fetched < pkt->virt_hdr.gso_size) {
+ while (fetched < IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size)) {
/* no more place in fragment iov */
if (*dst_idx == NET_MAX_FRAG_SG_LIST) {
@@ -472,7 +511,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset;
dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset,
- pkt->virt_hdr.gso_size - fetched);
+ IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size) - fetched);
*src_offset += dst[*dst_idx].iov_len;
fetched += dst[*dst_idx].iov_len;
@@ -488,6 +527,16 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
return fetched;
}
+static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt,
+ NetClientState *nc, const struct iovec *iov, int iov_cnt)
+{
+ if (pkt->is_loopback) {
+ nc->info->receive_iov(nc, iov, iov_cnt);
+ } else {
+ qemu_sendv_packet(nc, iov, iov_cnt);
+ }
+}
+
static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
NetClientState *nc)
{
@@ -526,7 +575,7 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
eth_fix_ip4_checksum(l3_iov_base, l3_iov_len);
- qemu_sendv_packet(nc, fragment, dst_idx);
+ net_tx_pkt_sendv(pkt, nc, fragment, dst_idx);
fragment_offset += fragment_len;
@@ -558,10 +607,21 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
if (pkt->has_virt_hdr ||
pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
- qemu_sendv_packet(nc, pkt->vec,
+ net_tx_pkt_sendv(pkt, nc, pkt->vec,
pkt->payload_frags + NET_TX_PKT_PL_START_FRAG);
return true;
}
return net_tx_pkt_do_sw_fragmentation(pkt, nc);
}
+
+bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc)
+{
+ bool res;
+
+ pkt->is_loopback = true;
+ res = net_tx_pkt_send(pkt, nc);
+ pkt->is_loopback = false;
+
+ return res;
+}
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
index 73a67f8..cb5983e 100644
--- a/hw/net/net_tx_pkt.h
+++ b/hw/net/net_tx_pkt.h
@@ -66,13 +66,29 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
bool csum_enable, uint32_t gso_size);
/**
- * updates vlan tag, and adds vlan header in case it is missing
- *
- * @pkt: packet
- * @vlan: VLAN tag
- *
- */
-void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan);
+* updates vlan tag, and adds vlan header with custom ethernet type
+* in case it is missing.
+*
+* @pkt: packet
+* @vlan: VLAN tag
+* @vlan_ethtype: VLAN header Ethernet type
+*
+*/
+void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
+ uint16_t vlan, uint16_t vlan_ethtype);
+
+/**
+* updates vlan tag, and adds vlan header in case it is missing
+*
+* @pkt: packet
+* @vlan: VLAN tag
+*
+*/
+static inline void
+net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan)
+{
+ net_tx_pkt_setup_vlan_header_ex(pkt, vlan, ETH_P_VLAN);
+}
/**
* populate data fragment into pkt context.
@@ -86,7 +102,7 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
size_t len);
/**
- * fix ip header fields and calculate checksums needed.
+ * Fix ip header fields and calculate IP header and pseudo header checksums.
*
* @pkt: packet
*
@@ -94,6 +110,14 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt);
/**
+ * Calculate the IP header checksum.
+ *
+ * @pkt: packet
+ *
+ */
+void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt);
+
+/**
* get length of all populated data.
*
* @pkt: packet
@@ -138,6 +162,17 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt);
bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc);
/**
+* Redirect packet directly to receive path (emulate loopback phy).
+* Handles sw offloads if vhdr is not supported.
+*
+* @pkt: packet
+* @nc: NetClientState
+* @ret: operation result
+*
+*/
+bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc);
+
+/**
* parse raw packet data and analyze offload requirements.
*
* @pkt: packet
@@ -145,4 +180,12 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc);
*/
bool net_tx_pkt_parse(struct NetTxPkt *pkt);
+/**
+* indicates if there are data fragments held by this packet object.
+*
+* @pkt: packet
+*
+*/
+bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt);
+
#endif
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 77d226f..26ab3c0 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -47,10 +47,12 @@ net_raw_checksum(uint8_t *data, int length)
* @iov_cnt: number of array elements
* @iov_off: starting iov offset for checksumming
* @size: length of data to be checksummed
+ * @csum_offset: offset of the checksum chunk
*/
uint32_t net_checksum_add_iov(const struct iovec *iov,
const unsigned int iov_cnt,
- uint32_t iov_off, uint32_t size);
+ uint32_t iov_off, uint32_t size,
+ uint32_t csum_offset);
typedef struct toeplitz_key_st {
uint32_t leftmost_32_bits;
diff --git a/include/net/eth.h b/include/net/eth.h
index b3273b8..b7d45e0 100644
--- a/include/net/eth.h
+++ b/include/net/eth.h
@@ -68,6 +68,16 @@ typedef struct tcp_header {
uint16_t th_urp; /* urgent pointer */
} tcp_header;
+#define TCP_FLAGS_ONLY(flags) ((flags)&0x3f)
+
+#define TCP_HEADER_FLAGS(tcp) \
+ TCP_FLAGS_ONLY(be16_to_cpu((tcp)->th_offset_flags))
+
+#define TCP_FLAG_ACK 0x10
+
+#define TCP_HEADER_DATA_OFFSET(tcp) \
+ (((be16_to_cpu((tcp)->th_offset_flags) >> 12) & 0xf) << 2)
+
typedef struct udp_header {
uint16_t uh_sport; /* source port */
uint16_t uh_dport; /* destination port */
@@ -109,11 +119,34 @@ struct ip6_header {
struct in6_address ip6_dst; /* destination address */
};
+typedef struct ip6_pseudo_header {
+ struct in6_address ip6_src;
+ struct in6_address ip6_dst;
+ uint32_t len;
+ uint8_t zero[3];
+ uint8_t next_hdr;
+} ip6_pseudo_header;
+
struct ip6_ext_hdr {
uint8_t ip6r_nxt; /* next header */
uint8_t ip6r_len; /* length in units of 8 octets */
};
+struct ip6_ext_hdr_routing {
+ uint8_t nxt;
+ uint8_t len;
+ uint8_t rtype;
+ uint8_t segleft;
+ uint8_t rsvd[4];
+};
+
+struct ip6_option_hdr {
+#define IP6_OPT_PAD1 (0x00)
+#define IP6_OPT_HOME (0xC9)
+ uint8_t type;
+ uint8_t len;
+};
+
struct udp_hdr {
uint16_t uh_sport; /* source port */
uint16_t uh_dport; /* destination port */
@@ -162,18 +195,21 @@ struct tcp_hdr {
#define PKT_GET_IP_HDR(p) \
((struct ip_header *)(((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
#define IP_HDR_GET_LEN(p) \
- ((((struct ip_header *)p)->ip_ver_len & 0x0F) << 2)
+ ((((struct ip_header *)(p))->ip_ver_len & 0x0F) << 2)
#define PKT_GET_IP_HDR_LEN(p) \
(IP_HDR_GET_LEN(PKT_GET_IP_HDR(p)))
#define PKT_GET_IP6_HDR(p) \
((struct ip6_header *) (((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
#define IP_HEADER_VERSION(ip) \
- ((ip->ip_ver_len >> 4)&0xf)
+ (((ip)->ip_ver_len >> 4)&0xf)
+#define IP4_IS_FRAGMENT(ip) \
+ ((be16_to_cpu((ip)->ip_off) & (IP_OFFMASK | IP_MF)) != 0)
#define ETH_P_IP (0x0800)
#define ETH_P_IPV6 (0x86dd)
#define ETH_P_VLAN (0x8100)
#define ETH_P_DVLAN (0x88a8)
+#define ETH_P_UNKNOWN (0xffff)
#define VLAN_VID_MASK 0x0fff
#define IP_HEADER_VERSION_4 (4)
#define IP_HEADER_VERSION_6 (6)
@@ -250,15 +286,25 @@ get_eth_packet_type(const struct eth_header *ehdr)
}
static inline uint32_t
-eth_get_l2_hdr_length(const void *p)
+eth_get_l2_hdr_length(const struct iovec *iov, int iovcnt)
{
- uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
- struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
+ uint8_t p[sizeof(struct eth_header) + sizeof(struct vlan_header)];
+ size_t copied = iov_to_buf(iov, iovcnt, 0, p, ARRAY_SIZE(p));
+ uint16_t proto;
+ struct vlan_header *hvlan;
+
+ if (copied < ARRAY_SIZE(p)) {
+ return copied;
+ }
+
+ proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
+ hvlan = PKT_GET_VLAN_HDR(p);
+
switch (proto) {
case ETH_P_VLAN:
return sizeof(struct eth_header) + sizeof(struct vlan_header);
case ETH_P_DVLAN:
- if (hvlan->h_proto == ETH_P_VLAN) {
+ if (be16_to_cpu(hvlan->h_proto) == ETH_P_VLAN) {
return sizeof(struct eth_header) + 2 * sizeof(struct vlan_header);
} else {
return sizeof(struct eth_header) + sizeof(struct vlan_header);
@@ -282,51 +328,67 @@ eth_get_pkt_tci(const void *p)
}
}
-static inline bool
-eth_strip_vlan(const void *p, uint8_t *new_ehdr_buf,
- uint16_t *payload_offset, uint16_t *tci)
-{
- uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
- struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
- struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
+bool
+eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
+ uint8_t *new_ehdr_buf,
+ uint16_t *payload_offset, uint16_t *tci);
- switch (proto) {
- case ETH_P_VLAN:
- case ETH_P_DVLAN:
- memcpy(new_ehdr->h_source, PKT_GET_ETH_HDR(p)->h_source, ETH_ALEN);
- memcpy(new_ehdr->h_dest, PKT_GET_ETH_HDR(p)->h_dest, ETH_ALEN);
- new_ehdr->h_proto = hvlan->h_proto;
- *tci = be16_to_cpu(hvlan->h_tci);
- *payload_offset =
- sizeof(struct eth_header) + sizeof(struct vlan_header);
- if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
- memcpy(PKT_GET_VLAN_HDR(new_ehdr),
- PKT_GET_DVLAN_HDR(p),
- sizeof(struct vlan_header));
- *payload_offset += sizeof(struct vlan_header);
- }
- return true;
- default:
- return false;
- }
-}
+bool
+eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
+ uint16_t vet, uint8_t *new_ehdr_buf,
+ uint16_t *payload_offset, uint16_t *tci);
-static inline uint16_t
-eth_get_l3_proto(const void *l2hdr, size_t l2hdr_len)
+uint16_t
+eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len);
+
+void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
+ uint16_t vlan_ethtype, bool *is_new);
+
+static inline void
+eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
+ bool *is_new)
{
- uint8_t *proto_ptr = (uint8_t *) l2hdr + l2hdr_len - sizeof(uint16_t);
- return be16_to_cpup((uint16_t *)proto_ptr);
+ eth_setup_vlan_headers_ex(ehdr, vlan_tag, ETH_P_VLAN, is_new);
}
-void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
- bool *is_new);
uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto);
-void eth_get_protocols(const uint8_t *headers,
- uint32_t hdr_length,
+typedef struct eth_ip6_hdr_info_st {
+ uint8_t l4proto;
+ size_t full_hdr_len;
+ struct ip6_header ip6_hdr;
+ bool has_ext_hdrs;
+ bool rss_ex_src_valid;
+ struct in6_address rss_ex_src;
+ bool rss_ex_dst_valid;
+ struct in6_address rss_ex_dst;
+ bool fragment;
+} eth_ip6_hdr_info;
+
+typedef struct eth_ip4_hdr_info_st {
+ struct ip_header ip4_hdr;
+ bool fragment;
+} eth_ip4_hdr_info;
+
+typedef struct eth_l4_hdr_info_st {
+ union {
+ struct tcp_header tcp;
+ struct udp_header udp;
+ } hdr;
+
+ bool has_tcp_data;
+} eth_l4_hdr_info;
+
+void eth_get_protocols(const struct iovec *iov, int iovcnt,
bool *isip4, bool *isip6,
- bool *isudp, bool *istcp);
+ bool *isudp, bool *istcp,
+ size_t *l3hdr_off,
+ size_t *l4hdr_off,
+ size_t *l5hdr_off,
+ eth_ip6_hdr_info *ip6hdr_info,
+ eth_ip4_hdr_info *ip4hdr_info,
+ eth_l4_hdr_info *l4hdr_info);
void eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
void *l3hdr, size_t l3hdr_len,
@@ -337,11 +399,18 @@ void
eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len);
uint32_t
-eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl);
+eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
+ uint16_t csl,
+ uint32_t *cso);
+
+uint32_t
+eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
+ uint16_t csl,
+ uint8_t l4_proto,
+ uint32_t *cso);
bool
-eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags,
- size_t ip6hdr_off, uint8_t *l4proto,
- size_t *full_hdr_len);
+eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
+ size_t ip6hdr_off, eth_ip6_hdr_info *info);
#endif
diff --git a/net/checksum.c b/net/checksum.c
index 14c0855..d710b6c 100644
--- a/net/checksum.c
+++ b/net/checksum.c
@@ -88,12 +88,11 @@ void net_checksum_calculate(uint8_t *data, int length)
uint32_t
net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
- uint32_t iov_off, uint32_t size)
+ uint32_t iov_off, uint32_t size, uint32_t csum_offset)
{
size_t iovec_off, buf_off;
unsigned int i;
uint32_t res = 0;
- uint32_t seq = 0;
iovec_off = 0;
buf_off = 0;
@@ -102,8 +101,8 @@ net_checksum_add_iov(const struct iovec *iov, const unsigned int iov_cnt,
size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size);
void *chunk_buf = iov[i].iov_base + (iov_off - iovec_off);
- res += net_checksum_add_cont(len, chunk_buf, seq);
- seq += len;
+ res += net_checksum_add_cont(len, chunk_buf, csum_offset);
+ csum_offset += len;
buf_off += len;
iov_off += len;
diff --git a/net/eth.c b/net/eth.c
index 7c61132..596d728 100644
--- a/net/eth.c
+++ b/net/eth.c
@@ -20,8 +20,8 @@
#include "qemu-common.h"
#include "net/tap.h"
-void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
- bool *is_new)
+void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
+ uint16_t vlan_ethtype, bool *is_new)
{
struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
@@ -35,7 +35,7 @@ void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
default:
/* No VLAN header, put a new one */
vhdr->h_proto = ehdr->h_proto;
- ehdr->h_proto = cpu_to_be16(ETH_P_VLAN);
+ ehdr->h_proto = cpu_to_be16(vlan_ethtype);
*is_new = true;
break;
}
@@ -78,26 +78,100 @@ eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto)
return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
}
-void eth_get_protocols(const uint8_t *headers,
- uint32_t hdr_length,
+uint16_t
+eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len)
+{
+ uint16_t proto;
+ size_t copied;
+ size_t size = iov_size(l2hdr_iov, iovcnt);
+ size_t proto_offset = l2hdr_len - sizeof(proto);
+
+ if (size < proto_offset) {
+ return ETH_P_UNKNOWN;
+ }
+
+ copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset,
+ &proto, sizeof(proto));
+
+ return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN;
+}
+
+static bool
+_eth_copy_chunk(size_t input_size,
+ const struct iovec *iov, int iovcnt,
+ size_t offset, size_t length,
+ void *buffer)
+{
+ size_t copied;
+
+ if (input_size < offset) {
+ return false;
+ }
+
+ copied = iov_to_buf(iov, iovcnt, offset, buffer, length);
+
+ if (copied < length) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+_eth_tcp_has_data(bool is_ip4,
+ const struct ip_header *ip4_hdr,
+ const struct ip6_header *ip6_hdr,
+ size_t full_ip6hdr_len,
+ const struct tcp_header *tcp)
+{
+ uint32_t l4len;
+
+ if (is_ip4) {
+ l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr);
+ } else {
+ size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
+ l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len;
+ }
+
+ return l4len > TCP_HEADER_DATA_OFFSET(tcp);
+}
+
+void eth_get_protocols(const struct iovec *iov, int iovcnt,
bool *isip4, bool *isip6,
- bool *isudp, bool *istcp)
+ bool *isudp, bool *istcp,
+ size_t *l3hdr_off,
+ size_t *l4hdr_off,
+ size_t *l5hdr_off,
+ eth_ip6_hdr_info *ip6hdr_info,
+ eth_ip4_hdr_info *ip4hdr_info,
+ eth_l4_hdr_info *l4hdr_info)
{
int proto;
- size_t l2hdr_len = eth_get_l2_hdr_length(headers);
- assert(hdr_length >= eth_get_l2_hdr_length(headers));
+ bool fragment = false;
+ size_t l2hdr_len = eth_get_l2_hdr_length(iov, iovcnt);
+ size_t input_size = iov_size(iov, iovcnt);
+ size_t copied;
+
*isip4 = *isip6 = *isudp = *istcp = false;
- proto = eth_get_l3_proto(headers, l2hdr_len);
+ proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
+
+ *l3hdr_off = l2hdr_len;
+
if (proto == ETH_P_IP) {
- *isip4 = true;
+ struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
- struct ip_header *iphdr;
+ if (input_size < l2hdr_len) {
+ return;
+ }
+
+ copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
- assert(hdr_length >=
- eth_get_l2_hdr_length(headers) + sizeof(struct ip_header));
+ *isip4 = true;
- iphdr = PKT_GET_IP_HDR(headers);
+ if (copied < sizeof(*iphdr)) {
+ return;
+ }
if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) {
if (iphdr->ip_p == IP_PROTO_TCP) {
@@ -106,24 +180,135 @@ void eth_get_protocols(const uint8_t *headers,
*isudp = true;
}
}
- } else if (proto == ETH_P_IPV6) {
- uint8_t l4proto;
- size_t full_ip6hdr_len;
- struct iovec hdr_vec;
- hdr_vec.iov_base = (void *) headers;
- hdr_vec.iov_len = hdr_length;
+ ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
+ *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
+
+ fragment = ip4hdr_info->fragment;
+ } else if (proto == ETH_P_IPV6) {
*isip6 = true;
- if (eth_parse_ipv6_hdr(&hdr_vec, 1, l2hdr_len,
- &l4proto, &full_ip6hdr_len)) {
- if (l4proto == IP_PROTO_TCP) {
+ if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len,
+ ip6hdr_info)) {
+ if (ip6hdr_info->l4proto == IP_PROTO_TCP) {
*istcp = true;
- } else if (l4proto == IP_PROTO_UDP) {
+ } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) {
*isudp = true;
}
+ } else {
+ return;
+ }
+
+ *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
+ fragment = ip6hdr_info->fragment;
+ }
+
+ if (!fragment) {
+ if (*istcp) {
+ *istcp = _eth_copy_chunk(input_size,
+ iov, iovcnt,
+ *l4hdr_off, sizeof(l4hdr_info->hdr.tcp),
+ &l4hdr_info->hdr.tcp);
+
+ if (istcp) {
+ *l5hdr_off = *l4hdr_off +
+ TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp);
+
+ l4hdr_info->has_tcp_data =
+ _eth_tcp_has_data(proto == ETH_P_IP,
+ &ip4hdr_info->ip4_hdr,
+ &ip6hdr_info->ip6_hdr,
+ *l4hdr_off - *l3hdr_off,
+ &l4hdr_info->hdr.tcp);
+ }
+ } else if (*isudp) {
+ *isudp = _eth_copy_chunk(input_size,
+ iov, iovcnt,
+ *l4hdr_off, sizeof(l4hdr_info->hdr.udp),
+ &l4hdr_info->hdr.udp);
+ *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
+ }
+ }
+}
+
+bool
+eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
+ uint8_t *new_ehdr_buf,
+ uint16_t *payload_offset, uint16_t *tci)
+{
+ struct vlan_header vlan_hdr;
+ struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
+
+ size_t copied = iov_to_buf(iov, iovcnt, iovoff,
+ new_ehdr, sizeof(*new_ehdr));
+
+ if (copied < sizeof(*new_ehdr)) {
+ return false;
+ }
+
+ switch (be16_to_cpu(new_ehdr->h_proto)) {
+ case ETH_P_VLAN:
+ case ETH_P_DVLAN:
+ copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
+ &vlan_hdr, sizeof(vlan_hdr));
+
+ if (copied < sizeof(vlan_hdr)) {
+ return false;
+ }
+
+ new_ehdr->h_proto = vlan_hdr.h_proto;
+
+ *tci = be16_to_cpu(vlan_hdr.h_tci);
+ *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
+
+ if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) {
+
+ copied = iov_to_buf(iov, iovcnt, *payload_offset,
+ PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
+
+ if (copied < sizeof(vlan_hdr)) {
+ return false;
+ }
+
+ *payload_offset += sizeof(vlan_hdr);
+ }
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
+ uint16_t vet, uint8_t *new_ehdr_buf,
+ uint16_t *payload_offset, uint16_t *tci)
+{
+ struct vlan_header vlan_hdr;
+ struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
+
+ size_t copied = iov_to_buf(iov, iovcnt, iovoff,
+ new_ehdr, sizeof(*new_ehdr));
+
+ if (copied < sizeof(*new_ehdr)) {
+ return false;
+ }
+
+ if (be16_to_cpu(new_ehdr->h_proto) == vet) {
+ copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
+ &vlan_hdr, sizeof(vlan_hdr));
+
+ if (copied < sizeof(vlan_hdr)) {
+ return false;
}
+
+ new_ehdr->h_proto = vlan_hdr.h_proto;
+
+ *tci = be16_to_cpu(vlan_hdr.h_tci);
+ *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
+ return true;
}
+
+ return false;
}
void
@@ -132,7 +317,12 @@ eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len,
size_t l3payload_len,
size_t frag_offset, bool more_frags)
{
- if (eth_get_l3_proto(l2hdr, l2hdr_len) == ETH_P_IP) {
+ const struct iovec l2vec = {
+ .iov_base = (void *) l2hdr,
+ .iov_len = l2hdr_len
+ };
+
+ if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) {
uint16_t orig_flags;
struct ip_header *iphdr = (struct ip_header *) l3hdr;
uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE;
@@ -157,7 +347,9 @@ eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len)
}
uint32_t
-eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl)
+eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr,
+ uint16_t csl,
+ uint32_t *cso)
{
struct ip_pseudo_header ipph;
ipph.ip_src = iphdr->ip_src;
@@ -165,7 +357,26 @@ eth_calc_pseudo_hdr_csum(struct ip_header *iphdr, uint16_t csl)
ipph.ip_payload = cpu_to_be16(csl);
ipph.ip_proto = iphdr->ip_p;
ipph.zeros = 0;
- return net_checksum_add(sizeof(ipph), (uint8_t *) &ipph);
+ *cso = sizeof(ipph);
+ return net_checksum_add(*cso, (uint8_t *) &ipph);
+}
+
+uint32_t
+eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr,
+ uint16_t csl,
+ uint8_t l4_proto,
+ uint32_t *cso)
+{
+ struct ip6_pseudo_header ipph;
+ ipph.ip6_src = iphdr->ip6_src;
+ ipph.ip6_dst = iphdr->ip6_dst;
+ ipph.len = cpu_to_be16(csl);
+ ipph.zero[0] = 0;
+ ipph.zero[1] = 0;
+ ipph.zero[2] = 0;
+ ipph.next_hdr = l4_proto;
+ *cso = sizeof(ipph);
+ return net_checksum_add(*cso, (uint8_t *)&ipph);
}
static bool
@@ -185,33 +396,152 @@ eth_is_ip6_extension_header_type(uint8_t hdr_type)
}
}
-bool eth_parse_ipv6_hdr(struct iovec *pkt, int pkt_frags,
- size_t ip6hdr_off, uint8_t *l4proto,
- size_t *full_hdr_len)
+static bool
+_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
+ size_t rthdr_offset,
+ struct ip6_ext_hdr *ext_hdr,
+ struct in6_address *dst_addr)
+{
+ struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
+
+ if ((rthdr->rtype == 2) &&
+ (rthdr->len == sizeof(struct in6_address) / 8) &&
+ (rthdr->segleft == 1)) {
+
+ size_t input_size = iov_size(pkt, pkt_frags);
+ size_t bytes_read;
+
+ if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
+ return false;
+ }
+
+ bytes_read = iov_to_buf(pkt, pkt_frags,
+ rthdr_offset + sizeof(*ext_hdr),
+ dst_addr, sizeof(dst_addr));
+
+ return bytes_read == sizeof(dst_addr);
+ }
+
+ return false;
+}
+
+static bool
+_eth_get_rss_ex_src_addr(const struct iovec *pkt, int pkt_frags,
+ size_t dsthdr_offset,
+ struct ip6_ext_hdr *ext_hdr,
+ struct in6_address *src_addr)
+{
+ size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr);
+ struct ip6_option_hdr opthdr;
+ size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr);
+
+ while (bytes_left > sizeof(opthdr)) {
+ size_t input_size = iov_size(pkt, pkt_frags);
+ size_t bytes_read, optlen;
+
+ if (input_size < opt_offset) {
+ return false;
+ }
+
+ bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset,
+ &opthdr, sizeof(opthdr));
+
+ if (bytes_read != sizeof(opthdr)) {
+ return false;
+ }
+
+ optlen = (opthdr.type == IP6_OPT_PAD1) ? 1
+ : (opthdr.len + sizeof(opthdr));
+
+ if (optlen > bytes_left) {
+ return false;
+ }
+
+ if (opthdr.type == IP6_OPT_HOME) {
+ size_t input_size = iov_size(pkt, pkt_frags);
+
+ if (input_size < opt_offset + sizeof(opthdr)) {
+ return false;
+ }
+
+ bytes_read = iov_to_buf(pkt, pkt_frags,
+ opt_offset + sizeof(opthdr),
+ src_addr, sizeof(src_addr));
+
+ return bytes_read == sizeof(src_addr);
+ }
+
+ opt_offset += optlen;
+ bytes_left -= optlen;
+ }
+
+ return false;
+}
+
+bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
+ size_t ip6hdr_off, eth_ip6_hdr_info *info)
{
- struct ip6_header ip6_hdr;
struct ip6_ext_hdr ext_hdr;
size_t bytes_read;
+ uint8_t curr_ext_hdr_type;
+ size_t input_size = iov_size(pkt, pkt_frags);
+
+ info->rss_ex_dst_valid = false;
+ info->rss_ex_src_valid = false;
+ info->fragment = false;
+
+ if (input_size < ip6hdr_off) {
+ return false;
+ }
bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off,
- &ip6_hdr, sizeof(ip6_hdr));
- if (bytes_read < sizeof(ip6_hdr)) {
+ &info->ip6_hdr, sizeof(info->ip6_hdr));
+ if (bytes_read < sizeof(info->ip6_hdr)) {
return false;
}
- *full_hdr_len = sizeof(struct ip6_header);
+ info->full_hdr_len = sizeof(struct ip6_header);
+
+ curr_ext_hdr_type = info->ip6_hdr.ip6_nxt;
- if (!eth_is_ip6_extension_header_type(ip6_hdr.ip6_nxt)) {
- *l4proto = ip6_hdr.ip6_nxt;
+ if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) {
+ info->l4proto = info->ip6_hdr.ip6_nxt;
+ info->has_ext_hdrs = false;
return true;
}
+ info->has_ext_hdrs = true;
+
do {
- bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + *full_hdr_len,
+ if (input_size < ip6hdr_off + info->full_hdr_len) {
+ return false;
+ }
+
+ bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len,
&ext_hdr, sizeof(ext_hdr));
- *full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
- } while (eth_is_ip6_extension_header_type(ext_hdr.ip6r_nxt));
- *l4proto = ext_hdr.ip6r_nxt;
+ if (bytes_read < sizeof(ext_hdr)) {
+ return false;
+ }
+
+ if (curr_ext_hdr_type == IP6_ROUTING) {
+ info->rss_ex_dst_valid =
+ _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
+ ip6hdr_off + info->full_hdr_len,
+ &ext_hdr, &info->rss_ex_dst);
+ } else if (curr_ext_hdr_type == IP6_DESTINATON) {
+ info->rss_ex_src_valid =
+ _eth_get_rss_ex_src_addr(pkt, pkt_frags,
+ ip6hdr_off + info->full_hdr_len,
+ &ext_hdr, &info->rss_ex_src);
+ } else if (curr_ext_hdr_type == IP6_FRAGMENT) {
+ info->fragment = true;
+ }
+
+ info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY;
+ curr_ext_hdr_type = ext_hdr.ip6r_nxt;
+ } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type));
+
+ info->l4proto = ext_hdr.ip6r_nxt;
return true;
}
diff --git a/trace-events b/trace-events
index 30eba92..a0844a3 100644
--- a/trace-events
+++ b/trace-events
@@ -1590,3 +1590,28 @@ i8257_unregistered_dma(int nchan, int dma_pos, int dma_len) "unregistered DMA ch
cpu_set_state(int cpu_index, uint8_t state) "setting cpu %d state to %" PRIu8
cpu_halt(int cpu_index) "halting cpu %d"
cpu_unhalt(int cpu_index) "unhalting cpu %d"
+
+# hw/net/net_rx_pkt.c
+net_rx_pkt_parsed(bool ip4, bool ip6, bool udp, bool tcp, size_t l3o, size_t l4o, size_t l5o) "RX packet parsed: ip4: %d, ip6: %d, udp: %d, tcp: %d, l3 offset: %lu, l4 offset: %lu, l5 offset: %lu"
+net_rx_pkt_l4_csum_validate_entry(void) "Starting L4 checksum validation"
+net_rx_pkt_l4_csum_validate_not_xxp(void) "Not a TCP/UDP packet"
+net_rx_pkt_l4_csum_validate_udp_with_no_checksum(void) "UDP packet without checksum"
+net_rx_pkt_l4_csum_validate_ip4_fragment(void) "IP4 fragment"
+net_rx_pkt_l4_csum_validate_ip4_udp(void) "IP4/UDP packet"
+net_rx_pkt_l4_csum_validate_ip4_tcp(void) "IP4/TCP packet"
+net_rx_pkt_l4_csum_validate_ip6_udp(void) "IP6/UDP packet"
+net_rx_pkt_l4_csum_validate_ip6_tcp(void) "IP6/TCP packet"
+net_rx_pkt_l4_csum_validate_ph_csum(uint32_t cntr, uint16_t csl) "Pseudo-header: checksum counter %u, length %u"
+net_rx_pkt_l4_csum_validate_csum(size_t l4hdr_off, uint16_t csl, uint32_t cntr, uint16_t csum, bool csum_valid) "L4 Checksum: L4 header offset: %lu, length: %u, counter: %u, final checksum: %u, valid: %d"
+
+net_rx_pkt_l3_csum_validate_entry(void) "Starting L3 checksum validation"
+net_rx_pkt_l3_csum_validate_not_ip4(void) "Not an IP4 packet"
+net_rx_pkt_l3_csum_validate_csum(size_t l3hdr_off, uint32_t csl, uint32_t cntr, uint16_t csum, bool csum_valid) "L3 Checksum: L3 header offset: %lu, length: %u, counter: %u, final checksum: %u, valid: %d"
+
+net_rx_pkt_rss_ip4(void) "Calculating IPv4 RSS hash"
+net_rx_pkt_rss_ip4_tcp(void) "Calculating IPv4/TCP RSS hash"
+net_rx_pkt_rss_ip6_tcp(void) "Calculating IPv6/TCP RSS hash"
+net_rx_pkt_rss_ip6(void) "Calculating IPv6 RSS hash"
+net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash"
+net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %lu bytes: 0x%X"
+net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %lu bytes, RSS input offset %lu bytes"
--
2.4.3
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [Qemu-devel] [RFC PATCH v2 10/10] net: Introduce e1000e device emulation
2016-01-18 17:35 [Qemu-devel] [RFC PATCH v2 00/10] Introduce Intel 82574 GbE Controller Emulation (e1000e) Leonid Bloch
` (8 preceding siblings ...)
2016-01-18 17:35 ` [Qemu-devel] [RFC PATCH v2 09/10] e1000_regs: Add definitions for Intel 82574-specific bits Leonid Bloch
@ 2016-01-18 17:35 ` Leonid Bloch
2016-01-19 3:48 ` [Qemu-devel] [RFC PATCH v2 00/10] Introduce Intel 82574 GbE Controller Emulation (e1000e) Jason Wang
10 siblings, 0 replies; 18+ messages in thread
From: Leonid Bloch @ 2016-01-18 17:35 UTC (permalink / raw)
To: qemu-devel
Cc: Dmitry Fleytman, Jason Wang, Leonid Bloch, Shmulik Ladkani,
Michael S. Tsirkin
From: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
This patch introduces emulation for the Intel 82574 adapter, AKA e1000e.
This implementation is based on the e1000 emulation code, and
utilizes the TX/RX packet abstractions initially developed for the
vmxnet3 device. Although some parts of the introduced code are
common with the e1000, the differences are substantial enough so
that the only shared resources for the two devices are the
definitions in hw/net/e1000_regs.h.
Similarly to vmxnet3, the new device uses virtio headers for task
offloads (for backends that support virtio extensions). Usage of
virtio headers may be forcibly disabled via a boolean device property
"vnet" (which is enabled by default). In such case task offloads
will be performed in software, in the same way it is done on
backends that do not support virtio headers.
The device code is split into two parts:
1. hw/net/e1000e.c: QEMU-specific code for a network device;
2. hw/net/e1000e_core.[hc]: Device emulation according to the spec.
The new device name is e1000e.
Intel specification for 82574 controller is available at
http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
Signed-off-by: Dmitry Fleytman <dmitry.fleytman@ravellosystems.com>
Signed-off-by: Leonid Bloch <leonid.bloch@ravellosystems.com>
---
MAINTAINERS | 6 +
default-configs/pci.mak | 1 +
hw/net/Makefile.objs | 3 +-
hw/net/e1000e.c | 700 ++++++++++
hw/net/e1000e_core.c | 3453 +++++++++++++++++++++++++++++++++++++++++++++++
hw/net/e1000e_core.h | 230 ++++
trace-events | 170 +++
7 files changed, 4562 insertions(+), 1 deletion(-)
create mode 100644 hw/net/e1000e.c
create mode 100644 hw/net/e1000e_core.c
create mode 100644 hw/net/e1000e_core.h
diff --git a/MAINTAINERS b/MAINTAINERS
index cf93a45..c329fad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -762,6 +762,12 @@ S: Maintained
F: hw/net/vmxnet*
F: hw/scsi/vmw_pvscsi*
+e1000e
+M: Dmitry Fleytman <dmitry@daynix.com>
+S: Maintained
+F: hw/net/e1000e*
+F: docs/specs/e1000e-spec.txt
+
Subsystems
----------
Audio
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 58a2c0a..5fd4fcd 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -17,6 +17,7 @@ CONFIG_VMW_PVSCSI_SCSI_PCI=y
CONFIG_MEGASAS_SCSI_PCI=y
CONFIG_RTL8139_PCI=y
CONFIG_E1000_PCI=y
+CONFIG_E1000E_PCI=y
CONFIG_VMXNET3_PCI=y
CONFIG_IDE_CORE=y
CONFIG_IDE_QDEV=y
diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs
index 34039fc..67d8efe 100644
--- a/hw/net/Makefile.objs
+++ b/hw/net/Makefile.objs
@@ -6,7 +6,8 @@ common-obj-$(CONFIG_NE2000_PCI) += ne2000.o
common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o
common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
-common-obj-$(CONFIG_E1000_PCI) += e1000.o
+common-obj-$(CONFIG_E1000_PCI) += e1000.o e1000e_core.o
+common-obj-$(CONFIG_E1000E_PCI) += e1000e.o e1000e_core.o
common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o
common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
new file mode 100644
index 0000000..58114e7
--- /dev/null
+++ b/hw/net/e1000e.c
@@ -0,0 +1,700 @@
+/*
+* QEMU INTEL 82574 GbE NIC emulation
+*
+* Software developer's manuals:
+* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
+*
+* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
+* Developed by Daynix Computing LTD (http://www.daynix.com)
+*
+* Authors:
+* Dmitry Fleytman <dmitry@daynix.com>
+* Leonid Bloch <leonid@daynix.com>
+* Yan Vugenfirer <yan@daynix.com>
+*
+* Based on work done by:
+* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
+* Copyright (c) 2008 Qumranet
+* Based on work done by:
+* Copyright (c) 2007 Dan Aloni
+* Copyright (c) 2004 Antony T Curtis
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+#include "hw/hw.h"
+#include "hw/pci/pci.h"
+#include "net/net.h"
+#include "net/tap.h"
+#include "sysemu/sysemu.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+
+#include "hw/net/e1000_regs.h"
+
+#include "e1000e_core.h"
+
+#include "trace.h"
+
+#define TYPE_E1000E "e1000e"
+#define E1000E(obj) OBJECT_CHECK(E1000EState, (obj), TYPE_E1000E)
+
+typedef struct {
+ PCIDevice parent_obj;
+ NICState *nic;
+ NICConf conf;
+
+ MemoryRegion mmio;
+ MemoryRegion flash;
+ MemoryRegion io;
+ MemoryRegion msix;
+
+ uint32_t ioaddr;
+
+ uint16_t subsys_ven;
+ uint16_t subsys;
+
+ uint16_t subsys_ven_used;
+ uint16_t subsys_used;
+
+ uint32_t intr_state;
+ bool use_vnet;
+
+ E1000ECore core;
+
+} E1000EState;
+
+#define E1000E_MMIO_IDX 0
+#define E1000E_FLASH_IDX 1
+#define E1000E_IO_IDX 2
+#define E1000E_MSIX_IDX 3
+
+#define E1000E_MMIO_SIZE (128*1024)
+#define E1000E_FLASH_SIZE (128*1024)
+#define E1000E_IO_SIZE (32)
+#define E1000E_MSIX_SIZE (16*1024)
+
+#define E1000E_MSIX_TABLE (0x0000)
+#define E1000E_MSIX_PBA (0x2000)
+
+#define E1000E_USE_MSI BIT(0)
+#define E1000E_USE_MSIX BIT(1)
+
+static uint64_t
+e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+ E1000EState *s = opaque;
+ return e1000e_core_read(&s->core, addr, size);
+}
+
+static void
+e1000e_mmio_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ E1000EState *s = opaque;
+ e1000e_core_write(&s->core, addr, val, size);
+}
+
+static uint64_t
+e1000e_flash_read(void *opaque, hwaddr addr, unsigned size)
+{
+ trace_e1000e_wrn_flash_read(addr);
+ return 0;
+}
+
+static void
+e1000e_flash_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ trace_e1000e_wrn_flash_write(addr, val);
+}
+
+static bool
+_e1000e_io_get_reg_index(E1000EState *s, uint32_t *idx)
+{
+ if (s->ioaddr < 0x1FFFF) {
+ *idx = s->ioaddr;
+ return true;
+ }
+
+ if (s->ioaddr < 0x7FFFF) {
+ trace_e1000e_wrn_io_addr_undefined(s->ioaddr);
+ return false;
+ }
+
+ if (s->ioaddr < 0xFFFFF) {
+ trace_e1000e_wrn_io_addr_flash(s->ioaddr);
+ return false;
+ }
+
+ trace_e1000e_wrn_io_addr_unknown(s->ioaddr);
+ return false;
+}
+
+static uint64_t
+e1000e_io_read(void *opaque, hwaddr addr, unsigned size)
+{
+ E1000EState *s = opaque;
+ uint32_t idx;
+ uint64_t val;
+
+ switch (addr) {
+ case E1000_IOADDR:
+ trace_e1000e_io_read_addr(s->ioaddr);
+ return s->ioaddr;
+ case E1000_IODATA:
+ if (_e1000e_io_get_reg_index(s, &idx)) {
+ val = e1000e_core_read(&s->core, idx, sizeof(val));
+ trace_e1000e_io_read_data(idx, val);
+ return val;
+ }
+ return 0;
+ default:
+ trace_e1000e_wrn_io_read_unknown(addr);
+ return 0;
+ }
+}
+
+static void
+e1000e_io_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ E1000EState *s = opaque;
+ uint32_t idx;
+
+ switch (addr) {
+ case E1000_IOADDR:
+ trace_e1000e_io_write_addr(val);
+ s->ioaddr = (uint32_t) val;
+ return;
+ case E1000_IODATA:
+ if (_e1000e_io_get_reg_index(s, &idx)) {
+ trace_e1000e_io_write_data(idx, val);
+ e1000e_core_write(&s->core, idx, val, sizeof(val));
+ }
+ return;
+ default:
+ trace_e1000e_wrn_io_write_unknown(addr);
+ return;
+ }
+}
+
+static const MemoryRegionOps mmio_ops = {
+ .read = e1000e_mmio_read,
+ .write = e1000e_mmio_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static const MemoryRegionOps flash_ops = {
+ .read = e1000e_flash_read,
+ .write = e1000e_flash_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static const MemoryRegionOps io_ops = {
+ .read = e1000e_io_read,
+ .write = e1000e_io_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static int
+_e1000e_can_receive(NetClientState *nc)
+{
+ E1000EState *s = qemu_get_nic_opaque(nc);
+ return e1000e_can_receive(&s->core);
+}
+
+static ssize_t
+_e1000e_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
+{
+ E1000EState *s = qemu_get_nic_opaque(nc);
+ return e1000e_receive_iov(&s->core, iov, iovcnt);
+}
+
+static ssize_t
+_e1000e_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+{
+ E1000EState *s = qemu_get_nic_opaque(nc);
+ return e1000e_receive(&s->core, buf, size);
+}
+
+static void
+e1000e_set_link_status(NetClientState *nc)
+{
+ E1000EState *s = qemu_get_nic_opaque(nc);
+ e1000e_core_set_link_status(&s->core);
+}
+
+static NetClientInfo net_e1000e_info = {
+ .type = NET_CLIENT_OPTIONS_KIND_NIC,
+ .size = sizeof(NICState),
+ .can_receive = _e1000e_can_receive,
+ .receive = _e1000e_receive,
+ .receive_iov = _e1000e_receive_iov,
+ .link_status_changed = e1000e_set_link_status,
+};
+
+/*
+* EEPROM (NVM) contents documented in Table 36, section 6.1.
+*/
+static const uint16_t e1000e_eeprom_template[64] = {
+ /* Address | Compat. | ImVer | Compat. */
+ 0x0000, 0x0000, 0x0000, 0x0420, 0xf746, 0x2010, 0xffff, 0xffff,
+ /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */
+ 0x0000, 0x0000, 0x026b, 0x0000, 0x8086, 0x0000, 0x0000, 0x8058,
+ /* NVM words 1,2,3 |-------------------------------|PCI-EID*/
+ 0x0000, 0x2001, 0x7e7c, 0xffff, 0x1000, 0x00c8, 0x0000, 0x2704,
+ /* PCIe Init. Conf 1,2,3 |PCICtrl|PHY|LD1|-------| RevID | LD0,2 */
+ 0x6cc9, 0x3150, 0x070e, 0x460b, 0x2d84, 0x0100, 0xf000, 0x0706,
+ /* FLPAR |FLANADD|LAN-PWR|FlVndr |ICtrl3 |APTSMBA|APTRxEP|APTSMBC*/
+ 0x6000, 0x0080, 0x0f04, 0x7fff, 0x4f01, 0xc600, 0x0000, 0x20ff,
+ /* APTIF | APTMC |APTuCP |LSWFWID|MSWFWID|NC-SIMC|NC-SIC | VPDP */
+ 0x0028, 0x0003, 0x0000, 0x0000, 0x0000, 0x0003, 0x0000, 0xffff,
+ /* SW Section */
+ 0x0100, 0xc000, 0x121c, 0xc007, 0xffff, 0xffff, 0xffff, 0xffff,
+ /* SW Section |CHKSUM */
+ 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0120, 0xffff, 0x0000,
+};
+
+static void _e1000e_core_reinitialize(E1000EState *s)
+{
+ s->core.owner = &s->parent_obj;
+ s->core.owner_nic = s->nic;
+}
+
+static void
+_e1000e_init_msi(E1000EState *s)
+{
+ int res;
+
+ res = msi_init(PCI_DEVICE(s),
+ 0xD0, /* MSI capability offset */
+ 1, /* MAC MSI interrupts */
+ true, /* 64-bit message addresses supported */
+ false); /* Per vector mask supported */
+
+ if (res > 0) {
+ s->intr_state |= E1000E_USE_MSI;
+ } else {
+ trace_e1000e_msi_init_fail(res);
+ }
+}
+
+static void
+_e1000e_cleanup_msi(E1000EState *s)
+{
+ if (s->intr_state & E1000E_USE_MSI) {
+ msi_uninit(PCI_DEVICE(s));
+ }
+}
+
+static void
+_e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors)
+{
+ int i;
+ for (i = 0; i < num_vectors; i++) {
+ msix_vector_unuse(PCI_DEVICE(s), i);
+ }
+}
+
+static bool
+_e1000e_use_msix_vectors(E1000EState *s, int num_vectors)
+{
+ int i;
+ for (i = 0; i < num_vectors; i++) {
+ int res = msix_vector_use(PCI_DEVICE(s), i);
+ if (res < 0) {
+ trace_e1000e_msix_use_vector_fail(i, res);
+ _e1000e_unuse_msix_vectors(s, i);
+ return false;
+ }
+ }
+ return true;
+}
+
+static void
+_e1000e_init_msix(E1000EState *s)
+{
+ PCIDevice *d = PCI_DEVICE(s);
+ int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM,
+ &s->msix,
+ E1000E_MSIX_IDX, E1000E_MSIX_TABLE,
+ &s->msix,
+ E1000E_MSIX_IDX, E1000E_MSIX_PBA,
+ 0xA0);
+
+ if (0 > res) {
+ trace_e1000e_msix_init_fail(res);
+ } else {
+ if (!_e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) {
+ msix_uninit(d, &s->msix, &s->msix);
+ } else {
+ s->intr_state |= E1000E_USE_MSIX;
+ }
+ }
+}
+
+static void
+_e1000e_cleanup_msix(E1000EState *s)
+{
+ if (s->intr_state & E1000E_USE_MSIX) {
+ _e1000e_unuse_msix_vectors(s, E1000E_MSIX_VEC_NUM);
+ msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix);
+ }
+}
+
+static void
+_e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr)
+{
+ DeviceState *dev = DEVICE(pci_dev);
+ NetClientState *nc;
+ int i;
+
+ s->nic = qemu_new_nic(&net_e1000e_info, &s->conf,
+ object_get_typename(OBJECT(s)), dev->id, s);
+
+ if (s->conf.peers.queues != E1000E_NUM_QUEUES) {
+ fprintf(stderr,
+ "WARNING: e1000e: Device requires %d network backend "
+ "queues for optimal performance. Current number of "
+ "queues is %d.\n", E1000E_NUM_QUEUES, s->conf.peers.queues);
+ }
+
+ s->core.max_queue_num = s->conf.peers.queues - 1;
+
+ trace_e1000e_mac_set_permanent(MAC_ARG(macaddr));
+ memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac));
+
+ qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr);
+
+ /* Setup virtio headers */
+ if (s->use_vnet) {
+ s->core.has_vnet = true;
+ } else {
+ s->core.has_vnet = false;
+ return;
+ }
+
+ for (i = 0; i < s->conf.peers.queues; i++) {
+ nc = qemu_get_subqueue(s->nic, i);
+ if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) {
+ s->core.has_vnet = false;
+ trace_e1000e_cfg_support_virtio(false);
+ return;
+ }
+ }
+
+ trace_e1000e_cfg_support_virtio(true);
+
+ for (i = 0; i < s->conf.peers.queues; i++) {
+ nc = qemu_get_subqueue(s->nic, i);
+ qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr));
+ qemu_using_vnet_hdr(nc->peer, true);
+ }
+}
+
+static inline uint64_t
+_e1000e_gen_dsn(uint8_t *mac)
+{
+ return (uint64_t)(mac[5]) |
+ (uint64_t)(mac[4]) << 8 |
+ (uint64_t)(mac[3]) << 16 |
+ (uint64_t)(0x00FF) << 24 |
+ (uint64_t)(0x00FF) << 32 |
+ (uint64_t)(mac[2]) << 40 |
+ (uint64_t)(mac[1]) << 48 |
+ (uint64_t)(mac[0]) << 56;
+}
+
+static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp)
+{
+ static const uint16_t E1000E_PMRB_OFFSET = 0x0C8;
+ static const uint16_t E1000E_PCIE_OFFSET = 0x0E0;
+ static const uint16_t E1000E_AER_OFFSET = 0x100;
+ static const uint16_t E1000E_DSN_OFFSET = 0x140;
+
+ E1000EState *s = E1000E(pci_dev);
+ uint8_t *macaddr;
+
+ trace_e1000e_cb_pci_realize();
+
+ pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10;
+ pci_dev->config[PCI_INTERRUPT_PIN] = 1;
+
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, s->subsys_ven);
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, s->subsys);
+
+ s->subsys_ven_used = s->subsys_ven;
+ s->subsys_used = s->subsys;
+
+ /* Define IO/MMIO regions */
+ memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s,
+ "e1000e-mmio", E1000E_MMIO_SIZE);
+ pci_register_bar(pci_dev, E1000E_MMIO_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
+
+ memory_region_init_io(&s->flash, OBJECT(s), &flash_ops, s,
+ "e1000e-flash", E1000E_FLASH_SIZE);
+ pci_register_bar(pci_dev, E1000E_FLASH_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash);
+
+ memory_region_init_io(&s->io, OBJECT(s), &io_ops, s,
+ "e1000e-io", E1000E_IO_SIZE);
+ pci_register_bar(pci_dev, E1000E_IO_IDX,
+ PCI_BASE_ADDRESS_SPACE_IO, &s->io);
+
+ memory_region_init(&s->msix, OBJECT(s), "e1000e-msix",
+ E1000E_MSIX_SIZE);
+ pci_register_bar(pci_dev, E1000E_MSIX_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix);
+
+ /* Create networking backend */
+ qemu_macaddr_default_if_unset(&s->conf.macaddr);
+ macaddr = s->conf.macaddr.a;
+
+ _e1000e_init_msix(s);
+
+ if (pcie_endpoint_cap_v1_init(pci_dev, E1000E_PCIE_OFFSET) < 0) {
+ hw_error("Failed to initialize PCIe capability");
+ }
+
+ _e1000e_init_msi(s);
+
+ if (pci_add_pm_capability(pci_dev, E1000E_PMRB_OFFSET,
+ PCI_PM_CAP_DSI) < 0) {
+ hw_error("Failed to initialize PM capability");
+ }
+
+ if (pcie_aer_init(pci_dev, E1000E_AER_OFFSET) < 0) {
+ hw_error("Failed to initialize AER capability");
+ }
+
+ pcie_dsn_init(pci_dev, E1000E_DSN_OFFSET,
+ _e1000e_gen_dsn(macaddr));
+
+ _e1000e_init_net_peer(s, pci_dev, macaddr);
+
+ /* Initialize core */
+ _e1000e_core_reinitialize(s);
+
+ e1000e_core_pci_realize(&s->core,
+ e1000e_eeprom_template,
+ sizeof(e1000e_eeprom_template),
+ macaddr);
+}
+
+static void e1000e_pci_uninit(PCIDevice *pci_dev)
+{
+ E1000EState *s = E1000E(pci_dev);
+
+ trace_e1000e_cb_pci_uninit();
+
+ e1000e_core_pci_uninit(&s->core);
+
+ pcie_aer_exit(pci_dev);
+ pcie_cap_exit(pci_dev);
+
+ qemu_del_nic(s->nic);
+
+ _e1000e_cleanup_msix(s);
+ _e1000e_cleanup_msi(s);
+}
+
+static void e1000e_qdev_reset(DeviceState *dev)
+{
+ E1000EState *s = E1000E(dev);
+
+ trace_e1000e_cb_qdev_reset();
+
+ e1000e_core_reset(&s->core);
+}
+
+static void e1000e_pre_save(void *opaque)
+{
+ E1000EState *s = opaque;
+
+ trace_e1000e_cb_pre_save();
+
+ e1000e_core_pre_save(&s->core);
+}
+
+static int e1000e_post_load(void *opaque, int version_id)
+{
+ E1000EState *s = opaque;
+
+ trace_e1000e_cb_post_load();
+
+ if ((s->subsys != s->subsys_used) ||
+ (s->subsys_ven != s->subsys_ven_used)) {
+ fprintf(stderr,
+ "ERROR: Cannot migrate while device properties "
+ "(subsys/subsys_ven) differ");
+ return -1;
+ }
+
+ return e1000e_core_post_load(&s->core);
+}
+
+static const VMStateDescription vmstate_e1000e = {
+ .name = "e1000e",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .pre_save = e1000e_pre_save,
+ .post_load = e1000e_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_PCIE_DEVICE(parent_obj, E1000EState),
+ VMSTATE_MSIX(parent_obj, E1000EState),
+
+ VMSTATE_UINT32(ioaddr, E1000EState),
+ VMSTATE_UINT32(intr_state, E1000EState),
+ VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState),
+ VMSTATE_UINT8(core.rx_desc_len, E1000EState),
+ VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState,
+ E1000_PSRCTL_BUFFS_PER_DESC),
+ VMSTATE_UINT32(core.rx_desc_buf_size, E1000EState),
+ VMSTATE_UINT16_ARRAY(core.eeprom, E1000EState, E1000E_EEPROM_SIZE),
+ VMSTATE_UINT16_2DARRAY(core.phy, E1000EState,
+ E1000E_PHY_PAGES, E1000E_PHY_PAGE_SIZE),
+ VMSTATE_UINT32_ARRAY(core.mac, E1000EState, E1000E_MAC_SIZE),
+ VMSTATE_UINT8_ARRAY(core.permanent_mac, E1000EState, ETH_ALEN),
+
+ VMSTATE_UINT32(core.delayed_causes, E1000EState),
+
+ VMSTATE_UINT16(subsys, E1000EState),
+ VMSTATE_UINT16(subsys_ven, E1000EState),
+
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.rdtr, E1000EState),
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.radv, E1000EState),
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.raid, E1000EState),
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.tadv, E1000EState),
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.tidv, E1000EState),
+
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.itr, E1000EState),
+ VMSTATE_BOOL(core.itr_intr_pending, E1000EState),
+
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.eitr[0], E1000EState),
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.eitr[1], E1000EState),
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.eitr[2], E1000EState),
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.eitr[3], E1000EState),
+ VMSTATE_E1000_INTR_DELAY_TIMER(core.eitr[4], E1000EState),
+ VMSTATE_BOOL_ARRAY(core.eitr_intr_pending, E1000EState,
+ E1000E_MSIX_VEC_NUM),
+
+ VMSTATE_UINT32(core.itr_guest_value, E1000EState),
+ VMSTATE_UINT32_ARRAY(core.eitr_guest_value, E1000EState,
+ E1000E_MSIX_VEC_NUM),
+
+ VMSTATE_UINT16(core.vet, E1000EState),
+
+ VMSTATE_UINT8(core.tx[0].sum_needed, E1000EState),
+ VMSTATE_UINT8(core.tx[0].ipcss, E1000EState),
+ VMSTATE_UINT8(core.tx[0].ipcso, E1000EState),
+ VMSTATE_UINT16(core.tx[0].ipcse, E1000EState),
+ VMSTATE_UINT8(core.tx[0].tucss, E1000EState),
+ VMSTATE_UINT8(core.tx[0].tucso, E1000EState),
+ VMSTATE_UINT16(core.tx[0].tucse, E1000EState),
+ VMSTATE_UINT8(core.tx[0].hdr_len, E1000EState),
+ VMSTATE_UINT16(core.tx[0].mss, E1000EState),
+ VMSTATE_UINT32(core.tx[0].paylen, E1000EState),
+ VMSTATE_INT8(core.tx[0].ip, E1000EState),
+ VMSTATE_INT8(core.tx[0].tcp, E1000EState),
+ VMSTATE_BOOL(core.tx[0].tse, E1000EState),
+ VMSTATE_BOOL(core.tx[0].cptse, E1000EState),
+ VMSTATE_BOOL(core.tx[0].skip_cp, E1000EState),
+
+ VMSTATE_UINT8(core.tx[1].sum_needed, E1000EState),
+ VMSTATE_UINT8(core.tx[1].ipcss, E1000EState),
+ VMSTATE_UINT8(core.tx[1].ipcso, E1000EState),
+ VMSTATE_UINT16(core.tx[1].ipcse, E1000EState),
+ VMSTATE_UINT8(core.tx[1].tucss, E1000EState),
+ VMSTATE_UINT8(core.tx[1].tucso, E1000EState),
+ VMSTATE_UINT16(core.tx[1].tucse, E1000EState),
+ VMSTATE_UINT8(core.tx[1].hdr_len, E1000EState),
+ VMSTATE_UINT16(core.tx[1].mss, E1000EState),
+ VMSTATE_UINT32(core.tx[1].paylen, E1000EState),
+ VMSTATE_INT8(core.tx[1].ip, E1000EState),
+ VMSTATE_INT8(core.tx[1].tcp, E1000EState),
+ VMSTATE_BOOL(core.tx[1].tse, E1000EState),
+ VMSTATE_BOOL(core.tx[1].cptse, E1000EState),
+ VMSTATE_BOOL(core.tx[1].skip_cp, E1000EState),
+
+ VMSTATE_BOOL(core.has_vnet, E1000EState),
+
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static Property e1000e_properties[] = {
+ DEFINE_NIC_PROPERTIES(E1000EState, conf),
+ DEFINE_PROP_BOOL("vnet", E1000EState, use_vnet, true),
+ DEFINE_PROP_UINT16("subsys_ven", E1000EState,
+ subsys_ven, PCI_VENDOR_ID_INTEL),
+ DEFINE_PROP_UINT16("subsys", E1000EState, subsys, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void e1000e_class_init(ObjectClass *class, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(class);
+ PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
+
+ c->realize = e1000e_pci_realize;
+ c->exit = e1000e_pci_uninit;
+ c->vendor_id = PCI_VENDOR_ID_INTEL;
+ c->device_id = E1000_DEV_ID_82574L;
+ c->revision = 0;
+ c->class_id = PCI_CLASS_NETWORK_ETHERNET;
+ c->is_express = 1;
+
+ dc->desc = "Intel 82574L GbE Controller";
+ dc->reset = e1000e_qdev_reset;
+ dc->vmsd = &vmstate_e1000e;
+ dc->props = e1000e_properties;
+
+ set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+}
+
+static void e1000e_instance_init(Object *obj)
+{
+ E1000EState *s = E1000E(obj);
+ device_add_bootindex_property(obj, &s->conf.bootindex,
+ "bootindex", "/ethernet-phy@0",
+ DEVICE(obj), NULL);
+}
+
+static const TypeInfo e1000e_info = {
+ .name = TYPE_E1000E,
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(E1000EState),
+ .class_init = e1000e_class_init,
+ .instance_init = e1000e_instance_init,
+};
+
+static void e1000e_register_types(void)
+{
+ type_register_static(&e1000e_info);
+}
+
+type_init(e1000e_register_types)
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
new file mode 100644
index 0000000..c7e9e97
--- /dev/null
+++ b/hw/net/e1000e_core.c
@@ -0,0 +1,3453 @@
+/*
+* Core code for QEMU e1000e emulation
+*
+* Software developer's manuals:
+* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
+*
+* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
+* Developed by Daynix Computing LTD (http://www.daynix.com)
+*
+* Authors:
+* Dmitry Fleytman <dmitry@daynix.com>
+* Leonid Bloch <leonid@daynix.com>
+* Yan Vugenfirer <yan@daynix.com>
+*
+* Based on work done by:
+* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
+* Copyright (c) 2008 Qumranet
+* Based on work done by:
+* Copyright (c) 2007 Dan Aloni
+* Copyright (c) 2004 Antony T Curtis
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "hw/hw.h"
+#include "hw/pci/pci.h"
+#include "net/net.h"
+#include "net/tap.h"
+#include "net/checksum.h"
+#include "sysemu/sysemu.h"
+#include "qemu/iov.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+
+#include "net_tx_pkt.h"
+#include "net_rx_pkt.h"
+
+#include "e1000_regs.h"
+#include "e1000e_core.h"
+
+#include "trace.h"
+
+#define _E1000E_MIN_XITR (500) /* No more then 7813 interrupts per
+ second according to spec 10.2.4.2 */
+
+static const uint8_t E1000E_MAX_TX_FRAGS = 64;
+
+static void
+set_interrupt_cause(E1000ECore *core, uint32_t val);
+
+static inline int
+vlan_enabled(E1000ECore *core)
+{
+ return ((core->mac[CTRL] & E1000_CTRL_VME) != 0);
+}
+
+static inline int
+is_vlan_txd(uint32_t txd_lower)
+{
+ return ((txd_lower & E1000_TXD_CMD_VLE) != 0);
+}
+
+static inline void
+inc_reg_if_not_full(E1000ECore *core, int index)
+{
+ if (core->mac[index] != 0xffffffff) {
+ core->mac[index]++;
+ }
+}
+
+static void
+grow_8reg_if_not_full(E1000ECore *core, int index, int size)
+{
+ uint64_t sum = core->mac[index] | (uint64_t)core->mac[index+1] << 32;
+
+ if (sum + size < sum) {
+ sum = ~0ULL;
+ } else {
+ sum += size;
+ }
+ core->mac[index] = sum;
+ core->mac[index+1] = sum >> 32;
+}
+
+static void
+increase_size_stats(E1000ECore *core, const int *size_regs, int size)
+{
+ if (size > 1023) {
+ inc_reg_if_not_full(core, size_regs[5]);
+ } else if (size > 511) {
+ inc_reg_if_not_full(core, size_regs[4]);
+ } else if (size > 255) {
+ inc_reg_if_not_full(core, size_regs[3]);
+ } else if (size > 127) {
+ inc_reg_if_not_full(core, size_regs[2]);
+ } else if (size > 64) {
+ inc_reg_if_not_full(core, size_regs[1]);
+ } else if (size == 64) {
+ inc_reg_if_not_full(core, size_regs[0]);
+ }
+}
+
+static inline void
+process_ts_option(E1000ECore *core, struct e1000_tx_desc *dp)
+{
+ if (le32_to_cpu(dp->upper.data) & E1000_TXD_EXTCMD_TSTAMP) {
+ trace_e1000e_wrn_no_ts_support();
+ }
+}
+
+static inline void
+process_snap_option(E1000ECore *core, uint32_t cmd_and_length)
+{
+ if (cmd_and_length & E1000_TXD_CMD_SNAP) {
+ trace_e1000e_wrn_no_snap_support();
+ }
+}
+
+static inline void
+_e1000e_raise_legacy_irq(E1000ECore *core)
+{
+ trace_e1000e_irq_legacy_notify(true);
+ inc_reg_if_not_full(core, IAC);
+ pci_set_irq(core->owner, 1);
+}
+
+static inline void
+_e1000e_lower_legacy_irq(E1000ECore *core)
+{
+ trace_e1000e_irq_legacy_notify(false);
+ pci_set_irq(core->owner, 0);
+}
+
+static inline void
+_e1000e_intrmgr_rearm_timer(E1000IntrDelayTimer *timer)
+{
+ int64_t delay_ns = (int64_t) timer->core->mac[timer->delay_reg] *
+ timer->delay_resolution_ns;
+
+ trace_e1000e_irq_rearm_timer(timer->delay_reg << 2, delay_ns);
+
+ timer_mod(timer->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + delay_ns);
+
+ timer->running = true;
+}
+
+static void
+_e1000e_intmgr_timer_post_load(E1000IntrDelayTimer *timer)
+{
+ if (timer->running) {
+ _e1000e_intrmgr_rearm_timer(timer);
+ }
+}
+
+static inline void
+_e1000e_intrmgr_stop_timer(E1000IntrDelayTimer *timer)
+{
+ if (timer->running) {
+ timer_del(timer->timer);
+ timer->running = false;
+ }
+}
+
+static inline void
+_e1000e_intrmgr_fire_delayed_interrupts(E1000ECore *core)
+{
+ trace_e1000e_irq_fire_delayed_interrupts();
+ set_interrupt_cause(core, 0);
+}
+
+static void
+_e1000e_intrmgr_on_timer(void *opaque)
+{
+ E1000IntrDelayTimer *timer = opaque;
+
+ trace_e1000e_irq_throttling_timer(timer->delay_reg << 2);
+
+ timer->running = false;
+ _e1000e_intrmgr_fire_delayed_interrupts(timer->core);
+}
+
+static void
+_e1000e_intrmgr_on_throttling_timer(void *opaque)
+{
+ E1000IntrDelayTimer *timer = opaque;
+
+ assert(!msix_enabled(timer->core->owner));
+
+ timer->running = false;
+
+ if (!timer->core->itr_intr_pending) {
+ trace_e1000e_irq_throttling_no_pending_interrupts();
+ return;
+ }
+
+ if (msi_enabled(timer->core->owner)) {
+ trace_e1000e_irq_msi_notify_postponed();
+ msi_notify(timer->core->owner, 0);
+ } else {
+ trace_e1000e_irq_legacy_notify_postponed();
+ _e1000e_raise_legacy_irq(timer->core);
+ }
+}
+
+static void
+_e1000e_intrmgr_on_msix_throttling_timer(void *opaque)
+{
+ E1000IntrDelayTimer *timer = opaque;
+ int idx = timer - &timer->core->eitr[0];
+
+ assert(msix_enabled(timer->core->owner));
+
+ timer->running = false;
+
+ if (!timer->core->eitr_intr_pending[idx]) {
+ trace_e1000e_irq_throttling_no_pending_vec(idx);
+ return;
+ }
+
+ trace_e1000e_irq_msix_notify_postponed_vec(idx);
+ msix_notify(timer->core->owner, idx);
+}
+
+static void
+_e1000e_intrmgr_initialize_all_timers(E1000ECore *core, bool create)
+{
+ int i;
+
+ core->radv.delay_reg = RADV;
+ core->rdtr.delay_reg = RDTR;
+ core->raid.delay_reg = RAID;
+ core->tadv.delay_reg = TADV;
+ core->tidv.delay_reg = TIDV;
+
+ core->radv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
+ core->rdtr.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
+ core->raid.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
+ core->tadv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
+ core->tidv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES;
+
+ core->radv.core = core;
+ core->rdtr.core = core;
+ core->raid.core = core;
+ core->tadv.core = core;
+ core->tidv.core = core;
+
+ core->itr.core = core;
+ core->itr.delay_reg = ITR;
+ core->itr.delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES;
+
+ for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
+ core->eitr[i].core = core;
+ core->eitr[i].delay_reg = EITR + i;
+ core->eitr[i].delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES;
+ }
+
+ if (!create) {
+ return;
+ }
+
+ core->radv.timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, _e1000e_intrmgr_on_timer, &core->radv);
+ core->rdtr.timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, _e1000e_intrmgr_on_timer, &core->rdtr);
+ core->raid.timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, _e1000e_intrmgr_on_timer, &core->raid);
+
+ core->tadv.timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, _e1000e_intrmgr_on_timer, &core->tadv);
+ core->tidv.timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, _e1000e_intrmgr_on_timer, &core->tidv);
+
+ core->itr.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ _e1000e_intrmgr_on_throttling_timer,
+ &core->itr);
+
+ for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
+ core->eitr[i].timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ _e1000e_intrmgr_on_msix_throttling_timer,
+ &core->eitr[i]);
+ }
+}
+
+static inline void
+_e1000e_intrmgr_stop_delay_timers(E1000ECore *core)
+{
+ _e1000e_intrmgr_stop_timer(&core->radv);
+ _e1000e_intrmgr_stop_timer(&core->rdtr);
+ _e1000e_intrmgr_stop_timer(&core->raid);
+ _e1000e_intrmgr_stop_timer(&core->tidv);
+ _e1000e_intrmgr_stop_timer(&core->tadv);
+}
+
+static bool
+_e1000e_intrmgr_delay_rx_causes(E1000ECore *core, uint32_t *causes)
+{
+ uint32_t delayable_causes;
+ uint32_t rdtr = core->mac[RDTR];
+ uint32_t radv = core->mac[RADV];
+ uint32_t raid = core->mac[RAID];
+
+ if (msix_enabled(core->owner)) {
+ return false;
+ }
+
+ delayable_causes = E1000_ICR_RXQ0 |
+ E1000_ICR_RXQ1 |
+ E1000_ICR_RXT0;
+
+ if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS)) {
+ delayable_causes |= E1000_ICR_ACK;
+ }
+
+ /* Clean up all causes that may be delayed */
+ core->delayed_causes |= *causes & delayable_causes;
+ *causes &= ~delayable_causes;
+
+ /* Check if delayed RX interrupts disabled by client
+ or if there are causes that cannot be delayed */
+ if ((rdtr == 0) || (causes != 0)) {
+ return false;
+ }
+
+ /* Check if delayed RX ACK interrupts disabled by client
+ and there is an ACK packet received */
+ if ((raid == 0) && (core->delayed_causes & E1000_ICR_ACK)) {
+ return false;
+ }
+
+ /* All causes delayed */
+ _e1000e_intrmgr_rearm_timer(&core->rdtr);
+
+ if (!core->radv.running && (radv != 0)) {
+ _e1000e_intrmgr_rearm_timer(&core->radv);
+ }
+
+ if (!core->raid.running && (core->delayed_causes & E1000_ICR_ACK)) {
+ _e1000e_intrmgr_rearm_timer(&core->raid);
+ }
+
+ return true;
+}
+
+static bool
+_e1000e_intrmgr_delay_tx_causes(E1000ECore *core, uint32_t *causes)
+{
+ static const uint32_t delayable_causes = E1000_ICR_TXQ0 |
+ E1000_ICR_TXQ1 |
+ E1000_ICR_TXQE |
+ E1000_ICR_TXDW;
+
+ if (msix_enabled(core->owner)) {
+ return false;
+ }
+
+ /* Clean up all causes that may be delayed */
+ core->delayed_causes |= *causes & delayable_causes;
+ *causes &= ~delayable_causes;
+
+ /* If there are causes that cannot be delayed */
+ if (causes != 0) {
+ return false;
+ }
+
+ /* All causes delayed */
+ _e1000e_intrmgr_rearm_timer(&core->tidv);
+
+ if (!core->tadv.running && (core->mac[TADV] != 0)) {
+ _e1000e_intrmgr_rearm_timer(&core->tadv);
+ }
+
+ return true;
+}
+
+static uint32_t
+_e1000e_intmgr_collect_delayed_causes(E1000ECore *core)
+{
+ uint32_t res;
+
+ if (msix_enabled(core->owner)) {
+ assert(core->delayed_causes == 0);
+ return 0;
+ }
+
+ res = core->delayed_causes;
+ core->delayed_causes = 0;
+
+ _e1000e_intrmgr_stop_delay_timers(core);
+
+ return res;
+}
+
+static void
+_e1000e_intrmgr_fire_all_timers(E1000ECore *core)
+{
+ int i;
+ uint32_t val = _e1000e_intmgr_collect_delayed_causes(core);
+
+ trace_e1000e_irq_adding_delayed_causes(val, core->mac[ICR]);
+ core->mac[ICR] |= val;
+
+ if (core->itr.running) {
+ timer_del(core->itr.timer);
+ _e1000e_intrmgr_on_throttling_timer(&core->itr);
+ }
+
+ for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
+ if (core->eitr[i].running) {
+ timer_del(core->eitr[i].timer);
+ _e1000e_intrmgr_on_msix_throttling_timer(&core->eitr[i]);
+ }
+ }
+}
+
+static void
+_e1000e_intrmgr_post_load(E1000ECore *core)
+{
+ int i;
+
+ _e1000e_intmgr_timer_post_load(&core->radv);
+ _e1000e_intmgr_timer_post_load(&core->rdtr);
+ _e1000e_intmgr_timer_post_load(&core->raid);
+ _e1000e_intmgr_timer_post_load(&core->tidv);
+ _e1000e_intmgr_timer_post_load(&core->tadv);
+
+ _e1000e_intmgr_timer_post_load(&core->itr);
+
+ for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
+ _e1000e_intmgr_timer_post_load(&core->eitr[i]);
+ }
+}
+
+static void
+_e1000e_intrmgr_reset(E1000ECore *core)
+{
+ int i;
+
+ core->delayed_causes = 0;
+
+ _e1000e_intrmgr_stop_delay_timers(core);
+
+ _e1000e_intrmgr_stop_timer(&core->itr);
+
+ for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
+ _e1000e_intrmgr_stop_timer(&core->eitr[i]);
+ }
+}
+
+static void
+_e1000e_intrmgr_pci_unint(E1000ECore *core)
+{
+ int i;
+
+ timer_del(core->radv.timer);
+ timer_free(core->radv.timer);
+ timer_del(core->rdtr.timer);
+ timer_free(core->rdtr.timer);
+ timer_del(core->raid.timer);
+ timer_free(core->raid.timer);
+
+ timer_del(core->tadv.timer);
+ timer_free(core->tadv.timer);
+ timer_del(core->tidv.timer);
+ timer_free(core->tidv.timer);
+
+ timer_del(core->itr.timer);
+ timer_free(core->itr.timer);
+
+ for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
+ timer_del(core->eitr[i].timer);
+ timer_free(core->eitr[i].timer);
+ }
+}
+
+static void
+_e1000e_intrmgr_pci_realize(E1000ECore *core)
+{
+ _e1000e_intrmgr_initialize_all_timers(core, true);
+}
+
+static inline bool
+_e1000e_rx_csum_enabled(E1000ECore *core)
+{
+ return (core->mac[RXCSUM] & E1000_RXCSUM_PCSD) ? false : true;
+}
+
+static inline bool
+_e1000e_rx_use_legacy_descriptor(E1000ECore *core)
+{
+ return (core->mac[RFCTL] & E1000_RFCTL_EXTEN) ? false : true;
+}
+
+static inline bool
+_e1000e_rx_use_ps_descriptor(E1000ECore *core)
+{
+ return !_e1000e_rx_use_legacy_descriptor(core) &&
+ (core->mac[RCTL] & E1000_RCTL_DTYP_PS);
+}
+
+static inline bool
+_e1000e_rss_enabled(E1000ECore *core)
+{
+ return E1000_MRQC_ENABLED(core->mac[MRQC]) &&
+ !_e1000e_rx_csum_enabled(core) &&
+ !_e1000e_rx_use_legacy_descriptor(core);
+}
+
+typedef struct E1000E_RSSInfo_st {
+ bool enabled;
+ uint32_t hash;
+ uint32_t queue;
+ uint32_t type;
+} E1000E_RSSInfo;
+
+static uint32_t
+_e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt)
+{
+ bool isip4, isip6, isudp, istcp;
+
+ assert(_e1000e_rss_enabled(core));
+
+ net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
+
+ if (isip4) {
+ bool fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
+
+ trace_e1000e_rx_rss_ip4(fragment, istcp, core->mac[MRQC],
+ E1000_MRQC_EN_TCPIPV4(core->mac[MRQC]),
+ E1000_MRQC_EN_IPV4(core->mac[MRQC]));
+
+ if (!fragment && istcp && E1000_MRQC_EN_TCPIPV4(core->mac[MRQC])) {
+ return E1000_MRQ_RSS_TYPE_IPV4TCP;
+ }
+
+ if (E1000_MRQC_EN_IPV4(core->mac[MRQC])) {
+ return E1000_MRQ_RSS_TYPE_IPV4;
+ }
+ } else if (isip6) {
+ eth_ip6_hdr_info *ip6info = net_rx_pkt_get_ip6_info(pkt);
+
+ bool ex_dis = core->mac[RFCTL] & E1000_RFCTL_IPV6_EX_DIS;
+ bool new_ex_dis = core->mac[RFCTL] & E1000_RFCTL_NEW_IPV6_EXT_DIS;
+
+ trace_e1000e_rx_rss_ip6(core->mac[RFCTL],
+ ex_dis, new_ex_dis, istcp,
+ ip6info->has_ext_hdrs,
+ ip6info->rss_ex_dst_valid,
+ ip6info->rss_ex_src_valid,
+ core->mac[MRQC],
+ E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]),
+ E1000_MRQC_EN_IPV6EX(core->mac[MRQC]),
+ E1000_MRQC_EN_IPV6(core->mac[MRQC]));
+
+ if ((!ex_dis || !ip6info->has_ext_hdrs) &&
+ (!new_ex_dis || !(ip6info->rss_ex_dst_valid ||
+ ip6info->rss_ex_src_valid))) {
+
+ if (istcp && !ip6info->fragment &&
+ E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) {
+ return E1000_MRQ_RSS_TYPE_IPV6TCP;
+ }
+
+ if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) {
+ return E1000_MRQ_RSS_TYPE_IPV6EX;
+ }
+
+ }
+
+ if (E1000_MRQC_EN_IPV6(core->mac[MRQC])) {
+ return E1000_MRQ_RSS_TYPE_IPV6;
+ }
+
+ }
+
+ return E1000_MRQ_RSS_TYPE_NONE;
+}
+
+static uint32_t
+_e1000e_rss_calc_hash(E1000ECore *core,
+ struct NetRxPkt *pkt,
+ E1000E_RSSInfo *info)
+{
+ NetRxPktRssType type;
+
+ assert(_e1000e_rss_enabled(core));
+
+ switch (info->type) {
+ case E1000_MRQ_RSS_TYPE_IPV4:
+ type = NetPktRssIpV4;
+ break;
+ case E1000_MRQ_RSS_TYPE_IPV4TCP:
+ type = NetPktRssIpV4Tcp;
+ break;
+ case E1000_MRQ_RSS_TYPE_IPV6TCP:
+ type = NetPktRssIpV6Tcp;
+ break;
+ case E1000_MRQ_RSS_TYPE_IPV6:
+ type = NetPktRssIpV6;
+ break;
+ case E1000_MRQ_RSS_TYPE_IPV6EX:
+ type = NetPktRssIpV6Ex;
+ break;
+ default:
+ assert(false);
+ return 0;
+ }
+
+ return net_rx_pkt_calc_rss_hash(pkt, type, (uint8_t *) &core->mac[RSSRK]);
+}
+
+static void
+_e1000e_rss_parse_packet(E1000ECore *core,
+ struct NetRxPkt *pkt,
+ E1000E_RSSInfo *info)
+{
+ trace_e1000e_rx_rss_started();
+
+ if (!_e1000e_rss_enabled(core)) {
+ info->enabled = false;
+ info->hash = 0;
+ info->queue = 0;
+ info->type = 0;
+ trace_e1000e_rx_rss_disabled();
+ return;
+ }
+
+ info->enabled = true;
+
+ info->type = _e1000e_rss_get_hash_type(core, pkt);
+
+ trace_e1000e_rx_rss_type(info->type);
+
+ if (info->type == E1000_MRQ_RSS_TYPE_NONE) {
+ info->hash = 0;
+ info->queue = 0;
+ return;
+ }
+
+ info->hash = _e1000e_rss_calc_hash(core, pkt, info);
+ info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash);
+}
+
+static void
+_e1000e_setup_tx_offloads(E1000ECore *core, struct e1000_tx *tx)
+{
+ if (tx->tse && tx->cptse) {
+ net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss);
+ net_tx_pkt_update_ip_checksums(tx->tx_pkt);
+ inc_reg_if_not_full(core, TSCTC);
+ return;
+ }
+
+ if (tx->sum_needed & E1000_TXD_POPTS_TXSM) {
+ net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0);
+ }
+
+ if (tx->sum_needed & E1000_TXD_POPTS_IXSM) {
+ net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
+ }
+}
+
+static bool
+_e1000e_tx_pkt_send(E1000ECore *core, struct e1000_tx *tx, int queue_index)
+{
+ int target_queue = MIN(core->max_queue_num, queue_index);
+ NetClientState *queue = qemu_get_subqueue(core->owner_nic, target_queue);
+
+ _e1000e_setup_tx_offloads(core, tx);
+
+ net_tx_pkt_dump(tx->tx_pkt);
+
+ if ((core->phy[0][PHY_CTRL] & MII_CR_LOOPBACK) ||
+ ((core->mac[RCTL] & E1000_RCTL_LBM_MAC) == E1000_RCTL_LBM_MAC)) {
+ return net_tx_pkt_send_loopback(tx->tx_pkt, queue);
+ } else {
+ return net_tx_pkt_send(tx->tx_pkt, queue);
+ }
+}
+
+static void
+_e1000e_on_tx_done_update_stats(E1000ECore *core, struct NetTxPkt *tx_pkt)
+{
+ static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
+ PTC1023, PTC1522 };
+
+ size_t tot_len = net_tx_pkt_get_total_len(tx_pkt);
+
+ increase_size_stats(core, PTCregs, tot_len);
+ inc_reg_if_not_full(core, TPT);
+ grow_8reg_if_not_full(core, TOTL, tot_len);
+
+ switch (net_tx_pkt_get_packet_type(tx_pkt)) {
+ case ETH_PKT_BCAST:
+ inc_reg_if_not_full(core, BPTC);
+ break;
+ case ETH_PKT_MCAST:
+ inc_reg_if_not_full(core, MPTC);
+ break;
+ case ETH_PKT_UCAST:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ core->mac[GPTC] = core->mac[TPT];
+ core->mac[GOTCL] = core->mac[TOTL];
+ core->mac[GOTCH] = core->mac[TOTH];
+}
+
+static void
+process_tx_desc(E1000ECore *core,
+ struct e1000_tx *tx,
+ struct e1000_tx_desc *dp,
+ int queue_index)
+{
+ uint32_t txd_lower = le32_to_cpu(dp->lower.data);
+ uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D);
+ unsigned int split_size = txd_lower & 0xffff, op;
+ uint64_t addr;
+ struct e1000_context_desc *xp = (struct e1000_context_desc *)dp;
+ bool eop = txd_lower & E1000_TXD_CMD_EOP;
+
+ if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */
+ op = le32_to_cpu(xp->cmd_and_length);
+ tx->ipcss = xp->lower_setup.ip_fields.ipcss;
+ tx->ipcso = xp->lower_setup.ip_fields.ipcso;
+ tx->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse);
+ tx->tucss = xp->upper_setup.tcp_fields.tucss;
+ tx->tucso = xp->upper_setup.tcp_fields.tucso;
+ tx->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse);
+ tx->paylen = op & 0xfffff;
+ tx->hdr_len = xp->tcp_seg_setup.fields.hdr_len;
+ tx->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss);
+ tx->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0;
+ tx->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0;
+ tx->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0;
+ if (tx->tucso == 0) { /* this is probably wrong */
+ trace_e1000e_tx_cso_zero();
+ tx->tucso = tx->tucss + (tx->tcp ? 16 : 6);
+ }
+ process_snap_option(core, op);
+ return;
+ } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
+ /* data descriptor */
+ tx->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
+ tx->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
+ process_ts_option(core, dp);
+ } else {
+ /* legacy descriptor */
+ process_ts_option(core, dp);
+ tx->cptse = 0;
+ }
+
+ addr = le64_to_cpu(dp->buffer_addr);
+
+ if (!tx->skip_cp) {
+ if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, addr, split_size)) {
+ tx->skip_cp = true;
+ }
+ }
+
+ if (eop) {
+ if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
+ if (vlan_enabled(core) && is_vlan_txd(txd_lower)) {
+ net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt,
+ le16_to_cpu(dp->upper.fields.special), core->vet);
+ }
+ if (_e1000e_tx_pkt_send(core, tx, queue_index)) {
+ _e1000e_on_tx_done_update_stats(core, tx->tx_pkt);
+ }
+ }
+
+ tx->skip_cp = false;
+ net_tx_pkt_reset(tx->tx_pkt);
+
+ tx->sum_needed = 0;
+ tx->cptse = 0;
+ }
+}
+
+static inline uint32_t
+_e1000e_tx_wb_interrupt_cause(E1000ECore *core, int queue_idx)
+{
+ if (!msix_enabled(core->owner)) {
+ return E1000_ICR_TXDW;
+ }
+
+ return (queue_idx == 0) ? E1000_ICR_TXQ0 : E1000_ICR_TXQ1;
+}
+
+static inline uint32_t
+_e1000e_rx_wb_interrupt_cause(E1000ECore *core,
+ int queue_idx,
+ bool min_threshold_hit)
+{
+ if (!msix_enabled(core->owner)) {
+ return E1000_ICS_RXT0 | (min_threshold_hit ? E1000_ICS_RXDMT0 : 0);
+ }
+
+ return (queue_idx == 0) ? E1000_ICR_RXQ0 : E1000_ICR_RXQ1;
+}
+
+static uint32_t
+txdesc_writeback(E1000ECore *core, dma_addr_t base,
+ struct e1000_tx_desc *dp, bool *ide, int queue_idx)
+{
+ uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
+
+ if (!(txd_lower & E1000_TXD_CMD_RS) &&
+ !(core->mac[IVAR] & E1000_IVAR_TX_INT_EVERY_WB)) {
+ return 0;
+ }
+
+ *ide = (txd_lower & E1000_TXD_CMD_IDE) ? true : false;
+
+ txd_upper = le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD;
+
+ dp->upper.data = cpu_to_le32(txd_upper);
+ pci_dma_write(core->owner, base + ((char *)&dp->upper - (char *)dp),
+ &dp->upper, sizeof(dp->upper));
+ return _e1000e_tx_wb_interrupt_cause(core, queue_idx);
+}
+
+typedef struct E1000E_RingInfo_st {
+ int dbah;
+ int dbal;
+ int dlen;
+ int dh;
+ int dt;
+ int idx;
+} E1000E_RingInfo;
+
+static inline bool
+_e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r)
+{
+ return core->mac[r->dh] == core->mac[r->dt];
+}
+
+static inline uint64_t
+_e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
+{
+ uint64_t bah = core->mac[r->dbah];
+ uint64_t bal = core->mac[r->dbal] & ~0xf;
+
+ return (bah << 32) + bal;
+}
+
+static inline uint64_t
+_e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r)
+{
+ return _e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
+}
+
+static inline void
+_e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
+{
+ core->mac[r->dh] += count;
+
+ if (core->mac[r->dh] * E1000_RING_DESC_LEN >= core->mac[r->dlen]) {
+ core->mac[r->dh] = 0;
+ }
+}
+
+static inline uint32_t
+_e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
+{
+ trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
+ core->mac[r->dh], core->mac[r->dt]);
+
+ if (core->mac[r->dh] <= core->mac[r->dt]) {
+ return core->mac[r->dt] - core->mac[r->dh];
+ }
+
+ if (core->mac[r->dh] > core->mac[r->dt]) {
+ return core->mac[r->dlen] / E1000_RING_DESC_LEN +
+ core->mac[r->dt] - core->mac[r->dh];
+ }
+
+ g_assert_not_reached();
+ return 0;
+}
+
+static inline bool
+_e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r)
+{
+ return core->mac[r->dlen] > 0;
+}
+
+static inline uint32_t
+_e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r)
+{
+ return core->mac[r->dlen];
+}
+
+typedef struct E1000E_TxRing_st {
+ const E1000E_RingInfo *i;
+ struct e1000_tx *tx;
+} E1000E_TxRing;
+
+static inline int
+_e1000e_mq_queue_idx(int base_reg_idx, int reg_idx)
+{
+ return (reg_idx - base_reg_idx) / (0x100 >> 2);
+}
+
+static inline void
+_e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
+{
+ static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
+ { TDBAH, TDBAL, TDLEN, TDH, TDT, 0 },
+ { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 }
+ };
+
+ assert(idx < ARRAY_SIZE(i));
+
+ txr->i = &i[idx];
+ txr->tx = &core->tx[idx];
+}
+
+typedef struct E1000E_RxRing_st {
+ const E1000E_RingInfo *i;
+} E1000E_RxRing;
+
+static inline void
+_e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx)
+{
+ static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
+ { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
+ { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 }
+ };
+
+ assert(idx < ARRAY_SIZE(i));
+
+ rxr->i = &i[idx];
+}
+
+static void
+start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
+{
+ dma_addr_t base;
+ struct e1000_tx_desc desc;
+ bool ide = false;
+ const E1000E_RingInfo *txi = txr->i;
+ uint32_t cause = E1000_ICS_TXQE;
+
+ if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
+ trace_e1000e_tx_disabled();
+ return;
+ }
+
+ while (!_e1000e_ring_empty(core, txi)) {
+ base = _e1000e_ring_head_descr(core, txi);
+
+ pci_dma_read(core->owner, base, &desc, sizeof(desc));
+
+ trace_e1000e_tx_descr((void *)(intptr_t)desc.buffer_addr,
+ desc.lower.data, desc.upper.data);
+
+ process_tx_desc(core, txr->tx, &desc, txi->idx);
+ cause |= txdesc_writeback(core, base, &desc, &ide, txi->idx);
+
+ _e1000e_ring_advance(core, txi, 1);
+ }
+
+ if (!ide || !_e1000e_intrmgr_delay_tx_causes(core, &cause)) {
+ set_interrupt_cause(core, cause);
+ }
+}
+
+static bool
+_e1000e_has_rxbufs(E1000ECore *core,
+ const E1000E_RingInfo *r,
+ size_t total_size)
+{
+ uint32_t bufs = _e1000e_ring_free_descr_num(core, r);
+
+ trace_e1000e_rx_has_buffers(r->idx, bufs, total_size,
+ core->rx_desc_buf_size);
+
+ return total_size <= bufs / (core->rx_desc_len / E1000_MIN_RX_DESC_LEN) *
+ core->rx_desc_buf_size;
+}
+
+static inline void
+start_recv(E1000ECore *core)
+{
+ int i;
+
+ trace_e1000e_rx_start_recv();
+
+ for (i = 0; i <= core->max_queue_num; i++) {
+ qemu_flush_queued_packets(qemu_get_subqueue(core->owner_nic, i));
+ }
+}
+
+int
+e1000e_can_receive(E1000ECore *core)
+{
+ int i;
+
+ bool link_up = core->mac[STATUS] & E1000_STATUS_LU;
+ bool rx_enabled = core->mac[RCTL] & E1000_RCTL_EN;
+ bool pci_master = core->owner->config[PCI_COMMAND] & PCI_COMMAND_MASTER;
+
+ if (!link_up || !rx_enabled || !pci_master) {
+ trace_e1000e_rx_can_recv_disabled(link_up, rx_enabled, pci_master);
+ return false;
+ }
+
+ for (i = 0; i < E1000E_NUM_QUEUES; i++) {
+ E1000E_RxRing rxr;
+
+ _e1000e_rx_ring_init(core, &rxr, i);
+ if (_e1000e_ring_enabled(core, rxr.i) &&
+ _e1000e_has_rxbufs(core, rxr.i, 1)) {
+ trace_e1000e_rx_can_recv();
+ return true;
+ }
+ }
+
+ trace_e1000e_rx_can_recv_rings_full();
+ return false;
+}
+
+ssize_t
+e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size)
+{
+ const struct iovec iov = {
+ .iov_base = (uint8_t *)buf,
+ .iov_len = size
+ };
+
+ return e1000e_receive_iov(core, &iov, 1);
+}
+
+static inline int
+vlan_rx_filter_enabled(E1000ECore *core)
+{
+ return ((core->mac[RCTL] & E1000_RCTL_VFE) != 0);
+}
+
+static inline bool
+_e1000e_rx_l3_cso_enabled(E1000ECore *core)
+{
+ return !!(core->mac[RXCSUM] & E1000_RXCSUM_IPOFLD);
+}
+
+static inline bool
+_e1000e_rx_l4_cso_enabled(E1000ECore *core)
+{
+ return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
+}
+
+static inline bool
+is_vlan_packet(E1000ECore *core, const uint8_t *buf)
+{
+ uint16_t eth_proto = be16_to_cpup((uint16_t *)(buf + 12));
+ bool res = (eth_proto == core->vet);
+
+ trace_e1000e_vlan_is_vlan_pkt(res, eth_proto, core->vet);
+
+ return res;
+}
+
+static bool
+receive_filter(E1000ECore *core, const uint8_t *buf, int size)
+{
+ static const int mta_shift[] = {4, 3, 2, 0};
+ uint32_t f, rctl = core->mac[RCTL], ra[2], *rp;
+
+ if (is_vlan_packet(core, buf) && vlan_rx_filter_enabled(core)) {
+ uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14));
+ uint32_t vfta = le32_to_cpup((uint32_t *)(core->mac + VFTA) +
+ ((vid >> 5) & 0x7f));
+ if ((vfta & (1 << (vid & 0x1f))) == 0) {
+ trace_e1000e_rx_flt_vlan_mismatch(vid);
+ return 0;
+ } else {
+ trace_e1000e_rx_flt_vlan_match(vid);
+ }
+ }
+
+ switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
+ case ETH_PKT_UCAST:
+ if (rctl & E1000_RCTL_UPE) {
+ return true; /* promiscuous ucast */
+ }
+ break;
+
+ case ETH_PKT_BCAST:
+ if (rctl & E1000_RCTL_BAM) {
+ return true; /* broadcast enabled */
+ }
+ break;
+
+ case ETH_PKT_MCAST:
+ if (rctl & E1000_RCTL_MPE) {
+ return true; /* promiscuous mcast */
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ for (rp = core->mac + RA; rp < core->mac + RA + 32; rp += 2) {
+ if (!(rp[1] & E1000_RAH_AV)) {
+ continue;
+ }
+ ra[0] = cpu_to_le32(rp[0]);
+ ra[1] = cpu_to_le32(rp[1]);
+ if (!memcmp(buf, (uint8_t *)ra, 6)) {
+ trace_e1000e_rx_flt_ucast_match((int)(rp - core->mac - RA) / 2,
+ MAC_ARG(buf));
+ return 1;
+ }
+ }
+ trace_e1000e_rx_flt_ucast_mismatch(MAC_ARG(buf));
+
+ f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
+ f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
+ if (core->mac[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
+ inc_reg_if_not_full(core, MPRC);
+ return 1;
+ }
+
+ trace_e1000e_rx_flt_inexact_mismatch(MAC_ARG(buf),
+ (rctl >> E1000_RCTL_MO_SHIFT) & 3,
+ f >> 5,
+ core->mac[MTA + (f >> 5)]);
+
+ return 0;
+}
+
+/* FCS aka Ethernet CRC-32. We don't get it from backends and can't
+ * fill it in, just pad descriptor length by 4 bytes unless guest
+ * told us to strip it off the packet. */
+static inline int
+fcs_len(E1000ECore *core)
+{
+ return (core->mac[RCTL] & E1000_RCTL_SECRC) ? 0 : 4;
+}
+
+static void
+read_legacy_rx_descriptor(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr)
+{
+ struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc;
+ *buff_addr = le64_to_cpu(d->buffer_addr);
+}
+
+static void
+read_extended_rx_descriptor(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr)
+{
+ union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc;
+ *buff_addr = le64_to_cpu(d->read.buffer_addr);
+}
+
+static void
+read_ps_rx_descriptor(E1000ECore *core, uint8_t *desc,
+ hwaddr (*buff_addr)[MAX_PS_BUFFERS])
+{
+ int i;
+ union e1000_rx_desc_packet_split *d =
+ (union e1000_rx_desc_packet_split *) desc;
+
+ for (i = 0; i < MAX_PS_BUFFERS; i++) {
+ (*buff_addr)[i] = le64_to_cpu(d->read.buffer_addr[i]);
+ }
+
+ trace_e1000e_rx_desc_ps_read((*buff_addr)[0], (*buff_addr)[1],
+ (*buff_addr)[2], (*buff_addr)[3]);
+}
+
+static void
+read_rx_descriptor(E1000ECore *core, uint8_t *desc,
+ hwaddr (*buff_addr)[MAX_PS_BUFFERS])
+{
+ if (_e1000e_rx_use_legacy_descriptor(core)) {
+ read_legacy_rx_descriptor(core, desc, &(*buff_addr)[0]);
+ (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0;
+ } else {
+ if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) {
+ read_ps_rx_descriptor(core, desc, buff_addr);
+ } else {
+ read_extended_rx_descriptor(core, desc, &(*buff_addr)[0]);
+ (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0;
+ }
+ }
+}
+
+static void
+_e1000e_verify_csum_in_sw(E1000ECore *core,
+ struct NetRxPkt *pkt,
+ uint32_t *status_flags,
+ bool istcp, bool isudp)
+{
+ bool csum_valid;
+ uint32_t csum_error;
+
+ if (_e1000e_rx_l3_cso_enabled(core)) {
+ if (!net_rx_pkt_validate_l3_csum(pkt, &csum_valid)) {
+ trace_e1000e_rx_metadata_l3_csum_validation_failed();
+ } else {
+ csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_IPE;
+ *status_flags |= E1000_RXD_STAT_IPCS | csum_error;
+ }
+ } else {
+ trace_e1000e_rx_metadata_l3_cso_disabled();
+ }
+
+ if (!_e1000e_rx_l4_cso_enabled(core)) {
+ trace_e1000e_rx_metadata_l4_cso_disabled();
+ return;
+ }
+
+ if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) {
+ trace_e1000e_rx_metadata_l4_csum_validation_failed();
+ return;
+ }
+
+ csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_TCPE;
+
+ if (istcp) {
+ *status_flags |= E1000_RXD_STAT_TCPCS |
+ csum_error;
+ } else if (isudp) {
+ *status_flags |= E1000_RXD_STAT_TCPCS |
+ E1000_RXD_STAT_UDPCS |
+ csum_error;
+ }
+}
+
+static inline bool
+_e1000e_is_tcp_ack(E1000ECore *core, struct NetRxPkt *rx_pkt)
+{
+ if (!net_rx_pkt_is_tcp_ack(rx_pkt)) {
+ return false;
+ }
+
+ if (core->mac[RFCTL] & E1000_RFCTL_ACK_DATA_DIS) {
+ return !net_rx_pkt_has_tcp_data(rx_pkt);
+ }
+
+ return true;
+}
+
+static void
+_e1000e_build_rx_metadata(E1000ECore *core,
+ struct NetRxPkt *pkt,
+ bool is_eop,
+ const E1000E_RSSInfo *rss_info,
+ uint32_t *rss, uint32_t *mrq,
+ uint32_t *status_flags,
+ uint16_t *ip_id,
+ uint16_t *vlan_tag)
+{
+ struct virtio_net_hdr *vhdr;
+ bool isip4, isip6, istcp, isudp;
+ uint32_t pkt_type;
+
+ *status_flags = E1000_RXD_STAT_DD;
+
+ /* No additional metadata needed for non-EOP descriptors */
+ if (!is_eop) {
+ goto func_exit;
+ }
+
+ *status_flags |= E1000_RXD_STAT_EOP;
+
+ net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
+ trace_e1000e_rx_metadata_protocols(isip4, isip6, isudp, istcp);
+
+ /* VLAN state */
+ if (net_rx_pkt_is_vlan_stripped(pkt)) {
+ *status_flags |= E1000_RXD_STAT_VP;
+ *vlan_tag = cpu_to_le16(net_rx_pkt_get_vlan_tag(pkt));
+ trace_e1000e_rx_metadata_vlan(*vlan_tag);
+ }
+
+ /* Packet parsing results */
+ if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
+ if (rss_info->enabled) {
+ *rss = cpu_to_le32(rss_info->hash);
+ *mrq = cpu_to_le32(rss_info->type | (rss_info->queue << 8));
+ trace_e1000e_rx_metadata_rss(*rss, *mrq);
+ }
+ } else if (isip4) {
+ *status_flags |= E1000_RXD_STAT_IPIDV;
+ *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
+ trace_e1000e_rx_metadata_ip_id(*ip_id);
+ }
+
+ if (istcp && _e1000e_is_tcp_ack(core, pkt)) {
+ *status_flags |= E1000_RXD_STAT_ACK;
+ trace_e1000e_rx_metadata_ack();
+ }
+
+ if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
+ trace_e1000e_rx_metadata_ipv6_filtering_disabled();
+ pkt_type = E1000_RXD_PKT_MAC;
+ } else if (istcp || isudp) {
+ pkt_type = isip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP;
+ } else if (isip4 || isip6) {
+ pkt_type = isip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6;
+ } else {
+ pkt_type = E1000_RXD_PKT_MAC;
+ }
+
+ *status_flags |= E1000_RXD_PKT_TYPE(pkt_type);
+ trace_e1000e_rx_metadata_pkt_type(pkt_type);
+
+ /* RX CSO information */
+ if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) {
+ trace_e1000e_rx_metadata_ipv6_sum_disabled();
+ goto func_exit;
+ }
+
+ if (!net_rx_pkt_has_virt_hdr(pkt)) {
+ trace_e1000e_rx_metadata_no_virthdr();
+ _e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp);
+ goto func_exit;
+ }
+
+ vhdr = net_rx_pkt_get_vhdr(pkt);
+
+ if (!(vhdr->flags & VIRTIO_NET_HDR_F_DATA_VALID) &&
+ !(vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) {
+ trace_e1000e_rx_metadata_virthdr_no_csum_info();
+ _e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp);
+ goto func_exit;
+ }
+
+ if (_e1000e_rx_l3_cso_enabled(core)) {
+ *status_flags |= isip4 ? E1000_RXD_STAT_IPCS : 0;
+ } else {
+ trace_e1000e_rx_metadata_l3_cso_disabled();
+ }
+
+ if (_e1000e_rx_l4_cso_enabled(core)) {
+ if (istcp) {
+ *status_flags |= E1000_RXD_STAT_TCPCS;
+ } else if (isudp) {
+ *status_flags |= E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS;
+ }
+ } else {
+ trace_e1000e_rx_metadata_l4_cso_disabled();
+ }
+
+ trace_e1000e_rx_metadata_status_flags(*status_flags);
+
+func_exit:
+ *status_flags = cpu_to_le32(*status_flags);
+}
+
+static void
+write_legacy_rx_descriptor(E1000ECore *core, uint8_t *desc,
+ struct NetRxPkt *pkt,
+ const E1000E_RSSInfo *rss_info,
+ uint16_t length)
+{
+ uint32_t status_flags, rss, mrq;
+ uint16_t ip_id;
+
+ struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc;
+
+ memset(d, 0, sizeof(*d));
+
+ assert(!rss_info->enabled);
+
+ d->length = cpu_to_le16(length);
+
+ _e1000e_build_rx_metadata(core, pkt, pkt != NULL,
+ rss_info,
+ &rss, &mrq,
+ &status_flags, &ip_id,
+ &d->special);
+ d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
+ d->status = (uint8_t) le32_to_cpu(status_flags);
+}
+
+static void
+write_extended_rx_descriptor(E1000ECore *core, uint8_t *desc,
+ struct NetRxPkt *pkt,
+ const E1000E_RSSInfo *rss_info,
+ uint16_t length)
+{
+ union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc;
+
+ memset(d, 0, sizeof(*d));
+
+ d->wb.upper.length = cpu_to_le16(length);
+
+ _e1000e_build_rx_metadata(core, pkt, pkt != NULL,
+ rss_info,
+ &d->wb.lower.hi_dword.rss,
+ &d->wb.lower.mrq,
+ &d->wb.upper.status_error,
+ &d->wb.lower.hi_dword.csum_ip.ip_id,
+ &d->wb.upper.vlan);
+}
+
+static void
+write_ps_rx_descriptor(E1000ECore *core, uint8_t *desc,
+ struct NetRxPkt *pkt,
+ const E1000E_RSSInfo *rss_info,
+ size_t ps_hdr_len,
+ uint16_t(*written)[MAX_PS_BUFFERS])
+{
+ int i;
+ union e1000_rx_desc_packet_split *d =
+ (union e1000_rx_desc_packet_split *) desc;
+
+ memset(d, 0, sizeof(*d));
+
+ d->wb.middle.length0 = cpu_to_le16((*written)[0]);
+
+ for (i = 0; i < PS_PAGE_BUFFERS; i++) {
+ d->wb.upper.length[i] = cpu_to_le16((*written)[i + 1]);
+ }
+
+ _e1000e_build_rx_metadata(core, pkt, pkt != NULL,
+ rss_info,
+ &d->wb.lower.hi_dword.rss,
+ &d->wb.lower.mrq,
+ &d->wb.middle.status_error,
+ &d->wb.lower.hi_dword.csum_ip.ip_id,
+ &d->wb.middle.vlan);
+
+ d->wb.upper.header_status =
+ cpu_to_le16(ps_hdr_len | (ps_hdr_len ? E1000_RXDPS_HDRSTAT_HDRSP : 0));
+
+ trace_e1000e_rx_desc_ps_write((*written)[0], (*written)[1],
+ (*written)[2], (*written)[3]);
+}
+
+static void
+write_rx_descriptor(E1000ECore *core, uint8_t *desc,
+ struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
+ size_t ps_hdr_len, uint16_t (*written)[MAX_PS_BUFFERS])
+{
+ if (_e1000e_rx_use_legacy_descriptor(core)) {
+ assert(ps_hdr_len == 0);
+ write_legacy_rx_descriptor(core, desc, pkt, rss_info, (*written)[0]);
+ } else {
+ if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) {
+ write_ps_rx_descriptor(core, desc, pkt, rss_info,
+ ps_hdr_len, written);
+ } else {
+ assert(ps_hdr_len == 0);
+ write_extended_rx_descriptor(core, desc, pkt, rss_info,
+ (*written)[0]);
+ }
+ }
+}
+
+typedef struct ba_state_st {
+ uint16_t written[MAX_PS_BUFFERS];
+ uint8_t cur_idx;
+} ba_state;
+
+static void
+write_hdr_to_rx_buffers(E1000ECore *core,
+ hwaddr (*ba)[MAX_PS_BUFFERS],
+ ba_state *bastate,
+ const char *data,
+ dma_addr_t data_len)
+{
+ assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]);
+
+ pci_dma_write(core->owner, (*ba)[0] + bastate->written[0], data, data_len);
+ bastate->written[0] += data_len;
+
+ bastate->cur_idx = 1;
+}
+
+static void
+write_to_rx_buffers(E1000ECore *core,
+ hwaddr (*ba)[MAX_PS_BUFFERS],
+ ba_state *bastate,
+ const char *data,
+ dma_addr_t data_len)
+{
+ while (data_len > 0) {
+ uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
+ uint32_t cur_buf_bytes_left = cur_buf_len -
+ bastate->written[bastate->cur_idx];
+ uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left);
+
+ trace_e1000e_rx_desc_buff_write(bastate->cur_idx,
+ (*ba)[bastate->cur_idx],
+ bastate->written[bastate->cur_idx],
+ data,
+ bytes_to_write);
+
+ pci_dma_write(core->owner,
+ (*ba)[bastate->cur_idx] + bastate->written[bastate->cur_idx],
+ data, bytes_to_write);
+
+ bastate->written[bastate->cur_idx] += bytes_to_write;
+ data += bytes_to_write;
+ data_len -= bytes_to_write;
+
+ if (bastate->written[bastate->cur_idx] == cur_buf_len) {
+ bastate->cur_idx++;
+ }
+
+ assert(bastate->cur_idx < MAX_PS_BUFFERS);
+ }
+}
+
+static void
+_e1000e_update_rx_stats(E1000ECore *core,
+ size_t data_size,
+ size_t data_fcs_size)
+{
+ static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
+ PRC1023, PRC1522 };
+
+ increase_size_stats(core, PRCregs, data_fcs_size);
+ inc_reg_if_not_full(core, TPR);
+ core->mac[GPRC] = core->mac[TPR];
+ /* TOR - Total Octets Received:
+ * This register includes bytes received in a packet from the <Destination
+ * Address> field through the <CRC> field, inclusively.
+ * Always include FCS length (4) in size.
+ */
+ grow_8reg_if_not_full(core, TORL, data_size + 4);
+ core->mac[GORCL] = core->mac[TORL];
+ core->mac[GORCH] = core->mac[TORH];
+
+ switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
+ case ETH_PKT_BCAST:
+ inc_reg_if_not_full(core, BPRC);
+ break;
+
+ case ETH_PKT_MCAST:
+ inc_reg_if_not_full(core, MPRC);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static inline bool
+_e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi)
+{
+ return _e1000e_ring_free_descr_num(core, rxi) ==
+ _e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift;
+}
+
+static bool
+_e1000e_do_ps(E1000ECore *core, struct NetRxPkt *pkt, size_t *hdr_len)
+{
+ bool isip4, isip6, isudp, istcp;
+ bool fragment;
+
+ if (!_e1000e_rx_use_ps_descriptor(core)) {
+ return false;
+ }
+
+ net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
+
+ if (isip4) {
+ fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
+ } else if (isip6) {
+ fragment = net_rx_pkt_get_ip6_info(pkt)->fragment;
+ } else {
+ return false;
+ }
+
+ if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) {
+ return false;
+ }
+
+ if (!fragment && (isudp || istcp)) {
+ *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt);
+ } else {
+ *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt);
+ }
+
+ if ((*hdr_len > core->rxbuf_sizes[0]) ||
+ (*hdr_len > net_rx_pkt_get_total_len(pkt))) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+_e1000e_write_paket_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
+ const E1000E_RxRing *rxr,
+ const E1000E_RSSInfo *rss_info)
+{
+ PCIDevice *d = core->owner;
+ dma_addr_t base;
+ uint8_t desc[E1000_MAX_RX_DESC_LEN];
+ size_t desc_size;
+ size_t desc_offset = 0;
+ size_t iov_ofs = 0;
+
+ struct iovec *iov = net_rx_pkt_get_iovec(pkt);
+ size_t size = net_rx_pkt_get_total_len(pkt);
+ size_t total_size = size + fcs_len(core);
+ const E1000E_RingInfo *rxi;
+ size_t ps_hdr_len = 0;
+ bool do_ps = _e1000e_do_ps(core, pkt, &ps_hdr_len);
+
+ rxi = rxr->i;
+
+ do {
+ hwaddr ba[MAX_PS_BUFFERS];
+ ba_state bastate = { { 0 } };
+ bool is_last = false;
+ bool is_first = true;
+
+ desc_size = total_size - desc_offset;
+
+ if (desc_size > core->rx_desc_buf_size) {
+ desc_size = core->rx_desc_buf_size;
+ }
+
+ base = _e1000e_ring_head_descr(core, rxi);
+
+ pci_dma_read(d, base, &desc, core->rx_desc_len);
+
+ trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
+
+ read_rx_descriptor(core, desc, &ba);
+
+ if (ba[0]) {
+ if (desc_offset < size) {
+ static const uint32_t fcs_pad;
+ size_t iov_copy;
+ size_t copy_size = size - desc_offset;
+ if (copy_size > core->rx_desc_buf_size) {
+ copy_size = core->rx_desc_buf_size;
+ }
+
+ /* For PS mode copy the packet header first */
+ if (do_ps) {
+ if (is_first) {
+ size_t ps_hdr_copied = 0;
+ do {
+ iov_copy = MIN(ps_hdr_len - ps_hdr_copied,
+ iov->iov_len - iov_ofs);
+
+ write_hdr_to_rx_buffers(core, &ba, &bastate,
+ iov->iov_base, iov_copy);
+
+ copy_size -= iov_copy;
+ ps_hdr_copied += iov_copy;
+
+ iov_ofs += iov_copy;
+ if (iov_ofs == iov->iov_len) {
+ iov++;
+ iov_ofs = 0;
+ }
+ } while (ps_hdr_copied < ps_hdr_len);
+
+ is_first = false;
+ } else {
+ /* Leave buffer 0 of each descriptor except first */
+ /* empty as per spec 7.1.5.1 */
+ write_hdr_to_rx_buffers(core, &ba, &bastate, NULL, 0);
+ }
+ }
+
+ /* Copy packet payload */
+ while (copy_size) {
+ iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
+
+ write_to_rx_buffers(core, &ba, &bastate,
+ iov->iov_base + iov_ofs, iov_copy);
+
+ copy_size -= iov_copy;
+ iov_ofs += iov_copy;
+ if (iov_ofs == iov->iov_len) {
+ iov++;
+ iov_ofs = 0;
+ }
+ }
+
+ if (desc_offset + desc_size >= total_size) {
+ /* Simulate FCS checksum presence in the last descriptor */
+ write_to_rx_buffers(core, &ba, &bastate,
+ (const char *) &fcs_pad, fcs_len(core));
+ }
+ }
+ desc_offset += desc_size;
+ if (desc_offset >= total_size) {
+ is_last = true;
+ }
+ } else { /* as per intel docs; skip descriptors with null buf addr */
+ trace_e1000e_rx_null_descriptor();
+ }
+
+ write_rx_descriptor(core, desc, is_last ? core->rx_pkt : NULL,
+ rss_info, do_ps ? ps_hdr_len : 0, &bastate.written);
+ pci_dma_write(d, base, &desc, core->rx_desc_len);
+
+ _e1000e_ring_advance(core, rxi,
+ core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
+
+ } while (desc_offset < total_size);
+
+ _e1000e_update_rx_stats(core, size, total_size);
+
+ return true;
+}
+
+ssize_t
+e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
+{
+ /* this is the size past which hardware will
+ drop packets when setting LPE=0 */
+ static const int MAXIMUM_ETHERNET_VLAN_SIZE = 1522;
+ /* this is the size past which hardware will
+ drop packets when setting LPE=1 */
+ static const int MAXIMUM_ETHERNET_LPE_SIZE = 16384;
+
+ static const int MAXIMUM_ETHERNET_HDR_LEN = (14 + 4);
+
+ /* Min. octets in an ethernet frame sans FCS */
+ static const int MIN_BUF_SIZE = 60;
+
+ uint32_t n = 0;
+ uint8_t min_buf[MIN_BUF_SIZE];
+ struct iovec min_iov;
+ uint8_t *filter_buf;
+ size_t size, orig_size;
+ size_t iov_ofs = 0;
+ E1000E_RxRing rxr;
+ E1000E_RSSInfo rss_info;
+ size_t total_size;
+ ssize_t retval;
+ bool rdmts_hit;
+
+ trace_e1000e_rx_receive_iov(iovcnt);
+
+ if (!(core->mac[STATUS] & E1000_STATUS_LU)) {
+ trace_e1000e_rx_link_down(core->mac[STATUS]);
+ return -1;
+ }
+
+ if (!(core->mac[RCTL] & E1000_RCTL_EN)) {
+ trace_e1000e_rx_disabled(core->mac[RCTL]);
+ return 0;
+ }
+
+ /* Pull virtio header in */
+ if (core->has_vnet) {
+ net_rx_pkt_set_vhdr_iovec(core->rx_pkt, iov, iovcnt);
+ iov_ofs = sizeof(struct virtio_net_hdr);
+ }
+
+ filter_buf = iov->iov_base + iov_ofs;
+ orig_size = iov_size(iov, iovcnt);
+ size = orig_size - iov_ofs;
+
+ /* Pad to minimum Ethernet frame length */
+ if (size < sizeof(min_buf)) {
+ iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size);
+ memset(&min_buf[size], 0, sizeof(min_buf) - size);
+ inc_reg_if_not_full(core, RUC);
+ min_iov.iov_base = filter_buf = min_buf;
+ min_iov.iov_len = size = sizeof(min_buf);
+ iovcnt = 1;
+ iov = &min_iov;
+ iov_ofs = 0;
+ } else if (iov->iov_len < MAXIMUM_ETHERNET_HDR_LEN) {
+ /* This is very unlikely, but may happen. */
+ iov_to_buf(iov, iovcnt, iov_ofs, min_buf, MAXIMUM_ETHERNET_HDR_LEN);
+ filter_buf = min_buf;
+ }
+
+ /* Discard oversized packets if !LPE and !SBP. */
+ if ((size > MAXIMUM_ETHERNET_LPE_SIZE ||
+ (size > MAXIMUM_ETHERNET_VLAN_SIZE
+ && !(core->mac[RCTL] & E1000_RCTL_LPE)))
+ && !(core->mac[RCTL] & E1000_RCTL_SBP)) {
+ inc_reg_if_not_full(core, ROC);
+ trace_e1000e_rx_oversized(size);
+ return orig_size;
+ }
+
+ net_rx_pkt_set_packet_type(core->rx_pkt,
+ get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf)));
+
+ if (!receive_filter(core, filter_buf, size)) {
+ trace_e1000e_rx_flt_dropped();
+ return orig_size;
+ }
+
+ net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
+ vlan_enabled(core), core->vet);
+
+ _e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info);
+ _e1000e_rx_ring_init(core, &rxr, rss_info.queue);
+
+ trace_e1000e_rx_rss_dispatched_to_queue(rxr.i->idx);
+
+ total_size = net_rx_pkt_get_total_len(core->rx_pkt) + fcs_len(core);
+
+ if (_e1000e_has_rxbufs(core, rxr.i, total_size) &&
+ _e1000e_write_paket_to_guest(core, core->rx_pkt, &rxr, &rss_info)) {
+ retval = orig_size;
+
+ /* Perform small receive detection (RSRPD) */
+ if (total_size < core->mac[RSRPD]) {
+ n |= E1000_ICS_SRPD;
+ }
+
+ /* Perform ACK receive detection */
+ if (_e1000e_is_tcp_ack(core, core->rx_pkt)) {
+ n |= E1000_ICS_ACK;
+ }
+
+ /* Check if receive descriptor minimum threshold hit */
+ rdmts_hit = _e1000e_rx_descr_threshold_hit(core, rxr.i);
+ n |= _e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit);
+
+ trace_e1000e_rx_written_to_guest(n);
+ } else {
+ n |= E1000_ICS_RXO;
+ retval = 0;
+
+ trace_e1000e_rx_not_written_to_guest(n);
+ }
+
+ if (!_e1000e_intrmgr_delay_rx_causes(core, &n)) {
+ trace_e1000e_rx_interrupt_set(n);
+ set_interrupt_cause(core, n);
+ } else {
+ trace_e1000e_rx_interrupt_delayed(n);
+ }
+
+ return retval;
+}
+
+static bool
+have_autoneg(E1000ECore *core)
+{
+ return core->phy[0][PHY_CTRL] & MII_CR_AUTO_NEG_EN;
+}
+
+static void _e1000e_update_flowctl_status(E1000ECore *core)
+{
+ if (have_autoneg(core) &&
+ core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE) {
+ trace_e1000e_link_autoneg_flowctl(true);
+ core->mac[CTRL] |= E1000_CTRL_TFCE | E1000_CTRL_RFCE;
+ } else {
+ trace_e1000e_link_autoneg_flowctl(false);
+ }
+}
+
+static void
+e1000_link_down(E1000ECore *core)
+{
+ core->mac[STATUS] &= ~E1000_STATUS_LU;
+ core->phy[0][PHY_STATUS] &= ~MII_SR_LINK_STATUS;
+ core->phy[0][PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE;
+ core->phy[0][PHY_LP_ABILITY] &= ~MII_LPAR_LPACK;
+
+ _e1000e_update_flowctl_status(core);
+}
+
+static void
+e1000_link_up(E1000ECore *core)
+{
+ core->mac[STATUS] |= E1000_STATUS_LU;
+ core->phy[0][PHY_STATUS] |= MII_SR_LINK_STATUS;
+}
+
+static inline void
+_e1000e_restart_autoneg(E1000ECore *core)
+{
+ if (have_autoneg(core)) {
+ e1000_link_down(core);
+ trace_e1000e_link_negotiation_start();
+ timer_mod(core->autoneg_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
+ }
+}
+
+static void
+set_phy_ctrl(E1000ECore *core, int index, uint16_t val)
+{
+ /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */
+ core->phy[0][PHY_CTRL] = val & ~(0x3f |
+ MII_CR_RESET |
+ MII_CR_RESTART_AUTO_NEG);
+
+ if (val & MII_CR_RESTART_AUTO_NEG) {
+ _e1000e_restart_autoneg(core);
+ }
+}
+
+static void
+set_phy_oem_bits(E1000ECore *core, int index, uint16_t val)
+{
+ core->phy[0][PHY_OEM_BITS] = val & ~BIT(10);
+
+ if (val & BIT(10)) {
+ _e1000e_restart_autoneg(core);
+ }
+}
+
+static void
+set_phy_page(E1000ECore *core, int index, uint16_t val)
+{
+ core->phy[0][PHY_PAGE] = val & PHY_PAGE_RW_MASK;
+}
+
+void
+e1000e_core_set_link_status(E1000ECore *core)
+{
+ NetClientState *nc = qemu_get_queue(core->owner_nic);
+ uint32_t old_status = core->mac[STATUS];
+
+ if (nc->link_down) {
+ e1000_link_down(core);
+ } else {
+ if (have_autoneg(core) &&
+ !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
+ /* emulate auto-negotiation if supported */
+ timer_mod(core->autoneg_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
+ } else {
+ e1000_link_up(core);
+ }
+ }
+
+ if (core->mac[STATUS] != old_status) {
+ set_interrupt_cause(core, E1000_ICR_LSC);
+ }
+}
+
+static void
+_e1000e_core_reset_mac(E1000ECore *core)
+{
+ int i;
+
+ core->mac[RA] = 0;
+ core->mac[RA + 1] = E1000_RAH_AV;
+ for (i = 0; i < 4; i++) {
+ core->mac[RA] |= core->permanent_mac[i] << (8 * i);
+ core->mac[RA + 1] |=
+ (i < 2) ? core->permanent_mac[i + 4] << (8 * i) : 0;
+ }
+
+ qemu_format_nic_info_str(qemu_get_queue(core->owner_nic),
+ core->permanent_mac);
+
+ trace_e1000e_mac_indicate(MAC_ARG(core->permanent_mac));
+}
+
+static void
+set_ctrl(E1000ECore *core, int index, uint32_t val)
+{
+ trace_e1000e_core_ctrl_write(index, val);
+
+ /* RST is self clearing */
+ core->mac[CTRL] = val & ~E1000_CTRL_RST;
+ core->mac[CTRL_DUP] = core->mac[CTRL];
+
+ trace_e1000e_link_set_params(
+ !!(val & E1000_CTRL_ASDE),
+ (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT,
+ !!(val & E1000_CTRL_FRCSPD),
+ !!(val & E1000_CTRL_FRCDPX),
+ !!(val & E1000_CTRL_RFCE),
+ !!(val & E1000_CTRL_TFCE));
+
+ if (val & E1000_CTRL_RST) {
+ trace_e1000e_core_ctrl_sw_reset();
+ _e1000e_core_reset_mac(core);
+ }
+
+ if (val & E1000_CTRL_PHY_RST) {
+ trace_e1000e_core_ctrl_phy_reset();
+ core->mac[STATUS] |= E1000_STATUS_PHYRA;
+ }
+}
+
+static void
+set_rfctl(E1000ECore *core, int index, uint32_t val)
+{
+ trace_e1000e_rx_set_rfctl(val);
+
+ if (!(val & E1000_RFCTL_ISCSI_DIS)) {
+ trace_e1000e_wrn_iscsi_filtering_not_supported();
+ }
+
+ if (!(val & E1000_RFCTL_NFSW_DIS)) {
+ trace_e1000e_wrn_nfsw_filtering_not_supported();
+ }
+
+ if (!(val & E1000_RFCTL_NFSR_DIS)) {
+ trace_e1000e_wrn_nfsr_filtering_not_supported();
+ }
+
+ core->mac[RFCTL] = val;
+}
+
+static uint32_t
+parse_rxbufsize_e1000(uint32_t rctl)
+{
+ rctl &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 |
+ E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 |
+ E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256;
+ switch (rctl) {
+ case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384:
+ return 16384;
+ case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192:
+ return 8192;
+ case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096:
+ return 4096;
+ case E1000_RCTL_SZ_1024:
+ return 1024;
+ case E1000_RCTL_SZ_512:
+ return 512;
+ case E1000_RCTL_SZ_256:
+ return 256;
+ }
+ return 2048;
+}
+
+static void
+calc_per_desc_buf_size(E1000ECore *core)
+{
+ int i;
+ core->rx_desc_buf_size = 0;
+
+ for (i = 0; i < ARRAY_SIZE(core->rxbuf_sizes); i++) {
+ core->rx_desc_buf_size += core->rxbuf_sizes[i];
+ }
+}
+
+static void
+parse_rxbufsize(E1000ECore *core)
+{
+ uint32_t rctl = core->mac[RCTL];
+
+ memset(core->rxbuf_sizes, 0, sizeof(core->rxbuf_sizes));
+
+ if (rctl & E1000_RCTL_DTYP_MASK) {
+ uint32_t bsize;
+
+ bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE0_MASK;
+ core->rxbuf_sizes[0] = (bsize >> E1000_PSRCTL_BSIZE0_SHIFT) * 128;
+
+ bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE1_MASK;
+ core->rxbuf_sizes[1] = (bsize >> E1000_PSRCTL_BSIZE1_SHIFT) * 1024;
+
+ bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE2_MASK;
+ core->rxbuf_sizes[2] = (bsize >> E1000_PSRCTL_BSIZE2_SHIFT) * 1024;
+
+ bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE3_MASK;
+ core->rxbuf_sizes[3] = (bsize >> E1000_PSRCTL_BSIZE3_SHIFT) * 1024;
+ } else if (rctl & E1000_RCTL_FLXBUF_MASK) {
+ int flxbuf = rctl & E1000_RCTL_FLXBUF_MASK;
+ core->rxbuf_sizes[0] = (flxbuf >> E1000_RCTL_FLXBUF_SHIFT) * 1024;
+ } else {
+ core->rxbuf_sizes[0] = parse_rxbufsize_e1000(rctl);
+ }
+
+ trace_e1000e_rx_desc_buff_sizes(core->rxbuf_sizes[0], core->rxbuf_sizes[1],
+ core->rxbuf_sizes[2], core->rxbuf_sizes[3]);
+
+ calc_per_desc_buf_size(core);
+}
+
+static void
+calc_rxdesclen(E1000ECore *core)
+{
+ if (_e1000e_rx_use_legacy_descriptor(core)) {
+ core->rx_desc_len = sizeof(struct e1000_rx_desc);
+ } else {
+ if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) {
+ core->rx_desc_len = sizeof(union e1000_rx_desc_packet_split);
+ } else {
+ core->rx_desc_len = sizeof(union e1000_rx_desc_extended);
+ }
+ }
+ trace_e1000e_rx_desc_len(core->rx_desc_len);
+}
+
+static void
+set_rx_control(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[RCTL] = val;
+ trace_e1000e_rx_set_rctl(core->mac[RCTL]);
+
+ if (val & E1000_RCTL_EN) {
+ parse_rxbufsize(core);
+ calc_rxdesclen(core);
+ core->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1 +
+ E1000_RING_DESC_LEN_SHIFT;
+
+ start_recv(core);
+ }
+}
+
+static
+void(*phyreg_writeops[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE])
+(E1000ECore *, int, uint16_t) = {
+ [0] = {
+ [PHY_CTRL] = set_phy_ctrl,
+ [PHY_PAGE] = set_phy_page,
+ [PHY_OEM_BITS] = set_phy_oem_bits
+ }
+};
+
+static void
+clear_ims_bits(E1000ECore *core, uint32_t bits)
+{
+ trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits);
+ core->mac[IMS] &= ~bits;
+}
+
+static bool
+_e1000e_postpone_interrupt(bool *interrupt_pending,
+ E1000IntrDelayTimer *timer)
+{
+ if (timer->running) {
+ trace_e1000e_irq_postponed_by_xitr(timer->delay_reg << 2);
+
+ *interrupt_pending = true;
+ return true;
+ }
+
+ if (timer->core->mac[timer->delay_reg] != 0) {
+ _e1000e_intrmgr_rearm_timer(timer);
+ }
+
+ return false;
+}
+
+static inline bool
+_e1000e_itr_should_postpone(E1000ECore *core)
+{
+ return _e1000e_postpone_interrupt(&core->itr_intr_pending, &core->itr);
+}
+
+static inline bool
+_e1000e_eitr_should_postpone(E1000ECore *core, int idx)
+{
+ return _e1000e_postpone_interrupt(&core->eitr_intr_pending[idx],
+ &core->eitr[idx]);
+}
+
+static void
+_e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
+{
+ if (E1000_IVAR_ENTRY_VALID(int_cfg)) {
+ uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg);
+ if (vec < E1000E_MSIX_VEC_NUM) {
+ if (!_e1000e_eitr_should_postpone(core, vec)) {
+ trace_e1000e_irq_msix_notify_vec(vec);
+ msix_notify(core->owner, vec);
+ }
+ } else {
+ trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg);
+ }
+ } else {
+ trace_e1000e_wrn_msix_invalid(cause, int_cfg);
+ }
+
+ if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) {
+ trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause);
+ clear_ims_bits(core, core->mac[IAM] & cause);
+ }
+
+ core->mac[ICR] &= ~(core->mac[EIAC] & E1000_EIAC_MASK);
+}
+
+static void
+_e1000e_msix_notify(E1000ECore *core, uint32_t causes)
+{
+ if (causes & E1000_ICR_RXQ0) {
+ _e1000e_msix_notify_one(core, E1000_ICR_RXQ0,
+ E1000_IVAR_RXQ0(core->mac[IVAR]));
+ }
+
+ if (causes & E1000_ICR_RXQ1) {
+ _e1000e_msix_notify_one(core, E1000_ICR_RXQ1,
+ E1000_IVAR_RXQ1(core->mac[IVAR]));
+ }
+
+ if (causes & E1000_ICR_TXQ0) {
+ _e1000e_msix_notify_one(core, E1000_ICR_TXQ0,
+ E1000_IVAR_TXQ0(core->mac[IVAR]));
+ }
+
+ if (causes & E1000_ICR_TXQ1) {
+ _e1000e_msix_notify_one(core, E1000_ICR_TXQ1,
+ E1000_IVAR_TXQ1(core->mac[IVAR]));
+ }
+
+ if (causes & E1000_ICR_OTHER) {
+ _e1000e_msix_notify_one(core, E1000_ICR_OTHER,
+ E1000_IVAR_OTHER(core->mac[IVAR]));
+ }
+}
+
+static void
+_e1000e_msix_clear_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
+{
+ if (E1000_IVAR_ENTRY_VALID(int_cfg)) {
+ uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg);
+ if (vec < E1000E_MSIX_VEC_NUM) {
+ trace_e1000e_irq_msix_pending_clearing(cause, int_cfg, vec);
+ msix_clr_pending(core->owner, vec);
+ } else {
+ trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg);
+ }
+ } else {
+ trace_e1000e_wrn_msix_invalid(cause, int_cfg);
+ }
+}
+
+static void
+_e1000e_msix_clear(E1000ECore *core, uint32_t causes)
+{
+ if (causes & E1000_ICR_RXQ0) {
+ _e1000e_msix_clear_one(core, E1000_ICR_RXQ0,
+ E1000_IVAR_RXQ0(core->mac[IVAR]));
+ }
+
+ if (causes & E1000_ICR_RXQ1) {
+ _e1000e_msix_clear_one(core, E1000_ICR_RXQ1,
+ E1000_IVAR_RXQ1(core->mac[IVAR]));
+ }
+
+ if (causes & E1000_ICR_TXQ0) {
+ _e1000e_msix_clear_one(core, E1000_ICR_TXQ0,
+ E1000_IVAR_TXQ0(core->mac[IVAR]));
+ }
+
+ if (causes & E1000_ICR_TXQ1) {
+ _e1000e_msix_clear_one(core, E1000_ICR_TXQ1,
+ E1000_IVAR_TXQ1(core->mac[IVAR]));
+ }
+
+ if (causes & E1000_ICR_OTHER) {
+ _e1000e_msix_clear_one(core, E1000_ICR_OTHER,
+ E1000_IVAR_OTHER(core->mac[IVAR]));
+ }
+}
+
+static inline void
+_e1000e_fix_icr_asserted(E1000ECore *core)
+{
+ core->mac[ICR] &= ~E1000_ICR_ASSERTED;
+ if (core->mac[ICR]) {
+ core->mac[ICR] |= E1000_ICR_ASSERTED;
+ }
+
+ trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]);
+}
+
+static void
+_e1000e_send_msi(E1000ECore *core, bool msix)
+{
+ uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED;
+
+ if (msix) {
+ _e1000e_msix_notify(core, causes);
+ } else {
+ if (!_e1000e_itr_should_postpone(core)) {
+ trace_e1000e_irq_msi_notify(causes);
+ msi_notify(core->owner, 0);
+ }
+ }
+}
+
+static void
+_e1000e_update_interrupt_state(E1000ECore *core)
+{
+ bool interrupts_pending;
+ bool is_msix = msix_enabled(core->owner);
+
+ /* Set ICR[OTHER] for MSI-X */
+ if (is_msix) {
+ if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) {
+ core->mac[ICR] |= E1000_ICR_OTHER;
+ trace_e1000e_irq_add_msi_other(core->mac[ICR]);
+ }
+ }
+
+ _e1000e_fix_icr_asserted(core);
+
+ /*
+ * Make sure ICR and ICS registers have the same value.
+ * The spec says that the ICS register is write-only. However in practice,
+ * on real hardware ICS is readable, and for reads it has the same value as
+ * ICR (except that ICS does not have the clear on read behaviour of ICR).
+ *
+ * The VxWorks PRO/1000 driver uses this behaviour.
+ */
+ core->mac[ICS] = core->mac[ICR];
+
+ interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false;
+
+ trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
+ core->mac[ICR], core->mac[IMS]);
+
+ if (is_msix || msi_enabled(core->owner)) {
+ if (interrupts_pending) {
+ _e1000e_send_msi(core, is_msix);
+ }
+ } else {
+ if (interrupts_pending) {
+ if (!_e1000e_itr_should_postpone(core)) {
+ _e1000e_raise_legacy_irq(core);
+ }
+ } else {
+ _e1000e_lower_legacy_irq(core);
+ }
+ }
+}
+
+static void
+set_interrupt_cause(E1000ECore *core, uint32_t val)
+{
+ trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]);
+
+ val |= _e1000e_intmgr_collect_delayed_causes(core);
+ core->mac[ICR] |= val;
+
+ trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]);
+
+ _e1000e_update_interrupt_state(core);
+}
+
+static void
+_e1000e_autoneg_timer(void *opaque)
+{
+ E1000ECore *core = opaque;
+ if (!qemu_get_queue(core->owner_nic)->link_down) {
+ e1000_link_up(core);
+ core->phy[0][PHY_LP_ABILITY] |= MII_LPAR_LPACK;
+ core->phy[0][PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
+ _e1000e_update_flowctl_status(core);
+ trace_e1000e_link_negotiation_done();
+ set_interrupt_cause(core, E1000_ICR_LSC); /* signal link status change
+ * to guest */
+ }
+}
+
+static inline uint16_t
+_e1000e_get_reg_index_with_offset(const uint16_t *mac_reg_access, hwaddr addr)
+{
+ uint16_t index = (addr & 0x1ffff) >> 2;
+ return index + (mac_reg_access[index] & 0xfffe);
+}
+
+static const char phy_regcap[E1000E_PHY_PAGES][0x20] = {
+ [0] = {
+ [PHY_CTRL] = PHY_ANYPAGE | PHY_RW,
+ [PHY_STATUS] = PHY_ANYPAGE | PHY_R,
+ [PHY_ID1] = PHY_ANYPAGE | PHY_R,
+ [PHY_ID2] = PHY_ANYPAGE | PHY_R,
+ [PHY_AUTONEG_ADV] = PHY_ANYPAGE | PHY_RW,
+ [PHY_LP_ABILITY] = PHY_ANYPAGE | PHY_R,
+ [PHY_AUTONEG_EXP] = PHY_ANYPAGE | PHY_R,
+ [PHY_NEXT_PAGE_TX] = PHY_ANYPAGE | PHY_RW,
+ [PHY_LP_NEXT_PAGE] = PHY_ANYPAGE | PHY_R,
+ [PHY_1000T_CTRL] = PHY_ANYPAGE | PHY_RW,
+ [PHY_1000T_STATUS] = PHY_ANYPAGE | PHY_R,
+ [PHY_EXT_STATUS] = PHY_ANYPAGE | PHY_R,
+ [PHY_PAGE] = PHY_ANYPAGE | PHY_RW,
+
+ [PHY_COPPER_CTRL1] = PHY_RW,
+ [PHY_COPPER_STAT1] = PHY_R,
+ [PHY_COPPER_CTRL3] = PHY_RW,
+ [PHY_RX_ERR_CNTR] = PHY_R,
+ [PHY_OEM_BITS] = PHY_RW,
+ [PHY_BIAS_1] = PHY_RW,
+ [PHY_BIAS_2] = PHY_RW,
+ [PHY_COPPER_INT_ENABLE] = PHY_RW,
+ [PHY_COPPER_STAT2] = PHY_R,
+ [PHY_COPPER_CTRL2] = PHY_RW
+ },
+ [2] = {
+ [PHY_MAC_CTRL1] = PHY_RW,
+ [PHY_MAC_INT_ENABLE] = PHY_RW,
+ [PHY_MAC_STAT] = PHY_R,
+ [PHY_MAC_CTRL2] = PHY_RW
+ },
+ [3] = {
+ [PHY_LED_03_FUNC_CTRL1] = PHY_RW,
+ [PHY_LED_03_POL_CTRL] = PHY_RW,
+ [PHY_LED_TIMER_CTRL] = PHY_RW,
+ [PHY_LED_45_CTRL] = PHY_RW
+ },
+ [5] = {
+ [PHY_1000T_SKEW] = PHY_R,
+ [PHY_1000T_SWAP] = PHY_R
+ },
+ [6] = {
+ [PHY_CRC_COUNTERS] = PHY_R
+ }
+};
+
+static bool
+phy_reg_check_cap(E1000ECore *core, uint32_t addr, char cap, uint8_t *page)
+{
+ *page = (phy_regcap[0][addr] & PHY_ANYPAGE) ? 0 : core->phy[0][PHY_PAGE];
+
+ if (*page >= E1000E_PHY_PAGES) {
+ return false;
+ }
+
+ return phy_regcap[*page][addr] & cap;
+}
+
+static void
+phy_reg_write(E1000ECore *core, uint8_t page, uint32_t addr, uint16_t data)
+{
+ assert(page < E1000E_PHY_PAGES);
+ assert(addr < E1000E_PHY_PAGE_SIZE);
+
+ if (phyreg_writeops[page][addr]) {
+ phyreg_writeops[page][addr](core, addr, data);
+ } else {
+ core->phy[page][addr] = data;
+ }
+}
+
+static void
+set_mdic(E1000ECore *core, int index, uint32_t val)
+{
+ uint32_t data = val & E1000_MDIC_DATA_MASK;
+ uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+ uint8_t page;
+
+ if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) { /* phy # */
+ val = core->mac[MDIC] | E1000_MDIC_ERROR;
+ } else if (val & E1000_MDIC_OP_READ) {
+ if (!phy_reg_check_cap(core, addr, PHY_R, &page)) {
+ trace_e1000e_core_mdic_read_unhandled(page, addr);
+ val |= E1000_MDIC_ERROR;
+ } else {
+ val = (val ^ data) | core->phy[page][addr];
+ trace_e1000e_core_mdic_read(page, addr, val);
+ }
+ } else if (val & E1000_MDIC_OP_WRITE) {
+ if (!phy_reg_check_cap(core, addr, PHY_W, &page)) {
+ trace_e1000e_core_mdic_write_unhandled(page, addr);
+ val |= E1000_MDIC_ERROR;
+ } else {
+ trace_e1000e_core_mdic_write(page, addr, data);
+ phy_reg_write(core, page, addr, data);
+ }
+ }
+ core->mac[MDIC] = val | E1000_MDIC_READY;
+
+ if (val & E1000_MDIC_INT_EN) {
+ set_interrupt_cause(core, E1000_ICR_MDAC);
+ }
+}
+
+static void
+set_rdt(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[index] = val & 0xffff;
+ trace_e1000e_rx_set_rdt(_e1000e_mq_queue_idx(RDT0, index), val);
+ start_recv(core);
+}
+
+static void
+set_status(E1000ECore *core, int index, uint32_t val)
+{
+ if ((val & E1000_STATUS_PHYRA) == 0) {
+ core->mac[index] &= ~E1000_STATUS_PHYRA;
+ }
+}
+
+static void
+set_ctrlext(E1000ECore *core, int index, uint32_t val)
+{
+ trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
+ !!(val & E1000_CTRL_EXT_SPD_BYPS));
+
+ /* Zero self-clearing bits */
+ val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
+ core->mac[CTRL_EXT] = val;
+}
+
+static void
+set_pbaclr(E1000ECore *core, int index, uint32_t val)
+{
+ int i;
+
+ core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK;
+
+ if (msix_enabled(core->owner)) {
+ return;
+ }
+
+ for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
+ if (core->mac[PBACLR] & BIT(i)) {
+ msix_clr_pending(core->owner, i);
+ }
+ }
+}
+
+static void
+set_fcrth(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[FCRTH] = val & 0xFFF8;
+}
+
+static void
+set_fcrtl(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[FCRTL] = val & 0x8000FFF8;
+}
+
+static void
+set_16bit(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[index] = val & 0xffff;
+}
+
+static void
+set_12bit(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[index] = val & 0xfff;
+}
+
+static void
+set_vet(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[VET] = val & 0xffff;
+ core->vet = le16_to_cpu(core->mac[VET]);
+ trace_e1000e_vlan_vet(core->vet);
+}
+
+static void
+set_dlen(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[index] = val & 0xfff80;
+}
+
+static void
+set_tctl(E1000ECore *core, int index, uint32_t val)
+{
+ E1000E_TxRing txr;
+ core->mac[index] = val;
+
+ _e1000e_tx_ring_init(core, &txr, 0);
+ start_xmit(core, &txr);
+
+ if (core->mac[TARC1] & E1000_TARC_ENABLE) {
+ _e1000e_tx_ring_init(core, &txr, 1);
+ start_xmit(core, &txr);
+ }
+}
+
+static void
+set_tdt(E1000ECore *core, int index, uint32_t val)
+{
+ E1000E_TxRing txr;
+
+ core->mac[index] = val & 0xffff;
+
+ _e1000e_tx_ring_init(core, &txr, _e1000e_mq_queue_idx(TDT, index));
+ start_xmit(core, &txr);
+}
+
+static void
+set_ics(E1000ECore *core, int index, uint32_t val)
+{
+ trace_e1000e_irq_write_ics(val);
+ set_interrupt_cause(core, val);
+}
+
+static void
+set_icr(E1000ECore *core, int index, uint32_t val)
+{
+ if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
+ (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
+ trace_e1000e_irq_icr_process_iame();
+ clear_ims_bits(core, core->mac[IAM]);
+ }
+
+ trace_e1000e_irq_icr_write(val, core->mac[ICR], core->mac[ICR] & ~val);
+ core->mac[ICR] &= ~val;
+ _e1000e_update_interrupt_state(core);
+}
+
+static void
+set_imc(E1000ECore *core, int index, uint32_t val)
+{
+ trace_e1000e_irq_ims_clear_set_imc(val);
+ clear_ims_bits(core, val);
+ _e1000e_update_interrupt_state(core);
+}
+
+static void
+set_ims(E1000ECore *core, int index, uint32_t val)
+{
+ static const uint32_t IMS_EXT_MASK =
+ E1000_IMS_RXQ0 | E1000_IMS_RXQ1 |
+ E1000_IMS_TXQ0 | E1000_IMS_TXQ1 |
+ E1000_IMS_OTHER;
+
+ static const uint32_t IMS_VALID_MASK =
+ E1000_IMS_TXDW | E1000_IMS_TXQE | E1000_IMS_LSC |
+ E1000_IMS_RXDMT0 | E1000_IMS_RXO | E1000_IMS_RXT0 |
+ E1000_IMS_MDAC | E1000_IMS_TXD_LOW | E1000_IMS_SRPD |
+ E1000_IMS_ACK | E1000_IMS_MNG | E1000_IMS_RXQ0 |
+ E1000_IMS_RXQ1 | E1000_IMS_TXQ0 | E1000_IMS_TXQ1 |
+ E1000_IMS_OTHER;
+
+ uint32_t valid_val = val & IMS_VALID_MASK;
+
+ trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val);
+ core->mac[IMS] |= valid_val;
+
+ if ((valid_val & IMS_EXT_MASK) &&
+ (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PBA_CLR) &&
+ msix_enabled(core->owner)) {
+ _e1000e_msix_clear(core, valid_val);
+ }
+
+ if ((valid_val == IMS_VALID_MASK) &&
+ (core->mac[CTRL_EXT] & E1000_CTRL_EXT_INT_TIMERS_CLEAR_ENA)) {
+ trace_e1000e_irq_fire_all_timers(val);
+ _e1000e_intrmgr_fire_all_timers(core);
+ }
+
+ _e1000e_update_interrupt_state(core);
+}
+
+static void
+set_rdtr(E1000ECore *core, int index, uint32_t val)
+{
+ set_16bit(core, index, val);
+
+ if ((val & E1000_RDTR_FPD) && (core->rdtr.running)) {
+ trace_e1000e_irq_rdtr_fpd_running();
+ _e1000e_intrmgr_fire_delayed_interrupts(core);
+ } else {
+ trace_e1000e_irq_rdtr_fpd_not_running();
+ }
+}
+
+static void
+set_tidv(E1000ECore *core, int index, uint32_t val)
+{
+ set_16bit(core, index, val);
+
+ if ((val & E1000_TIDV_FPD) && (core->tidv.running)) {
+ trace_e1000e_irq_tidv_fpd_running();
+ _e1000e_intrmgr_fire_delayed_interrupts(core);
+ } else {
+ trace_e1000e_irq_tidv_fpd_not_running();
+ }
+}
+
+static uint32_t
+mac_readreg(E1000ECore *core, int index)
+{
+ return core->mac[index];
+}
+
+static uint32_t
+mac_ics_read(E1000ECore *core, int index)
+{
+ trace_e1000e_irq_read_ics(core->mac[ICS]);
+ return core->mac[ICS];
+}
+
+static uint32_t
+mac_ims_read(E1000ECore *core, int index)
+{
+ trace_e1000e_irq_read_ims(core->mac[IMS]);
+ return core->mac[IMS];
+}
+
+static uint32_t
+mac_low4_read(E1000ECore *core, int index)
+{
+ return core->mac[index] & 0xf;
+}
+
+static uint32_t
+mac_low6_read(E1000ECore *core, int index)
+{
+ return core->mac[index] & 0x3f;
+}
+
+static uint32_t
+mac_low11_read(E1000ECore *core, int index)
+{
+ return core->mac[index] & 0x7ff;
+}
+
+static uint32_t
+mac_low13_read(E1000ECore *core, int index)
+{
+ return core->mac[index] & 0x1fff;
+}
+
+static uint32_t
+mac_low16_read(E1000ECore *core, int index)
+{
+ return core->mac[index] & 0xffff;
+}
+
+static uint32_t
+mac_swsm_read(E1000ECore *core, int index)
+{
+ uint32_t val = core->mac[SWSM];
+ core->mac[SWSM] = val | 1;
+ return val;
+}
+
+static uint32_t
+mac_itr_read(E1000ECore *core, int index)
+{
+ return core->itr_guest_value;
+}
+
+static uint32_t
+mac_eitr_read(E1000ECore *core, int index)
+{
+ return core->eitr_guest_value[index - EITR];
+}
+
+static uint32_t
+mac_icr_read(E1000ECore *core, int index)
+{
+ uint32_t ret = core->mac[ICR];
+ trace_e1000e_irq_icr_read_entry(ret);
+
+ if (core->mac[IMS] == 0) {
+ trace_e1000e_irq_icr_clear_zero_ims();
+ core->mac[ICR] = 0;
+ }
+
+ if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
+ (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
+ trace_e1000e_irq_icr_clear_iame();
+ core->mac[ICR] = 0;
+ trace_e1000e_irq_icr_process_iame();
+ clear_ims_bits(core, core->mac[IAM]);
+ }
+
+ trace_e1000e_irq_icr_read_exit(core->mac[ICR]);
+ _e1000e_update_interrupt_state(core);
+ return ret;
+}
+
+static uint32_t
+mac_read_clr4(E1000ECore *core, int index)
+{
+ uint32_t ret = core->mac[index];
+
+ core->mac[index] = 0;
+ return ret;
+}
+
+static uint32_t
+mac_read_clr8(E1000ECore *core, int index)
+{
+ uint32_t ret = core->mac[index];
+
+ core->mac[index] = 0;
+ core->mac[index-1] = 0;
+ return ret;
+}
+
+static uint32_t
+get_ctrl(E1000ECore *core, int index)
+{
+ uint32_t val = core->mac[CTRL];
+
+ trace_e1000e_link_read_params(
+ !!(val & E1000_CTRL_ASDE),
+ (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT,
+ !!(val & E1000_CTRL_FRCSPD),
+ !!(val & E1000_CTRL_FRCDPX),
+ !!(val & E1000_CTRL_RFCE),
+ !!(val & E1000_CTRL_TFCE));
+
+ return val;
+}
+
+static uint32_t
+get_status(E1000ECore *core, int index)
+{
+ uint32_t res = core->mac[STATUS];
+
+ if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE)) {
+ res |= E1000_STATUS_GIO_MASTER_ENABLE;
+ }
+
+ if (core->mac[CTRL] & E1000_CTRL_FRCDPX) {
+ res |= (core->mac[CTRL] & E1000_CTRL_FD) ? E1000_STATUS_FD : 0;
+ } else {
+ res |= E1000_STATUS_FD;
+ }
+
+ if ((core->mac[CTRL] & E1000_CTRL_FRCSPD) ||
+ (core->mac[CTRL_EXT] & E1000_CTRL_EXT_SPD_BYPS)) {
+ switch (core->mac[CTRL] & E1000_CTRL_SPD_SEL) {
+ case E1000_CTRL_SPD_10:
+ res |= E1000_STATUS_SPEED_10;
+ break;
+ case E1000_CTRL_SPD_100:
+ res |= E1000_STATUS_SPEED_100;
+ break;
+ case E1000_CTRL_SPD_1000:
+ default:
+ res |= E1000_STATUS_SPEED_1000;
+ break;
+ }
+ } else {
+ res |= E1000_STATUS_SPEED_1000;
+ }
+
+ trace_e1000e_link_status(
+ !!(res & E1000_STATUS_LU),
+ !!(res & E1000_STATUS_FD),
+ (res & E1000_STATUS_SPEED_MASK) >> E1000_STATUS_SPEED_SHIFT,
+ (res & E1000_STATUS_ASDV) >> E1000_STATUS_ASDV_SHIFT);
+
+ return res;
+}
+
+static uint32_t
+get_tarc(E1000ECore *core, int index)
+{
+ return core->mac[index] & ((BIT(11) - 1) |
+ BIT(27) |
+ BIT(28) |
+ BIT(29) |
+ BIT(30));
+}
+
+static void
+mac_writereg(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[index] = val;
+}
+
+static void
+mac_setmacaddr(E1000ECore *core, int index, uint32_t val)
+{
+ uint32_t macaddr[2];
+
+ core->mac[index] = val;
+
+ macaddr[0] = cpu_to_le32(core->mac[RA]);
+ macaddr[1] = cpu_to_le32(core->mac[RA + 1]);
+ qemu_format_nic_info_str(qemu_get_queue(core->owner_nic),
+ (uint8_t *) macaddr);
+
+ trace_e1000e_mac_set_sw(MAC_ARG(macaddr));
+}
+
+static void
+set_eecd(E1000ECore *core, int index, uint32_t val)
+{
+ static const uint32_t ro_bits = E1000_EECD_PRES |
+ E1000_EECD_AUTO_RD |
+ E1000_EECD_SIZE_EX_MASK;
+
+ core->mac[EECD] = (core->mac[EECD] & ro_bits) | (val & ~ro_bits);
+}
+
+static void
+set_eerd(E1000ECore *core, int index, uint32_t val)
+{
+ uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK;
+ uint32_t flags = 0;
+ uint32_t data = 0;
+
+ if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) {
+ data = core->eeprom[addr];
+ flags = E1000_EERW_DONE;
+ }
+
+ core->mac[EERD] = flags |
+ (addr << E1000_EERW_ADDR_SHIFT) |
+ (data << E1000_EERW_DATA_SHIFT);
+}
+
+static void
+set_eewr(E1000ECore *core, int index, uint32_t val)
+{
+ uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK;
+ uint32_t data = (val >> E1000_EERW_DATA_SHIFT) & E1000_EERW_DATA_MASK;
+ uint32_t flags = 0;
+
+ if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) {
+ core->eeprom[addr] = data;
+ flags = E1000_EERW_DONE;
+ }
+
+ core->mac[EERD] = flags |
+ (addr << E1000_EERW_ADDR_SHIFT) |
+ (data << E1000_EERW_DATA_SHIFT);
+}
+
+static void
+set_rxdctl(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[RXDCTL] = core->mac[RXDCTL1] = val;
+}
+
+static void
+set_itr(E1000ECore *core, int index, uint32_t val)
+{
+ uint32_t interval = val & 0xffff;
+
+ trace_e1000e_irq_itr_set(val);
+
+ core->itr_guest_value = interval;
+ core->mac[index] = MAX(interval, _E1000E_MIN_XITR);
+}
+
+static void
+set_eitr(E1000ECore *core, int index, uint32_t val)
+{
+ uint32_t interval = val & 0xffff;
+ uint32_t eitr_num = index - EITR;
+
+ trace_e1000e_irq_eitr_set(eitr_num, val);
+
+ core->eitr_guest_value[eitr_num] = interval;
+ core->mac[index] = MAX(interval, _E1000E_MIN_XITR);
+}
+
+static void
+set_psrctl(E1000ECore *core, int index, uint32_t val)
+{
+ if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) {
+ hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero");
+ }
+
+ if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) {
+ hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero");
+ }
+
+ core->mac[PSRCTL] = val;
+}
+
+static void
+_e1000e_update_rx_offloads(E1000ECore *core)
+{
+ int cso_state = _e1000e_rx_l4_cso_enabled(core);
+
+ trace_e1000e_rx_set_cso(cso_state);
+
+ if (core->has_vnet) {
+ qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
+ cso_state, 0, 0, 0, 0);
+ }
+}
+
+static void
+set_rxcsum(E1000ECore *core, int index, uint32_t val)
+{
+ core->mac[RXCSUM] = val;
+ _e1000e_update_rx_offloads(core);
+}
+
+static void
+set_gcr(E1000ECore *core, int index, uint32_t val)
+{
+ uint32_t ro_bits = core->mac[GCR] & E1000_GCR_RO_BITS;
+ core->mac[GCR] = (val & ~E1000_GCR_RO_BITS) | ro_bits;
+}
+
+
+#define getreg(x) [x] = mac_readreg
+static uint32_t (*macreg_readops[])(E1000ECore *, int) = {
+ getreg(PBA), getreg(RCTL), getreg(TDH), getreg(TXDCTL),
+ getreg(WUFC), getreg(TDT), getreg(LEDCTL), getreg(FCRTL),
+ getreg(MANC), getreg(MDIC), getreg(STATUS), getreg(TORL),
+ getreg(TOTL), getreg(FCRUC), getreg(TCTL), getreg(RDH0),
+ getreg(RDT0), getreg(VET), getreg(TDBAL), getreg(TDBAH),
+ getreg(RDBAH0), getreg(RDBAL0), getreg(TDLEN), getreg(TDLEN1),
+ getreg(TDBAL1), getreg(TDBAH1), getreg(TDH1), getreg(TDT1),
+ getreg(RDLEN0), getreg(RDTR), getreg(RADV), getreg(TADV),
+ getreg(RDH1), getreg(SCC), getreg(ECOL), getreg(MCC),
+ getreg(LATECOL), getreg(COLC), getreg(DC), getreg(TNCRS),
+ getreg(SEC), getreg(CEXTERR), getreg(RLEC), getreg(XONRXC),
+ getreg(XONTXC), getreg(XOFFRXC), getreg(XOFFTXC), getreg(WUC),
+ getreg(WUS), getreg(IPAV), getreg(RFC), getreg(RJC),
+ getreg(GORCL), getreg(GOTCL), getreg(RNBC), getreg(TSCTFC),
+ getreg(MGTPRC), getreg(MGTPDC), getreg(MGTPTC), getreg(EECD),
+ getreg(EERD), getreg(GCR), getreg(TIMINCA), getreg(IAM),
+ getreg(EIAC), getreg(IVAR), getreg(CTRL_EXT), getreg(RFCTL),
+ getreg(PSRCTL), getreg(POEMB), getreg(MFUTP01), getreg(MFUTP23),
+ getreg(MANC2H), getreg(MFVAL), getreg(FACTPS), getreg(EXTCNF_CTRL),
+ getreg(RXCSUM), getreg(FUNCTAG), getreg(GSCL_1), getreg(GSCL_2),
+ getreg(GSCL_3), getreg(GSCL_4), getreg(GSCN_0), getreg(GSCN_1),
+ getreg(GSCN_2), getreg(GSCN_3), getreg(GCR2), getreg(RAID),
+ getreg(RSRPD), getreg(MRQC), getreg(RDT1), getreg(RDBAH1),
+ getreg(RDBAL1), getreg(RDLEN1), getreg(PBACLR), getreg(FCAL),
+ getreg(FCAH), getreg(FCT), getreg(FCTTV), getreg(FCRTV),
+ getreg(FCRTH), getreg(FLA), getreg(EEWR), getreg(FLSWDATA),
+ getreg(FLOP), getreg(FLOL), getreg(FLSWCTL), getreg(FLSWCNT),
+ getreg(FLASHT), getreg(RXDCTL), getreg(RXDCTL1), getreg(TXDCTL1),
+ getreg(RXSTMPH), getreg(RXSTMPL), getreg(RXSATRL), getreg(RXSATRH),
+ getreg(TXSTMPL), getreg(TXSTMPH), getreg(SYSTIML), getreg(SYSTIMH),
+ getreg(TIMADJL), getreg(TIMADJH), getreg(RXUDP), getreg(RXCFGL),
+ getreg(TSYNCRXCTL),
+ getreg(TSYNCTXCTL),
+ getreg(TIPG),
+ getreg(EXTCNF_SIZE),
+ getreg(EEMNGCTL),
+ getreg(EEMNGDATA),
+ getreg(FLMNGCTL),
+ getreg(FLMNGDATA),
+ getreg(FLMNGCNT),
+
+ [TOTH] = mac_read_clr8, [TORH] = mac_read_clr8,
+ [GOTCH] = mac_read_clr8, [GORCH] = mac_read_clr8,
+ [PRC64] = mac_read_clr4, [PRC127] = mac_read_clr4,
+ [PRC255] = mac_read_clr4, [PRC511] = mac_read_clr4,
+ [PRC1023] = mac_read_clr4, [PRC1522] = mac_read_clr4,
+ [PTC64] = mac_read_clr4, [PTC127] = mac_read_clr4,
+ [PTC255] = mac_read_clr4, [PTC511] = mac_read_clr4,
+ [PTC1023] = mac_read_clr4, [PTC1522] = mac_read_clr4,
+ [GPRC] = mac_read_clr4, [GPTC] = mac_read_clr4,
+ [TPT] = mac_read_clr4, [TPR] = mac_read_clr4,
+ [RUC] = mac_read_clr4, [ROC] = mac_read_clr4,
+ [BPRC] = mac_read_clr4, [MPRC] = mac_read_clr4,
+ [MPTC] = mac_read_clr4, [BPTC] = mac_read_clr4,
+ [IAC] = mac_read_clr4, [TSCTC] = mac_read_clr4,
+ [ICR] = mac_icr_read, [ITR] = mac_itr_read,
+ [ICS] = mac_ics_read, [IMS] = mac_ims_read,
+ [RDFH] = mac_low13_read, [RDFT] = mac_low13_read,
+ [RDFHS] = mac_low13_read, [RDFTS] = mac_low13_read,
+ [RDFPC] = mac_low13_read,
+ [TDFH] = mac_low13_read, [TDFT] = mac_low13_read,
+ [TDFHS] = mac_low13_read, [TDFTS] = mac_low13_read,
+ [TDFPC] = mac_low13_read,
+ [AIT] = mac_low16_read,
+ [STATUS] = get_status, [CTRL] = get_ctrl,
+ [TARC0] = get_tarc, [TARC1] = get_tarc,
+ [PBS] = mac_low6_read, [SWSM] = mac_swsm_read,
+
+ [CRCERRS ... MPC] = &mac_readreg,
+ [IP6AT ... IP6AT+3] = &mac_readreg,
+ [IP4AT ... IP4AT+6] = &mac_readreg,
+ [RA ... RA+31] = &mac_readreg,
+ [WUPM ... WUPM+31] = &mac_readreg,
+ [MTA ... MTA+127] = &mac_readreg,
+ [VFTA ... VFTA+127] = &mac_readreg,
+ [FFMT ... FFMT+254] = &mac_low4_read,
+ [FFVT ... FFVT+254] = &mac_readreg,
+ [MDEF ... MDEF+7] = &mac_readreg,
+ [FFLT ... FFLT+10] = &mac_low11_read,
+ [FTFT ... FTFT+254] = &mac_readreg,
+ [PBM ... PBM+10239] = &mac_readreg,
+ [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = &mac_eitr_read,
+ [RETA ... RETA+31] = &mac_readreg,
+ [RSSRK ... RSSRK+31] = &mac_readreg,
+ [MAVTV0 ... MAVTV3] = &mac_readreg
+};
+enum { NREADOPS = ARRAY_SIZE(macreg_readops) };
+
+#define putreg(x) [x] = mac_writereg
+static void (*macreg_writeops[])(E1000ECore *, int, uint32_t) = {
+ putreg(PBA), putreg(SWSM), putreg(WUFC), putreg(RDBAH1),
+ putreg(TDBAL), putreg(TDBAH), putreg(TXDCTL), putreg(RDBAH0),
+ putreg(RDBAL0), putreg(LEDCTL), putreg(FCAL), putreg(FCRUC),
+ putreg(AIT), putreg(TDFH), putreg(TDFT), putreg(TDFHS),
+ putreg(TDFTS), putreg(TDFPC), putreg(WUC), putreg(WUS),
+ putreg(RDFH), putreg(RDFT), putreg(RDFHS), putreg(RDFTS),
+ putreg(RDFPC), putreg(IPAV), putreg(TDBAL1), putreg(TDBAH1),
+ putreg(TIMINCA), putreg(IAM), putreg(EIAC), putreg(IVAR),
+ putreg(RDBAL1), putreg(TARC0), putreg(TARC1), putreg(FLSWDATA),
+ putreg(POEMB), putreg(PBS), putreg(MFUTP01), putreg(MFUTP23),
+ putreg(MANC), putreg(MANC2H), putreg(MFVAL), putreg(EXTCNF_CTRL),
+ putreg(FACTPS), putreg(FUNCTAG), putreg(GSCL_1), putreg(GSCL_2),
+ putreg(GSCL_3), putreg(GSCL_4), putreg(GSCN_0), putreg(GSCN_1),
+ putreg(GSCN_2), putreg(GSCN_3), putreg(GCR2), putreg(MRQC),
+ putreg(FLOP), putreg(FLOL), putreg(FLSWCTL), putreg(FLSWCNT),
+ putreg(FLA), putreg(RXDCTL1), putreg(TXDCTL1), putreg(TIPG),
+ putreg(RXSTMPH), putreg(RXSTMPL), putreg(RXSATRL), putreg(RXSATRH),
+ putreg(TXSTMPL), putreg(TXSTMPH), putreg(SYSTIML), putreg(SYSTIMH),
+ putreg(TIMADJL), putreg(TIMADJH), putreg(RXUDP), putreg(RXCFGL),
+ putreg(TSYNCRXCTL),
+ putreg(TSYNCTXCTL),
+ putreg(FLSWDATA),
+ putreg(EXTCNF_SIZE),
+ putreg(EEMNGCTL),
+
+ [TDLEN1] = set_dlen, [TDH1] = set_16bit, [TDT1] = set_tdt,
+ [TDLEN] = set_dlen, [RDLEN0] = set_dlen, [TCTL] = set_tctl,
+ [TDT] = set_tdt, [MDIC] = set_mdic, [ICS] = set_ics,
+ [TDH] = set_16bit, [RDH0] = set_16bit, [RDT0] = set_rdt,
+ [IMC] = set_imc, [IMS] = set_ims, [ICR] = set_icr,
+ [EECD] = set_eecd, [RCTL] = set_rx_control, [CTRL] = set_ctrl,
+ [RDTR] = set_rdtr, [RADV] = set_16bit, [TADV] = set_16bit,
+ [ITR] = set_itr, [EERD] = set_eerd, [GCR] = set_gcr,
+ [PSRCTL] = set_psrctl, [RXCSUM] = set_rxcsum, [RAID] = set_16bit,
+ [RSRPD] = set_12bit, [TIDV] = set_tidv, [RDLEN1] = set_dlen,
+ [RDH1] = set_16bit, [RDT1] = set_rdt, [STATUS] = set_status,
+ [PBACLR] = set_pbaclr, [CTRL_EXT] = set_ctrlext, [FCAH] = set_16bit,
+ [FCT] = set_16bit, [FCTTV] = set_16bit, [FCRTV] = set_16bit,
+ [FCRTH] = set_fcrth, [FCRTL] = set_fcrtl, [VET] = set_vet,
+ [RXDCTL] = set_rxdctl, [FLASHT] = set_16bit, [EEWR] = set_eewr,
+ [CTRL_DUP] = set_ctrl, [RFCTL] = set_rfctl,
+ [IP6AT ... IP6AT+3] = &mac_writereg, [IP4AT ... IP4AT+6] = &mac_writereg,
+ [RA] = &mac_writereg,
+ [RA + 1] = &mac_setmacaddr,
+ [RA + 2 ... RA + 31] = &mac_writereg,
+ [WUPM ... WUPM+31] = &mac_writereg,
+ [MTA ... MTA+127] = &mac_writereg,
+ [VFTA ... VFTA+127] = &mac_writereg,
+ [FFMT ... FFMT+254] = &mac_writereg, [FFVT ... FFVT+254] = &mac_writereg,
+ [PBM ... PBM+10239] = &mac_writereg,
+ [MDEF ... MDEF+7] = &mac_writereg,
+ [FFLT ... FFLT+10] = &mac_writereg,
+ [FTFT ... FTFT+254] = &mac_writereg,
+ [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = &set_eitr,
+ [RETA ... RETA + 31] = &mac_writereg,
+ [RSSRK ... RSSRK + 31] = &mac_writereg,
+ [MAVTV0 ... MAVTV3] = &mac_writereg
+};
+
+enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
+
+enum { MAC_ACCESS_PARTIAL = 1 };
+
+/* The array below combines alias offsets of the index values for the
+ * MAC registers that have aliases, with the indication of not fully
+ * implemented registers (lowest bit). This combination is possible
+ * because all of the offsets are even. */
+static const uint16_t mac_reg_access[E1000E_MAC_SIZE] = {
+ /* Alias index offsets */
+ [FCRTL_A] = 0x07fe, [FCRTH_A] = 0x0802,
+ [RDH0_A] = 0x09bc, [RDT0_A] = 0x09bc, [RDTR_A] = 0x09c6,
+ [RDFH_A] = 0xe904, [RDFT_A] = 0xe904,
+ [TDH_A] = 0x0cf8, [TDT_A] = 0x0cf8, [TIDV_A] = 0x0cf8,
+ [TDFH_A] = 0xed00, [TDFT_A] = 0xed00,
+ [RA_A ... RA_A+31] = 0x14f0,
+ [VFTA_A ... VFTA_A+127] = 0x1400,
+ [RDBAL0_A ... RDLEN0_A] = 0x09bc,
+ [TDBAL_A ... TDLEN_A] = 0x0cf8,
+ /* Access options */
+ [RDFH] = MAC_ACCESS_PARTIAL, [RDFT] = MAC_ACCESS_PARTIAL,
+ [RDFHS] = MAC_ACCESS_PARTIAL, [RDFTS] = MAC_ACCESS_PARTIAL,
+ [RDFPC] = MAC_ACCESS_PARTIAL,
+ [TDFH] = MAC_ACCESS_PARTIAL, [TDFT] = MAC_ACCESS_PARTIAL,
+ [TDFHS] = MAC_ACCESS_PARTIAL, [TDFTS] = MAC_ACCESS_PARTIAL,
+ [TDFPC] = MAC_ACCESS_PARTIAL, [EECD] = MAC_ACCESS_PARTIAL,
+ [PBM] = MAC_ACCESS_PARTIAL, [FLA] = MAC_ACCESS_PARTIAL,
+ [FCAL] = MAC_ACCESS_PARTIAL, [FCAH] = MAC_ACCESS_PARTIAL,
+ [FCT] = MAC_ACCESS_PARTIAL, [FCTTV] = MAC_ACCESS_PARTIAL,
+ [FCRTV] = MAC_ACCESS_PARTIAL, [FCRTL] = MAC_ACCESS_PARTIAL,
+ [FCRTH] = MAC_ACCESS_PARTIAL, [TXDCTL] = MAC_ACCESS_PARTIAL,
+ [TXDCTL1] = MAC_ACCESS_PARTIAL,
+ [MAVTV0 ... MAVTV3] = MAC_ACCESS_PARTIAL
+};
+
+void
+e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size)
+{
+ uint16_t index = _e1000e_get_reg_index_with_offset(mac_reg_access, addr);
+
+ if (index < NWRITEOPS && macreg_writeops[index]) {
+ if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
+ trace_e1000e_wrn_regs_write_trivial(index<<2);
+ }
+ trace_e1000e_core_write(index << 2, size, val);
+ macreg_writeops[index](core, index, val);
+ } else if (index < NREADOPS && macreg_readops[index]) {
+ trace_e1000e_wrn_regs_write_ro(index << 2, size, val);
+ } else {
+ trace_e1000e_wrn_regs_write_unknown(index << 2, size, val);
+ }
+}
+
+uint64_t
+e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size)
+{
+ uint64_t val;
+ uint16_t index = _e1000e_get_reg_index_with_offset(mac_reg_access, addr);
+
+ if (index < NREADOPS && macreg_readops[index]) {
+ if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) {
+ trace_e1000e_wrn_regs_read_trivial(index<<2);
+ }
+ val = macreg_readops[index](core, index);
+ trace_e1000e_core_read(index << 2, size, val);
+ return val;
+ } else {
+ trace_e1000e_wrn_regs_read_unknown(index << 2, size);
+ }
+ return 0;
+}
+
+static void
+_e1000e_core_prepare_eeprom(E1000ECore *core,
+ const uint16_t *templ,
+ uint32_t templ_size,
+ const uint8_t *macaddr)
+{
+ PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(core->owner);
+ uint16_t checksum = 0;
+ int i;
+
+ memmove(core->eeprom, templ, templ_size);
+
+ for (i = 0; i < 3; i++) {
+ core->eeprom[i] = (macaddr[2*i+1]<<8) | macaddr[2*i];
+ }
+
+ core->eeprom[11] = core->eeprom[13] = pdc->device_id;
+
+ for (i = 0; i < EEPROM_CHECKSUM_REG; i++) {
+ checksum += core->eeprom[i];
+ }
+
+ checksum = (uint16_t) EEPROM_SUM - checksum;
+
+ core->eeprom[EEPROM_CHECKSUM_REG] = checksum;
+}
+
+void
+e1000e_core_pci_realize(E1000ECore *core,
+ const uint16_t *eeprom_templ,
+ uint32_t eeprom_size,
+ const uint8_t *macaddr)
+{
+ int i;
+
+ core->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+ _e1000e_autoneg_timer, core);
+ _e1000e_intrmgr_pci_realize(core);
+
+ for (i = 0; i < E1000E_NUM_QUEUES; i++) {
+ net_tx_pkt_init(&core->tx[i].tx_pkt,
+ E1000E_MAX_TX_FRAGS, core->has_vnet);
+ }
+
+ net_rx_pkt_init(&core->rx_pkt, core->has_vnet);
+
+ _e1000e_core_prepare_eeprom(core, eeprom_templ, eeprom_size, macaddr);
+ _e1000e_update_rx_offloads(core);
+}
+
+void
+e1000e_core_pci_uninit(E1000ECore *core)
+{
+ int i;
+
+ timer_del(core->autoneg_timer);
+ timer_free(core->autoneg_timer);
+
+ _e1000e_intrmgr_pci_unint(core);
+
+ for (i = 0; i < E1000E_NUM_QUEUES; i++) {
+ net_tx_pkt_reset(core->tx[i].tx_pkt);
+ net_tx_pkt_uninit(core->tx[i].tx_pkt);
+ }
+
+ net_rx_pkt_uninit(core->rx_pkt);
+}
+
+static const uint16_t phy_reg_init[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE] = {
+ [0] = {
+ [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB |
+ MII_CR_FULL_DUPLEX |
+ MII_CR_AUTO_NEG_EN,
+
+ [PHY_STATUS] = MII_SR_EXTENDED_CAPS |
+ MII_SR_LINK_STATUS | /* link initially up */
+ MII_SR_AUTONEG_CAPS |
+ /* MII_SR_AUTONEG_COMPLETE: initially NOT completed */
+ MII_SR_PREAMBLE_SUPPRESS |
+ MII_SR_EXTENDED_STATUS |
+ MII_SR_10T_HD_CAPS |
+ MII_SR_10T_FD_CAPS |
+ MII_SR_100X_HD_CAPS |
+ MII_SR_100X_FD_CAPS,
+
+ [PHY_ID1] = 0x141,
+ [PHY_ID2] = E1000_PHY_ID2_82574x,
+ [PHY_AUTONEG_ADV] = 0xde1,
+ [PHY_LP_ABILITY] = 0x7e0,
+ [PHY_AUTONEG_EXP] = BIT(2),
+ [PHY_NEXT_PAGE_TX] = BIT(0) | BIT(13),
+ [PHY_1000T_CTRL] = BIT(8) | BIT(9) | BIT(10) | BIT(11),
+ [PHY_1000T_STATUS] = 0x3c00,
+ [PHY_EXT_STATUS] = BIT(12) | BIT(13),
+
+ [PHY_COPPER_CTRL1] = BIT(5) | BIT(6) |
+ BIT(8) | BIT(9) |
+ BIT(12) | BIT(13),
+ [PHY_COPPER_STAT1] = BIT(3) | BIT(10) | BIT(11) |
+ BIT(13) | BIT(15)
+ },
+ [2] = {
+ [PHY_MAC_CTRL1] = BIT(3) | BIT(7),
+ [PHY_MAC_CTRL2] = BIT(1) | BIT(2) | BIT(6) | BIT(12)
+ },
+ [3] = {
+ [PHY_LED_TIMER_CTRL] = BIT(0) | BIT(2) | BIT(14)
+ }
+};
+
+static const uint32_t mac_reg_init[] = {
+ [PBA] = 0x00140014,
+ [LEDCTL] = BIT(1) | BIT(8) | BIT(9) | BIT(15) | BIT(17) | BIT(18),
+ [EXTCNF_CTRL] = BIT(3),
+ [EEMNGCTL] = BIT(31),
+ [FLASHT] = 0x2,
+ [FLSWCTL] = BIT(30) | BIT(31),
+ [FLOL] = BIT(0),
+ [RXDCTL] = BIT(16),
+ [RXDCTL1] = BIT(16),
+ [TIPG] = 0x8 | (0x8 << 10) | (0x6 << 20),
+ [RXCFGL] = 0x88F7,
+ [RXUDP] = 0x319,
+ [CTRL] = E1000_CTRL_FD | E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 |
+ E1000_CTRL_SPD_1000 | E1000_CTRL_SLU | E1000_CTRL_ADVD3WUC,
+ [STATUS] = E1000_STATUS_ASDV_1000 | E1000_STATUS_LU,
+ [PSRCTL] = (2 << E1000_PSRCTL_BSIZE0_SHIFT) |
+ (4 << E1000_PSRCTL_BSIZE1_SHIFT) |
+ (4 << E1000_PSRCTL_BSIZE2_SHIFT),
+ [TARC0] = 0x3 | E1000_TARC_ENABLE,
+ [TARC1] = 0x3 | E1000_TARC_ENABLE,
+ [EECD] = E1000_EECD_AUTO_RD | E1000_EECD_PRES,
+ [EERD] = E1000_EERW_DONE,
+ [EEWR] = E1000_EERW_DONE,
+ [GCR] = E1000_L0S_ADJUST |
+ E1000_L1_ENTRY_LATENCY_MSB |
+ E1000_L1_ENTRY_LATENCY_LSB,
+ [TDFH] = 0x600,
+ [TDFT] = 0x600,
+ [TDFHS] = 0x600,
+ [TDFTS] = 0x600,
+ [POEMB] = 0x30D,
+ [PBS] = 0x028,
+ [MANC] = E1000_MANC_DIS_IP_CHK_ARP,
+ [FACTPS] = E1000_FACTPS_LAN0_ON | 0x20000000,
+ [SWSM] = 1,
+ [RXCSUM] = E1000_RXCSUM_IPOFLD | E1000_RXCSUM_TUOFLD,
+ [ITR] = _E1000E_MIN_XITR,
+ [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = _E1000E_MIN_XITR,
+};
+
+void
+e1000e_core_reset(E1000ECore *core)
+{
+ int i;
+
+ timer_del(core->autoneg_timer);
+
+ _e1000e_intrmgr_reset(core);
+
+ memset(core->phy, 0, sizeof core->phy);
+ memmove(core->phy, phy_reg_init, sizeof phy_reg_init);
+ memset(core->mac, 0, sizeof core->mac);
+ memmove(core->mac, mac_reg_init, sizeof mac_reg_init);
+
+ core->rxbuf_min_shift = 1 + E1000_RING_DESC_LEN_SHIFT;
+
+ if (qemu_get_queue(core->owner_nic)->link_down) {
+ e1000_link_down(core);
+ }
+
+ _e1000e_core_reset_mac(core);
+
+ for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
+ net_tx_pkt_reset(core->tx[i].tx_pkt);
+ core->tx[i].sum_needed = 0;
+ core->tx[i].ipcss = 0;
+ core->tx[i].ipcso = 0;
+ core->tx[i].ipcse = 0;
+ core->tx[i].tucss = 0;
+ core->tx[i].tucso = 0;
+ core->tx[i].tucse = 0;
+ core->tx[i].hdr_len = 0;
+ core->tx[i].mss = 0;
+ core->tx[i].paylen = 0;
+ core->tx[i].ip = 0;
+ core->tx[i].tcp = 0;
+ core->tx[i].tse = 0;
+ core->tx[i].cptse = 0;
+ core->tx[i].skip_cp = 0;
+ }
+}
+
+void e1000e_core_pre_save(E1000ECore *core)
+{
+ int i;
+ NetClientState *nc = qemu_get_queue(core->owner_nic);
+
+ /*
+ * If link is down and auto-negotiation is supported and ongoing,
+ * complete auto-negotiation immediately. This allows us to look
+ * at MII_SR_AUTONEG_COMPLETE to infer link status on load.
+ */
+ if (nc->link_down && have_autoneg(core)) {
+ core->phy[0][PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE;
+ _e1000e_update_flowctl_status(core);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
+ if (net_tx_pkt_has_fragments(core->tx[i].tx_pkt)) {
+ core->tx[i].skip_cp = true;
+ }
+ }
+}
+
+int
+e1000e_core_post_load(E1000ECore *core)
+{
+ NetClientState *nc = qemu_get_queue(core->owner_nic);
+
+ /* nc.link_down can't be migrated, so infer link_down according
+ * to link status bit in core.mac[STATUS].
+ * Alternatively, restart link negotiation if it was in progress. */
+ nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0;
+
+ if (have_autoneg(core) &&
+ !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) {
+ nc->link_down = false;
+ timer_mod(core->autoneg_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500);
+ }
+
+ _e1000e_intrmgr_post_load(core);
+
+ return 0;
+}
diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
new file mode 100644
index 0000000..298d1de
--- /dev/null
+++ b/hw/net/e1000e_core.h
@@ -0,0 +1,230 @@
+/*
+* Core code for QEMU e1000e emulation
+*
+* Software developer's manuals:
+* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf
+*
+* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com)
+* Developed by Daynix Computing LTD (http://www.daynix.com)
+*
+* Authors:
+* Dmitry Fleytman <dmitry@daynix.com>
+* Leonid Bloch <leonid@daynix.com>
+* Yan Vugenfirer <yan@daynix.com>
+*
+* Based on work done by:
+* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc.
+* Copyright (c) 2008 Qumranet
+* Based on work done by:
+* Copyright (c) 2007 Dan Aloni
+* Copyright (c) 2004 Antony T Curtis
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define E1000E_PHY_PAGE_SIZE (0x20)
+#define E1000E_PHY_PAGES (0x07)
+#define E1000E_MAC_SIZE (0x8000)
+#define E1000E_EEPROM_SIZE (64)
+#define E1000E_MSIX_VEC_NUM (5)
+#define E1000E_NUM_QUEUES (2)
+
+typedef struct E1000Core_st E1000ECore;
+
+enum { PHY_R = BIT(0),
+ PHY_W = BIT(1),
+ PHY_RW = PHY_R | PHY_W,
+ PHY_ANYPAGE = BIT(2) };
+
+typedef struct E1000IntrDelayTimer_st {
+ QEMUTimer *timer;
+ bool running;
+ uint32_t delay_reg;
+ uint32_t delay_resolution_ns;
+ E1000ECore *core;
+} E1000IntrDelayTimer;
+
+#define VMSTATE_E1000_INTR_DELAY_TIMER(_f, _s) \
+ VMSTATE_TIMER_PTR(_f.timer, _s), \
+ VMSTATE_BOOL(_f.running, _s) \
+
+typedef struct E1000Core_st {
+ uint32_t mac[E1000E_MAC_SIZE];
+ uint16_t phy[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE];
+ uint16_t eeprom[E1000E_EEPROM_SIZE];
+
+ uint32_t rxbuf_sizes[E1000_PSRCTL_BUFFS_PER_DESC];
+ uint32_t rx_desc_buf_size;
+ uint32_t rxbuf_min_shift;
+ uint8_t rx_desc_len;
+
+ QEMUTimer *autoneg_timer;
+
+ struct e1000_tx {
+ unsigned char sum_needed;
+ uint8_t ipcss;
+ uint8_t ipcso;
+ uint16_t ipcse;
+ uint8_t tucss;
+ uint8_t tucso;
+ uint16_t tucse;
+ uint8_t hdr_len;
+ uint16_t mss;
+ uint32_t paylen;
+ int8_t ip;
+ int8_t tcp;
+ bool tse;
+ bool cptse;
+
+ struct NetTxPkt *tx_pkt;
+ bool skip_cp;
+ } tx[E1000E_NUM_QUEUES];
+
+ struct NetRxPkt *rx_pkt;
+
+ bool has_vnet;
+ int max_queue_num;
+
+ /* Interrupt moderation management */
+ uint32_t delayed_causes;
+
+ E1000IntrDelayTimer radv;
+ E1000IntrDelayTimer rdtr;
+ E1000IntrDelayTimer raid;
+
+ E1000IntrDelayTimer tadv;
+ E1000IntrDelayTimer tidv;
+
+ E1000IntrDelayTimer itr;
+ bool itr_intr_pending;
+
+ E1000IntrDelayTimer eitr[E1000E_MSIX_VEC_NUM];
+ bool eitr_intr_pending[E1000E_MSIX_VEC_NUM];
+
+ uint32_t itr_guest_value;
+ uint32_t eitr_guest_value[E1000E_MSIX_VEC_NUM];
+
+ uint16_t vet;
+
+ uint8_t permanent_mac[ETH_ALEN];
+
+ NICState *owner_nic;
+ PCIDevice *owner;
+ void (*owner_start_recv)(PCIDevice *d);
+} E1000ECore;
+
+#define defreg(x) x = (E1000_##x>>2)
+enum {
+ defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC),
+ defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC),
+ defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC),
+ defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH0),
+ defreg(RDBAL0), defreg(RDH0), defreg(RDLEN0), defreg(RDT0),
+ defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH),
+ defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT),
+ defreg(TDLEN1), defreg(TDBAL1), defreg(TDBAH1), defreg(TDH1),
+ defreg(TDT1), defreg(TORH), defreg(TORL), defreg(TOTH),
+ defreg(TOTL), defreg(TPR), defreg(TPT), defreg(TXDCTL),
+ defreg(WUFC), defreg(RA), defreg(MTA), defreg(CRCERRS),
+ defreg(VFTA), defreg(VET), defreg(RDTR), defreg(RADV),
+ defreg(TADV), defreg(ITR), defreg(SCC), defreg(ECOL),
+ defreg(MCC), defreg(LATECOL), defreg(COLC), defreg(DC),
+ defreg(TNCRS), defreg(SEC), defreg(CEXTERR), defreg(RLEC),
+ defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), defreg(XOFFTXC),
+ defreg(FCRUC), defreg(AIT), defreg(TDFH), defreg(TDFT),
+ defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(WUC),
+ defreg(WUS), defreg(POEMB), defreg(PBS), defreg(RDFH),
+ defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC),
+ defreg(PBM), defreg(IPAV), defreg(IP4AT), defreg(IP6AT),
+ defreg(WUPM), defreg(FFLT), defreg(FFMT), defreg(FFVT),
+ defreg(TARC0), defreg(TARC1), defreg(IAM), defreg(EXTCNF_CTRL),
+ defreg(GCR), defreg(TIMINCA), defreg(EIAC), defreg(CTRL_EXT),
+ defreg(IVAR), defreg(MFUTP01), defreg(MFUTP23), defreg(MANC2H),
+ defreg(MFVAL), defreg(MDEF), defreg(FACTPS), defreg(FTFT),
+ defreg(RUC), defreg(ROC), defreg(RFC), defreg(RJC),
+ defreg(PRC64), defreg(PRC127), defreg(PRC255), defreg(PRC511),
+ defreg(PRC1023), defreg(PRC1522), defreg(PTC64), defreg(PTC127),
+ defreg(PTC255), defreg(PTC511), defreg(PTC1023), defreg(PTC1522),
+ defreg(GORCL), defreg(GORCH), defreg(GOTCL), defreg(GOTCH),
+ defreg(RNBC), defreg(BPRC), defreg(MPRC), defreg(RFCTL),
+ defreg(PSRCTL), defreg(MPTC), defreg(BPTC), defreg(TSCTFC),
+ defreg(IAC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC),
+ defreg(TSCTC), defreg(RXCSUM), defreg(FUNCTAG), defreg(GSCL_1),
+ defreg(GSCL_2), defreg(GSCL_3), defreg(GSCL_4), defreg(GSCN_0),
+ defreg(GSCN_1), defreg(GSCN_2), defreg(GSCN_3), defreg(GCR2),
+ defreg(RAID), defreg(RSRPD), defreg(TIDV), defreg(EITR),
+ defreg(MRQC), defreg(RETA), defreg(RSSRK), defreg(RDBAH1),
+ defreg(RDBAL1), defreg(RDLEN1), defreg(RDH1), defreg(RDT1),
+ defreg(PBACLR), defreg(FCAL), defreg(FCAH), defreg(FCT),
+ defreg(FCRTH), defreg(FCRTL), defreg(FCTTV), defreg(FCRTV),
+ defreg(FLA), defreg(EEWR), defreg(FLOP), defreg(FLOL),
+ defreg(FLSWCTL), defreg(FLSWCNT), defreg(RXDCTL), defreg(RXDCTL1),
+ defreg(MAVTV0), defreg(MAVTV1), defreg(MAVTV2), defreg(MAVTV3),
+ defreg(TXSTMPL), defreg(TXSTMPH), defreg(SYSTIML), defreg(SYSTIMH),
+ defreg(RXCFGL), defreg(RXUDP), defreg(TIMADJL), defreg(TIMADJH),
+ defreg(RXSTMPH), defreg(RXSTMPL), defreg(RXSATRL), defreg(RXSATRH),
+ defreg(FLASHT), defreg(TIPG), defreg(TXDCTL1), defreg(FLSWDATA),
+ defreg(CTRL_DUP),
+ defreg(EXTCNF_SIZE),
+ defreg(EEMNGCTL),
+ defreg(EEMNGDATA),
+ defreg(FLMNGCTL),
+ defreg(FLMNGDATA),
+ defreg(FLMNGCNT),
+ defreg(TSYNCRXCTL),
+ defreg(TSYNCTXCTL),
+
+ /* Aliases */
+ defreg(RDH0_A), defreg(RDT0_A), defreg(RDTR_A), defreg(RDFH_A),
+ defreg(RDFT_A), defreg(TDH_A), defreg(TDT_A), defreg(TIDV_A),
+ defreg(TDFH_A), defreg(TDFT_A), defreg(RA_A), defreg(RDBAL0_A),
+ defreg(TDBAL_A), defreg(TDLEN_A), defreg(VFTA_A), defreg(RDLEN0_A),
+ defreg(FCRTL_A), defreg(FCRTH_A)
+};
+
+void
+e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size);
+
+uint64_t
+e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size);
+
+void
+e1000e_core_pci_realize(E1000ECore *regs,
+ const uint16_t *eeprom_templ,
+ uint32_t eeprom_size,
+ const uint8_t *macaddr);
+
+void
+e1000e_core_reset(E1000ECore *core);
+
+void
+e1000e_core_pre_save(E1000ECore *core);
+
+int
+e1000e_core_post_load(E1000ECore *core);
+
+void
+e1000e_core_set_link_status(E1000ECore *core);
+
+void
+e1000e_core_pci_uninit(E1000ECore *core);
+
+int
+e1000e_can_receive(E1000ECore *core);
+
+ssize_t
+e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size);
+
+ssize_t
+e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt);
diff --git a/trace-events b/trace-events
index a0844a3..50a29cc 100644
--- a/trace-events
+++ b/trace-events
@@ -1615,3 +1615,173 @@ net_rx_pkt_rss_ip6(void) "Calculating IPv6 RSS hash"
net_rx_pkt_rss_ip6_ex(void) "Calculating IPv6/EX RSS hash"
net_rx_pkt_rss_hash(size_t rss_length, uint32_t rss_hash) "RSS hash for %lu bytes: 0x%X"
net_rx_pkt_rss_add_chunk(void* ptr, size_t size, size_t input_offset) "Add RSS chunk %p, %lu bytes, RSS input offset %lu bytes"
+
+# hw/net/e1000e_core.c
+e1000e_core_write(uint64_t index, uint32_t size, uint64_t val) "Write to register 0x%"PRIx64", %d byte(s), value: 0x%"PRIx64
+e1000e_core_read(uint64_t index, uint32_t size, uint64_t val) "Read from register 0x%"PRIx64", %d byte(s), value: 0x%"PRIx64
+e1000e_core_mdic_read(uint8_t page, uint32_t addr, uint32_t data) "MDIC READ: PHY[%u][%u] = 0x%x"
+e1000e_core_mdic_read_unhandled(uint8_t page, uint32_t addr) "MDIC READ: PHY[%u][%u] UNHANDLED"
+e1000e_core_mdic_write(uint8_t page, uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u][%u] = 0x%x"
+e1000e_core_mdic_write_unhandled(uint8_t page, uint32_t addr) "MDIC WRITE: PHY[%u][%u] UNHANDLED"
+e1000e_core_eeeprom_write(uint16_t bit_in, uint16_t bit_out, uint16_t reading) "eeprom bitnum in %d out %d, reading %d"
+e1000e_core_ctrl_write(uint64_t index, uint32_t val) "Write CTRL register 0x%"PRIx64", value: 0x%X"
+e1000e_core_ctrl_sw_reset(void) "Doing SW reset"
+e1000e_core_ctrl_phy_reset(void) "Doing PHY reset"
+
+e1000e_link_negotiation_start(void) "Start link auto negotiation"
+e1000e_link_negotiation_done(void) "Auto negotiation is completed"
+e1000e_link_autoneg_flowctl(bool enabled) "Auto-negotiated flow control state is %d"
+e1000e_link_set_params(bool autodetect, uint32_t speed, bool force_spd, bool force_dplx, bool rx_fctl, bool tx_fctl) "Set link params: Autodetect: %d, Speed: %d, Force speed: %d, Force duplex: %d, RX flow control %d, TX flow control %d"
+e1000e_link_read_params(bool autodetect, uint32_t speed, bool force_spd, bool force_dplx, bool rx_fctl, bool tx_fctl) "Get link params: Autodetect: %d, Speed: %d, Force speed: %d, Force duplex: %d, RX flow control %d, TX flow control %d"
+e1000e_link_set_ext_params(bool asd_check, bool speed_select_bypass) "Set extended link params: ASD check: %d, Speed select bypass: %d"
+e1000e_link_status(bool link_up, bool full_dplx, uint32_t speed, uint32_t asdv) "Link up: %d, Duplex: %d, Speed: %d, ASDV: %d"
+
+e1000e_wrn_regs_write_ro(uint64_t index, uint32_t size, uint64_t val) "WARNING: Write to RO register 0x%"PRIx64", %d byte(s), value: 0x%"PRIx64
+e1000e_wrn_regs_write_unknown(uint64_t index, uint32_t size, uint64_t val) "WARNING: Write to unknown register 0x%"PRIx64", %d byte(s), value: 0x%"PRIx64
+e1000e_wrn_regs_read_unknown(uint64_t index, uint32_t size) "WARNING: Read from unknown register 0x%"PRIx64", %d byte(s)"
+e1000e_wrn_regs_read_trivial(uint32_t index) "WARNING: Reading register at offset: 0x%05x. It is not fully implemented."
+e1000e_wrn_regs_write_trivial(uint32_t index) "WARNING: Writing to register at offset: 0x%05x. It is not fully implemented."
+e1000e_wrn_no_ts_support(void) "WARNING: Guest requested TX timestamping which is not supported"
+e1000e_wrn_no_snap_support(void) "WARNING: Guest requested TX SNAP header update which is not supported"
+e1000e_wrn_iscsi_filtering_not_supported(void) "WARNING: Guest requested iSCSI filtering which is not supported"
+e1000e_wrn_nfsw_filtering_not_supported(void) "WARNING: Guest requested NFS write filtering which is not supported"
+e1000e_wrn_nfsr_filtering_not_supported(void) "WARNING: Guest requested NFS read filtering which is not supported"
+
+e1000e_tx_disabled(void) "TX Disabled"
+e1000e_tx_descr(void *addr, uint32_t lower, uint32_t upper) "%p : %x %x"
+e1000e_tx_cso_zero(void) "TCP/UDP: cso 0!"
+
+e1000e_ring_free_space(int ridx, uint32_t rdlen, uint32_t rdh, uint32_t rdt) "ring #%d: LEN: %u, DH: %u, DT: %u"
+
+e1000e_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d"
+e1000e_rx_can_recv_rings_full(void) "Cannot receive: all rings are full"
+e1000e_rx_can_recv(void) "Can receive"
+e1000e_rx_has_buffers(int ridx, uint32_t free_desc, size_t total_size, uint32_t desc_buf_size) "ring #%d: free descr: %u, packet size %lu, descr buffer size %u"
+e1000e_rx_null_descriptor(void) "Null RX descriptor!!"
+e1000e_rx_flt_ucast_match(uint32_t idx, uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x"
+e1000e_rx_flt_ucast_mismatch(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x"
+e1000e_rx_flt_inexact_mismatch(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5, uint32_t mo, uint32_t mta, uint32_t mta_val) "inexact mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x"
+e1000e_rx_flt_vlan_mismatch(uint16_t vid) "VID mismatch: 0x%X"
+e1000e_rx_flt_vlan_match(uint16_t vid) "VID match: 0x%X"
+e1000e_rx_desc_ps_read(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) "buffers: [0x%"PRIx64", 0x%"PRIx64", 0x%"PRIx64", 0x%"PRIx64"]"
+e1000e_rx_desc_ps_write(uint16_t a0, uint16_t a1, uint16_t a2, uint16_t a3) "bytes written: [%u, %u, %u, %u]"
+e1000e_rx_desc_buff_sizes(uint32_t b0, uint32_t b1, uint32_t b2, uint32_t b3) "buffer sizes: [%u, %u, %u, %u]"
+e1000e_rx_desc_len(uint8_t rx_desc_len) "RX descriptor length: %u"
+e1000e_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
+e1000e_rx_descr(int ridx, uint64_t base, uint8_t len) "Next RX descriptor: ring #%d, PA: 0x%"PRIx64", length: %u"
+e1000e_rx_set_rctl(uint32_t rctl) "RCTL = 0x%x"
+e1000e_rx_receive_iov(int iovcnt) "Received vector of %d fragments"
+e1000e_rx_packet_size(size_t full, size_t vhdr, size_t data) "Received packet of %lu bytes total, %lu virt header, %lu data"
+e1000e_rx_link_down(uint32_t status_reg) "Received packet dropped because the link is down STATUS = %u"
+e1000e_rx_disabled(uint32_t rctl_reg) "Received packet dropped because receive is disabled RCTL = %u"
+e1000e_rx_oversized(size_t size) "Received packet dropped because it was oversized (%lu bytes)"
+e1000e_rx_flt_dropped(void) "Received packet dropped by RX filter"
+e1000e_rx_written_to_guest(uint32_t causes) "Received packet written to guest (ICR causes %u)"
+e1000e_rx_not_written_to_guest(uint32_t causes) "Received packet NOT written to guest (ICR causes %u)"
+e1000e_rx_interrupt_set(uint32_t causes) "Receive interrupt set (ICR causes %u)"
+e1000e_rx_interrupt_delayed(uint32_t causes) "Receive interrupt delayed (ICR causes %u)"
+e1000e_rx_set_cso(int cso_state) "RX CSO state set to %d"
+e1000e_rx_set_rdt(int queue_idx, uint32_t val) "Setting RDT[%d] = %u"
+e1000e_rx_set_rfctl(uint32_t val) "Setting RFCTL = 0x%X"
+e1000e_rx_start_recv(void)
+
+e1000e_rx_rss_started(void) "Starting RSS processing"
+e1000e_rx_rss_disabled(void) "RSS is disabled"
+e1000e_rx_rss_type(uint32_t type) "RSS type is %u"
+e1000e_rx_rss_ip4(bool isfragment, bool istcp, uint32_t mrqc, bool tcpipv4_enabled, bool ipv4_enabled) "RSS IPv4: fragment %d, tcp %d, mrqc 0x%X, tcpipv4 enabled %d, ipv4 enabled %d"
+e1000e_rx_rss_ip6(uint32_t rfctl, bool ex_dis, bool new_ex_dis, bool istcp, bool has_ext_headers, bool ex_dst_valid, bool ex_src_valid, uint32_t mrqc, bool tcpipv6_enabled, bool ipv6ex_enabled, bool ipv6_enabled) "RSS IPv6: rfctl 0x%X, ex_dis: %d, new_ex_dis: %d, tcp %d, has_ext_headers %d, ex_dst_valid %d, ex_src_valid %d, mrqc 0x%X, tcpipv6 enabled %d, ipv6ex enabled %d, ipv6 enabled %d"
+e1000e_rx_rss_dispatched_to_queue(int queue_idx) "Packet being dispatched to queue %d"
+
+e1000e_rx_metadata_protocols(bool isip4, bool isip6, bool isudp, bool istcp) "protocols: ip4: %d, ip6: %d, udp: %d, tcp: %d"
+e1000e_rx_metadata_vlan(uint16_t vlan_tag) "VLAN tag is 0x%X"
+e1000e_rx_metadata_rss(uint32_t rss, uint32_t mrq) "RSS data: rss: 0x%X, mrq: 0x%X"
+e1000e_rx_metadata_ip_id(uint16_t ip_id) "the IPv4 ID is 0x%X"
+e1000e_rx_metadata_ack(void) "the packet is TCP ACK"
+e1000e_rx_metadata_pkt_type(uint32_t pkt_type) "the packet type is %u"
+e1000e_rx_metadata_no_virthdr(void) "the packet has no virt-header"
+e1000e_rx_metadata_virthdr_no_csum_info(void) "virt-header does not contain checksum info"
+e1000e_rx_metadata_l3_cso_disabled(void) "IP4 CSO is disabled"
+e1000e_rx_metadata_l4_cso_disabled(void) "TCP/UDP CSO is disabled"
+e1000e_rx_metadata_l3_csum_validation_failed(void) "Cannot validate L3 checksum"
+e1000e_rx_metadata_l4_csum_validation_failed(void) "Cannot validate L4 checksum"
+e1000e_rx_metadata_status_flags(uint32_t status_flags) "status_flags is 0x%X"
+e1000e_rx_metadata_ipv6_sum_disabled(void) "IPv6 RX checksummimg disabled by RFCTL"
+e1000e_rx_metadata_ipv6_filtering_disabled(void) "IPv6 RX filtering disabled by RFCTL"
+
+e1000e_vlan_vet(uint16_t vet) "Setting VLAN ethernet type 0x%X"
+e1000e_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X"
+
+e1000e_irq_set_cause(uint32_t cause) "IRQ cause set 0x%x"
+e1000e_irq_msi_notify(uint32_t cause) "MSI notify 0x%x"
+e1000e_irq_throttling_no_pending_interrupts(void) "No pending interrupts to notify"
+e1000e_irq_msi_notify_postponed(void) "Sending MSI postponed by ITR"
+e1000e_irq_legacy_notify_postponed(void) "Raising legacy IRQ postponed by ITR"
+e1000e_irq_throttling_no_pending_vec(int idx) "No pending interrupts for vector %d"
+e1000e_irq_msix_notify_postponed_vec(int idx) "Sending MSI-X postponed by EITR[%d]"
+e1000e_irq_msix_notify(uint32_t cause) "MSI-X notify 0x%x"
+e1000e_irq_legacy_notify(bool level) "IRQ line state: %d"
+e1000e_irq_msix_notify_vec(uint32_t vector) "MSI-X notify vector 0x%x"
+e1000e_irq_postponed_by_xitr(uint32_t reg) "Interrupt postponed by [E]ITR register 0x%x"
+e1000e_irq_clear_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Clearing IMS bits 0x%x: 0x%x --> 0x%x"
+e1000e_irq_set_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Setting IMS bits 0x%x: 0x%x --> 0x%x"
+e1000e_irq_fix_icr_asserted(uint32_t new_val) "ICR_ASSERTED bit fixed: 0x%x"
+e1000e_irq_add_msi_other(uint32_t new_val) "ICR_OTHER bit added: 0x%x"
+e1000e_irq_pending_interrupts(uint32_t pending, uint32_t icr, uint32_t ims) "ICR PENDING: 0x%x (ICR: 0x%x, IMS: 0x%x)"
+e1000e_irq_set_cause_entry(uint32_t val, uint32_t icr) "Going to set IRQ cause 0x%x, ICR: 0x%x"
+e1000e_irq_set_cause_exit(uint32_t val, uint32_t icr) "Set IRQ cause 0x%x, ICR: 0x%x"
+e1000e_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x"
+e1000e_irq_write_ics(uint32_t val) "Adding ICR bits 0x%x"
+e1000e_irq_icr_process_iame(void) "Clearing IMS bits due to IAME"
+e1000e_irq_read_ics(uint32_t ics) "Current ICS: 0x%x"
+e1000e_irq_read_ims(uint32_t ims) "Current IMS: 0x%x"
+e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x"
+e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x"
+e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
+e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
+e1000e_irq_ims_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
+e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x"
+e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts"
+e1000e_irq_rearm_timer(uint32_t reg, int64_t delay_ns) "Mitigation timer armed for register 0x%X, delay %ld ns"
+e1000e_irq_throttling_timer(uint32_t reg) "Mitigation timer shot for register 0x%X"
+e1000e_irq_rdtr_fpd_running(void) "FPD written while RDTR was running"
+e1000e_irq_rdtr_fpd_not_running(void) "FPD written while RDTR was not running"
+e1000e_irq_tidv_fpd_running(void) "FPD written while TIDV was running"
+e1000e_irq_tidv_fpd_not_running(void) "FPD written while TIDV was not running"
+e1000e_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = %u"
+e1000e_irq_itr_set(uint32_t val) "ITR = %u"
+e1000e_irq_fire_all_timers(uint32_t val) "Firing all delay/throttling timers on all interrupts enable (0x%X written to IMS)"
+e1000e_irq_adding_delayed_causes(uint32_t val, uint32_t icr) "Merging delayed causes 0x%X to ICR 0x%X"
+e1000e_irq_msix_pending_clearing(uint32_t cause, uint32_t int_cfg, uint32_t vec) "Clearing MSI-X pending bit for cause 0x%x, IVAR config 0x%x, vector %u"
+
+e1000e_wrn_msix_vec_wrong(uint32_t cause, uint32_t cfg) "Invalid configuration for cause 0x%x: 0x%x"
+e1000e_wrn_msix_invalid(uint32_t cause, uint32_t cfg) "Invalid entry for cause 0x%x: 0x%x"
+
+e1000e_mac_set_permanent(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "Set permanent MAC: %02x:%02x:%02x:%02x:%02x:%02x"
+e1000e_mac_set_sw(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "Set SW MAC: %02x:%02x:%02x:%02x:%02x:%02x"
+e1000e_mac_indicate(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "Indicating MAC to guest: %02x:%02x:%02x:%02x:%02x:%02x"
+
+# hw/net/e1000e.c
+e1000e_cb_pci_realize(void) "E1000E PCI realize entry"
+e1000e_cb_pci_uninit(void) "E1000E PCI unit entry"
+e1000e_cb_qdev_reset(void) "E1000E qdev reset entry"
+e1000e_cb_pre_save(void) "E1000E pre save entry"
+e1000e_cb_post_load(void) "E1000E post load entry"
+
+e1000e_io_write_addr(uint64_t addr) "IOADDR write 0x%"PRIx64
+e1000e_io_write_data(uint64_t addr, uint64_t val) "IODATA write 0x%"PRIx64", value: 0x%"PRIx64
+e1000e_io_read_addr(uint64_t addr) "IOADDR read 0x%"PRIx64
+e1000e_io_read_data(uint64_t addr, uint64_t val) "IODATA read 0x%"PRIx64", value: 0x%"PRIx64
+e1000e_wrn_io_write_unknown(uint64_t addr) "IO write unknown address 0x%"PRIx64
+e1000e_wrn_io_read_unknown(uint64_t addr) "IO read unknown address 0x%"PRIx64
+e1000e_wrn_io_addr_undefined(uint64_t addr) "IO undefined register 0x%"PRIx64
+e1000e_wrn_io_addr_flash(uint64_t addr) "IO flash access (0x%"PRIx64") not implemented"
+e1000e_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64
+
+e1000e_wrn_flash_read(uint64_t addr) "BAR1 flash read (0x%"PRIx64") not implemented"
+e1000e_wrn_flash_write(uint64_t addr, uint64_t val) "BAR1 flash write ([0x%"PRIx64"] = 0x%"PRIx64") not implemented"
+
+e1000e_msi_init_fail(int32_t res) "Failed to initialize MSI, error %d"
+e1000e_msix_init_fail(int32_t res) "Failed to initialize MSI-X, error %d"
+e1000e_msix_use_vector_fail(uint32_t vec, int32_t res) "Failed to use MSI-X vector %d, error %d"
+
+e1000e_cfg_support_virtio(bool support) "Virtio header supported: %d"
--
2.4.3
^ permalink raw reply related [flat|nested] 18+ messages in thread