netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
To: virtualization@lists.linux-foundation.org
Cc: netdev@vger.kernel.org, mst@redhat.com, jasowang@redhat.com,
	richardcochran@gmail.com, Willem de Bruijn <willemb@google.com>
Subject: [PATCH RFC v2 1/4] virtio-net: support transmit hash report
Date: Mon,  8 Feb 2021 13:55:55 -0500	[thread overview]
Message-ID: <20210208185558.995292-2-willemdebruijn.kernel@gmail.com> (raw)
In-Reply-To: <20210208185558.995292-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Virtio-net supports sharing the flow hash from device to driver on rx.
Do the same in the other direction for robust routing and telemetry.

Linux derives ipv6 flowlabel and ECMP multipath from sk->sk_txhash,
and updates this field on error with sk_rethink_txhash. Allow the host
stack to do the same.

Concrete examples of error conditions that are resolved are
mentioned in the commits that add sk_rethink_txhash calls. Such as
commit 7788174e8726 ("tcp: change IPv6 flow-label upon receiving
spurious retransmission").

Experimental results mirror what the theory suggests: where IPv6
FlowLabel is included in path selection (e.g., LAG/ECMP), flowlabel
rotation on TCP timeout avoids the vast majority of TCP disconnects
that would otherwise have occurred during link failures in long-haul
backbones, when an alternative path is available.

Rotation can be applied to various bad connection signals, such as
timeouts and spurious retransmissions. In aggregate, such flow level
signals can help locate network issues. Reserve field hash_state to
share this info. For now, always set VIRTIO_NET_HASH_STATE_DEFAULT.
Passing information between TCP stack and driver is future work.

Changes RFC->RFCv2
  - drop unused VIRTIO_NET_HASH_STATE_TIMEOUT_BIT
  - convert from cpu_to_virtioXX to cpu_to_leXX

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 drivers/net/virtio_net.c        | 26 +++++++++++++++++++++++---
 include/uapi/linux/virtio_net.h |  9 ++++++++-
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ba8e63792549..7f822b2a5205 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -201,6 +201,9 @@ struct virtnet_info {
 	/* Host will merge rx buffers for big packets (shake it! shake it!) */
 	bool mergeable_rx_bufs;
 
+	/* Driver will pass tx path info to the device */
+	bool has_tx_hash;
+
 	/* Has control virtqueue */
 	bool has_cvq;
 
@@ -394,9 +397,9 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 
 	hdr_len = vi->hdr_len;
 	if (vi->mergeable_rx_bufs)
-		hdr_padded_len = sizeof(*hdr);
+		hdr_padded_len = max_t(unsigned int, hdr_len, sizeof(*hdr));
 	else
-		hdr_padded_len = sizeof(struct padded_vnet_hdr);
+		hdr_padded_len = ALIGN(hdr_len, 16);
 
 	/* hdr_valid means no XDP, so we can copy the vnet header */
 	if (hdr_valid)
@@ -1528,6 +1531,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 	struct virtnet_info *vi = sq->vq->vdev->priv;
+	struct virtio_net_hdr_v1_hash *ht;
 	int num_sg;
 	unsigned hdr_len = vi->hdr_len;
 	bool can_push;
@@ -1552,6 +1556,16 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 	if (vi->mergeable_rx_bufs)
 		hdr->num_buffers = 0;
 
+	ht = (void *)hdr;
+	if (vi->has_tx_hash) {
+		u16 report = skb->l4_hash ? VIRTIO_NET_HASH_REPORT_L4 :
+					    VIRTIO_NET_HASH_REPORT_OTHER;
+
+		ht->hash_value = cpu_to_le32(skb->hash);
+		ht->hash_report = cpu_to_le16(report);
+		ht->hash_state = cpu_to_le16(VIRTIO_NET_HASH_STATE_DEFAULT);
+	}
+
 	sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
 	if (can_push) {
 		__skb_push(skb, hdr_len);
@@ -3050,6 +3064,11 @@ static int virtnet_probe(struct virtio_device *vdev)
 	else
 		vi->hdr_len = sizeof(struct virtio_net_hdr);
 
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_TX_HASH)) {
+		vi->has_tx_hash = true;
+		vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
+	}
+
 	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
 	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
 		vi->any_header_sg = true;
@@ -3240,7 +3259,8 @@ static struct virtio_device_id id_table[] = {
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
 	VIRTIO_NET_F_CTRL_MAC_ADDR, \
 	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
-	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY
+	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
+	VIRTIO_NET_F_TX_HASH
 
 static unsigned int features[] = {
 	VIRTNET_FEATURES,
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 3f55a4215f11..273d43c35f59 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -57,6 +57,7 @@
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
 
+#define VIRTIO_NET_F_TX_HASH	  56	/* Driver sends hash report */
 #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
 #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
 #define VIRTIO_NET_F_RSC_EXT	  61	/* extended coalescing info */
@@ -170,8 +171,14 @@ struct virtio_net_hdr_v1_hash {
 #define VIRTIO_NET_HASH_REPORT_IPv6_EX         7
 #define VIRTIO_NET_HASH_REPORT_TCPv6_EX        8
 #define VIRTIO_NET_HASH_REPORT_UDPv6_EX        9
+#define VIRTIO_NET_HASH_REPORT_L4              10
+#define VIRTIO_NET_HASH_REPORT_OTHER           11
 	__le16 hash_report;
-	__le16 padding;
+	union {
+		__le16 padding;
+#define VIRTIO_NET_HASH_STATE_DEFAULT          0
+		__le16 hash_state;
+	};
 };
 
 #ifndef VIRTIO_NET_NO_LEGACY
-- 
2.30.0.478.g8a0d178c01-goog


  reply	other threads:[~2021-02-08 18:58 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-08 18:55 [PATCH RFC v2 0/4] virtio-net: add tx-hash, rx-tstamp, tx-tstamp and tx-time Willem de Bruijn
2021-02-08 18:55 ` Willem de Bruijn [this message]
2021-02-08 18:55 ` [PATCH RFC v2 2/4] virtio-net: support receive timestamp Willem de Bruijn
2021-02-09  4:13   ` Jason Wang
2021-02-09 13:53     ` Willem de Bruijn
2021-02-10  4:10       ` Jason Wang
2021-02-08 18:55 ` [PATCH RFC v2 3/4] virtio-net: support transmit timestamp Willem de Bruijn
2021-02-09  5:45   ` Jason Wang
2021-02-09 16:38     ` Michael S. Tsirkin
2021-02-10  2:36       ` Willem de Bruijn
2021-02-10  4:15         ` Jason Wang
2021-02-10 14:40           ` Willem de Bruijn
2021-02-10  4:12       ` Jason Wang
2021-02-09  9:42   ` Michael S. Tsirkin
2021-02-09 14:45     ` Willem de Bruijn
2021-02-08 18:55 ` [PATCH RFC v2 4/4] virtio-net: support future packet transmit time Willem de Bruijn
2021-05-13 22:49 ` [PATCH RFC v2 0/4] virtio-net: add tx-hash, rx-tstamp, tx-tstamp and tx-time Willem de Bruijn
2021-05-14  7:12   ` Jason Wang
2021-05-14 12:46     ` Willem de Bruijn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210208185558.995292-2-willemdebruijn.kernel@gmail.com \
    --to=willemdebruijn.kernel@gmail.com \
    --cc=jasowang@redhat.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=richardcochran@gmail.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).