netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] virtio-net: put virtio net header inline with data
@ 2013-07-08 10:12 Michael S. Tsirkin
  2013-07-09  2:16 ` Rusty Russell
  0 siblings, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-07-08 10:12 UTC (permalink / raw)
  To: netdev
  Cc: Cong Wang, linux-kernel, virtualization, Dave Jones, David S. Miller

For small packets we can simplify xmit processing
by linearizing buffers with the header:
most packets seem to have enough head room
we can use for this purpose.
Since existing hypervisors require that header
is the first s/g element, we need a feature bit
for this.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

Note: this needs to be applied on top of patch
defining VIRTIO_F_ANY_LAYOUT - bit to be
selected by Rusty.

The following patch should work for any definition of
VIRTIO_F_ANY_LAYOUT - I used bit 31 for testing.
Rusty, could you please pick a valid bit for VIRTIO_F_ANY_LAYOUT
and squeeze this patch into 3.11?

 drivers/net/virtio_net.c        | 42 +++++++++++++++++++++++++++++++++--------
 include/uapi/linux/virtio_net.h |  4 +++-
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c9e0038..5305bd1 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -106,6 +106,9 @@ struct virtnet_info {
 	/* Has control virtqueue */
 	bool has_cvq;
 
+	/* Host can handle any s/g split between our header and packet data */
+	bool any_header_sg;
+
 	/* enable config space updates */
 	bool config_enable;
 
@@ -668,12 +671,28 @@ static void free_old_xmit_skbs(struct send_queue *sq)
 
 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 {
-	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
+	struct skb_vnet_hdr *hdr;
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 	struct virtnet_info *vi = sq->vq->vdev->priv;
 	unsigned num_sg;
+	unsigned hdr_len;
+	bool can_push;
 
 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
+	if (vi->mergeable_rx_bufs)
+		hdr_len = sizeof hdr->mhdr;
+	else
+		hdr_len = sizeof hdr->hdr;
+
+	can_push = vi->any_header_sg &&
+		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
+		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
+	/* Even if we can, don't push here yet as this would skew
+	 * csum_start offset below. */
+	if (can_push)
+		hdr = (struct skb_vnet_hdr *)(skb->data - hdr_len);
+	else
+		hdr = skb_vnet_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
@@ -702,15 +721,18 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
 	}
 
-	hdr->mhdr.num_buffers = 0;
-
-	/* Encode metadata header at front. */
 	if (vi->mergeable_rx_bufs)
-		sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
-	else
-		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
+		hdr->mhdr.num_buffers = 0;
 
-	num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	if (can_push) {
+		__skb_push(skb, hdr_len);
+		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
+		/* Pull header back to avoid skew in tx bytes calculations. */
+		__skb_pull(skb, hdr_len);
+	} else {
+		sg_set_buf(sq->sg, hdr, hdr_len);
+		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	}
 	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
 }
 
@@ -1554,6 +1576,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
+	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT))
+		vi->any_header_sg = true;
+
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
 		vi->has_cvq = true;
 
@@ -1729,6 +1754,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
 	VIRTIO_NET_F_CTRL_MAC_ADDR,
+	VIRTIO_F_ANY_LAYOUT,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index c520203..bd1993b 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -70,7 +70,9 @@ struct virtio_net_config {
 	__u16 max_virtqueue_pairs;
 } __attribute__((packed));
 
-/* This is the first element of the scatter-gather list.  If you don't
+/* This header comes first in the scatter-gather list.
+ * If VIRTIO_F_ANY_LAYOUT is not negotiated, it must
+ * be the first element of the scatter-gather list.  If you don't
  * specify GSO or CSUM features, you can simply ignore the header. */
 struct virtio_net_hdr {
 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	// Use csum_start, csum_offset
-- 
MST

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-08 10:12 [PATCH] virtio-net: put virtio net header inline with data Michael S. Tsirkin
@ 2013-07-09  2:16 ` Rusty Russell
  2013-07-09  5:18   ` Michael S. Tsirkin
  2013-07-11 13:00   ` Michael S. Tsirkin
  0 siblings, 2 replies; 22+ messages in thread
From: Rusty Russell @ 2013-07-09  2:16 UTC (permalink / raw)
  To: Michael S. Tsirkin, netdev
  Cc: Cong Wang, linux-kernel, virtualization, Dave Jones, David S. Miller

"Michael S. Tsirkin" <mst@redhat.com> writes:
> For small packets we can simplify xmit processing
> by linearizing buffers with the header:
> most packets seem to have enough head room
> we can use for this purpose.
> Since existing hypervisors require that header
> is the first s/g element, we need a feature bit
> for this.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>
> Note: this needs to be applied on top of patch
> defining VIRTIO_F_ANY_LAYOUT - bit to be
> selected by Rusty.
>
> The following patch should work for any definition of
> VIRTIO_F_ANY_LAYOUT - I used bit 31 for testing.
> Rusty, could you please pick a valid bit for VIRTIO_F_ANY_LAYOUT
> and squeeze this patch into 3.11?

Sorry, it's too late.

First, I've been a bit lax in sending patches via DaveM, and this is
definitely his territory (ie. more net than virtio).

Secondly, it needs baking and testing time.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-09  2:16 ` Rusty Russell
@ 2013-07-09  5:18   ` Michael S. Tsirkin
  2013-07-09  8:08     ` Rusty Russell
  2013-07-11 13:00   ` Michael S. Tsirkin
  1 sibling, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-07-09  5:18 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Cong Wang, netdev, linux-kernel, virtualization, Dave Jones,
	David S. Miller

On Tue, Jul 09, 2013 at 11:46:23AM +0930, Rusty Russell wrote:
> "Michael S. Tsirkin" <mst@redhat.com> writes:
> > For small packets we can simplify xmit processing
> > by linearizing buffers with the header:
> > most packets seem to have enough head room
> > we can use for this purpose.
> > Since existing hypervisors require that header
> > is the first s/g element, we need a feature bit
> > for this.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >
> > Note: this needs to be applied on top of patch
> > defining VIRTIO_F_ANY_LAYOUT - bit to be
> > selected by Rusty.
> >
> > The following patch should work for any definition of
> > VIRTIO_F_ANY_LAYOUT - I used bit 31 for testing.
> > Rusty, could you please pick a valid bit for VIRTIO_F_ANY_LAYOUT
> > and squeeze this patch into 3.11?
> 
> Sorry, it's too late.
> 
> First, I've been a bit lax in sending patches via DaveM, and this is
> definitely his territory (ie. more net than virtio).

Let's do this: I'll send a patch series, you ack and we see
what happens?

> Secondly, it needs baking and testing time.
> 
> Cheers,
> Rusty.

I did some testing on this.  But proper testing just isn't happening out
of tree.

-- 
MST

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-09  5:18   ` Michael S. Tsirkin
@ 2013-07-09  8:08     ` Rusty Russell
  2013-07-10  4:38       ` David Miller
  2013-07-10  6:14       ` Michael S. Tsirkin
  0 siblings, 2 replies; 22+ messages in thread
From: Rusty Russell @ 2013-07-09  8:08 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Cong Wang, netdev, linux-kernel, virtualization, Dave Jones,
	David S. Miller

"Michael S. Tsirkin" <mst@redhat.com> writes:
> On Tue, Jul 09, 2013 at 11:46:23AM +0930, Rusty Russell wrote:
>> "Michael S. Tsirkin" <mst@redhat.com> writes:
>> > For small packets we can simplify xmit processing
>> > by linearizing buffers with the header:
>> > most packets seem to have enough head room
>> > we can use for this purpose.
>> > Since existing hypervisors require that header
>> > is the first s/g element, we need a feature bit
>> > for this.
>> >
>> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> > ---
>> >
>> > Note: this needs to be applied on top of patch
>> > defining VIRTIO_F_ANY_LAYOUT - bit to be
>> > selected by Rusty.
>> >
>> > The following patch should work for any definition of
>> > VIRTIO_F_ANY_LAYOUT - I used bit 31 for testing.
>> > Rusty, could you please pick a valid bit for VIRTIO_F_ANY_LAYOUT
>> > and squeeze this patch into 3.11?
>> 
>> Sorry, it's too late.
>> 
>> First, I've been a bit lax in sending patches via DaveM, and this is
>> definitely his territory (ie. more net than virtio).
>
> Let's do this: I'll send a patch series, you ack and we see
> what happens?

If you convince DaveM, I won't object :)

>> Secondly, it needs baking and testing time.
>> 
>> Cheers,
>> Rusty.
>
> I did some testing on this.  But proper testing just isn't happening out
> of tree.

But it will get into linux-next for the *next* merge window.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-09  8:08     ` Rusty Russell
@ 2013-07-10  4:38       ` David Miller
  2013-07-15  1:43         ` Rusty Russell
  2013-07-10  6:14       ` Michael S. Tsirkin
  1 sibling, 1 reply; 22+ messages in thread
From: David Miller @ 2013-07-10  4:38 UTC (permalink / raw)
  To: rusty; +Cc: amwang, mst, netdev, linux-kernel, virtualization, davej

From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 09 Jul 2013 17:38:51 +0930

> If you convince DaveM, I won't object :)

Simplifications are great, but not when the merge window opens up.

Sorry, this isn't appropriate now.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-09  8:08     ` Rusty Russell
  2013-07-10  4:38       ` David Miller
@ 2013-07-10  6:14       ` Michael S. Tsirkin
  2013-07-15  1:40         ` Rusty Russell
  1 sibling, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-07-10  6:14 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Cong Wang, netdev, linux-kernel, virtualization, Dave Jones,
	David S. Miller

On Tue, Jul 09, 2013 at 05:38:51PM +0930, Rusty Russell wrote:
> "Michael S. Tsirkin" <mst@redhat.com> writes:
> > On Tue, Jul 09, 2013 at 11:46:23AM +0930, Rusty Russell wrote:
> >> "Michael S. Tsirkin" <mst@redhat.com> writes:
> >> > For small packets we can simplify xmit processing
> >> > by linearizing buffers with the header:
> >> > most packets seem to have enough head room
> >> > we can use for this purpose.
> >> > Since existing hypervisors require that header
> >> > is the first s/g element, we need a feature bit
> >> > for this.
> >> >
> >> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >> > ---
> >> >
> >> > Note: this needs to be applied on top of patch
> >> > defining VIRTIO_F_ANY_LAYOUT - bit to be
> >> > selected by Rusty.
> >> >
> >> > The following patch should work for any definition of
> >> > VIRTIO_F_ANY_LAYOUT - I used bit 31 for testing.
> >> > Rusty, could you please pick a valid bit for VIRTIO_F_ANY_LAYOUT
> >> > and squeeze this patch into 3.11?
> >> 
> >> Sorry, it's too late.
> >> 
> >> First, I've been a bit lax in sending patches via DaveM, and this is
> >> definitely his territory (ie. more net than virtio).
> >
> > Let's do this: I'll send a patch series, you ack and we see
> > what happens?
> 
> If you convince DaveM, I won't object :)
> 
> >> Secondly, it needs baking and testing time.
> >> 
> >> Cheers,
> >> Rusty.
> >
> > I did some testing on this.  But proper testing just isn't happening out
> > of tree.
> 
> But it will get into linux-next for the *next* merge window.
> 
> Cheers,
> Rusty.

Okay. Can you put it on virtio-next then?
I don't see it there ...

-- 
MST

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-09  2:16 ` Rusty Russell
  2013-07-09  5:18   ` Michael S. Tsirkin
@ 2013-07-11 13:00   ` Michael S. Tsirkin
  2013-07-12  5:57     ` Rusty Russell
  1 sibling, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-07-11 13:00 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Cong Wang, qemu-devel, netdev, linux-kernel, virtualization,
	Dave Jones, David S. Miller

On Tue, Jul 09, 2013 at 11:46:23AM +0930, Rusty Russell wrote:
> "Michael S. Tsirkin" <mst@redhat.com> writes:
> > For small packets we can simplify xmit processing
> > by linearizing buffers with the header:
> > most packets seem to have enough head room
> > we can use for this purpose.
> > Since existing hypervisors require that header
> > is the first s/g element, we need a feature bit
> > for this.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> >
> > Note: this needs to be applied on top of patch
> > defining VIRTIO_F_ANY_LAYOUT - bit to be
> > selected by Rusty.
> >
> > The following patch should work for any definition of
> > VIRTIO_F_ANY_LAYOUT - I used bit 31 for testing.
> > Rusty, could you please pick a valid bit for VIRTIO_F_ANY_LAYOUT
> > and squeeze this patch into 3.11?
> 
> Sorry, it's too late.
> 
> First, I've been a bit lax in sending patches via DaveM, and this is
> definitely his territory (ie. more net than virtio).
> 
> Secondly, it needs baking and testing time.
> 
> Cheers,
> Rusty.

Okay. But we can already commit the qemu change, right?
Also, can you submit the spec update please?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-11 13:00   ` Michael S. Tsirkin
@ 2013-07-12  5:57     ` Rusty Russell
  0 siblings, 0 replies; 22+ messages in thread
From: Rusty Russell @ 2013-07-12  5:57 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Cong Wang, qemu-devel, netdev, linux-kernel, virtualization,
	Dave Jones, David S. Miller

"Michael S. Tsirkin" <mst@redhat.com> writes:
> On Tue, Jul 09, 2013 at 11:46:23AM +0930, Rusty Russell wrote:
>> "Michael S. Tsirkin" <mst@redhat.com> writes:
>> > For small packets we can simplify xmit processing
>> > by linearizing buffers with the header:
>> > most packets seem to have enough head room
>> > we can use for this purpose.
>> > Since existing hypervisors require that header
>> > is the first s/g element, we need a feature bit
>> > for this.
>> >
>> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> > ---
>> >
>> > Note: this needs to be applied on top of patch
>> > defining VIRTIO_F_ANY_LAYOUT - bit to be
>> > selected by Rusty.
>> >
>> > The following patch should work for any definition of
>> > VIRTIO_F_ANY_LAYOUT - I used bit 31 for testing.
>> > Rusty, could you please pick a valid bit for VIRTIO_F_ANY_LAYOUT
>> > and squeeze this patch into 3.11?
>> 
>> Sorry, it's too late.
>> 
>> First, I've been a bit lax in sending patches via DaveM, and this is
>> definitely his territory (ie. more net than virtio).
>> 
>> Secondly, it needs baking and testing time.
>> 
>> Cheers,
>> Rusty.
>
> Okay. But we can already commit the qemu change, right?
> Also, can you submit the spec update please?

The spec has been updated, BTW.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-10  6:14       ` Michael S. Tsirkin
@ 2013-07-15  1:40         ` Rusty Russell
  0 siblings, 0 replies; 22+ messages in thread
From: Rusty Russell @ 2013-07-15  1:40 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Cong Wang, netdev, linux-kernel, virtualization, Dave Jones,
	David S. Miller

"Michael S. Tsirkin" <mst@redhat.com> writes:
> On Tue, Jul 09, 2013 at 05:38:51PM +0930, Rusty Russell wrote:
>> "Michael S. Tsirkin" <mst@redhat.com> writes:
>> > On Tue, Jul 09, 2013 at 11:46:23AM +0930, Rusty Russell wrote:
>> >> "Michael S. Tsirkin" <mst@redhat.com> writes:
>> >> > For small packets we can simplify xmit processing
>> >> > by linearizing buffers with the header:
>> >> > most packets seem to have enough head room
>> >> > we can use for this purpose.
>> >> > Since existing hypervisors require that header
>> >> > is the first s/g element, we need a feature bit
>> >> > for this.
>> >> >
>> >> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> >> > ---
>> >> >
>> >> > Note: this needs to be applied on top of patch
>> >> > defining VIRTIO_F_ANY_LAYOUT - bit to be
>> >> > selected by Rusty.
>> >> >
>> >> > The following patch should work for any definition of
>> >> > VIRTIO_F_ANY_LAYOUT - I used bit 31 for testing.
>> >> > Rusty, could you please pick a valid bit for VIRTIO_F_ANY_LAYOUT
>> >> > and squeeze this patch into 3.11?
>> >> 
>> >> Sorry, it's too late.
>> >> 
>> >> First, I've been a bit lax in sending patches via DaveM, and this is
>> >> definitely his territory (ie. more net than virtio).
>> >
>> > Let's do this: I'll send a patch series, you ack and we see
>> > what happens?
>> 
>> If you convince DaveM, I won't object :)
>> 
>> >> Secondly, it needs baking and testing time.
>> >> 
>> >> Cheers,
>> >> Rusty.
>> >
>> > I did some testing on this.  But proper testing just isn't happening out
>> > of tree.
>> 
>> But it will get into linux-next for the *next* merge window.
>> 
>> Cheers,
>> Rusty.
>
> Okay. Can you put it on virtio-next then?
> I don't see it there ...

Here's the general order:
- 1 week before Linus merge window: I stop putting new stuff into xxx-next.
- Linus merge window open: I push stuff to Linus, new stuff stays in -pending.
- Linus merge window close: I ff my -next trees to -rc1, merge stuff
  from -pending.

In this case, your patch will go from pending-rebases to DaveM, not
virtio-next.

Which I will do now...

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] virtio-net: put virtio net header inline with data
  2013-07-10  4:38       ` David Miller
@ 2013-07-15  1:43         ` Rusty Russell
  2013-07-16 19:33           ` David Miller
  0 siblings, 1 reply; 22+ messages in thread
From: Rusty Russell @ 2013-07-15  1:43 UTC (permalink / raw)
  To: David Miller; +Cc: amwang, mst, netdev, linux-kernel, virtualization, davej

From: Michael S. Tsirkin <mst@redhat.com>

For small packets we can simplify xmit processing
by linearizing buffers with the header:
most packets seem to have enough head room
we can use for this purpose.
Since existing hypervisors require that header
is the first s/g element, we need a feature bit
for this.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c        | 42 +++++++++++++++++++++++++++++++++--------
 include/uapi/linux/virtio_net.h |  4 +++-
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3d2a90a..f216002 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -106,6 +106,9 @@ struct virtnet_info {
 	/* Has control virtqueue */
 	bool has_cvq;
 
+	/* Host can handle any s/g split between our header and packet data */
+	bool any_header_sg;
+
 	/* enable config space updates */
 	bool config_enable;
 
@@ -669,12 +672,28 @@ static void free_old_xmit_skbs(struct send_queue *sq)
 
 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 {
-	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
+	struct skb_vnet_hdr *hdr;
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 	struct virtnet_info *vi = sq->vq->vdev->priv;
 	unsigned num_sg;
+	unsigned hdr_len;
+	bool can_push;
 
 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
+	if (vi->mergeable_rx_bufs)
+		hdr_len = sizeof hdr->mhdr;
+	else
+		hdr_len = sizeof hdr->hdr;
+
+	can_push = vi->any_header_sg &&
+		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
+		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
+	/* Even if we can, don't push here yet as this would skew
+	 * csum_start offset below. */
+	if (can_push)
+		hdr = (struct skb_vnet_hdr *)(skb->data - hdr_len);
+	else
+		hdr = skb_vnet_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
@@ -703,15 +722,18 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
 	}
 
-	hdr->mhdr.num_buffers = 0;
-
-	/* Encode metadata header at front. */
 	if (vi->mergeable_rx_bufs)
-		sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
-	else
-		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
+		hdr->mhdr.num_buffers = 0;
 
-	num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	if (can_push) {
+		__skb_push(skb, hdr_len);
+		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
+		/* Pull header back to avoid skew in tx bytes calculations. */
+		__skb_pull(skb, hdr_len);
+	} else {
+		sg_set_buf(sq->sg, hdr, hdr_len);
+		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	}
 	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
 }
 
@@ -1552,6 +1574,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
+	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT))
+		vi->any_header_sg = true;
+
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
 		vi->has_cvq = true;
 
@@ -1727,6 +1752,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
 	VIRTIO_NET_F_CTRL_MAC_ADDR,
+	VIRTIO_F_ANY_LAYOUT,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index c520203..227d4ce 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -70,7 +70,9 @@ struct virtio_net_config {
 	__u16 max_virtqueue_pairs;
 } __attribute__((packed));
 
-/* This is the first element of the scatter-gather list.  If you don't
+/* This header comes first in the scatter-gather list.
+ * If VIRTIO_F_ANY_LAYOUT is not negotiated, it must
+ * be the first element of the scatter-gather list.  If you don't
  * specify GSO or CSUM features, you can simply ignore the header. */
 struct virtio_net_hdr {
 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	// Use csum_start, csum_offset

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-15  1:43         ` Rusty Russell
@ 2013-07-16 19:33           ` David Miller
  2013-07-17  0:08             ` Rusty Russell
  2013-07-17  5:00             ` Michael S. Tsirkin
  0 siblings, 2 replies; 22+ messages in thread
From: David Miller @ 2013-07-16 19:33 UTC (permalink / raw)
  To: rusty; +Cc: amwang, mst, netdev, linux-kernel, virtualization, davej

From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 15 Jul 2013 11:13:25 +0930

> From: Michael S. Tsirkin <mst@redhat.com>
> 
> For small packets we can simplify xmit processing
> by linearizing buffers with the header:
> most packets seem to have enough head room
> we can use for this purpose.
> Since existing hypervisors require that header
> is the first s/g element, we need a feature bit
> for this.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

I really think this has to wait until the next merge window, sorry.

Please resubmit this when I open net-next back up, thanks.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-16 19:33           ` David Miller
@ 2013-07-17  0:08             ` Rusty Russell
  2013-07-17  5:00             ` Michael S. Tsirkin
  1 sibling, 0 replies; 22+ messages in thread
From: Rusty Russell @ 2013-07-17  0:08 UTC (permalink / raw)
  To: David Miller; +Cc: amwang, mst, netdev, linux-kernel, virtualization, davej

David Miller <davem@davemloft.net> writes:
> From: Rusty Russell <rusty@rustcorp.com.au>
> Date: Mon, 15 Jul 2013 11:13:25 +0930
>
>> From: Michael S. Tsirkin <mst@redhat.com>
>> 
>> For small packets we can simplify xmit processing
>> by linearizing buffers with the header:
>> most packets seem to have enough head room
>> we can use for this purpose.
>> Since existing hypervisors require that header
>> is the first s/g element, we need a feature bit
>> for this.
>> 
>> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>
> I really think this has to wait until the next merge window, sorry.
>
> Please resubmit this when I open net-next back up, thanks.

Oh, assumed it was already open.    Will re-submit then.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-16 19:33           ` David Miller
  2013-07-17  0:08             ` Rusty Russell
@ 2013-07-17  5:00             ` Michael S. Tsirkin
  2013-07-17  5:05               ` David Miller
  1 sibling, 1 reply; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-07-17  5:00 UTC (permalink / raw)
  To: David Miller; +Cc: amwang, netdev, linux-kernel, virtualization, davej

On Tue, Jul 16, 2013 at 12:33:26PM -0700, David Miller wrote:
> From: Rusty Russell <rusty@rustcorp.com.au>
> Date: Mon, 15 Jul 2013 11:13:25 +0930
> 
> > From: Michael S. Tsirkin <mst@redhat.com>
> > 
> > For small packets we can simplify xmit processing
> > by linearizing buffers with the header:
> > most packets seem to have enough head room
> > we can use for this purpose.
> > Since existing hypervisors require that header
> > is the first s/g element, we need a feature bit
> > for this.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> 
> I really think this has to wait until the next merge window, sorry.
> 
> Please resubmit this when I open net-next back up, thanks.

I assumed since -rc1 is out net-next is already open?

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-17  5:00             ` Michael S. Tsirkin
@ 2013-07-17  5:05               ` David Miller
  2013-07-17  6:02                 ` Rusty Russell
  0 siblings, 1 reply; 22+ messages in thread
From: David Miller @ 2013-07-17  5:05 UTC (permalink / raw)
  To: mst; +Cc: amwang, netdev, linux-kernel, virtualization, davej

From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 17 Jul 2013 08:00:32 +0300

> On Tue, Jul 16, 2013 at 12:33:26PM -0700, David Miller wrote:
>> From: Rusty Russell <rusty@rustcorp.com.au>
>> Date: Mon, 15 Jul 2013 11:13:25 +0930
>> 
>> > From: Michael S. Tsirkin <mst@redhat.com>
>> > 
>> > For small packets we can simplify xmit processing
>> > by linearizing buffers with the header:
>> > most packets seem to have enough head room
>> > we can use for this purpose.
>> > Since existing hypervisors require that header
>> > is the first s/g element, we need a feature bit
>> > for this.
>> > 
>> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>> 
>> I really think this has to wait until the next merge window, sorry.
>> 
>> Please resubmit this when I open net-next back up, thanks.
> 
> I assumed since -rc1 is out net-next is already open?

-rc1 being released never makes net-next open.  Instead, I explicitly
open it up at some point in time after -rc1 when I feel that things
have settled down enough.

And when that happens, I announce so here.

So you have to follow my announcements here on netdev to know
when net-next is actually open.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-17  5:05               ` David Miller
@ 2013-07-17  6:02                 ` Rusty Russell
  2013-07-24 19:44                   ` Michael S. Tsirkin
  0 siblings, 1 reply; 22+ messages in thread
From: Rusty Russell @ 2013-07-17  6:02 UTC (permalink / raw)
  To: David Miller, mst; +Cc: amwang, netdev, linux-kernel, virtualization, davej

David Miller <davem@davemloft.net> writes:
> From: "Michael S. Tsirkin" <mst@redhat.com>
> Date: Wed, 17 Jul 2013 08:00:32 +0300
>
>> On Tue, Jul 16, 2013 at 12:33:26PM -0700, David Miller wrote:
>>> From: Rusty Russell <rusty@rustcorp.com.au>
>>> Date: Mon, 15 Jul 2013 11:13:25 +0930
>>> 
>>> > From: Michael S. Tsirkin <mst@redhat.com>
>>> > 
>>> > For small packets we can simplify xmit processing
>>> > by linearizing buffers with the header:
>>> > most packets seem to have enough head room
>>> > we can use for this purpose.
>>> > Since existing hypervisors require that header
>>> > is the first s/g element, we need a feature bit
>>> > for this.
>>> > 
>>> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>>> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>>> 
>>> I really think this has to wait until the next merge window, sorry.
>>> 
>>> Please resubmit this when I open net-next back up, thanks.
>> 
>> I assumed since -rc1 is out net-next is already open?
>
> -rc1 being released never makes net-next open.  Instead, I explicitly
> open it up at some point in time after -rc1 when I feel that things
> have settled down enough.
>
> And when that happens, I announce so here.
>
> So you have to follow my announcements here on netdev to know
> when net-next is actually open.

Thanks for letting me know.  I'm sure that works well for others, but I
can't follow the mailing lists of every maintainer I deal with.

Fortunately, you're the paragon for acking applied patches, so if I hit
this failure mode again I will know.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-07-17  6:02                 ` Rusty Russell
@ 2013-07-24 19:44                   ` Michael S. Tsirkin
  0 siblings, 0 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-07-24 19:44 UTC (permalink / raw)
  To: Rusty Russell
  Cc: amwang, netdev, linux-kernel, virtualization, davej, David Miller

On Wed, Jul 17, 2013 at 03:32:36PM +0930, Rusty Russell wrote:
> David Miller <davem@davemloft.net> writes:
> > From: "Michael S. Tsirkin" <mst@redhat.com>
> > Date: Wed, 17 Jul 2013 08:00:32 +0300
> >
> >> On Tue, Jul 16, 2013 at 12:33:26PM -0700, David Miller wrote:
> >>> From: Rusty Russell <rusty@rustcorp.com.au>
> >>> Date: Mon, 15 Jul 2013 11:13:25 +0930
> >>> 
> >>> > From: Michael S. Tsirkin <mst@redhat.com>
> >>> > 
> >>> > For small packets we can simplify xmit processing
> >>> > by linearizing buffers with the header:
> >>> > most packets seem to have enough head room
> >>> > we can use for this purpose.
> >>> > Since existing hypervisors require that header
> >>> > is the first s/g element, we need a feature bit
> >>> > for this.
> >>> > 
> >>> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >>> > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> >>> 
> >>> I really think this has to wait until the next merge window, sorry.
> >>> 
> >>> Please resubmit this when I open net-next back up, thanks.
> >> 
> >> I assumed since -rc1 is out net-next is already open?
> >
> > -rc1 being released never makes net-next open.  Instead, I explicitly
> > open it up at some point in time after -rc1 when I feel that things
> > have settled down enough.
> >
> > And when that happens, I announce so here.
> >
> > So you have to follow my announcements here on netdev to know
> > when net-next is actually open.
> 
> Thanks for letting me know.  I'm sure that works well for others, but I
> can't follow the mailing lists of every maintainer I deal with.
> 
> Fortunately, you're the paragon for acking applied patches, so if I hit
> this failure mode again I will know.
> 
> Cheers,
> Rusty.

In case you missed this, net-next opened Fri, 19 Jul 2013.

-- 
MST

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-06-07  2:12 ` Rusty Russell
@ 2013-06-09  7:11   ` Michael S. Tsirkin
  0 siblings, 0 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-06-09  7:11 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Cong Wang, qemu-devel, netdev, linux-kernel, virtualization,
	Dave Jones, David S. Miller

On Fri, Jun 07, 2013 at 11:42:43AM +0930, Rusty Russell wrote:
> "Michael S. Tsirkin" <mst@redhat.com> writes:
> > For small packets we can simplify xmit processing by linearizing buffers
> > with the header: most packets seem to have enough head room we can use
> > for this purpose.
> >
> > Since some older hypervisors (e.g. qemu before version 1.5)
> > required that header is the first s/g element,
> > we need a feature bit for this.
> 
> OK, we know this is horrible.  But I will sleep better knowing that we
> this feature need never make it into a final 1.0 spec, since it can be
> assumed at that point...

Nod. Though if we want to require this for all devices,
virtio-blk scsi command passthrough will need to change -
I sent a spec patch a while ago
	virtio-spec: add field for scsi command size
any comments on it?

> >  	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
> > +	if (vi->mergeable_rx_bufs)
> > +		hdr_len = sizeof hdr->mhdr;
> > +	else
> > +		hdr_len = sizeof hdr->hdr;
> > +
> > +	can_push = vi->any_header_sg &&
> > +		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
> > +		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
> 
> Idle thought: how often does this fail?

I think it's mostly doesn't fail in my testing.

It's probably a good idea to add a counter here, then
if it starts triggering we can optimize.

I think things like skb_header_cloned depend on guest config
really, e.g. tcpdump running on the interface in guest can cause this.

>  Would it suck if we copied
> headers which didn't let us prepend data?

I think it will - copies are generally best avoided,
and header is easily 1K of data.

>  Or could we bump
> dev->hard_header_len appropriately?

Needs some thought, though from experience it's a pain.


> Thanks,
> Rusty.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-06-07  2:52 ` Jason Wang
@ 2013-06-09  6:52   ` Michael S. Tsirkin
  0 siblings, 0 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-06-09  6:52 UTC (permalink / raw)
  To: Jason Wang
  Cc: Cong Wang, qemu-devel, netdev, linux-kernel, virtualization,
	Dave Jones, David S. Miller

On Fri, Jun 07, 2013 at 10:52:01AM +0800, Jason Wang wrote:
> On 06/06/2013 05:55 PM, Michael S. Tsirkin wrote:
> > For small packets we can simplify xmit processing by linearizing buffers
> > with the header: most packets seem to have enough head room we can use
> > for this purpose.
> >
> > Since some older hypervisors (e.g. qemu before version 1.5)
> > required that header is the first s/g element,
> > we need a feature bit for this.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> 
> Hi Michael:
> 
> The idea looks good, but there are some questions:
> 
> - What if we just use direct descriptors with sgs <=2 and double the
> ring size?

It's not easy to increase the ring size, we have trouble allocating
large rings on some systems as it is.
So far, any attempts to use direct for sg > 1 seem to hurt some
workloads.

> - I believe we may add more things into vnet header in the future, so
> this trick may not work because of limited header room.
> 
> Thanks

Didn't happen in a long while.
We can come up with more tricks when we run against this limit.

> > This is a repost of my old patch, rebased to latest kernel.
> >
> > Before:
> > Thu Jun  6 05:24:59 EDT 2013
> > TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 11.0.0.4 (11.0.0.4) port 0 AF_INET : demo
> > Local /Remote
> > Socket Size   Request  Resp.   Elapsed  Trans.
> > Send   Recv   Size     Size    Time     Rate         
> > bytes  Bytes  bytes    bytes   secs.    per sec   
> >
> > 16384  87380  1        1       10.00    12931.13   
> >
> >
> > After:
> > TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> > 11.0.0.4 (11.0.0.4) port 0 AF_INET : demo
> > Local /Remote
> > Socket Size   Request  Resp.   Elapsed  Trans.
> > Send   Recv   Size     Size    Time     Rate         
> > bytes  Bytes  bytes    bytes   secs.    per sec   
> >
> > 16384  87380  1        1       10.00    14151.12
> >
> >
> >  drivers/net/virtio_net.c        | 42 +++++++++++++++++++++++++++++++++--------
> >  include/uapi/linux/virtio_net.h |  6 +++++-
> >  2 files changed, 39 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index c9e0038..d35a097 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -106,6 +106,9 @@ struct virtnet_info {
> >  	/* Has control virtqueue */
> >  	bool has_cvq;
> >  
> > +	/* Host can handle any s/g split between our header and packet data */
> > +	bool any_header_sg;
> > +
> >  	/* enable config space updates */
> >  	bool config_enable;
> >  
> > @@ -668,12 +671,28 @@ static void free_old_xmit_skbs(struct send_queue *sq)
> >  
> >  static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
> >  {
> > -	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
> > +	struct skb_vnet_hdr *hdr;
> >  	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
> >  	struct virtnet_info *vi = sq->vq->vdev->priv;
> >  	unsigned num_sg;
> > +	unsigned hdr_len;
> > +	bool can_push;
> >  
> >  	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
> > +	if (vi->mergeable_rx_bufs)
> > +		hdr_len = sizeof hdr->mhdr;
> > +	else
> > +		hdr_len = sizeof hdr->hdr;
> > +
> > +	can_push = vi->any_header_sg &&
> > +		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
> > +		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
> > +	/* Even if we can, don't push here yet as this would skew
> > +	 * csum_start offset below. */
> > +	if (can_push)
> > +		hdr = (struct skb_vnet_hdr *)(skb->data - hdr_len);
> > +	else
> > +		hdr = skb_vnet_hdr(skb);
> >  
> >  	if (skb->ip_summed == CHECKSUM_PARTIAL) {
> >  		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
> > @@ -702,15 +721,18 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
> >  		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
> >  	}
> >  
> > -	hdr->mhdr.num_buffers = 0;
> > -
> > -	/* Encode metadata header at front. */
> >  	if (vi->mergeable_rx_bufs)
> > -		sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
> > -	else
> > -		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
> > +		hdr->mhdr.num_buffers = 0;
> >  
> > -	num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
> > +	if (can_push) {
> > +		__skb_push(skb, hdr_len);
> > +		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
> > +		/* Pull header back to avoid skew in tx bytes calculations. */
> > +		__skb_pull(skb, hdr_len);
> > +	} else {
> > +		sg_set_buf(sq->sg, hdr, hdr_len);
> > +		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
> > +	}
> >  	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
> >  }
> >  
> > @@ -1554,6 +1576,9 @@ static int virtnet_probe(struct virtio_device *vdev)
> >  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> >  		vi->mergeable_rx_bufs = true;
> >  
> > +	if (virtio_has_feature(vdev, VIRTIO_NET_F_ANY_HEADER_SG))
> > +		vi->any_header_sg = true;
> > +
> >  	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
> >  		vi->has_cvq = true;
> >  
> > @@ -1729,6 +1754,7 @@ static unsigned int features[] = {
> >  	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
> >  	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
> >  	VIRTIO_NET_F_CTRL_MAC_ADDR,
> > +	VIRTIO_NET_F_ANY_HEADER_SG,
> >  };
> >  
> >  static struct virtio_driver virtio_net_driver = {
> > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> > index c520203..9c98b7d 100644
> > --- a/include/uapi/linux/virtio_net.h
> > +++ b/include/uapi/linux/virtio_net.h
> > @@ -55,6 +55,8 @@
> >  					 * Steering */
> >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
> >  
> > +#define VIRTIO_NET_F_ANY_HEADER_SG 25	/* Host can handle any header s/g */
> > +
> >  #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
> >  #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
> >  
> > @@ -70,7 +72,9 @@ struct virtio_net_config {
> >  	__u16 max_virtqueue_pairs;
> >  } __attribute__((packed));
> >  
> > -/* This is the first element of the scatter-gather list.  If you don't
> > +/* This header comes first in the scatter-gather list.
> > + * If VIRTIO_NET_F_ANY_HEADER_SG is not negotiated, it must
> > + * be the first element of the scatter-gather list.  If you don't
> >   * specify GSO or CSUM features, you can simply ignore the header. */
> >  struct virtio_net_hdr {
> >  #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	// Use csum_start, csum_offset

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-06-06  9:55 Michael S. Tsirkin
  2013-06-06 19:59 ` [Qemu-devel] " Jesse Larrew
  2013-06-07  2:12 ` Rusty Russell
@ 2013-06-07  2:52 ` Jason Wang
  2013-06-09  6:52   ` Michael S. Tsirkin
  2 siblings, 1 reply; 22+ messages in thread
From: Jason Wang @ 2013-06-07  2:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Cong Wang, qemu-devel, netdev, linux-kernel, virtualization,
	Dave Jones, David S. Miller

On 06/06/2013 05:55 PM, Michael S. Tsirkin wrote:
> For small packets we can simplify xmit processing by linearizing buffers
> with the header: most packets seem to have enough head room we can use
> for this purpose.
>
> Since some older hypervisors (e.g. qemu before version 1.5)
> required that header is the first s/g element,
> we need a feature bit for this.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---

Hi Michael:

The idea looks good, but there are some questions:

- What if we just use direct descriptors with sgs <=2 and double the
ring size?
- I believe we may add more things into vnet header in the future, so
this trick may not work because of limited header room.

Thanks
> This is a repost of my old patch, rebased to latest kernel.
>
> Before:
> Thu Jun  6 05:24:59 EDT 2013
> TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 11.0.0.4 (11.0.0.4) port 0 AF_INET : demo
> Local /Remote
> Socket Size   Request  Resp.   Elapsed  Trans.
> Send   Recv   Size     Size    Time     Rate         
> bytes  Bytes  bytes    bytes   secs.    per sec   
>
> 16384  87380  1        1       10.00    12931.13   
>
>
> After:
> TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
> 11.0.0.4 (11.0.0.4) port 0 AF_INET : demo
> Local /Remote
> Socket Size   Request  Resp.   Elapsed  Trans.
> Send   Recv   Size     Size    Time     Rate         
> bytes  Bytes  bytes    bytes   secs.    per sec   
>
> 16384  87380  1        1       10.00    14151.12
>
>
>  drivers/net/virtio_net.c        | 42 +++++++++++++++++++++++++++++++++--------
>  include/uapi/linux/virtio_net.h |  6 +++++-
>  2 files changed, 39 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index c9e0038..d35a097 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -106,6 +106,9 @@ struct virtnet_info {
>  	/* Has control virtqueue */
>  	bool has_cvq;
>  
> +	/* Host can handle any s/g split between our header and packet data */
> +	bool any_header_sg;
> +
>  	/* enable config space updates */
>  	bool config_enable;
>  
> @@ -668,12 +671,28 @@ static void free_old_xmit_skbs(struct send_queue *sq)
>  
>  static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
>  {
> -	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
> +	struct skb_vnet_hdr *hdr;
>  	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
>  	struct virtnet_info *vi = sq->vq->vdev->priv;
>  	unsigned num_sg;
> +	unsigned hdr_len;
> +	bool can_push;
>  
>  	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
> +	if (vi->mergeable_rx_bufs)
> +		hdr_len = sizeof hdr->mhdr;
> +	else
> +		hdr_len = sizeof hdr->hdr;
> +
> +	can_push = vi->any_header_sg &&
> +		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
> +		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
> +	/* Even if we can, don't push here yet as this would skew
> +	 * csum_start offset below. */
> +	if (can_push)
> +		hdr = (struct skb_vnet_hdr *)(skb->data - hdr_len);
> +	else
> +		hdr = skb_vnet_hdr(skb);
>  
>  	if (skb->ip_summed == CHECKSUM_PARTIAL) {
>  		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
> @@ -702,15 +721,18 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
>  		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
>  	}
>  
> -	hdr->mhdr.num_buffers = 0;
> -
> -	/* Encode metadata header at front. */
>  	if (vi->mergeable_rx_bufs)
> -		sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
> -	else
> -		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
> +		hdr->mhdr.num_buffers = 0;
>  
> -	num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
> +	if (can_push) {
> +		__skb_push(skb, hdr_len);
> +		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
> +		/* Pull header back to avoid skew in tx bytes calculations. */
> +		__skb_pull(skb, hdr_len);
> +	} else {
> +		sg_set_buf(sq->sg, hdr, hdr_len);
> +		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
> +	}
>  	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
>  }
>  
> @@ -1554,6 +1576,9 @@ static int virtnet_probe(struct virtio_device *vdev)
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
>  		vi->mergeable_rx_bufs = true;
>  
> +	if (virtio_has_feature(vdev, VIRTIO_NET_F_ANY_HEADER_SG))
> +		vi->any_header_sg = true;
> +
>  	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
>  		vi->has_cvq = true;
>  
> @@ -1729,6 +1754,7 @@ static unsigned int features[] = {
>  	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
>  	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
>  	VIRTIO_NET_F_CTRL_MAC_ADDR,
> +	VIRTIO_NET_F_ANY_HEADER_SG,
>  };
>  
>  static struct virtio_driver virtio_net_driver = {
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index c520203..9c98b7d 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -55,6 +55,8 @@
>  					 * Steering */
>  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
>  
> +#define VIRTIO_NET_F_ANY_HEADER_SG 25	/* Host can handle any header s/g */
> +
>  #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
>  #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
>  
> @@ -70,7 +72,9 @@ struct virtio_net_config {
>  	__u16 max_virtqueue_pairs;
>  } __attribute__((packed));
>  
> -/* This is the first element of the scatter-gather list.  If you don't
> +/* This header comes first in the scatter-gather list.
> + * If VIRTIO_NET_F_ANY_HEADER_SG is not negotiated, it must
> + * be the first element of the scatter-gather list.  If you don't
>   * specify GSO or CSUM features, you can simply ignore the header. */
>  struct virtio_net_hdr {
>  #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	// Use csum_start, csum_offset

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-06-06  9:55 Michael S. Tsirkin
  2013-06-06 19:59 ` [Qemu-devel] " Jesse Larrew
@ 2013-06-07  2:12 ` Rusty Russell
  2013-06-09  7:11   ` Michael S. Tsirkin
  2013-06-07  2:52 ` Jason Wang
  2 siblings, 1 reply; 22+ messages in thread
From: Rusty Russell @ 2013-06-07  2:12 UTC (permalink / raw)
  To: Michael S. Tsirkin, David S. Miller, Jason Wang, Cong Wang,
	Amos Kong, Dave Jones, virtualization, netdev, linux-kernel
  Cc: qemu-devel

"Michael S. Tsirkin" <mst@redhat.com> writes:
> For small packets we can simplify xmit processing by linearizing buffers
> with the header: most packets seem to have enough head room we can use
> for this purpose.
>
> Since some older hypervisors (e.g. qemu before version 1.5)
> required that header is the first s/g element,
> we need a feature bit for this.

OK, we know this is horrible.  But I will sleep better knowing that we
this feature need never make it into a final 1.0 spec, since it can be
assumed at that point...

>  	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
> +	if (vi->mergeable_rx_bufs)
> +		hdr_len = sizeof hdr->mhdr;
> +	else
> +		hdr_len = sizeof hdr->hdr;
> +
> +	can_push = vi->any_header_sg &&
> +		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
> +		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;

Idle thought: how often does this fail?  Would it suck if we copied
headers which didn't let us prepend data?  Or could we bump
dev->hard_header_len appropriately?

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] virtio-net: put virtio net header inline with data
  2013-06-06 20:09   ` Dave Jones
@ 2013-06-06 20:18     ` Jesse Larrew
  0 siblings, 0 replies; 22+ messages in thread
From: Jesse Larrew @ 2013-06-06 20:18 UTC (permalink / raw)
  To: Dave Jones, Michael S. Tsirkin, Rusty Russell, David S. Miller,
	Jason Wang, Cong Wang, Amos Kong, virtualization, netdev,
	linux-kernel, qemu-devel

On 06/06/2013 03:09 PM, Dave Jones wrote:
> On Thu, Jun 06, 2013 at 02:59:44PM -0500, Jesse Larrew wrote:
> 
>  > >  	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
>  > > +	if (vi->mergeable_rx_bufs)
>  > > +		hdr_len = sizeof hdr->mhdr;
>  > > +	else
>  > > +		hdr_len = sizeof hdr->hdr;
>  > 
>  > All conditionals need braces.
> 
> Documentation/CodingStyle disagrees:
> 
>  "Do not unnecessarily use braces where a single statement will do."
> 
> 	Dave 
> 

Ah, yes. This is kernel code. My mistake. :)

Jesse Larrew
Software Engineer, KVM Team
IBM Linux Technology Center
Phone: (512) 973-2052 (T/L: 363-2052)
jlarrew@linux.vnet.ibm.com

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] virtio-net: put virtio net header inline with data
@ 2013-06-06  9:55 Michael S. Tsirkin
  2013-06-06 19:59 ` [Qemu-devel] " Jesse Larrew
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Michael S. Tsirkin @ 2013-06-06  9:55 UTC (permalink / raw)
  To: Rusty Russell, David S. Miller, Jason Wang, Cong Wang, Amos Kong,
	Dave Jones, virtualization, netdev, linux-kernel
  Cc: qemu-devel

For small packets we can simplify xmit processing by linearizing buffers
with the header: most packets seem to have enough head room we can use
for this purpose.

Since some older hypervisors (e.g. qemu before version 1.5)
required that header is the first s/g element,
we need a feature bit for this.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

This is a repost of my old patch, rebased to latest kernel.

Before:
Thu Jun  6 05:24:59 EDT 2013
TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
11.0.0.4 (11.0.0.4) port 0 AF_INET : demo
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size     Size    Time     Rate         
bytes  Bytes  bytes    bytes   secs.    per sec   

16384  87380  1        1       10.00    12931.13   


After:
TCP REQUEST/RESPONSE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
11.0.0.4 (11.0.0.4) port 0 AF_INET : demo
Local /Remote
Socket Size   Request  Resp.   Elapsed  Trans.
Send   Recv   Size     Size    Time     Rate         
bytes  Bytes  bytes    bytes   secs.    per sec   

16384  87380  1        1       10.00    14151.12


 drivers/net/virtio_net.c        | 42 +++++++++++++++++++++++++++++++++--------
 include/uapi/linux/virtio_net.h |  6 +++++-
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c9e0038..d35a097 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -106,6 +106,9 @@ struct virtnet_info {
 	/* Has control virtqueue */
 	bool has_cvq;
 
+	/* Host can handle any s/g split between our header and packet data */
+	bool any_header_sg;
+
 	/* enable config space updates */
 	bool config_enable;
 
@@ -668,12 +671,28 @@ static void free_old_xmit_skbs(struct send_queue *sq)
 
 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 {
-	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
+	struct skb_vnet_hdr *hdr;
 	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
 	struct virtnet_info *vi = sq->vq->vdev->priv;
 	unsigned num_sg;
+	unsigned hdr_len;
+	bool can_push;
 
 	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
+	if (vi->mergeable_rx_bufs)
+		hdr_len = sizeof hdr->mhdr;
+	else
+		hdr_len = sizeof hdr->hdr;
+
+	can_push = vi->any_header_sg &&
+		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
+		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
+	/* Even if we can, don't push here yet as this would skew
+	 * csum_start offset below. */
+	if (can_push)
+		hdr = (struct skb_vnet_hdr *)(skb->data - hdr_len);
+	else
+		hdr = skb_vnet_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
@@ -702,15 +721,18 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
 	}
 
-	hdr->mhdr.num_buffers = 0;
-
-	/* Encode metadata header at front. */
 	if (vi->mergeable_rx_bufs)
-		sg_set_buf(sq->sg, &hdr->mhdr, sizeof hdr->mhdr);
-	else
-		sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
+		hdr->mhdr.num_buffers = 0;
 
-	num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	if (can_push) {
+		__skb_push(skb, hdr_len);
+		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
+		/* Pull header back to avoid skew in tx bytes calculations. */
+		__skb_pull(skb, hdr_len);
+	} else {
+		sg_set_buf(sq->sg, hdr, hdr_len);
+		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+	}
 	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
 }
 
@@ -1554,6 +1576,9 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_ANY_HEADER_SG))
+		vi->any_header_sg = true;
+
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
 		vi->has_cvq = true;
 
@@ -1729,6 +1754,7 @@ static unsigned int features[] = {
 	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
 	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
 	VIRTIO_NET_F_CTRL_MAC_ADDR,
+	VIRTIO_NET_F_ANY_HEADER_SG,
 };
 
 static struct virtio_driver virtio_net_driver = {
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index c520203..9c98b7d 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -55,6 +55,8 @@
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
 
+#define VIRTIO_NET_F_ANY_HEADER_SG 25	/* Host can handle any header s/g */
+
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
 
@@ -70,7 +72,9 @@ struct virtio_net_config {
 	__u16 max_virtqueue_pairs;
 } __attribute__((packed));
 
-/* This is the first element of the scatter-gather list.  If you don't
+/* This header comes first in the scatter-gather list.
+ * If VIRTIO_NET_F_ANY_HEADER_SG is not negotiated, it must
+ * be the first element of the scatter-gather list.  If you don't
  * specify GSO or CSUM features, you can simply ignore the header. */
 struct virtio_net_hdr {
 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	// Use csum_start, csum_offset
-- 
MST

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2013-07-24 19:44 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-08 10:12 [PATCH] virtio-net: put virtio net header inline with data Michael S. Tsirkin
2013-07-09  2:16 ` Rusty Russell
2013-07-09  5:18   ` Michael S. Tsirkin
2013-07-09  8:08     ` Rusty Russell
2013-07-10  4:38       ` David Miller
2013-07-15  1:43         ` Rusty Russell
2013-07-16 19:33           ` David Miller
2013-07-17  0:08             ` Rusty Russell
2013-07-17  5:00             ` Michael S. Tsirkin
2013-07-17  5:05               ` David Miller
2013-07-17  6:02                 ` Rusty Russell
2013-07-24 19:44                   ` Michael S. Tsirkin
2013-07-10  6:14       ` Michael S. Tsirkin
2013-07-15  1:40         ` Rusty Russell
2013-07-11 13:00   ` Michael S. Tsirkin
2013-07-12  5:57     ` Rusty Russell
  -- strict thread matches above, loose matches on Subject: below --
2013-06-06  9:55 Michael S. Tsirkin
2013-06-06 19:59 ` [Qemu-devel] " Jesse Larrew
2013-06-06 20:09   ` Dave Jones
2013-06-06 20:18     ` Jesse Larrew
2013-06-07  2:12 ` Rusty Russell
2013-06-09  7:11   ` Michael S. Tsirkin
2013-06-07  2:52 ` Jason Wang
2013-06-09  6:52   ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).