All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rusty Russell <rusty@rustcorp.com.au>
To: "Michael S. Tsirkin" <mst@redhat.com>,
	Thomas Lendacky <tahm@linux.vnet.ibm.com>
Cc: Sasha Levin <levinsasha928@gmail.com>,
	virtualization@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, avi@redhat.com,
	kvm@vger.kernel.org, netdev@vger.kernel.org
Subject: Re: [PATCH 0/3] virtio-net: inline header support
Date: Wed, 03 Oct 2012 16:14:17 +0930	[thread overview]
Message-ID: <87vces2gxq.fsf@rustcorp.com.au> (raw)
In-Reply-To: <cover.1348824232.git.mst@redhat.com>

"Michael S. Tsirkin" <mst@redhat.com> writes:

> Thinking about Sasha's patches, we can reduce ring usage
> for virtio net small packets dramatically if we put
> virtio net header inline with the data.
> This can be done for free in case guest net stack allocated
> extra head room for the packet, and I don't see
> why would this have any downsides.

I've been wanting to do this for the longest time... but...

> Even though with my recent patches qemu
> no longer requires header to be the first s/g element,
> we need a new feature bit to detect this.
> A trivial qemu patch will be sent separately.

There's a reason I haven't done this.  I really, really dislike "my
implemention isn't broken" feature bits.  We could have an infinite
number of them, for each bug in each device.

So my plan was to tie this assumption to the new PCI layout.  And have a
stress-testing patch like the one below in the kernel (see my virtio-wip
branch for stuff like this).  Turn it on at boot with
"virtio_ring.torture" on the kernel commandline.

BTW, I've fixed lguest, but my kvm here (Ubuntu precise, kvm-qemu 1.0)
is too old.  Building the latest git now...

Cheers,
Rusty.

Subject: virtio: CONFIG_VIRTIO_DEVICE_TORTURE

Virtio devices are not supposed to depend on the framing of the scatter-gather
lists, but various implementations did.  Safeguard this in future by adding
an option to deliberately create perverse descriptors.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index 8d5bddb..930a4ea 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -5,6 +5,15 @@ config VIRTIO
 	  bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST,
 	  CONFIG_RPMSG or CONFIG_S390_GUEST.
 
+config VIRTIO_DEVICE_TORTURE
+	bool "Virtio device torture tests"
+	depends on VIRTIO && DEBUG_KERNEL
+	help
+	  This makes the virtio_ring implementation creatively change
+	  the format of requests to make sure that devices are
+	  properly implemented.  This will make your virtual machine
+	  slow *and* unreliable!  Say N.
+
 menu "Virtio drivers"
 
 config VIRTIO_PCI
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index e639584..8893753 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -124,6 +124,149 @@ struct vring_virtqueue
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+#ifdef CONFIG_VIRTIO_DEVICE_TORTURE
+static bool torture;
+module_param(torture, bool, 0644);
+
+struct torture {
+	unsigned int orig_out, orig_in;
+	void *orig_data;
+	struct scatterlist sg[4];
+	struct scatterlist orig_sg[];
+};
+
+static size_t tot_len(struct scatterlist sg[], unsigned num)
+{
+	size_t len, i;
+
+	for (len = 0, i = 0; i < num; i++)
+		len += sg[i].length;
+
+	return len;
+}
+
+static void copy_sg_data(const struct scatterlist *dst, unsigned dnum,
+			 const struct scatterlist *src, unsigned snum)
+{
+	unsigned len;
+	struct scatterlist s, d;
+
+	s = *src;
+	d = *dst;
+
+	while (snum && dnum) {
+		len = min(s.length, d.length);
+		memcpy(sg_virt(&d), sg_virt(&s), len);
+		d.offset += len;
+		d.length -= len;
+		s.offset += len;
+		s.length -= len;
+		if (!s.length) {
+			BUG_ON(snum == 0);
+			src++;
+			snum--;
+			s = *src;
+		}
+		if (!d.length) {
+			BUG_ON(dnum == 0);
+			dst++;
+			dnum--;
+			d = *dst;
+		}
+	}
+}
+
+static bool torture_replace(struct scatterlist **sg,
+			     unsigned int *out,
+			     unsigned int *in,
+			     void **data,
+			     gfp_t gfp)
+{
+	static size_t seed;
+	struct torture *t;
+	size_t outlen, inlen, ourseed, len1;
+	void *buf;
+
+	if (!torture)
+		return true;
+
+	outlen = tot_len(*sg, *out);
+	inlen = tot_len(*sg + *out, *in);
+
+	/* This will break horribly on large block requests. */
+	t = kmalloc(sizeof(*t) + (*out + *in) * sizeof(t->orig_sg[1])
+		    + outlen + 1 + inlen + 1, gfp);
+	if (!t)
+		return false;
+
+	sg_init_table(t->sg, 4);
+	buf = &t->orig_sg[*out + *in];
+
+	memcpy(t->orig_sg, *sg, sizeof(**sg) * (*out + *in));
+	t->orig_out = *out;
+	t->orig_in = *in;
+	t->orig_data = *data;
+	*data = t;
+
+	ourseed = ACCESS_ONCE(seed);
+	seed++;
+
+	*sg = t->sg;
+	if (outlen) {
+		/* Split outbuf into two parts, one byte apart. */
+		*out = 2;
+		len1 = ourseed % (outlen + 1);
+		sg_set_buf(&t->sg[0], buf, len1);
+		buf += len1 + 1;
+		sg_set_buf(&t->sg[1], buf, outlen - len1);
+		buf += outlen - len1;
+		copy_sg_data(t->sg, *out, t->orig_sg, t->orig_out);
+	}
+
+	if (inlen) {
+		/* Split inbuf into two parts, one byte apart. */
+		*in = 2;
+		len1 = ourseed % (inlen + 1);
+		sg_set_buf(&t->sg[*out], buf, len1);
+		buf += len1 + 1;
+		sg_set_buf(&t->sg[*out + 1], buf, inlen - len1);
+		buf += inlen - len1;
+	}
+	return true;
+}
+
+static void *torture_done(struct torture *t)
+{
+	void *data;
+
+	if (!torture)
+		return t;
+
+	if (t->orig_in)
+		copy_sg_data(t->orig_sg + t->orig_out, t->orig_in,
+			     t->sg + (t->orig_out ? 2 : 0), 2);
+
+	data = t->orig_data;
+	kfree(t);
+	return data;
+}
+
+#else
+static bool torture_replace(struct scatterlist **sg,
+			     unsigned int *out,
+			     unsigned int *in,
+			     void **data,
+			     gfp_t gfp)
+{
+	return true;
+}
+
+static void *torture_done(void *data)
+{
+	return data;
+}
+#endif /* CONFIG_VIRTIO_DEVICE_TORTURE */
+
 /* Set up an indirect table of descriptors and add it to the queue. */
 static int vring_add_indirect(struct vring_virtqueue *vq,
 			      struct scatterlist sg[],
@@ -213,6 +356,9 @@ int virtqueue_add_buf(struct virtqueue *_vq,
 
 	BUG_ON(data == NULL);
 
+	if (!torture_replace(&sg, &out, &in, &data, gfp))
+		return -ENOMEM;
+
 #ifdef DEBUG
 	{
 		ktime_t now = ktime_get();
@@ -246,6 +392,7 @@ int virtqueue_add_buf(struct virtqueue *_vq,
 		if (out)
 			vq->notify(&vq->vq);
 		END_USE(vq);
+		torture_done(data);
 		return -ENOSPC;
 	}
 
@@ -476,7 +623,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 #endif
 
 	END_USE(vq);
-	return ret;
+	return torture_done(ret);
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 

  parent reply	other threads:[~2012-10-03  7:02 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-28  9:26 [PATCH 0/3] virtio-net: inline header support Michael S. Tsirkin
2012-09-28  9:26 ` Michael S. Tsirkin
2012-09-28  9:26 ` [PATCH 1/3] virtio: add API to query ring capacity Michael S. Tsirkin
2012-09-28  9:26   ` Michael S. Tsirkin
2012-09-28  9:26 ` [PATCH 2/3] virtio-net: correct capacity math on ring full Michael S. Tsirkin
2012-09-28  9:26   ` Michael S. Tsirkin
2012-10-04  0:24   ` Rusty Russell
2012-09-28  9:26 ` [PATCH 3/3] virtio-net: put virtio net header inline with data Michael S. Tsirkin
2012-09-28  9:26   ` Michael S. Tsirkin
2012-10-03  6:44 ` Rusty Russell [this message]
2012-10-03  7:10   ` [PATCH 0/3] virtio-net: inline header support Rusty Russell
2012-10-04  1:24   ` Anthony Liguori
2012-10-04  3:34     ` Rusty Russell
2012-10-04  4:29       ` Anthony Liguori
2012-10-04  7:44         ` Rusty Russell
2012-10-05  7:47           ` Paolo Bonzini
2012-10-05  7:47             ` Paolo Bonzini
2012-10-08 21:31       ` Michael S. Tsirkin
2012-10-08 21:31         ` Michael S. Tsirkin
2012-10-04  1:35   ` Anthony Liguori
2012-10-04  5:17     ` Rusty Russell
2012-10-08 20:41   ` Michael S. Tsirkin
2012-10-08 20:41     ` Michael S. Tsirkin
2012-10-03  6:44 ` Rusty Russell
2012-10-03 10:53   ` Paolo Bonzini
2012-10-03 10:53     ` Paolo Bonzini
2012-10-04  0:11     ` Rusty Russell
2012-10-04  7:09       ` Paolo Bonzini
2012-10-04 12:51         ` Rusty Russell
2012-10-04 13:23           ` Paolo Bonzini
2012-10-05  5:43             ` Rusty Russell
2012-10-06 12:54               ` Paolo Bonzini
2012-10-06 12:54                 ` Paolo Bonzini
2012-10-09  4:59                 ` Rusty Russell
2012-10-09  4:59                   ` Rusty Russell
2012-10-09  7:27                   ` Paolo Bonzini
2012-10-09  7:27                     ` Paolo Bonzini
2012-10-11  0:03                     ` Rusty Russell
2012-10-11  0:03                       ` Rusty Russell
2012-10-11 11:04                       ` Michael S. Tsirkin
2012-10-11 11:04                         ` Michael S. Tsirkin
2012-10-11 22:37                         ` Rusty Russell
2012-10-11 22:37                           ` Rusty Russell
2012-10-12  7:38                           ` Paolo Bonzini
2012-10-12  7:38                             ` Paolo Bonzini
2012-10-12 11:52                           ` Cornelia Huck
2012-10-12 11:52                             ` Cornelia Huck
2012-10-05  5:43             ` Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87vces2gxq.fsf@rustcorp.com.au \
    --to=rusty@rustcorp.com.au \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=levinsasha928@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=tahm@linux.vnet.ibm.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.