All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
To: <qemu-devel@nongnu.org>
Cc: <kvm@vger.kernel.org>, Anthony Liguori <aliguori@us.ibm.com>,
	Kevin Wolf <kwolf@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Asias He <asias@redhat.com>, Khoa Huynh <khoa@us.ibm.com>,
	Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Subject: [RFC v9 11/27] virtio-blk: Indirect vring and flush support
Date: Wed, 18 Jul 2012 16:07:38 +0100	[thread overview]
Message-ID: <1342624074-24650-12-git-send-email-stefanha@linux.vnet.ibm.com> (raw)
In-Reply-To: <1342624074-24650-1-git-send-email-stefanha@linux.vnet.ibm.com>

RHEL6 and other new guest kernels use indirect vring descriptors to
increase the number of requests that can be batched.  This fundamentally
changes vring from a scheme that requires fixed resources to something
more dynamic (although there is still an absolute maximum number of
descriptors).  Cope with indirect vrings by taking on as many requests
as we can in one go and then postponing the remaining requests until the
first batch completes.

It would be possible to switch to dynamic resource management so iovec
and iocb structs are malloced.  This would allow the entire ring to be
processed even with indirect descriptors, but would probably hit a
bottleneck when io_submit refuses to queue more requests.  Therefore,
stick with the simpler scheme for now.

Unfortunately Linux AIO does not support asynchronous fsync/fdatasync on
all files.  In particular, an O_DIRECT opened file on ext4 does not
support Linux AIO fdsync.  Work around this by performing fdatasync()
synchronously for now.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/dataplane/ioq.h   |   18 ++++-----
 hw/dataplane/vring.h |  103 +++++++++++++++++++++++++++++++++++++++++++-------
 hw/virtio-blk.c      |   75 ++++++++++++++++++++++--------------
 3 files changed, 144 insertions(+), 52 deletions(-)

diff --git a/hw/dataplane/ioq.h b/hw/dataplane/ioq.h
index 7200e87..d1545d6 100644
--- a/hw/dataplane/ioq.h
+++ b/hw/dataplane/ioq.h
@@ -3,7 +3,7 @@
 
 typedef struct {
     int fd;                         /* file descriptor */
-    unsigned int max_reqs;           /* max length of freelist and queue */
+    unsigned int max_reqs;          /* max length of freelist and queue */
 
     io_context_t io_ctx;            /* Linux AIO context */
     EventNotifier io_notifier;      /* Linux AIO eventfd */
@@ -91,18 +91,16 @@ static struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov, unsigne
     return iocb;
 }
 
-static struct iocb *ioq_fdsync(IOQueue *ioq)
-{
-    struct iocb *iocb = ioq_get_iocb(ioq);
-
-    io_prep_fdsync(iocb, ioq->fd);
-    io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier));
-    return iocb;
-}
-
 static int ioq_submit(IOQueue *ioq)
 {
     int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
+    if (unlikely(rc < 0)) {
+        unsigned int i;
+        fprintf(stderr, "io_submit io_ctx=%#lx nr=%d iovecs=%p\n", (uint64_t)ioq->io_ctx, ioq->queue_idx, ioq->queue);
+        for (i = 0; i < ioq->queue_idx; i++) {
+            fprintf(stderr, "[%u] type=%#x fd=%d\n", i, ioq->queue[i]->aio_lio_opcode, ioq->queue[i]->aio_fildes);
+        }
+    }
     ioq->queue_idx = 0; /* reset */
     return rc;
 }
diff --git a/hw/dataplane/vring.h b/hw/dataplane/vring.h
index 70675e5..3eab4b4 100644
--- a/hw/dataplane/vring.h
+++ b/hw/dataplane/vring.h
@@ -64,6 +64,86 @@ static void vring_setup(Vring *vring, VirtIODevice *vdev, int n)
             vring->vr.desc, vring->vr.avail, vring->vr.used);
 }
 
+static bool vring_more_avail(Vring *vring)
+{
+	return vring->vr.avail->idx != vring->last_avail_idx;
+}
+
+/* This is stolen from linux-2.6/drivers/vhost/vhost.c. */
+static bool get_indirect(Vring *vring,
+			struct iovec iov[], struct iovec *iov_end,
+			unsigned int *out_num, unsigned int *in_num,
+			struct vring_desc *indirect)
+{
+	struct vring_desc desc;
+	unsigned int i = 0, count, found = 0;
+
+	/* Sanity check */
+	if (unlikely(indirect->len % sizeof desc)) {
+		fprintf(stderr, "Invalid length in indirect descriptor: "
+		       "len 0x%llx not multiple of 0x%zx\n",
+		       (unsigned long long)indirect->len,
+		       sizeof desc);
+		exit(1);
+	}
+
+	count = indirect->len / sizeof desc;
+	/* Buffers are chained via a 16 bit next field, so
+	 * we can have at most 2^16 of these. */
+	if (unlikely(count > USHRT_MAX + 1)) {
+		fprintf(stderr, "Indirect buffer length too big: %d\n",
+		       indirect->len);
+        exit(1);
+	}
+
+    /* Point to translate indirect desc chain */
+    indirect = phys_to_host(vring, indirect->addr);
+
+	/* We will use the result as an address to read from, so most
+	 * architectures only need a compiler barrier here. */
+	__sync_synchronize(); /* read_barrier_depends(); */
+
+	do {
+		if (unlikely(++found > count)) {
+			fprintf(stderr, "Loop detected: last one at %u "
+			       "indirect size %u\n",
+			       i, count);
+			exit(1);
+		}
+
+        desc = *indirect++;
+		if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
+			fprintf(stderr, "Nested indirect descriptor\n");
+            exit(1);
+		}
+
+        /* Stop for now if there are not enough iovecs available. */
+        if (iov >= iov_end) {
+            return false;
+        }
+
+        iov->iov_base = phys_to_host(vring, desc.addr);
+        iov->iov_len  = desc.len;
+        iov++;
+
+		/* If this is an input descriptor, increment that count. */
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			*in_num += 1;
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (unlikely(*in_num)) {
+				fprintf(stderr, "Indirect descriptor "
+				       "has out after in: idx %d\n", i);
+                exit(1);
+			}
+			*out_num += 1;
+		}
+        i = desc.next;
+	} while (desc.flags & VRING_DESC_F_NEXT);
+    return true;
+}
+
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
@@ -129,23 +209,20 @@ static unsigned int vring_pop(Vring *vring,
 		}
         desc = vring->vr.desc[i];
 		if (desc.flags & VRING_DESC_F_INDIRECT) {
-/*			ret = get_indirect(dev, vq, iov, iov_size,
-					   out_num, in_num,
-					   log, log_num, &desc);
-			if (unlikely(ret < 0)) {
-				vq_err(vq, "Failure detected "
-				       "in indirect descriptor at idx %d\n", i);
-				return ret;
-			}
-			continue; */
-            fprintf(stderr, "Indirect vring not supported\n");
-            exit(1);
+			if (!get_indirect(vring, iov, iov_end, out_num, in_num, &desc)) {
+                return num; /* not enough iovecs, stop for now */
+            }
+            continue;
 		}
 
+        /* If there are not enough iovecs left, stop for now.  The caller
+         * should check if there are more descs available once they have dealt
+         * with the current set.
+         */
         if (iov >= iov_end) {
-            fprintf(stderr, "Not enough vring iovecs\n");
-            exit(1);
+            return num;
         }
+
         iov->iov_base = phys_to_host(vring, desc.addr);
         iov->iov_len  = desc.len;
         iov++;
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 52ea601..591eace 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -62,6 +62,14 @@ static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
     return (VirtIOBlock *)vdev;
 }
 
+/* Normally the block driver passes down the fd, there's no way to get it from
+ * above.
+ */
+static int get_raw_posix_fd_hack(VirtIOBlock *s)
+{
+    return *(int*)s->bs->file->opaque;
+}
+
 static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
 {
     VirtIOBlock *s = opaque;
@@ -83,18 +91,6 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
     vring_push(&s->vring, req->head, len + sizeof req->status);
 }
 
-static bool handle_io(EventHandler *handler)
-{
-    VirtIOBlock *s = container_of(handler, VirtIOBlock, io_handler);
-
-    if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
-        /* TODO is this thread-safe and can it be done faster? */
-        virtio_irq(s->vq);
-    }
-
-    return true;
-}
-
 static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_num, unsigned int in_num, unsigned int head)
 {
     /* Virtio block requests look like this: */
@@ -117,13 +113,16 @@ static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_n
             outhdr->type, outhdr->sector);
     */
 
-    if (unlikely(outhdr->type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH))) {
+    /* TODO Linux sets the barrier bit even when not advertised! */
+    uint32_t type = outhdr->type & ~VIRTIO_BLK_T_BARRIER;
+
+    if (unlikely(type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH))) {
         fprintf(stderr, "virtio-blk unsupported request type %#x\n", outhdr->type);
         exit(1);
     }
 
     struct iocb *iocb;
-    switch (outhdr->type & (VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH)) {
+    switch (type & (VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH)) {
     case VIRTIO_BLK_T_IN:
         if (unlikely(out_num != 1)) {
             fprintf(stderr, "virtio-blk invalid read request\n");
@@ -145,8 +144,16 @@ static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_n
             fprintf(stderr, "virtio-blk invalid flush request\n");
             exit(1);
         }
-        iocb = ioq_fdsync(ioq);
-        break;
+
+        /* TODO fdsync is not supported by all backends, do it synchronously here! */
+        {
+            VirtIOBlock *s = container_of(ioq, VirtIOBlock, ioqueue);
+            fdatasync(get_raw_posix_fd_hack(s));
+            inhdr->status = VIRTIO_BLK_S_OK;
+            vring_push(&s->vring, head, sizeof *inhdr);
+            virtio_irq(s->vq);
+        }
+        return;
 
     default:
         fprintf(stderr, "virtio-blk multiple request type bits set\n");
@@ -199,11 +206,29 @@ static bool handle_notify(EventHandler *handler)
     }
 
     /* Submit requests, if any */
-    if (likely(iov != iovec)) {
-        if (unlikely(ioq_submit(&s->ioqueue) < 0)) {
-            fprintf(stderr, "ioq_submit failed\n");
-            exit(1);
-        }
+    int rc = ioq_submit(&s->ioqueue);
+    if (unlikely(rc < 0)) {
+        fprintf(stderr, "ioq_submit failed %d\n", rc);
+        exit(1);
+    }
+    return true;
+}
+
+static bool handle_io(EventHandler *handler)
+{
+    VirtIOBlock *s = container_of(handler, VirtIOBlock, io_handler);
+
+    if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
+        /* TODO is this thread-safe and can it be done faster? */
+        virtio_irq(s->vq);
+    }
+
+    /* If there were more requests than iovecs, the vring will not be empty yet
+     * so check again.  There should now be enough resources to process more
+     * requests.
+     */
+    if (vring_more_avail(&s->vring)) {
+        return handle_notify(&s->notify_handler);
     }
 
     return true;
@@ -217,14 +242,6 @@ static void *data_plane_thread(void *opaque)
     return NULL;
 }
 
-/* Normally the block driver passes down the fd, there's no way to get it from
- * above.
- */
-static int get_raw_posix_fd_hack(VirtIOBlock *s)
-{
-    return *(int*)s->bs->file->opaque;
-}
-
 static void data_plane_start(VirtIOBlock *s)
 {
     int i;
-- 
1.7.10.4


WARNING: multiple messages have this Message-ID (diff)
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
	Anthony Liguori <aliguori@us.ibm.com>,
	Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>,
	kvm@vger.kernel.org, "Michael S. Tsirkin" <mst@redhat.com>,
	Khoa Huynh <khoa@us.ibm.com>, Paolo Bonzini <pbonzini@redhat.com>,
	Asias He <asias@redhat.com>
Subject: [Qemu-devel] [RFC v9 11/27] virtio-blk: Indirect vring and flush support
Date: Wed, 18 Jul 2012 16:07:38 +0100	[thread overview]
Message-ID: <1342624074-24650-12-git-send-email-stefanha@linux.vnet.ibm.com> (raw)
In-Reply-To: <1342624074-24650-1-git-send-email-stefanha@linux.vnet.ibm.com>

RHEL6 and other new guest kernels use indirect vring descriptors to
increase the number of requests that can be batched.  This fundamentally
changes vring from a scheme that requires fixed resources to something
more dynamic (although there is still an absolute maximum number of
descriptors).  Cope with indirect vrings by taking on as many requests
as we can in one go and then postponing the remaining requests until the
first batch completes.

It would be possible to switch to dynamic resource management so iovec
and iocb structs are malloced.  This would allow the entire ring to be
processed even with indirect descriptors, but would probably hit a
bottleneck when io_submit refuses to queue more requests.  Therefore,
stick with the simpler scheme for now.

Unfortunately Linux AIO does not support asynchronous fsync/fdatasync on
all files.  In particular, an O_DIRECT opened file on ext4 does not
support Linux AIO fdsync.  Work around this by performing fdatasync()
synchronously for now.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/dataplane/ioq.h   |   18 ++++-----
 hw/dataplane/vring.h |  103 +++++++++++++++++++++++++++++++++++++++++++-------
 hw/virtio-blk.c      |   75 ++++++++++++++++++++++--------------
 3 files changed, 144 insertions(+), 52 deletions(-)

diff --git a/hw/dataplane/ioq.h b/hw/dataplane/ioq.h
index 7200e87..d1545d6 100644
--- a/hw/dataplane/ioq.h
+++ b/hw/dataplane/ioq.h
@@ -3,7 +3,7 @@
 
 typedef struct {
     int fd;                         /* file descriptor */
-    unsigned int max_reqs;           /* max length of freelist and queue */
+    unsigned int max_reqs;          /* max length of freelist and queue */
 
     io_context_t io_ctx;            /* Linux AIO context */
     EventNotifier io_notifier;      /* Linux AIO eventfd */
@@ -91,18 +91,16 @@ static struct iocb *ioq_rdwr(IOQueue *ioq, bool read, struct iovec *iov, unsigne
     return iocb;
 }
 
-static struct iocb *ioq_fdsync(IOQueue *ioq)
-{
-    struct iocb *iocb = ioq_get_iocb(ioq);
-
-    io_prep_fdsync(iocb, ioq->fd);
-    io_set_eventfd(iocb, event_notifier_get_fd(&ioq->io_notifier));
-    return iocb;
-}
-
 static int ioq_submit(IOQueue *ioq)
 {
     int rc = io_submit(ioq->io_ctx, ioq->queue_idx, ioq->queue);
+    if (unlikely(rc < 0)) {
+        unsigned int i;
+        fprintf(stderr, "io_submit io_ctx=%#lx nr=%d iovecs=%p\n", (uint64_t)ioq->io_ctx, ioq->queue_idx, ioq->queue);
+        for (i = 0; i < ioq->queue_idx; i++) {
+            fprintf(stderr, "[%u] type=%#x fd=%d\n", i, ioq->queue[i]->aio_lio_opcode, ioq->queue[i]->aio_fildes);
+        }
+    }
     ioq->queue_idx = 0; /* reset */
     return rc;
 }
diff --git a/hw/dataplane/vring.h b/hw/dataplane/vring.h
index 70675e5..3eab4b4 100644
--- a/hw/dataplane/vring.h
+++ b/hw/dataplane/vring.h
@@ -64,6 +64,86 @@ static void vring_setup(Vring *vring, VirtIODevice *vdev, int n)
             vring->vr.desc, vring->vr.avail, vring->vr.used);
 }
 
+static bool vring_more_avail(Vring *vring)
+{
+	return vring->vr.avail->idx != vring->last_avail_idx;
+}
+
+/* This is stolen from linux-2.6/drivers/vhost/vhost.c. */
+static bool get_indirect(Vring *vring,
+			struct iovec iov[], struct iovec *iov_end,
+			unsigned int *out_num, unsigned int *in_num,
+			struct vring_desc *indirect)
+{
+	struct vring_desc desc;
+	unsigned int i = 0, count, found = 0;
+
+	/* Sanity check */
+	if (unlikely(indirect->len % sizeof desc)) {
+		fprintf(stderr, "Invalid length in indirect descriptor: "
+		       "len 0x%llx not multiple of 0x%zx\n",
+		       (unsigned long long)indirect->len,
+		       sizeof desc);
+		exit(1);
+	}
+
+	count = indirect->len / sizeof desc;
+	/* Buffers are chained via a 16 bit next field, so
+	 * we can have at most 2^16 of these. */
+	if (unlikely(count > USHRT_MAX + 1)) {
+		fprintf(stderr, "Indirect buffer length too big: %d\n",
+		       indirect->len);
+        exit(1);
+	}
+
+    /* Point to translate indirect desc chain */
+    indirect = phys_to_host(vring, indirect->addr);
+
+	/* We will use the result as an address to read from, so most
+	 * architectures only need a compiler barrier here. */
+	__sync_synchronize(); /* read_barrier_depends(); */
+
+	do {
+		if (unlikely(++found > count)) {
+			fprintf(stderr, "Loop detected: last one at %u "
+			       "indirect size %u\n",
+			       i, count);
+			exit(1);
+		}
+
+        desc = *indirect++;
+		if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
+			fprintf(stderr, "Nested indirect descriptor\n");
+            exit(1);
+		}
+
+        /* Stop for now if there are not enough iovecs available. */
+        if (iov >= iov_end) {
+            return false;
+        }
+
+        iov->iov_base = phys_to_host(vring, desc.addr);
+        iov->iov_len  = desc.len;
+        iov++;
+
+		/* If this is an input descriptor, increment that count. */
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			*in_num += 1;
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (unlikely(*in_num)) {
+				fprintf(stderr, "Indirect descriptor "
+				       "has out after in: idx %d\n", i);
+                exit(1);
+			}
+			*out_num += 1;
+		}
+        i = desc.next;
+	} while (desc.flags & VRING_DESC_F_NEXT);
+    return true;
+}
+
 /* This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
@@ -129,23 +209,20 @@ static unsigned int vring_pop(Vring *vring,
 		}
         desc = vring->vr.desc[i];
 		if (desc.flags & VRING_DESC_F_INDIRECT) {
-/*			ret = get_indirect(dev, vq, iov, iov_size,
-					   out_num, in_num,
-					   log, log_num, &desc);
-			if (unlikely(ret < 0)) {
-				vq_err(vq, "Failure detected "
-				       "in indirect descriptor at idx %d\n", i);
-				return ret;
-			}
-			continue; */
-            fprintf(stderr, "Indirect vring not supported\n");
-            exit(1);
+			if (!get_indirect(vring, iov, iov_end, out_num, in_num, &desc)) {
+                return num; /* not enough iovecs, stop for now */
+            }
+            continue;
 		}
 
+        /* If there are not enough iovecs left, stop for now.  The caller
+         * should check if there are more descs available once they have dealt
+         * with the current set.
+         */
         if (iov >= iov_end) {
-            fprintf(stderr, "Not enough vring iovecs\n");
-            exit(1);
+            return num;
         }
+
         iov->iov_base = phys_to_host(vring, desc.addr);
         iov->iov_len  = desc.len;
         iov++;
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 52ea601..591eace 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -62,6 +62,14 @@ static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
     return (VirtIOBlock *)vdev;
 }
 
+/* Normally the block driver passes down the fd, there's no way to get it from
+ * above.
+ */
+static int get_raw_posix_fd_hack(VirtIOBlock *s)
+{
+    return *(int*)s->bs->file->opaque;
+}
+
 static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
 {
     VirtIOBlock *s = opaque;
@@ -83,18 +91,6 @@ static void complete_request(struct iocb *iocb, ssize_t ret, void *opaque)
     vring_push(&s->vring, req->head, len + sizeof req->status);
 }
 
-static bool handle_io(EventHandler *handler)
-{
-    VirtIOBlock *s = container_of(handler, VirtIOBlock, io_handler);
-
-    if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
-        /* TODO is this thread-safe and can it be done faster? */
-        virtio_irq(s->vq);
-    }
-
-    return true;
-}
-
 static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_num, unsigned int in_num, unsigned int head)
 {
     /* Virtio block requests look like this: */
@@ -117,13 +113,16 @@ static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_n
             outhdr->type, outhdr->sector);
     */
 
-    if (unlikely(outhdr->type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH))) {
+    /* TODO Linux sets the barrier bit even when not advertised! */
+    uint32_t type = outhdr->type & ~VIRTIO_BLK_T_BARRIER;
+
+    if (unlikely(type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH))) {
         fprintf(stderr, "virtio-blk unsupported request type %#x\n", outhdr->type);
         exit(1);
     }
 
     struct iocb *iocb;
-    switch (outhdr->type & (VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH)) {
+    switch (type & (VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_FLUSH)) {
     case VIRTIO_BLK_T_IN:
         if (unlikely(out_num != 1)) {
             fprintf(stderr, "virtio-blk invalid read request\n");
@@ -145,8 +144,16 @@ static void process_request(IOQueue *ioq, struct iovec iov[], unsigned int out_n
             fprintf(stderr, "virtio-blk invalid flush request\n");
             exit(1);
         }
-        iocb = ioq_fdsync(ioq);
-        break;
+
+        /* TODO fdsync is not supported by all backends, do it synchronously here! */
+        {
+            VirtIOBlock *s = container_of(ioq, VirtIOBlock, ioqueue);
+            fdatasync(get_raw_posix_fd_hack(s));
+            inhdr->status = VIRTIO_BLK_S_OK;
+            vring_push(&s->vring, head, sizeof *inhdr);
+            virtio_irq(s->vq);
+        }
+        return;
 
     default:
         fprintf(stderr, "virtio-blk multiple request type bits set\n");
@@ -199,11 +206,29 @@ static bool handle_notify(EventHandler *handler)
     }
 
     /* Submit requests, if any */
-    if (likely(iov != iovec)) {
-        if (unlikely(ioq_submit(&s->ioqueue) < 0)) {
-            fprintf(stderr, "ioq_submit failed\n");
-            exit(1);
-        }
+    int rc = ioq_submit(&s->ioqueue);
+    if (unlikely(rc < 0)) {
+        fprintf(stderr, "ioq_submit failed %d\n", rc);
+        exit(1);
+    }
+    return true;
+}
+
+static bool handle_io(EventHandler *handler)
+{
+    VirtIOBlock *s = container_of(handler, VirtIOBlock, io_handler);
+
+    if (ioq_run_completion(&s->ioqueue, complete_request, s) > 0) {
+        /* TODO is this thread-safe and can it be done faster? */
+        virtio_irq(s->vq);
+    }
+
+    /* If there were more requests than iovecs, the vring will not be empty yet
+     * so check again.  There should now be enough resources to process more
+     * requests.
+     */
+    if (vring_more_avail(&s->vring)) {
+        return handle_notify(&s->notify_handler);
     }
 
     return true;
@@ -217,14 +242,6 @@ static void *data_plane_thread(void *opaque)
     return NULL;
 }
 
-/* Normally the block driver passes down the fd, there's no way to get it from
- * above.
- */
-static int get_raw_posix_fd_hack(VirtIOBlock *s)
-{
-    return *(int*)s->bs->file->opaque;
-}
-
 static void data_plane_start(VirtIOBlock *s)
 {
     int i;
-- 
1.7.10.4

  parent reply	other threads:[~2012-07-18 15:08 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-18 15:07 [RFC v9 00/27] virtio: virtio-blk data plane Stefan Hajnoczi
2012-07-18 15:07 ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 01/27] virtio-blk: Remove virtqueue request handling code Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 02/27] virtio-blk: Set up host notifier for data plane Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 03/27] virtio-blk: Data plane thread event loop Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 04/27] virtio-blk: Map vring Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 05/27] virtio-blk: Do cheapest possible memory mapping Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 06/27] virtio-blk: Take PCI memory range into account Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 18:29   ` Michael S. Tsirkin
2012-07-18 18:29     ` [Qemu-devel] " Michael S. Tsirkin
2012-07-19  9:14     ` Stefan Hajnoczi
2012-07-19  9:14       ` [Qemu-devel] " Stefan Hajnoczi
2012-07-19  9:16       ` Stefan Hajnoczi
2012-07-19  9:16         ` Stefan Hajnoczi
2012-07-19  9:29         ` Avi Kivity
2012-07-19  9:29           ` Avi Kivity
2012-07-18 15:07 ` [RFC v9 07/27] virtio-blk: Put dataplane code into its own directory Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 08/27] virtio-blk: Read requests from the vring Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 09/27] virtio-blk: Add Linux AIO queue Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 10/27] virtio-blk: Stop data plane thread cleanly Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` Stefan Hajnoczi [this message]
2012-07-18 15:07   ` [Qemu-devel] [RFC v9 11/27] virtio-blk: Indirect vring and flush support Stefan Hajnoczi
2012-07-18 18:28   ` Michael S. Tsirkin
2012-07-18 18:28     ` [Qemu-devel] " Michael S. Tsirkin
2012-07-18 19:02   ` Michael S. Tsirkin
2012-07-18 19:02     ` [Qemu-devel] " Michael S. Tsirkin
2012-07-18 15:07 ` [RFC v9 12/27] virtio-blk: Add workaround for BUG_ON() dependency in virtio_ring.h Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 19:03   ` Michael S. Tsirkin
2012-07-18 19:03     ` [Qemu-devel] " Michael S. Tsirkin
2012-07-18 15:07 ` [RFC v9 13/27] virtio-blk: Increase max requests for indirect vring Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 14/27] virtio-blk: Use pthreads instead of qemu-thread Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 15/27] notifier: Add a function to set the notifier Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 16/27] virtio-blk: Kick data plane thread using event notifier set Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 17/27] virtio-blk: Use guest notifier to raise interrupts Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 18/27] virtio-blk: Call ioctl() directly instead of irqfd Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:40   ` Michael S. Tsirkin
2012-07-18 15:40     ` [Qemu-devel] " Michael S. Tsirkin
2012-07-19  9:11     ` Stefan Hajnoczi
2012-07-19  9:11       ` Stefan Hajnoczi
2012-07-19  9:19       ` Michael S. Tsirkin
2012-07-19  9:19         ` Michael S. Tsirkin
2012-07-18 15:07 ` [RFC v9 19/27] virtio-blk: Disable guest->host notifies while processing vring Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 20/27] virtio-blk: Add ioscheduler to detect mergable requests Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 21/27] virtio-blk: Add basic request merging Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 22/27] virtio-blk: Fix " Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 19:04   ` Michael S. Tsirkin
2012-07-18 19:04     ` [Qemu-devel] " Michael S. Tsirkin
2012-07-18 15:07 ` [RFC v9 23/27] virtio-blk: Stub out SCSI commands Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 19:05   ` Michael S. Tsirkin
2012-07-18 19:05     ` [Qemu-devel] " Michael S. Tsirkin
2012-07-18 15:07 ` [RFC v9 24/27] virtio-blk: fix incorrect length Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 25/27] msix: fix irqchip breakage in msix_try_notify_from_thread() Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 26/27] msix: use upstream kvm_irqchip_set_irq() Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:07 ` [RFC v9 27/27] virtio-blk: add EVENT_IDX support to dataplane Stefan Hajnoczi
2012-07-18 15:07   ` [Qemu-devel] " Stefan Hajnoczi
2012-07-18 15:43 ` [RFC v9 00/27] virtio: virtio-blk data plane Michael S. Tsirkin
2012-07-18 15:43   ` [Qemu-devel] " Michael S. Tsirkin
2012-07-18 16:18   ` Khoa Huynh
2012-07-18 16:18     ` [Qemu-devel] " Khoa Huynh
2012-07-18 16:41   ` Khoa Huynh
2012-07-18 16:41     ` [Qemu-devel] " Khoa Huynh
2012-07-18 15:49 ` Michael S. Tsirkin
2012-07-18 15:49   ` [Qemu-devel] " Michael S. Tsirkin
2012-07-19  9:48   ` Stefan Hajnoczi
2012-07-19  9:48     ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1342624074-24650-12-git-send-email-stefanha@linux.vnet.ibm.com \
    --to=stefanha@linux.vnet.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=asias@redhat.com \
    --cc=khoa@us.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=kwolf@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.