linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/1] vhost: parallel virtqueue handling
@ 2018-11-02 16:07 Vitaly Mayatskikh
  2018-11-02 16:07 ` [PATCH 1/1] vhost: add per-vq worker thread Vitaly Mayatskikh
  2018-11-05  2:51 ` [PATCH 0/1] vhost: parallel virtqueue handling Jason Wang
  0 siblings, 2 replies; 8+ messages in thread
From: Vitaly Mayatskikh @ 2018-11-02 16:07 UTC (permalink / raw)
  To: Michael S . Tsirkin
  Cc: Jason Wang, kvm, virtualization, netdev, linux-kernel, Vitaly Mayatskikh

Hi,

I stumbled across poor performance of virtio-blk while working on a
high-performance network storage protocol. Moving virtio-blk's host
side to kernel did increase single queue IOPS, but multiqueue disk
still was not scaling well. It turned out that vhost handles events
from all virtio queues in one helper thread, and that's pretty much a
big serialization point.

The following patch enables events handling in per-queue thread and
increases IO concurrency, see IOPS numbers:

# num-queues
# bare metal
# virtio-blk
# vhost-blk

1  171k  148k 195k 
2  328k  249k 349k 
3  479k  179k 501k 
4  622k  143k 620k 
5  755k  136k 737k 
6  887k  131k 830k 
7  1004k 126k 926k 
8  1099k 117k 1001k
9  1194k 115k 1055k
10 1278k 109k 1130k
11 1345k 110k 1119k
12 1411k 104k 1201k
13 1466k 106k 1260k
14 1517k 103k 1296k
15 1552k 102k 1322k
16 1480k 101k 1346k

Vitaly Mayatskikh (1):
  vhost: add per-vq worker thread

 drivers/vhost/vhost.c | 123 +++++++++++++++++++++++++++++++-----------
 drivers/vhost/vhost.h |  11 +++-
 2 files changed, 100 insertions(+), 34 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/1] vhost: add per-vq worker thread
  2018-11-02 16:07 [PATCH 0/1] vhost: parallel virtqueue handling Vitaly Mayatskikh
@ 2018-11-02 16:07 ` Vitaly Mayatskikh
  2018-11-05  2:53   ` Jason Wang
  2018-11-05  2:51 ` [PATCH 0/1] vhost: parallel virtqueue handling Jason Wang
  1 sibling, 1 reply; 8+ messages in thread
From: Vitaly Mayatskikh @ 2018-11-02 16:07 UTC (permalink / raw)
  To: Michael S . Tsirkin
  Cc: Jason Wang, kvm, virtualization, netdev, linux-kernel, Vitaly Mayatskikh

This enables a near linear scaling in multiqueue cases.

First virtqueue still gets the worker created unconditionally,
the rest is postponed until the actual poll starts on the queue.

Signed-off-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
---
 drivers/vhost/vhost.c | 123 +++++++++++++++++++++++++++++++-----------
 drivers/vhost/vhost.h |  11 +++-
 2 files changed, 100 insertions(+), 34 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 3a5f81a66d34..523dcfac4541 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -185,18 +185,27 @@ void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
 }
 EXPORT_SYMBOL_GPL(vhost_work_init);
 
-/* Init poll structure */
-void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
-		     __poll_t mask, struct vhost_dev *dev)
+
+static void vhost_vq_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+			       __poll_t mask, struct vhost_virtqueue *vq)
 {
 	init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
 	init_poll_funcptr(&poll->table, vhost_poll_func);
 	poll->mask = mask;
-	poll->dev = dev;
+	poll->dev = vq->dev;
+	poll->vq = vq;
 	poll->wqh = NULL;
 
 	vhost_work_init(&poll->work, fn);
 }
+EXPORT_SYMBOL_GPL(vhost_vq_poll_init);
+
+/* Init poll structure */
+void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+		     __poll_t mask, struct vhost_dev *dev)
+{
+	vhost_vq_poll_init(poll, fn, mask, dev->vqs[0]);
+}
 EXPORT_SYMBOL_GPL(vhost_poll_init);
 
 /* Start polling a file. We add ourselves to file's wait queue. The caller must
@@ -232,31 +241,74 @@ void vhost_poll_stop(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_stop);
 
-void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
+
+static void vhost_vq_poll_start_work(struct vhost_work *w)
+{
+	struct vhost_virtqueue *vq = container_of(w, struct vhost_virtqueue,
+						  work);
+
+	vhost_poll_start(&vq->poll, vq->kick);
+}
+
+static int vhost_vq_worker(void *data);
+
+static int vhost_vq_poll_start(struct vhost_virtqueue *vq)
+{
+	if (!vq->worker) {
+		vq->worker = kthread_create(vhost_vq_worker, vq, "vhost-%d/%i",
+					    vq->dev->pid, vq->index);
+		if (IS_ERR(vq->worker)) {
+			int ret = PTR_ERR(vq->worker);
+
+			pr_err("%s: can't create vq worker: %d\n", __func__,
+			       ret);
+			vq->worker = NULL;
+			return ret;
+		}
+	}
+	vhost_work_init(&vq->work, vhost_vq_poll_start_work);
+	vhost_vq_work_queue(vq, &vq->work);
+	return 0;
+}
+
+static void vhost_vq_work_flush(struct vhost_virtqueue *vq,
+				struct vhost_work *work)
 {
 	struct vhost_flush_struct flush;
 
-	if (dev->worker) {
+	if (vq->worker) {
 		init_completion(&flush.wait_event);
 		vhost_work_init(&flush.work, vhost_flush_work);
 
-		vhost_work_queue(dev, &flush.work);
+		vhost_vq_work_queue(vq, &flush.work);
 		wait_for_completion(&flush.wait_event);
 	}
 }
+EXPORT_SYMBOL_GPL(vhost_vq_work_flush);
+
+void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
+{
+	vhost_vq_work_flush(dev->vqs[0], work);
+}
 EXPORT_SYMBOL_GPL(vhost_work_flush);
 
 /* Flush any work that has been scheduled. When calling this, don't hold any
  * locks that are also used by the callback. */
 void vhost_poll_flush(struct vhost_poll *poll)
 {
-	vhost_work_flush(poll->dev, &poll->work);
+	vhost_vq_work_flush(poll->vq, &poll->work);
 }
 EXPORT_SYMBOL_GPL(vhost_poll_flush);
 
 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 {
-	if (!dev->worker)
+	return vhost_vq_work_queue(dev->vqs[0], work);
+}
+EXPORT_SYMBOL_GPL(vhost_work_queue);
+
+void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
+{
+	if (!vq->worker)
 		return;
 
 	if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
@@ -264,22 +316,22 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 		 * sure it was not in the list.
 		 * test_and_set_bit() implies a memory barrier.
 		 */
-		llist_add(&work->node, &dev->work_list);
-		wake_up_process(dev->worker);
+		llist_add(&work->node, &vq->work_list);
+		wake_up_process(vq->worker);
 	}
 }
-EXPORT_SYMBOL_GPL(vhost_work_queue);
+EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
 
 /* A lockless hint for busy polling code to exit the loop */
 bool vhost_has_work(struct vhost_dev *dev)
 {
-	return !llist_empty(&dev->work_list);
+	return !llist_empty(&dev->vqs[0]->work_list);
 }
 EXPORT_SYMBOL_GPL(vhost_has_work);
 
 void vhost_poll_queue(struct vhost_poll *poll)
 {
-	vhost_work_queue(poll->dev, &poll->work);
+	vhost_vq_work_queue(poll->vq, &poll->work);
 }
 EXPORT_SYMBOL_GPL(vhost_poll_queue);
 
@@ -333,9 +385,10 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	__vhost_vq_meta_reset(vq);
 }
 
-static int vhost_worker(void *data)
+static int vhost_vq_worker(void *data)
 {
-	struct vhost_dev *dev = data;
+	struct vhost_virtqueue *vq = data;
+	struct vhost_dev *dev = vq->dev;
 	struct vhost_work *work, *work_next;
 	struct llist_node *node;
 	mm_segment_t oldfs = get_fs();
@@ -351,8 +404,7 @@ static int vhost_worker(void *data)
 			__set_current_state(TASK_RUNNING);
 			break;
 		}
-
-		node = llist_del_all(&dev->work_list);
+		node = llist_del_all(&vq->work_list);
 		if (!node)
 			schedule();
 
@@ -429,25 +481,26 @@ void vhost_dev_init(struct vhost_dev *dev,
 	dev->umem = NULL;
 	dev->iotlb = NULL;
 	dev->mm = NULL;
-	dev->worker = NULL;
-	init_llist_head(&dev->work_list);
 	init_waitqueue_head(&dev->wait);
 	INIT_LIST_HEAD(&dev->read_list);
 	INIT_LIST_HEAD(&dev->pending_list);
 	spin_lock_init(&dev->iotlb_lock);
 
-
 	for (i = 0; i < dev->nvqs; ++i) {
 		vq = dev->vqs[i];
+		vq->index = i;
 		vq->log = NULL;
 		vq->indirect = NULL;
 		vq->heads = NULL;
 		vq->dev = dev;
+		vq->worker = NULL;
 		mutex_init(&vq->mutex);
 		vhost_vq_reset(dev, vq);
+		init_llist_head(&vq->work_list);
+		vq->worker = NULL;
 		if (vq->handle_kick)
-			vhost_poll_init(&vq->poll, vq->handle_kick,
-					EPOLLIN, dev);
+			vhost_vq_poll_init(&vq->poll, vq->handle_kick,
+					   EPOLLIN, vq);
 	}
 }
 EXPORT_SYMBOL_GPL(vhost_dev_init);
@@ -506,14 +559,16 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
 
 	/* No owner, become one */
 	dev->mm = get_task_mm(current);
-	worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
+	dev->pid = current->pid;
+	worker = kthread_create(vhost_vq_worker, dev->vqs[0], "vhost-%d/0",
+				current->pid);
 	if (IS_ERR(worker)) {
 		err = PTR_ERR(worker);
 		goto err_worker;
 	}
 
-	dev->worker = worker;
-	wake_up_process(worker);	/* avoid contributing to loadavg */
+	dev->vqs[0]->worker = worker;
+	wake_up_process(worker);        /* avoid contributing to loadavg */
 
 	err = vhost_attach_cgroups(dev);
 	if (err)
@@ -526,7 +581,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
 	return 0;
 err_cgroup:
 	kthread_stop(worker);
-	dev->worker = NULL;
+	dev->vqs[0]->worker = NULL;
 err_worker:
 	if (dev->mm)
 		mmput(dev->mm);
@@ -638,11 +693,15 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 	dev->iotlb = NULL;
 	vhost_clear_msg(dev);
 	wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
-	WARN_ON(!llist_empty(&dev->work_list));
-	if (dev->worker) {
-		kthread_stop(dev->worker);
-		dev->worker = NULL;
+
+	for (i = 0; i < dev->nvqs; ++i) {
+		WARN_ON(!llist_empty(&dev->vqs[i]->work_list));
+		if (dev->vqs[i]->worker) {
+			kthread_stop(dev->vqs[i]->worker);
+			dev->vqs[i]->worker = NULL;
+		}
 	}
+
 	if (dev->mm)
 		mmput(dev->mm);
 	dev->mm = NULL;
@@ -1564,7 +1623,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
 		fput(filep);
 
 	if (pollstart && vq->handle_kick)
-		r = vhost_poll_start(&vq->poll, vq->kick);
+		r = vhost_vq_poll_start(vq);
 
 	mutex_unlock(&vq->mutex);
 
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 466ef7542291..c00733fac49f 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -32,6 +32,7 @@ struct vhost_poll {
 	struct vhost_work	  work;
 	__poll_t		  mask;
 	struct vhost_dev	 *dev;
+	struct vhost_virtqueue   *vq;
 };
 
 void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
@@ -86,6 +87,7 @@ struct vhost_virtqueue {
 
 	/* The actual ring of buffers. */
 	struct mutex mutex;
+	unsigned int index;
 	unsigned int num;
 	struct vring_desc __user *desc;
 	struct vring_avail __user *avail;
@@ -145,6 +147,10 @@ struct vhost_virtqueue {
 	bool user_be;
 #endif
 	u32 busyloop_timeout;
+
+	struct llist_head work_list;
+	struct task_struct *worker;
+	struct vhost_work work;
 };
 
 struct vhost_msg_node {
@@ -158,12 +164,11 @@ struct vhost_msg_node {
 
 struct vhost_dev {
 	struct mm_struct *mm;
+	pid_t pid;
 	struct mutex mutex;
 	struct vhost_virtqueue **vqs;
 	int nvqs;
 	struct eventfd_ctx *log_ctx;
-	struct llist_head work_list;
-	struct task_struct *worker;
 	struct vhost_umem *umem;
 	struct vhost_umem *iotlb;
 	spinlock_t iotlb_lock;
@@ -185,6 +190,8 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
 bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
 bool vhost_log_access_ok(struct vhost_dev *);
 
+void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work);
+
 int vhost_get_vq_desc(struct vhost_virtqueue *,
 		      struct iovec iov[], unsigned int iov_count,
 		      unsigned int *out_num, unsigned int *in_num,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/1] vhost: parallel virtqueue handling
  2018-11-02 16:07 [PATCH 0/1] vhost: parallel virtqueue handling Vitaly Mayatskikh
  2018-11-02 16:07 ` [PATCH 1/1] vhost: add per-vq worker thread Vitaly Mayatskikh
@ 2018-11-05  2:51 ` Jason Wang
  2018-11-05  3:40   ` Vitaly Mayatskih
  1 sibling, 1 reply; 8+ messages in thread
From: Jason Wang @ 2018-11-05  2:51 UTC (permalink / raw)
  To: Vitaly Mayatskikh, Michael S . Tsirkin
  Cc: kvm, virtualization, netdev, linux-kernel


On 2018/11/3 上午12:07, Vitaly Mayatskikh wrote:
> Hi,
>
> I stumbled across poor performance of virtio-blk while working on a
> high-performance network storage protocol. Moving virtio-blk's host
> side to kernel did increase single queue IOPS, but multiqueue disk
> still was not scaling well. It turned out that vhost handles events
> from all virtio queues in one helper thread, and that's pretty much a
> big serialization point.
>
> The following patch enables events handling in per-queue thread and
> increases IO concurrency, see IOPS numbers:


Thanks a lot for the patches. Here's some thoughts:

- This is not the first attempt that tries to parallelize vhost workers. 
So we need a comparing among them.

1) Multiple vhost workers from Anthony, 
https://www.spinics.net/lists/netdev/msg189432.html

2) ELVIS from IBM, http://www.mulix.org/pubs/eli/elvis-h319.pdf

3) CMWQ from Bandan, 
http://www.linux-kvm.org/images/5/52/02x08-Aspen-Bandan_Das-vhost-sharing_is_better.pdf

- vhost-net use a different multiqueue model. Each vhost device on host 
is only dealing with a specific queue pair instead of a whole device. 
This allow great flexibility and multiqueue could be implemented without 
touching vhost codes.

- current vhost-net implementation depends heavily on the assumption of 
single thread model especially its busy polling code. It would be broken 
by this attempt. If we decide to go this way, this needs to be fixed. 
And we do need performance result of networking.

- Having more threads is not necessarily a win, at least we need a 
module parameter to other stuffs to control the number of threads I 
believe.


Thanks


>
> # num-queues
> # bare metal
> # virtio-blk
> # vhost-blk
>
> 1  171k  148k 195k
> 2  328k  249k 349k
> 3  479k  179k 501k
> 4  622k  143k 620k
> 5  755k  136k 737k
> 6  887k  131k 830k
> 7  1004k 126k 926k
> 8  1099k 117k 1001k
> 9  1194k 115k 1055k
> 10 1278k 109k 1130k
> 11 1345k 110k 1119k
> 12 1411k 104k 1201k
> 13 1466k 106k 1260k
> 14 1517k 103k 1296k
> 15 1552k 102k 1322k
> 16 1480k 101k 1346k
>
> Vitaly Mayatskikh (1):
>    vhost: add per-vq worker thread
>
>   drivers/vhost/vhost.c | 123 +++++++++++++++++++++++++++++++-----------
>   drivers/vhost/vhost.h |  11 +++-
>   2 files changed, 100 insertions(+), 34 deletions(-)
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] vhost: add per-vq worker thread
  2018-11-02 16:07 ` [PATCH 1/1] vhost: add per-vq worker thread Vitaly Mayatskikh
@ 2018-11-05  2:53   ` Jason Wang
  2018-11-05  3:28     ` Vitaly Mayatskih
  0 siblings, 1 reply; 8+ messages in thread
From: Jason Wang @ 2018-11-05  2:53 UTC (permalink / raw)
  To: Vitaly Mayatskikh, Michael S . Tsirkin
  Cc: kvm, virtualization, netdev, linux-kernel


On 2018/11/3 上午12:07, Vitaly Mayatskikh wrote:
> +
> +static int vhost_vq_poll_start(struct vhost_virtqueue *vq)
> +{
> +	if (!vq->worker) {
> +		vq->worker = kthread_create(vhost_vq_worker, vq, "vhost-%d/%i",
> +					    vq->dev->pid, vq->index);
> +		if (IS_ERR(vq->worker)) {
> +			int ret = PTR_ERR(vq->worker);
> +
> +			pr_err("%s: can't create vq worker: %d\n", __func__,
> +			       ret);
> +			vq->worker = NULL;
> +			return ret;
> +		}
> +	}
> +	vhost_work_init(&vq->work, vhost_vq_poll_start_work);
> +	vhost_vq_work_queue(vq, &vq->work);
> +	return 0;
> +}
> +


I wonder whether or not it's better to allow the device to specific the 
worker here instead of forcing a per vq worker model. Then we can keep 
the behavior of exist implementation and do optimization on top?

Thanks



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] vhost: add per-vq worker thread
  2018-11-05  2:53   ` Jason Wang
@ 2018-11-05  3:28     ` Vitaly Mayatskih
  2018-11-06  2:48       ` Jason Wang
  0 siblings, 1 reply; 8+ messages in thread
From: Vitaly Mayatskih @ 2018-11-05  3:28 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S . Tsirkin, kvm, virtualization, netdev, linux-kernel

On Sun, Nov 4, 2018 at 9:53 PM Jason Wang <jasowang@redhat.com> wrote:

> I wonder whether or not it's better to allow the device to specific the
> worker here instead of forcing a per vq worker model. Then we can keep
> the behavior of exist implementation and do optimization on top?

I was thinking about that too, but for the sake of simplicity it
sounds valid that if the user wanted 8 parallel queues for the disk,
they better be parallel, i.e. worker per queue. The rest of disks that
don't need high-performance, can have 1 queue specified.

-- 
wbr, Vitaly

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 0/1] vhost: parallel virtqueue handling
  2018-11-05  2:51 ` [PATCH 0/1] vhost: parallel virtqueue handling Jason Wang
@ 2018-11-05  3:40   ` Vitaly Mayatskih
  0 siblings, 0 replies; 8+ messages in thread
From: Vitaly Mayatskih @ 2018-11-05  3:40 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S . Tsirkin, kvm, virtualization, netdev, linux-kernel

On Sun, Nov 4, 2018 at 9:52 PM Jason Wang <jasowang@redhat.com> wrote:

> Thanks a lot for the patches. Here's some thoughts:
>
> - This is not the first attempt that tries to parallelize vhost workers.
> So we need a comparing among them.
>
> 1) Multiple vhost workers from Anthony,
> https://www.spinics.net/lists/netdev/msg189432.html
>
> 2) ELVIS from IBM, http://www.mulix.org/pubs/eli/elvis-h319.pdf
>
> 3) CMWQ from Bandan,
> http://www.linux-kvm.org/images/5/52/02x08-Aspen-Bandan_Das-vhost-sharing_is_better.pdf
>
> - vhost-net use a different multiqueue model. Each vhost device on host
> is only dealing with a specific queue pair instead of a whole device.
> This allow great flexibility and multiqueue could be implemented without
> touching vhost codes.

I'm no way a network expert, but I think this is because it follows a
combined queue model of the NIC. Having a TX/RX queues pair looks like
a natural choice for this case.

> - current vhost-net implementation depends heavily on the assumption of
> single thread model especially its busy polling code. It would be broken
> by this attempt. If we decide to go this way, this needs to be fixed.
> And we do need performance result of networking.

Thanks for noting that, I miss a lot of historical background. Will
check that up.

> - Having more threads is not necessarily a win, at least we need a
> module parameter to other stuffs to control the number of threads I
> believe.

I agree I didn't think fully about other cases, but for the disk it is
already under controll: QEMU's num-queues disk parameter.

There's a certain saturation point when adding more threads does not
yield lot more more performance. For my environment it's about 12
queues.

So, how does it sound: the default behaviour is 1 worker per vhost
device. If the user needs per-vq worker he does a new VHOST_SET_
ioctl?
-- 
wbr, Vitaly

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] vhost: add per-vq worker thread
  2018-11-05  3:28     ` Vitaly Mayatskih
@ 2018-11-06  2:48       ` Jason Wang
  2018-11-06  2:59         ` Vitaly Mayatskih
  0 siblings, 1 reply; 8+ messages in thread
From: Jason Wang @ 2018-11-06  2:48 UTC (permalink / raw)
  To: Vitaly Mayatskih
  Cc: Michael S . Tsirkin, kvm, virtualization, netdev, linux-kernel


On 2018/11/5 上午11:28, Vitaly Mayatskih wrote:
> On Sun, Nov 4, 2018 at 9:53 PM Jason Wang <jasowang@redhat.com> wrote:
>
>> I wonder whether or not it's better to allow the device to specific the
>> worker here instead of forcing a per vq worker model. Then we can keep
>> the behavior of exist implementation and do optimization on top?
> I was thinking about that too, but for the sake of simplicity it
> sounds valid that if the user wanted 8 parallel queues for the disk,
> they better be parallel, i.e. worker per queue. The rest of disks that
> don't need high-performance, can have 1 queue specified.
>

If you allow device to specify the worker itself, you can do any kinds 
of mapping bettween work and worker kthread I think. The advantage of 
doing this is that you can keep the vhost-net untouched. This makes 
things a little bit easier and proving two kthreads is better than one 
for -net workload is probably not as easy as it looks. We may get boost 
in some cases but degradation for the rest.


Thanks


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] vhost: add per-vq worker thread
  2018-11-06  2:48       ` Jason Wang
@ 2018-11-06  2:59         ` Vitaly Mayatskih
  0 siblings, 0 replies; 8+ messages in thread
From: Vitaly Mayatskih @ 2018-11-06  2:59 UTC (permalink / raw)
  To: Jason Wang; +Cc: Michael S . Tsirkin, kvm, virtualization, netdev, linux-kernel

On Mon, Nov 5, 2018 at 9:49 PM Jason Wang <jasowang@redhat.com> wrote:

> If you allow device to specify the worker itself, you can do any kinds
> of mapping bettween work and worker kthread I think. The advantage of
> doing this is that you can keep the vhost-net untouched. This makes
> things a little bit easier and proving two kthreads is better than one
> for -net workload is probably not as easy as it looks. We may get boost
> in some cases but degradation for the rest.

Sounds good. 1 worker is created by vhost as today, all the others are
entering from userspace via ioctl.
-- 
wbr, Vitaly

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-11-06  2:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-02 16:07 [PATCH 0/1] vhost: parallel virtqueue handling Vitaly Mayatskikh
2018-11-02 16:07 ` [PATCH 1/1] vhost: add per-vq worker thread Vitaly Mayatskikh
2018-11-05  2:53   ` Jason Wang
2018-11-05  3:28     ` Vitaly Mayatskih
2018-11-06  2:48       ` Jason Wang
2018-11-06  2:59         ` Vitaly Mayatskih
2018-11-05  2:51 ` [PATCH 0/1] vhost: parallel virtqueue handling Jason Wang
2018-11-05  3:40   ` Vitaly Mayatskih

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).