target-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: Mike Christie <michael.christie@oracle.com>
Cc: target-devel@vger.kernel.org, linux-scsi@vger.kernel.org,
	pbonzini@redhat.com, jasowang@redhat.com, mst@redhat.com,
	sgarzare@redhat.com, virtualization@lists.linux-foundation.org
Subject: Re: [PATCH 7/9] vhost: allow userspace to create workers
Date: Thu, 3 Jun 2021 15:30:43 +0100	[thread overview]
Message-ID: <YLjnk5GpFaCCOqCU@stefanha-x1.localdomain> (raw)
In-Reply-To: <20210525180600.6349-8-michael.christie@oracle.com>

[-- Attachment #1: Type: text/plain, Size: 8691 bytes --]

On Tue, May 25, 2021 at 01:05:58PM -0500, Mike Christie wrote:
> This patch allows userspace to create workers and bind them to vqs, so you
> can have N workers per dev and also share N workers with M vqs. The next
> patch will allow sharing across devices.
> 
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
>  drivers/vhost/vhost.c            | 94 +++++++++++++++++++++++++++++++-
>  drivers/vhost/vhost.h            |  3 +
>  include/uapi/linux/vhost.h       |  6 ++
>  include/uapi/linux/vhost_types.h | 12 ++++
>  4 files changed, 113 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 345ade0af133..981e9bac7a31 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -30,6 +30,7 @@
>  #include <linux/interval_tree_generic.h>
>  #include <linux/nospec.h>
>  #include <linux/kcov.h>
> +#include <linux/hashtable.h>
>  
>  #include "vhost.h"
>  
> @@ -42,6 +43,9 @@ module_param(max_iotlb_entries, int, 0444);
>  MODULE_PARM_DESC(max_iotlb_entries,
>  	"Maximum number of iotlb entries. (default: 2048)");
>  
> +static DEFINE_HASHTABLE(vhost_workers, 5);
> +static DEFINE_SPINLOCK(vhost_workers_lock);
> +
>  enum {
>  	VHOST_MEMORY_F_LOG = 0x1,
>  };
> @@ -617,8 +621,17 @@ static void vhost_detach_mm(struct vhost_dev *dev)
>  	dev->mm = NULL;
>  }
>  
> -static void vhost_worker_free(struct vhost_worker *worker)
> +static void vhost_worker_put(struct vhost_worker *worker)
>  {
> +	spin_lock(&vhost_workers_lock);
> +	if (!refcount_dec_and_test(&worker->refcount)) {
> +		spin_unlock(&vhost_workers_lock);
> +		return;
> +	}
> +
> +	hash_del(&worker->h_node);
> +	spin_unlock(&vhost_workers_lock);
> +
>  	WARN_ON(!llist_empty(&worker->work_list));
>  	kthread_stop(worker->task);
>  	kfree(worker);
> @@ -632,7 +645,7 @@ static void vhost_workers_free(struct vhost_dev *dev)
>  		return;
>  
>  	for (i = 0; i < dev->num_workers; i++)
> -		vhost_worker_free(dev->workers[i]);
> +		vhost_worker_put(dev->workers[i]);
>  
>  	kfree(dev->workers);
>  	dev->num_workers = 0;
> @@ -652,6 +665,8 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>  	worker->id = dev->num_workers;
>  	worker->dev = dev;
>  	init_llist_head(&worker->work_list);
> +	INIT_HLIST_NODE(&worker->h_node);
> +	refcount_set(&worker->refcount, 1);
>  
>  	task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
>  	if (IS_ERR(task))
> @@ -664,6 +679,9 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>  	if (ret)
>  		goto stop_worker;
>  
> +	spin_lock(&vhost_workers_lock);
> +	hash_add(vhost_workers, &worker->h_node, worker->task->pid);
> +	spin_unlock(&vhost_workers_lock);
>  	return worker;
>  
>  stop_worker:
> @@ -673,6 +691,67 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>  	return NULL;
>  }
>  
> +static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t pid)
> +{
> +	struct vhost_worker *worker, *found_worker = NULL;
> +
> +	spin_lock(&vhost_workers_lock);
> +	hash_for_each_possible(vhost_workers, worker, h_node, pid) {
> +		if (worker->task->pid == pid) {
> +			/* tmp - next patch allows sharing across devs */
> +			if (worker->dev != dev)
> +				break;
> +
> +			found_worker = worker;
> +			refcount_inc(&worker->refcount);
> +			break;
> +		}
> +	}
> +	spin_unlock(&vhost_workers_lock);
> +	return found_worker;
> +}
> +
> +/* Caller must have device mutex */
> +static int vhost_vq_set_worker(struct vhost_virtqueue *vq,
> +			       struct vhost_vring_worker *info)
> +{
> +	struct vhost_dev *dev = vq->dev;
> +	struct vhost_worker *worker;
> +
> +	if (vq->worker) {
> +		/* TODO - support changing while works are running */
> +		return -EBUSY;
> +	}
> +
> +	if (info->pid == VHOST_VRING_NEW_WORKER) {
> +		worker = vhost_worker_create(dev);

The maximum number of kthreads created is limited by
vhost_dev_init(nvqs)? For example VHOST_SCSI_MAX_VQ 128.

IIUC kthread_create is not limited by or accounted against the current
task, so I'm a little worried that a process can create a lot of
kthreads.

I haven't investigated other kthread_create() users reachable from
userspace applications to see how they bound the number of threads
effectively.

Any thoughts?

> +		if (!worker)
> +			return -ENOMEM;
> +
> +		info->pid = worker->task->pid;
> +	} else {
> +		worker = vhost_worker_find(dev, info->pid);
> +		if (!worker)
> +			return -ENODEV;
> +	}
> +
> +	if (!dev->workers) {
> +		dev->workers = kcalloc(vq->dev->nvqs,
> +				       sizeof(struct vhost_worker *),
> +				       GFP_KERNEL);

Another candidate for GFP_KERNEL_ACCOUNT.

> +		if (!dev->workers) {
> +			vhost_worker_put(worker);
> +			return -ENOMEM;
> +		}
> +	}
> +
> +	vq->worker = worker;
> +
> +	dev->workers[dev->num_workers] = worker;
> +	dev->num_workers++;

Hmm...should we really append to workers[] in the vhost_worker_find()
case?

> +	return 0;
> +}
> +
>  /* Caller must have device mutex */
>  static int vhost_worker_try_create_def(struct vhost_dev *dev)
>  {
> @@ -1680,6 +1759,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
>  	struct eventfd_ctx *ctx = NULL;
>  	u32 __user *idxp = argp;
>  	struct vhost_virtqueue *vq;
> +	struct vhost_vring_worker w;
>  	struct vhost_vring_state s;
>  	struct vhost_vring_file f;
>  	u32 idx;
> @@ -1794,6 +1874,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
>  		if (copy_to_user(argp, &s, sizeof(s)))
>  			r = -EFAULT;
>  		break;
> +	case VHOST_SET_VRING_WORKER:
> +		if (copy_from_user(&w, argp, sizeof(w))) {
> +			r = -EFAULT;
> +			break;
> +		}
> +		r = vhost_vq_set_worker(vq, &w);
> +		if (!r && copy_to_user(argp, &w, sizeof(w)))
> +			r = -EFAULT;
> +		break;
>  	default:
>  		r = -ENOIOCTLCMD;
>  	}
> @@ -2726,6 +2815,7 @@ EXPORT_SYMBOL_GPL(vhost_set_backend_features);
>  
>  static int __init vhost_init(void)
>  {
> +	hash_init(vhost_workers);
>  	return 0;
>  }
>  
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 0a252dd45101..75b884ad1f17 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -14,6 +14,7 @@
>  #include <linux/atomic.h>
>  #include <linux/vhost_iotlb.h>
>  #include <linux/irqbypass.h>
> +#include <linux/refcount.h>
>  
>  struct vhost_work;
>  typedef void (*vhost_work_fn_t)(struct vhost_work *work);
> @@ -28,6 +29,8 @@ struct vhost_work {
>  struct vhost_worker {
>  	struct task_struct	*task;
>  	struct llist_head	work_list;
> +	struct hlist_node	h_node;

h_node is a generic name. If you're willing to use a longer name then
vhost_workers_node would make it clear which hlist this is associated
with.

> +	refcount_t		refcount;
>  	struct vhost_dev	*dev;
>  	int			id;
>  };
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index c998860d7bbc..ce32119cb139 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -70,6 +70,12 @@
>  #define VHOST_VRING_BIG_ENDIAN 1
>  #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
>  #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> +/* Create/bind a vhost worker to a virtqueue. If pid > 0 and matches an existing
> + * vhost_worker thread it will be bound to the vq. If pid is
> + * VHOST_VRING_NEW_WORKER, then a new worker will be created and bound to the
> + * vq.
> + */

Please document when this ioctl must be called (before kick is set up).

> +#define VHOST_SET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x15, struct vhost_vring_worker)
>  
>  /* The following ioctls use eventfd file descriptors to signal and poll
>   * for events. */
> diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
> index f7f6a3a28977..5113baa8bc3e 100644
> --- a/include/uapi/linux/vhost_types.h
> +++ b/include/uapi/linux/vhost_types.h
> @@ -47,6 +47,18 @@ struct vhost_vring_addr {
>  	__u64 log_guest_addr;
>  };
>  
> +#define VHOST_VRING_NEW_WORKER -1
> +
> +struct vhost_vring_worker {
> +	unsigned int index;
> +	/*
> +	 * The pid of the vhost worker that the vq will be bound to. If
> +	 * pid is VHOST_VRING_NEW_WORKER a new worker will be created and it's

s/it's/its/

> +	 * pid will be returned in pid.
> +	 */
> +	__kernel_pid_t pid;
> +};
> +
>  /* no alignment requirement */
>  struct vhost_iotlb_msg {
>  	__u64 iova;
> -- 
> 2.25.1
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

  reply	other threads:[~2021-06-03 14:30 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-25 18:05 vhost: multiple worker support Mike Christie
2021-05-25 18:05 ` [PATCH 1/9] vhost: move worker thread fields to new struct Mike Christie
2021-06-03 10:16   ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 2/9] vhost: move vhost worker creation to kick setup Mike Christie
2021-06-03 10:28   ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 3/9] vhost: modify internal functions to take a vhost_worker Mike Christie
2021-06-03 10:45   ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 4/9] vhost: allow vhost_polls to use different vhost_workers Mike Christie
2021-06-03 13:51   ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 5/9] vhost-scsi: flush IO vqs then send TMF rsp Mike Christie
2021-06-03 13:54   ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 6/9] vhost-scsi: make SCSI cmd completion per vq Mike Christie
2021-06-03 13:57   ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 7/9] vhost: allow userspace to create workers Mike Christie
2021-06-03 14:30   ` Stefan Hajnoczi [this message]
2021-06-05 23:53     ` michael.christie
2021-06-07 15:19       ` Stefan Hajnoczi
2021-06-09 21:03         ` Mike Christie
2021-06-10  8:06           ` Stefan Hajnoczi
2021-06-18  2:49             ` Mike Christie
2021-06-21 13:41               ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 8/9] vhost: add vhost_dev pointer to vhost_work Mike Christie
2021-06-03 14:31   ` Stefan Hajnoczi
2021-05-25 18:06 ` [PATCH 9/9] vhost: support sharing workers across devs Mike Christie
2021-06-03 14:32   ` Stefan Hajnoczi
2021-06-07  2:18     ` Jason Wang
2021-06-03 10:13 ` vhost: multiple worker support Stefan Hajnoczi
2021-06-03 18:45   ` Mike Christie
2021-06-03 14:37 ` Stefan Hajnoczi
2021-06-03 22:16   ` Mike Christie
2021-06-05 22:40     ` michael.christie
2021-06-07 15:23       ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YLjnk5GpFaCCOqCU@stefanha-x1.localdomain \
    --to=stefanha@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=michael.christie@oracle.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=sgarzare@redhat.com \
    --cc=target-devel@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).