From: Stefan Hajnoczi <stefanha@redhat.com>
To: Mike Christie <michael.christie@oracle.com>
Cc: target-devel@vger.kernel.org, linux-scsi@vger.kernel.org,
pbonzini@redhat.com, jasowang@redhat.com, mst@redhat.com,
sgarzare@redhat.com, virtualization@lists.linux-foundation.org
Subject: Re: [PATCH 7/9] vhost: allow userspace to create workers
Date: Thu, 3 Jun 2021 15:30:43 +0100 [thread overview]
Message-ID: <YLjnk5GpFaCCOqCU@stefanha-x1.localdomain> (raw)
In-Reply-To: <20210525180600.6349-8-michael.christie@oracle.com>
[-- Attachment #1: Type: text/plain, Size: 8691 bytes --]
On Tue, May 25, 2021 at 01:05:58PM -0500, Mike Christie wrote:
> This patch allows userspace to create workers and bind them to vqs, so you
> can have N workers per dev and also share N workers with M vqs. The next
> patch will allow sharing across devices.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
> drivers/vhost/vhost.c | 94 +++++++++++++++++++++++++++++++-
> drivers/vhost/vhost.h | 3 +
> include/uapi/linux/vhost.h | 6 ++
> include/uapi/linux/vhost_types.h | 12 ++++
> 4 files changed, 113 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 345ade0af133..981e9bac7a31 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -30,6 +30,7 @@
> #include <linux/interval_tree_generic.h>
> #include <linux/nospec.h>
> #include <linux/kcov.h>
> +#include <linux/hashtable.h>
>
> #include "vhost.h"
>
> @@ -42,6 +43,9 @@ module_param(max_iotlb_entries, int, 0444);
> MODULE_PARM_DESC(max_iotlb_entries,
> "Maximum number of iotlb entries. (default: 2048)");
>
> +static DEFINE_HASHTABLE(vhost_workers, 5);
> +static DEFINE_SPINLOCK(vhost_workers_lock);
> +
> enum {
> VHOST_MEMORY_F_LOG = 0x1,
> };
> @@ -617,8 +621,17 @@ static void vhost_detach_mm(struct vhost_dev *dev)
> dev->mm = NULL;
> }
>
> -static void vhost_worker_free(struct vhost_worker *worker)
> +static void vhost_worker_put(struct vhost_worker *worker)
> {
> + spin_lock(&vhost_workers_lock);
> + if (!refcount_dec_and_test(&worker->refcount)) {
> + spin_unlock(&vhost_workers_lock);
> + return;
> + }
> +
> + hash_del(&worker->h_node);
> + spin_unlock(&vhost_workers_lock);
> +
> WARN_ON(!llist_empty(&worker->work_list));
> kthread_stop(worker->task);
> kfree(worker);
> @@ -632,7 +645,7 @@ static void vhost_workers_free(struct vhost_dev *dev)
> return;
>
> for (i = 0; i < dev->num_workers; i++)
> - vhost_worker_free(dev->workers[i]);
> + vhost_worker_put(dev->workers[i]);
>
> kfree(dev->workers);
> dev->num_workers = 0;
> @@ -652,6 +665,8 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> worker->id = dev->num_workers;
> worker->dev = dev;
> init_llist_head(&worker->work_list);
> + INIT_HLIST_NODE(&worker->h_node);
> + refcount_set(&worker->refcount, 1);
>
> task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
> if (IS_ERR(task))
> @@ -664,6 +679,9 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> if (ret)
> goto stop_worker;
>
> + spin_lock(&vhost_workers_lock);
> + hash_add(vhost_workers, &worker->h_node, worker->task->pid);
> + spin_unlock(&vhost_workers_lock);
> return worker;
>
> stop_worker:
> @@ -673,6 +691,67 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> return NULL;
> }
>
> +static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t pid)
> +{
> + struct vhost_worker *worker, *found_worker = NULL;
> +
> + spin_lock(&vhost_workers_lock);
> + hash_for_each_possible(vhost_workers, worker, h_node, pid) {
> + if (worker->task->pid == pid) {
> + /* tmp - next patch allows sharing across devs */
> + if (worker->dev != dev)
> + break;
> +
> + found_worker = worker;
> + refcount_inc(&worker->refcount);
> + break;
> + }
> + }
> + spin_unlock(&vhost_workers_lock);
> + return found_worker;
> +}
> +
> +/* Caller must have device mutex */
> +static int vhost_vq_set_worker(struct vhost_virtqueue *vq,
> + struct vhost_vring_worker *info)
> +{
> + struct vhost_dev *dev = vq->dev;
> + struct vhost_worker *worker;
> +
> + if (vq->worker) {
> + /* TODO - support changing while works are running */
> + return -EBUSY;
> + }
> +
> + if (info->pid == VHOST_VRING_NEW_WORKER) {
> + worker = vhost_worker_create(dev);
The maximum number of kthreads created is limited by
vhost_dev_init(nvqs)? For example VHOST_SCSI_MAX_VQ 128.
IIUC kthread_create is not limited by or accounted against the current
task, so I'm a little worried that a process can create a lot of
kthreads.
I haven't investigated other kthread_create() users reachable from
userspace applications to see how they bound the number of threads
effectively.
Any thoughts?
> + if (!worker)
> + return -ENOMEM;
> +
> + info->pid = worker->task->pid;
> + } else {
> + worker = vhost_worker_find(dev, info->pid);
> + if (!worker)
> + return -ENODEV;
> + }
> +
> + if (!dev->workers) {
> + dev->workers = kcalloc(vq->dev->nvqs,
> + sizeof(struct vhost_worker *),
> + GFP_KERNEL);
Another candidate for GFP_KERNEL_ACCOUNT.
> + if (!dev->workers) {
> + vhost_worker_put(worker);
> + return -ENOMEM;
> + }
> + }
> +
> + vq->worker = worker;
> +
> + dev->workers[dev->num_workers] = worker;
> + dev->num_workers++;
Hmm...should we really append to workers[] in the vhost_worker_find()
case?
> + return 0;
> +}
> +
> /* Caller must have device mutex */
> static int vhost_worker_try_create_def(struct vhost_dev *dev)
> {
> @@ -1680,6 +1759,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
> struct eventfd_ctx *ctx = NULL;
> u32 __user *idxp = argp;
> struct vhost_virtqueue *vq;
> + struct vhost_vring_worker w;
> struct vhost_vring_state s;
> struct vhost_vring_file f;
> u32 idx;
> @@ -1794,6 +1874,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
> if (copy_to_user(argp, &s, sizeof(s)))
> r = -EFAULT;
> break;
> + case VHOST_SET_VRING_WORKER:
> + if (copy_from_user(&w, argp, sizeof(w))) {
> + r = -EFAULT;
> + break;
> + }
> + r = vhost_vq_set_worker(vq, &w);
> + if (!r && copy_to_user(argp, &w, sizeof(w)))
> + r = -EFAULT;
> + break;
> default:
> r = -ENOIOCTLCMD;
> }
> @@ -2726,6 +2815,7 @@ EXPORT_SYMBOL_GPL(vhost_set_backend_features);
>
> static int __init vhost_init(void)
> {
> + hash_init(vhost_workers);
> return 0;
> }
>
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 0a252dd45101..75b884ad1f17 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -14,6 +14,7 @@
> #include <linux/atomic.h>
> #include <linux/vhost_iotlb.h>
> #include <linux/irqbypass.h>
> +#include <linux/refcount.h>
>
> struct vhost_work;
> typedef void (*vhost_work_fn_t)(struct vhost_work *work);
> @@ -28,6 +29,8 @@ struct vhost_work {
> struct vhost_worker {
> struct task_struct *task;
> struct llist_head work_list;
> + struct hlist_node h_node;
h_node is a generic name. If you're willing to use a longer name then
vhost_workers_node would make it clear which hlist this is associated
with.
> + refcount_t refcount;
> struct vhost_dev *dev;
> int id;
> };
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index c998860d7bbc..ce32119cb139 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -70,6 +70,12 @@
> #define VHOST_VRING_BIG_ENDIAN 1
> #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
> #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
> +/* Create/bind a vhost worker to a virtqueue. If pid > 0 and matches an existing
> + * vhost_worker thread it will be bound to the vq. If pid is
> + * VHOST_VRING_NEW_WORKER, then a new worker will be created and bound to the
> + * vq.
> + */
Please document when this ioctl must be called (before kick is set up).
> +#define VHOST_SET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x15, struct vhost_vring_worker)
>
> /* The following ioctls use eventfd file descriptors to signal and poll
> * for events. */
> diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
> index f7f6a3a28977..5113baa8bc3e 100644
> --- a/include/uapi/linux/vhost_types.h
> +++ b/include/uapi/linux/vhost_types.h
> @@ -47,6 +47,18 @@ struct vhost_vring_addr {
> __u64 log_guest_addr;
> };
>
> +#define VHOST_VRING_NEW_WORKER -1
> +
> +struct vhost_vring_worker {
> + unsigned int index;
> + /*
> + * The pid of the vhost worker that the vq will be bound to. If
> + * pid is VHOST_VRING_NEW_WORKER a new worker will be created and it's
s/it's/its/
> + * pid will be returned in pid.
> + */
> + __kernel_pid_t pid;
> +};
> +
> /* no alignment requirement */
> struct vhost_iotlb_msg {
> __u64 iova;
> --
> 2.25.1
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
next prev parent reply other threads:[~2021-06-03 14:30 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-25 18:05 vhost: multiple worker support Mike Christie
2021-05-25 18:05 ` [PATCH 1/9] vhost: move worker thread fields to new struct Mike Christie
2021-06-03 10:16 ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 2/9] vhost: move vhost worker creation to kick setup Mike Christie
2021-06-03 10:28 ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 3/9] vhost: modify internal functions to take a vhost_worker Mike Christie
2021-06-03 10:45 ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 4/9] vhost: allow vhost_polls to use different vhost_workers Mike Christie
2021-06-03 13:51 ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 5/9] vhost-scsi: flush IO vqs then send TMF rsp Mike Christie
2021-06-03 13:54 ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 6/9] vhost-scsi: make SCSI cmd completion per vq Mike Christie
2021-06-03 13:57 ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 7/9] vhost: allow userspace to create workers Mike Christie
2021-06-03 14:30 ` Stefan Hajnoczi [this message]
2021-06-05 23:53 ` michael.christie
2021-06-07 15:19 ` Stefan Hajnoczi
2021-06-09 21:03 ` Mike Christie
2021-06-10 8:06 ` Stefan Hajnoczi
2021-06-18 2:49 ` Mike Christie
2021-06-21 13:41 ` Stefan Hajnoczi
2021-05-25 18:05 ` [PATCH 8/9] vhost: add vhost_dev pointer to vhost_work Mike Christie
2021-06-03 14:31 ` Stefan Hajnoczi
2021-05-25 18:06 ` [PATCH 9/9] vhost: support sharing workers across devs Mike Christie
2021-06-03 14:32 ` Stefan Hajnoczi
2021-06-07 2:18 ` Jason Wang
2021-06-03 10:13 ` vhost: multiple worker support Stefan Hajnoczi
2021-06-03 18:45 ` Mike Christie
2021-06-03 14:37 ` Stefan Hajnoczi
2021-06-03 22:16 ` Mike Christie
2021-06-05 22:40 ` michael.christie
2021-06-07 15:23 ` Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YLjnk5GpFaCCOqCU@stefanha-x1.localdomain \
--to=stefanha@redhat.com \
--cc=jasowang@redhat.com \
--cc=linux-scsi@vger.kernel.org \
--cc=michael.christie@oracle.com \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=sgarzare@redhat.com \
--cc=target-devel@vger.kernel.org \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).