linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mike Christie <michael.christie@oracle.com>
To: hdanton@sina.com, hch@infradead.org, stefanha@redhat.com,
	jasowang@redhat.com, mst@redhat.com, sgarzare@redhat.com,
	virtualization@lists.linux-foundation.org,
	christian.brauner@ubuntu.com, axboe@kernel.dk,
	linux-kernel@vger.kernel.org
Cc: Mike Christie <michael.christie@oracle.com>
Subject: [PATCH V2 9/9] vhost: use kernel_worker to check RLIMITs and inherit v2 cgroups
Date: Tue, 21 Sep 2021 16:52:18 -0500	[thread overview]
Message-ID: <20210921215218.89844-10-michael.christie@oracle.com> (raw)
In-Reply-To: <20210921215218.89844-1-michael.christie@oracle.com>

For vhost workers we use the kthread API which inherit's its values from
and checks against the kthreadd thread. This results in cgroups v2 not
working and the wrong RLIMITs being checked. This patch has us use the
kernel_copy_process function which will inherit its values/checks from the
thread that owns the device.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/vhost/vhost.c | 68 ++++++++++++++++---------------------------
 drivers/vhost/vhost.h |  7 ++++-
 2 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c9a1f706989c..7a5142dcde1b 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -22,7 +22,6 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/kthread.h>
-#include <linux/cgroup.h>
 #include <linux/module.h>
 #include <linux/sort.h>
 #include <linux/sched/mm.h>
@@ -344,17 +343,14 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 static int vhost_worker(void *data)
 {
 	struct vhost_worker *worker = data;
-	struct vhost_dev *dev = worker->dev;
 	struct vhost_work *work, *work_next;
 	struct llist_node *node;
 
-	kthread_use_mm(dev->mm);
-
 	for (;;) {
 		/* mb paired w/ kthread_stop */
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (kthread_should_stop()) {
+		if (test_bit(VHOST_WORKER_FLAG_STOP, &worker->flags)) {
 			__set_current_state(TASK_RUNNING);
 			break;
 		}
@@ -376,8 +372,9 @@ static int vhost_worker(void *data)
 				schedule();
 		}
 	}
-	kthread_unuse_mm(dev->mm);
-	return 0;
+
+	complete(worker->exit_done);
+	do_exit(0);
 }
 
 static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
@@ -517,31 +514,6 @@ long vhost_dev_check_owner(struct vhost_dev *dev)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
 
-struct vhost_attach_cgroups_struct {
-	struct vhost_work work;
-	struct task_struct *owner;
-	int ret;
-};
-
-static void vhost_attach_cgroups_work(struct vhost_work *work)
-{
-	struct vhost_attach_cgroups_struct *s;
-
-	s = container_of(work, struct vhost_attach_cgroups_struct, work);
-	s->ret = cgroup_attach_task_all(s->owner, current);
-}
-
-static int vhost_attach_cgroups(struct vhost_dev *dev)
-{
-	struct vhost_attach_cgroups_struct attach;
-
-	attach.owner = current;
-	vhost_work_init(&attach.work, vhost_attach_cgroups_work);
-	vhost_work_queue(dev, &attach.work);
-	vhost_work_dev_flush(dev);
-	return attach.ret;
-}
-
 /* Caller should have device mutex */
 bool vhost_dev_has_owner(struct vhost_dev *dev)
 {
@@ -579,6 +551,16 @@ static void vhost_detach_mm(struct vhost_dev *dev)
 	dev->mm = NULL;
 }
 
+static void vhost_worker_stop(struct vhost_worker *worker)
+{
+	DECLARE_COMPLETION_ONSTACK(exit_done);
+
+	worker->exit_done = &exit_done;
+	set_bit(VHOST_WORKER_FLAG_STOP, &worker->flags);
+	wake_up_process(worker->task);
+	wait_for_completion(worker->exit_done);
+}
+
 static void vhost_worker_free(struct vhost_dev *dev)
 {
 	struct vhost_worker *worker = dev->worker;
@@ -588,7 +570,7 @@ static void vhost_worker_free(struct vhost_dev *dev)
 
 	dev->worker = NULL;
 	WARN_ON(!llist_empty(&worker->work_list));
-	kthread_stop(worker->task);
+	vhost_worker_stop(worker);
 	kfree(worker);
 }
 
@@ -603,27 +585,27 @@ static int vhost_worker_create(struct vhost_dev *dev)
 		return -ENOMEM;
 
 	dev->worker = worker;
-	worker->dev = dev;
 	worker->kcov_handle = kcov_common_handle();
 	init_llist_head(&worker->work_list);
 
-	task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
+	/*
+	 * vhost used to use the kthread API which ignores all signals by
+	 * default and the drivers expect this behavior. So we do not want to
+	 * ineherit the parent's signal handlers and set our worker to ignore
+	 * everything below.
+	 */
+	task = kernel_worker(vhost_worker, worker, NUMA_NO_NODE,
+			     CLONE_FS | CLONE_CLEAR_SIGHAND,
+			     KERN_WORKER_NO_FILES | KERN_WORKER_NO_SIGS);
 	if (IS_ERR(task)) {
 		ret = PTR_ERR(task);
 		goto free_worker;
 	}
 
 	worker->task = task;
-	wake_up_process(task); /* avoid contributing to loadavg */
-
-	ret = vhost_attach_cgroups(dev);
-	if (ret)
-		goto stop_worker;
-
+	kernel_worker_start(task, "vhost-%d", current->pid);
 	return 0;
 
-stop_worker:
-	kthread_stop(worker->task);
 free_worker:
 	kfree(worker);
 	dev->worker = NULL;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 102ce25e4e13..09748694cb66 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -25,11 +25,16 @@ struct vhost_work {
 	unsigned long		flags;
 };
 
+enum {
+	VHOST_WORKER_FLAG_STOP,
+};
+
 struct vhost_worker {
 	struct task_struct	*task;
+	struct completion	*exit_done;
 	struct llist_head	work_list;
-	struct vhost_dev	*dev;
 	u64			kcov_handle;
+	unsigned long		flags;
 };
 
 /* Poll a file (eventfd or socket) */
-- 
2.25.1


  parent reply	other threads:[~2021-09-21 21:53 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-21 21:52 [PATCH V2 0/9] Use copy_process/create_io_thread in vhost layer Mike Christie
2021-09-21 21:52 ` [PATCH V2 1/9] fork: Make IO worker options flag based Mike Christie
2021-09-22 12:44   ` Christian Brauner
2021-09-21 21:52 ` [PATCH V2 2/9] fork: pass worker_flags to copy_thread Mike Christie
2021-09-22 12:42   ` Christian Brauner
2021-09-22 14:18   ` Geert Uytterhoeven
2021-09-21 21:52 ` [PATCH V2 3/9] fork: move PF_IO_WORKER's kernel frame setup to new flag Mike Christie
2021-09-22 12:32   ` Geert Uytterhoeven
2021-09-22 12:39     ` Christian Brauner
2021-09-22 12:45     ` Christian Brauner
2021-09-21 21:52 ` [PATCH V2 4/9] fork: add option to not clone or dup files Mike Christie
2021-09-22 12:46   ` Christian Brauner
2021-09-21 21:52 ` [PATCH V2 5/9] fork: add helper to clone a process Mike Christie
2021-09-22 12:50   ` Christian Brauner
2021-09-21 21:52 ` [PATCH V2 6/9] io_uring: switch to kernel_worker Mike Christie
2021-09-22 12:53   ` Christian Brauner
2021-09-21 21:52 ` [PATCH V2 7/9] fork: Add worker flag to ignore signals Mike Christie
2021-09-22 12:52   ` Christian Brauner
2021-09-21 21:52 ` [PATCH V2 8/9] vhost: move worker thread fields to new struct Mike Christie
2021-10-04 13:12   ` Michael S. Tsirkin
2021-09-21 21:52 ` Mike Christie [this message]
2021-10-04 13:12   ` [PATCH V2 9/9] vhost: use kernel_worker to check RLIMITs and inherit v2 cgroups Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210921215218.89844-10-michael.christie@oracle.com \
    --to=michael.christie@oracle.com \
    --cc=axboe@kernel.dk \
    --cc=christian.brauner@ubuntu.com \
    --cc=hch@infradead.org \
    --cc=hdanton@sina.com \
    --cc=jasowang@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=sgarzare@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).