From f11913b472cdc46082466dbb6cd56f105e5dcdd7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 Mar 2021 12:39:36 -0700 Subject: [PATCH 2/2] io_uring: move to using create_io_thread() This allows us to do task creation and setup without needing to use completions to try and synchronize with the starting thread. Get rid of the old io_wq_fork_thread() wrapper, and the 'wq' and 'worker' startup completion events - we can now do setup before the task is running. Signed-off-by: Jens Axboe --- fs/io-wq.c | 69 ++++++++++++++------------------------------------- fs/io-wq.h | 2 -- fs/io_uring.c | 36 +++++++++++++-------------- 3 files changed, 35 insertions(+), 72 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 19f18389ead2..cee41b81747c 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -54,7 +54,6 @@ struct io_worker { spinlock_t lock; struct completion ref_done; - struct completion started; struct rcu_head rcu; }; @@ -116,7 +115,6 @@ struct io_wq { struct io_wq_hash *hash; refcount_t refs; - struct completion started; struct completion exited; atomic_t worker_refs; @@ -273,14 +271,6 @@ static void io_wqe_dec_running(struct io_worker *worker) io_wqe_wake_worker(wqe, acct); } -static void io_worker_start(struct io_worker *worker) -{ - current->flags |= PF_NOFREEZE; - worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); - io_wqe_inc_running(worker); - complete(&worker->started); -} - /* * Worker will start processing some work. Move it to the busy list, if * it's currently on the freelist @@ -490,8 +480,6 @@ static int io_wqe_worker(void *data) struct io_wqe *wqe = worker->wqe; struct io_wq *wq = wqe->wq; - io_worker_start(worker); - while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { set_current_state(TASK_INTERRUPTIBLE); loop: @@ -576,12 +564,6 @@ static int task_thread(void *data, int index) sprintf(buf, "iou-wrk-%d", wq->task_pid); set_task_comm(current, buf); - current->pf_io_worker = worker; - worker->task = current; - - set_cpus_allowed_ptr(current, cpumask_of_node(wqe->node)); - current->flags |= PF_NO_SETAFFINITY; - raw_spin_lock_irq(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); list_add_tail_rcu(&worker->all_list, &wqe->all_list); @@ -607,25 +589,10 @@ static int task_thread_unbound(void *data) return task_thread(data, IO_WQ_ACCT_UNBOUND); } -pid_t io_wq_fork_thread(int (*fn)(void *), void *arg) -{ - unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| - CLONE_IO|SIGCHLD; - struct kernel_clone_args args = { - .flags = ((lower_32_bits(flags) | CLONE_VM | - CLONE_UNTRACED) & ~CSIGNAL), - .exit_signal = (lower_32_bits(flags) & CSIGNAL), - .stack = (unsigned long)fn, - .stack_size = (unsigned long)arg, - }; - - return kernel_clone(&args); -} - static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) { struct io_worker *worker; - pid_t pid; + struct task_struct *tsk; __set_current_state(TASK_RUNNING); @@ -638,21 +605,26 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) worker->wqe = wqe; spin_lock_init(&worker->lock); init_completion(&worker->ref_done); - init_completion(&worker->started); atomic_inc(&wq->worker_refs); if (index == IO_WQ_ACCT_BOUND) - pid = io_wq_fork_thread(task_thread_bound, worker); + tsk = create_io_thread(task_thread_bound, worker, wqe->node); else - pid = io_wq_fork_thread(task_thread_unbound, worker); - if (pid < 0) { + tsk = create_io_thread(task_thread_unbound, worker, wqe->node); + if (IS_ERR(tsk)) { if (atomic_dec_and_test(&wq->worker_refs)) complete(&wq->worker_done); kfree(worker); return false; } - wait_for_completion(&worker->started); + worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); + io_wqe_inc_running(worker); + tsk->pf_io_worker = worker; + worker->task = tsk; + set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node)); + tsk->flags |= PF_NOFREEZE | PF_NO_SETAFFINITY; + wake_up_new_task(tsk); return true; } @@ -696,6 +668,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe, static bool io_wq_worker_wake(struct io_worker *worker, void *data) { + set_notify_signal(worker->task); wake_up_process(worker->task); return false; } @@ -752,10 +725,6 @@ static int io_wq_manager(void *data) sprintf(buf, "iou-mgr-%d", wq->task_pid); set_task_comm(current, buf); - current->flags |= PF_IO_WORKER; - wq->manager = get_task_struct(current); - - complete(&wq->started); do { set_current_state(TASK_INTERRUPTIBLE); @@ -815,21 +784,20 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) static int io_wq_fork_manager(struct io_wq *wq) { - int ret; + struct task_struct *tsk; if (wq->manager) return 0; reinit_completion(&wq->worker_done); - current->flags |= PF_IO_WORKER; - ret = io_wq_fork_thread(io_wq_manager, wq); - current->flags &= ~PF_IO_WORKER; - if (ret >= 0) { - wait_for_completion(&wq->started); + tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE); + if (!IS_ERR(tsk)) { + wq->manager = get_task_struct(tsk); + wake_up_new_task(tsk); return 0; } - return ret; + return PTR_ERR(tsk); } static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) @@ -1062,7 +1030,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) } wq->task_pid = current->pid; - init_completion(&wq->started); init_completion(&wq->exited); refcount_set(&wq->refs, 1); diff --git a/fs/io-wq.h b/fs/io-wq.h index 42f0be64a84d..5fbf7997149e 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -119,8 +119,6 @@ void io_wq_put_and_exit(struct io_wq *wq); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_hash_work(struct io_wq_work *work, void *val); -pid_t io_wq_fork_thread(int (*fn)(void *), void *arg); - static inline bool io_wq_is_hashed(struct io_wq_work *work) { return work->flags & IO_WQ_WORK_HASHED; diff --git a/fs/io_uring.c b/fs/io_uring.c index e55369555e5c..d885fbd53bbc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6668,7 +6668,6 @@ static int io_sq_thread(void *data) sprintf(buf, "iou-sqp-%d", sqd->task_pid); set_task_comm(current, buf); - sqd->thread = current; current->pf_io_worker = NULL; if (sqd->sq_cpu != -1) @@ -6677,8 +6676,6 @@ static int io_sq_thread(void *data) set_cpus_allowed_ptr(current, cpu_online_mask); current->flags |= PF_NO_SETAFFINITY; - complete(&sqd->completion); - wait_for_completion(&sqd->startup); while (!io_sq_thread_should_stop(sqd)) { @@ -7818,21 +7815,22 @@ void __io_uring_free(struct task_struct *tsk) static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx) { + struct task_struct *tsk; int ret; clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); reinit_completion(&sqd->completion); ctx->sqo_exec = 0; sqd->task_pid = current->pid; - current->flags |= PF_IO_WORKER; - ret = io_wq_fork_thread(io_sq_thread, sqd); - current->flags &= ~PF_IO_WORKER; - if (ret < 0) { - sqd->thread = NULL; + tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); + if (IS_ERR(tsk)) return ret; - } - wait_for_completion(&sqd->completion); - return io_uring_alloc_task_context(sqd->thread, ctx); + ret = io_uring_alloc_task_context(tsk, ctx); + if (ret) + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + sqd->thread = tsk; + wake_up_new_task(tsk); + return ret; } static int io_sq_offload_create(struct io_ring_ctx *ctx, @@ -7855,6 +7853,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, fdput(f); } if (ctx->flags & IORING_SETUP_SQPOLL) { + struct task_struct *tsk; struct io_sq_data *sqd; ret = -EPERM; @@ -7896,15 +7895,14 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, } sqd->task_pid = current->pid; - current->flags |= PF_IO_WORKER; - ret = io_wq_fork_thread(io_sq_thread, sqd); - current->flags &= ~PF_IO_WORKER; - if (ret < 0) { - sqd->thread = NULL; + tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); + if (IS_ERR(tsk)) goto err; - } - wait_for_completion(&sqd->completion); - ret = io_uring_alloc_task_context(sqd->thread, ctx); + ret = io_uring_alloc_task_context(tsk, ctx); + if (ret) + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + sqd->thread = tsk; + wake_up_new_task(tsk); if (ret) goto err; } else if (p->flags & IORING_SETUP_SQ_AFF) { -- 2.30.1