Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
From: "Zhang, Qiang" <Qiang.Zhang@windriver.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: "viro@zeniv.linux.org.uk" <viro@zeniv.linux.org.uk>,
	"io-uring@vger.kernel.org" <io-uring@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>
Subject: 回复: Question on io-wq
Date: Fri, 23 Oct 2020 03:55:04 +0000
Message-ID: <BYAPR11MB2632F2892640FCF08997B36AFF1A0@BYAPR11MB2632.namprd11.prod.outlook.com> (raw)
In-Reply-To: <8dd4bc4c-9d8e-fb5a-6931-3e861ad9b4bf@kernel.dk>



________________________________________
发件人: Jens Axboe <axboe@kernel.dk>
发送时间: 2020年10月22日 22:08
收件人: Zhang, Qiang
抄送: viro@zeniv.linux.org.uk; io-uring@vger.kernel.org; linux-kernel@vger.kernel.org; linux-fsdevel@vger.kernel.org
主题: Re: Question on io-wq

On 10/22/20 3:02 AM, Zhang,Qiang wrote:
>
> Hi Jens Axboe
>
> There are some problem in 'io_wqe_worker' thread, when the
> 'io_wqe_worker' be create and  Setting the affinity of CPUs in NUMA
> nodes, due to CPU hotplug, When the last CPU going down, the
> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes
> online again, we should restore their cpu bindings?

>Something like the below should help in ensuring affinities are
>always correct - trigger an affinity set for an online CPU event. We
>should not need to do it for offlining. Can you test it?


>diff --git a/fs/io-wq.c b/fs/io-wq.c
>index 4012ff541b7b..3bf029d1170e 100644
>--- a/fs/io-wq.c
>+++ b/fs/io-wq.c
>@@ -19,6 +19,7 @@
 >#include <linux/task_work.h>
 >#include <linux/blk-cgroup.h>
 >#include <linux/audit.h>
>+#include <linux/cpu.h>

 >#include "io-wq.h"
>
>@@ -123,9 +124,13 @@ struct io_wq {
 >       refcount_t refs;
  >      struct completion done;
>
>+       struct hlist_node cpuhp_node;
>+
 >       refcount_t use_refs;
 >};
>
>+static enum cpuhp_state io_wq_online;
>+
 >static bool io_worker_get(struct io_worker *worker)
 >{
   >     return refcount_inc_not_zero(&worker->ref);
>@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, >struct io_wq_data *data)
 >               return ERR_PTR(-ENOMEM);
  >      }
>
>+       ret = cpuhp_state_add_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
>+       if (ret) {
>+               kfree(wq->wqes);
>+               kfree(wq);
>+               return ERR_PTR(ret);
>+       }
>+
>        wq->free_work = data->free_work;
>        wq->do_work = data->do_work;
>
>@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, >struct io_wq_data *data)
 >       ret = PTR_ERR(wq->manager);
 >       complete(&wq->done);
 >err:
>+       cpuhp_state_remove_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
  >      for_each_node(node)
 >               kfree(wq->wqes[node]);
 >       kfree(wq->wqes);
>@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq)
 >{
 >       int node;
>
>+       cpuhp_state_remove_instance_nocalls(io_wq_online, >&wq->cpuhp_node);
>+
   >     set_bit(IO_WQ_BIT_EXIT, &wq->state);
  >      if (wq->manager)
 >               kthread_stop(wq->manager);
>@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq >*wq)
 >{
 >      return wq->manager;
 >}
>+
>+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
>+{
>+       struct task_struct *task = worker->task;
>+       unsigned long flags;
>+
           struct rq_flags rf;


>+       raw_spin_lock_irqsave(&task->pi_lock, flags);
>+       do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
>+       task->flags |= PF_NO_SETAFFINITY;
>+       raw_spin_unlock_irqrestore(&task->pi_lock, flags);


>+       return false;
>+}
>+
>+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
>+{
>+       struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
>+       int i;
>+
>+       rcu_read_lock();
>+       for_each_node(i)
>+               io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, >NULL);
>+       rcu_read_unlock();
>+       return 0;
>+}
>+
>+static __init int io_wq_init(void)
>+{
>+       int ret;
>+
>+       ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, >"io->wq/online",
>+                                       io_wq_cpu_online, NULL);
>+       if (ret < 0)
>+               return ret;
>+       io_wq_online = ret;
>+       return 0;
>+}
>+subsys_initcall(io_wq_init);
>
>--
>Jens Axboe


  reply index

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-22  9:02 Zhang,Qiang
2020-10-22 14:08 ` Jens Axboe
2020-10-23  3:55   ` Zhang, Qiang [this message]
2020-10-23  3:57     ` 回复: " Zhang, Qiang
     [not found] ` <20201023020514.2230-1-hdanton@sina.com>
2020-10-23  2:24   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=BYAPR11MB2632F2892640FCF08997B36AFF1A0@BYAPR11MB2632.namprd11.prod.outlook.com \
    --to=qiang.zhang@windriver.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git