From: Peter Zijlstra <peterz@infradead.org>
To: Juri Lelli <juri.lelli@redhat.com>
Cc: mingo@redhat.com, rostedt@goodmis.org, tj@kernel.org,
linux-kernel@vger.kernel.org, luca.abeni@santannapisa.it,
claudio@evidence.eu.com, tommaso.cucinotta@santannapisa.it,
bristot@redhat.com, mathieu.poirier@linaro.org,
lizefan@huawei.com, cgroups@vger.kernel.org,
Prateek Sood <prsood@codeaurora.org>
Subject: Re: [PATCH v8 6/8] cgroup/cpuset: Change cpuset_rwsem and hotplug lock order
Date: Fri, 28 Jun 2019 15:03:08 +0200 [thread overview]
Message-ID: <20190628130308.GU3419@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20190628080618.522-7-juri.lelli@redhat.com>
On Fri, Jun 28, 2019 at 10:06:16AM +0200, Juri Lelli wrote:
> cpuset_rwsem is going to be acquired from sched_setscheduler() with a
> following patch. There are however paths (e.g., spawn_ksoftirqd) in
> which sched_scheduler() is eventually called while holding hotplug lock;
> this creates a dependecy between hotplug lock (to be always acquired
> first) and cpuset_rwsem (to be always acquired after hotplug lock).
>
> Fix paths which currently take the two locks in the wrong order (after
> a following patch is applied).
> Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
This all reminds me of this:
https://lkml.kernel.org/r/1510755615-25906-1-git-send-email-prsood@codeaurora.org
Which sadly got reverted again. If we do this now (I've always been a
proponent), then we can make that rebuild synchronous again, which
should also help here IIRC.
> ---
> include/linux/cpuset.h | 8 ++++----
> kernel/cgroup/cpuset.c | 22 +++++++++++++++++-----
> 2 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
> index 934633a05d20..7f1478c26a33 100644
> --- a/include/linux/cpuset.h
> +++ b/include/linux/cpuset.h
> @@ -40,14 +40,14 @@ static inline bool cpusets_enabled(void)
>
> static inline void cpuset_inc(void)
> {
> - static_branch_inc(&cpusets_pre_enable_key);
> - static_branch_inc(&cpusets_enabled_key);
> + static_branch_inc_cpuslocked(&cpusets_pre_enable_key);
> + static_branch_inc_cpuslocked(&cpusets_enabled_key);
> }
>
> static inline void cpuset_dec(void)
> {
> - static_branch_dec(&cpusets_enabled_key);
> - static_branch_dec(&cpusets_pre_enable_key);
> + static_branch_dec_cpuslocked(&cpusets_enabled_key);
> + static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
> }
>
> extern int cpuset_init(void);
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index a7c0c8d8f132..d92b351f89e3 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -1026,8 +1026,8 @@ static void rebuild_sched_domains_locked(void)
> cpumask_var_t *doms;
> int ndoms;
>
> + lockdep_assert_cpus_held();
> percpu_rwsem_assert_held(&cpuset_rwsem);
> - get_online_cpus();
>
> /*
> * We have raced with CPU hotplug. Don't do anything to avoid
> @@ -1036,19 +1036,17 @@ static void rebuild_sched_domains_locked(void)
> */
> if (!top_cpuset.nr_subparts_cpus &&
> !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
> - goto out;
> + return;
>
> if (top_cpuset.nr_subparts_cpus &&
> !cpumask_subset(top_cpuset.effective_cpus, cpu_active_mask))
> - goto out;
> + return;
>
> /* Generate domain masks and attrs */
> ndoms = generate_sched_domains(&doms, &attr);
>
> /* Have scheduler rebuild the domains */
> partition_and_rebuild_sched_domains(ndoms, doms, attr);
> -out:
> - put_online_cpus();
> }
> #else /* !CONFIG_SMP */
> static void rebuild_sched_domains_locked(void)
> @@ -1058,9 +1056,11 @@ static void rebuild_sched_domains_locked(void)
>
> void rebuild_sched_domains(void)
> {
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> rebuild_sched_domains_locked();
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> }
>
> /**
> @@ -2298,6 +2298,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
> cpuset_filetype_t type = cft->private;
> int retval = 0;
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> if (!is_cpuset_online(cs)) {
> retval = -ENODEV;
> @@ -2335,6 +2336,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
> }
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> return retval;
> }
>
> @@ -2345,6 +2347,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
> cpuset_filetype_t type = cft->private;
> int retval = -ENODEV;
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> if (!is_cpuset_online(cs))
> goto out_unlock;
> @@ -2359,6 +2362,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
> }
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> return retval;
> }
>
> @@ -2397,6 +2401,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
> kernfs_break_active_protection(of->kn);
> flush_work(&cpuset_hotplug_work);
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> if (!is_cpuset_online(cs))
> goto out_unlock;
> @@ -2422,6 +2427,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
> free_cpuset(trialcs);
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> kernfs_unbreak_active_protection(of->kn);
> css_put(&cs->css);
> flush_workqueue(cpuset_migrate_mm_wq);
> @@ -2552,6 +2558,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
> return -EINVAL;
>
> css_get(&cs->css);
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> if (!is_cpuset_online(cs))
> goto out_unlock;
> @@ -2559,6 +2566,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
> retval = update_prstate(cs, val);
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> css_put(&cs->css);
> return retval ?: nbytes;
> }
> @@ -2764,6 +2772,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
> if (!parent)
> return 0;
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
>
> set_bit(CS_ONLINE, &cs->flags);
> @@ -2816,6 +2825,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
> spin_unlock_irq(&callback_lock);
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> return 0;
> }
>
> @@ -2834,6 +2844,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
> {
> struct cpuset *cs = css_cs(css);
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
>
> if (is_partition_root(cs))
> @@ -2854,6 +2865,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
> clear_bit(CS_ONLINE, &cs->flags);
>
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> }
>
> static void cpuset_css_free(struct cgroup_subsys_state *css)
> --
> 2.17.2
>
next prev parent reply other threads:[~2019-06-28 13:03 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-28 8:06 [PATCH v8 0/8] sched/deadline: fix cpusets bandwidth accounting Juri Lelli
2019-06-28 8:06 ` [PATCH v8 1/8] sched/topology: Adding function partition_sched_domains_locked() Juri Lelli
2019-06-28 8:06 ` [PATCH v8 2/8] sched/core: Streamlining calls to task_rq_unlock() Juri Lelli
2019-06-28 8:06 ` [PATCH v8 3/8] cpuset: Rebuild root domain deadline accounting information Juri Lelli
2019-06-28 8:06 ` [PATCH v8 4/8] sched/deadline: Fix bandwidth accounting at all levels after offline migration Juri Lelli
2019-06-28 8:06 ` [PATCH v8 5/8] cgroup/cpuset: convert cpuset_mutex to percpu_rwsem Juri Lelli
2019-06-28 12:45 ` Peter Zijlstra
2019-06-28 14:31 ` Juri Lelli
2019-06-28 8:06 ` [PATCH v8 6/8] cgroup/cpuset: Change cpuset_rwsem and hotplug lock order Juri Lelli
2019-06-28 13:03 ` Peter Zijlstra [this message]
2019-07-01 6:52 ` Juri Lelli
2019-07-01 8:27 ` Peter Zijlstra
2019-07-01 14:51 ` Tejun Heo
2019-07-04 8:49 ` Juri Lelli
2019-07-12 14:04 ` Juri Lelli
2019-07-16 15:36 ` Tejun Heo
2019-06-28 8:06 ` [PATCH v8 7/8] sched/core: Prevent race condition between cpuset and __sched_setscheduler() Juri Lelli
2019-07-01 19:11 ` Peter Zijlstra
2019-07-02 7:01 ` Juri Lelli
2019-06-28 8:06 ` [PATCH v8 8/8] rcu/tree: Setschedule gp ktread to SCHED_FIFO outside of atomic region Juri Lelli
2019-07-01 19:13 ` Peter Zijlstra
2019-07-02 7:01 ` Juri Lelli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190628130308.GU3419@hirez.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=bristot@redhat.com \
--cc=cgroups@vger.kernel.org \
--cc=claudio@evidence.eu.com \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lizefan@huawei.com \
--cc=luca.abeni@santannapisa.it \
--cc=mathieu.poirier@linaro.org \
--cc=mingo@redhat.com \
--cc=prsood@codeaurora.org \
--cc=rostedt@goodmis.org \
--cc=tj@kernel.org \
--cc=tommaso.cucinotta@santannapisa.it \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).