All of lore.kernel.org
 help / color / mirror / Atom feed
From: Juri Lelli <juri.lelli@redhat.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>, Qais Yousef <qyousef@layalina.io>,
	Waiman Long <longman@redhat.com>, Tejun Heo <tj@kernel.org>,
	Zefan Li <lizefan.x@bytedance.com>,
	Johannes Weiner <hannes@cmpxchg.org>, Hao Luo <haoluo@google.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	linux-kernel@vger.kernel.org, luca.abeni@santannapisa.it,
	claudio@evidence.eu.com, tommaso.cucinotta@santannapisa.it,
	bristot@redhat.com, mathieu.poirier@linaro.org,
	cgroups@vger.kernel.org,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Wei Wang <wvw@google.com>, Rick Yiu <rickyiu@google.com>,
	Quentin Perret <qperret@google.com>,
	Heiko Carstens <hca@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Alexander Gordeev <agordeev@linux.ibm.com>,
	Sudeep Holla <sudeep.holla@arm.com>,
	Juri Lelli <juri.lelli@redhat.com>
Subject: [RFC PATCH 2/3] sched/cpuset: Keep track of SCHED_DEADLINE tasks in cpusets
Date: Wed, 15 Mar 2023 12:18:11 +0000	[thread overview]
Message-ID: <20230315121812.206079-3-juri.lelli@redhat.com> (raw)
In-Reply-To: <20230315121812.206079-1-juri.lelli@redhat.com>

Qais reported that iterating over all tasks when rebuilding root domains
for finding out which ones are DEADLINE and need their bandwidth
correctly restored on such root domains can be a costly operation (10+
ms delays on suspend-resume).

To fix the problem keep track of the number of DEADLINE tasks belonging
to each cpuset and then use this information (followup patch) to only
perform the above iteration if DEADLINE tasks are actually present in
the cpuset for which a corresponding root domain is being rebuilt.

Reported-by: Qais Yousef <qyousef@layalina.io>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
---
 include/linux/cpuset.h |  4 ++++
 kernel/cgroup/cgroup.c |  4 ++++
 kernel/cgroup/cpuset.c | 25 +++++++++++++++++++++++++
 kernel/sched/core.c    | 10 ++++++++++
 4 files changed, 43 insertions(+)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 355f796c5f07..0348dba5680e 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -71,6 +71,8 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
+extern void inc_dl_tasks_cs(struct task_struct *task);
+extern void dec_dl_tasks_cs(struct task_struct *task);
 extern void cpuset_lock(void);
 extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
@@ -196,6 +198,8 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
+static inline void inc_dl_tasks_cs(struct task_struct *task) { }
+static inline void dec_dl_tasks_cs(struct task_struct *task) { }
 static inline void cpuset_lock(void) { }
 static inline void cpuset_unlock(void) { }
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c099cf3fa02d..357925e1e4af 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -57,6 +57,7 @@
 #include <linux/file.h>
 #include <linux/fs_parser.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/deadline.h>
 #include <linux/psi.h>
 #include <net/sock.h>
 
@@ -6673,6 +6674,9 @@ void cgroup_exit(struct task_struct *tsk)
 	list_add_tail(&tsk->cg_list, &cset->dying_tasks);
 	cset->nr_tasks--;
 
+	if (dl_task(tsk))
+		dec_dl_tasks_cs(tsk);
+
 	WARN_ON_ONCE(cgroup_task_frozen(tsk));
 	if (unlikely(!(tsk->flags & PF_KTHREAD) &&
 		     test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 8d82d66d432b..57bc60112618 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -193,6 +193,12 @@ struct cpuset {
 	int use_parent_ecpus;
 	int child_ecpus_count;
 
+	/*
+	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
+	 * know when to rebuild associated root domain bandwidth information.
+	 */
+	int nr_deadline_tasks;
+
 	/* Invalid partition error code, not lock protected */
 	enum prs_errcode prs_err;
 
@@ -245,6 +251,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
 	return css_cs(cs->css.parent);
 }
 
+void inc_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks++;
+}
+
+void dec_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks--;
+}
+
 /* bits in struct cpuset flags field */
 typedef enum {
 	CS_ONLINE,
@@ -2472,6 +2492,11 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 		ret = security_task_setscheduler(task);
 		if (ret)
 			goto out_unlock;
+
+		if (dl_task(task)) {
+			cs->nr_deadline_tasks++;
+			cpuset_attach_old_cs->nr_deadline_tasks--;
+		}
 	}
 
 	/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5902cbb5e751..d586a8440348 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7683,6 +7683,16 @@ static int __sched_setscheduler(struct task_struct *p,
 		goto unlock;
 	}
 
+	/*
+	 * In case a task is setscheduled to SCHED_DEADLINE, or if a task is
+	 * moved to a different sched policy, we need to keep track of that on
+	 * its cpuset (for correct bandwidth tracking).
+	 */
+	if (dl_policy(policy) && !dl_task(p))
+		inc_dl_tasks_cs(p);
+	else if (dl_task(p) && !dl_policy(policy))
+		dec_dl_tasks_cs(p);
+
 	p->sched_reset_on_fork = reset_on_fork;
 	oldprio = p->prio;
 
-- 
2.39.2


WARNING: multiple messages have this Message-ID (diff)
From: Juri Lelli <juri.lelli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
To: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>,
	Ingo Molnar <mingo-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Qais Yousef <qyousef-wp2msK0BRk8tq7phqP6ubQ@public.gmane.org>,
	Waiman Long <longman-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	Zefan Li <lizefan.x-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>,
	Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>,
	Hao Luo <haoluo-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Cc: Dietmar Eggemann <dietmar.eggemann-5wv7dgnIgG8@public.gmane.org>,
	Steven Rostedt <rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org>,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	luca.abeni-5rdYK369eBLQB0XuIGIEkQ@public.gmane.org,
	claudio-YOzL5CV4y4YG1A2ADO40+w@public.gmane.org,
	tommaso.cucinotta-5rdYK369eBLQB0XuIGIEkQ@public.gmane.org,
	bristot-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	mathieu.poirier-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Vincent Guittot
	<vincent.guittot-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>,
	Wei Wang <wvw-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Rick Yiu <rickyiu-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Quentin Perret <qperret-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Heiko Carstens <hca-tEXmvtCZX7AybS5Ee8rs3A@public.gmane.org>,
	Vasily Gorbik <gor-tEXmvtCZX7AybS5Ee8rs3A@public.gmane.org>,
	Alexander Gordeev
	<agordeev-tEXmvtCZX7AybS5Ee8rs3A@public.gmane.org>,
	Sudeep Holla <sudeep.holla-5wv7dgnIgG8@public.gmane.org>,
	Juri Lelli <juri.lelli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Subject: [RFC PATCH 2/3] sched/cpuset: Keep track of SCHED_DEADLINE tasks in cpusets
Date: Wed, 15 Mar 2023 12:18:11 +0000	[thread overview]
Message-ID: <20230315121812.206079-3-juri.lelli@redhat.com> (raw)
In-Reply-To: <20230315121812.206079-1-juri.lelli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

Qais reported that iterating over all tasks when rebuilding root domains
for finding out which ones are DEADLINE and need their bandwidth
correctly restored on such root domains can be a costly operation (10+
ms delays on suspend-resume).

To fix the problem keep track of the number of DEADLINE tasks belonging
to each cpuset and then use this information (followup patch) to only
perform the above iteration if DEADLINE tasks are actually present in
the cpuset for which a corresponding root domain is being rebuilt.

Reported-by: Qais Yousef <qyousef-wp2msK0BRk8tq7phqP6ubQ@public.gmane.org>
Signed-off-by: Juri Lelli <juri.lelli-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 include/linux/cpuset.h |  4 ++++
 kernel/cgroup/cgroup.c |  4 ++++
 kernel/cgroup/cpuset.c | 25 +++++++++++++++++++++++++
 kernel/sched/core.c    | 10 ++++++++++
 4 files changed, 43 insertions(+)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 355f796c5f07..0348dba5680e 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -71,6 +71,8 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
+extern void inc_dl_tasks_cs(struct task_struct *task);
+extern void dec_dl_tasks_cs(struct task_struct *task);
 extern void cpuset_lock(void);
 extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
@@ -196,6 +198,8 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
+static inline void inc_dl_tasks_cs(struct task_struct *task) { }
+static inline void dec_dl_tasks_cs(struct task_struct *task) { }
 static inline void cpuset_lock(void) { }
 static inline void cpuset_unlock(void) { }
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index c099cf3fa02d..357925e1e4af 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -57,6 +57,7 @@
 #include <linux/file.h>
 #include <linux/fs_parser.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/deadline.h>
 #include <linux/psi.h>
 #include <net/sock.h>
 
@@ -6673,6 +6674,9 @@ void cgroup_exit(struct task_struct *tsk)
 	list_add_tail(&tsk->cg_list, &cset->dying_tasks);
 	cset->nr_tasks--;
 
+	if (dl_task(tsk))
+		dec_dl_tasks_cs(tsk);
+
 	WARN_ON_ONCE(cgroup_task_frozen(tsk));
 	if (unlikely(!(tsk->flags & PF_KTHREAD) &&
 		     test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 8d82d66d432b..57bc60112618 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -193,6 +193,12 @@ struct cpuset {
 	int use_parent_ecpus;
 	int child_ecpus_count;
 
+	/*
+	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
+	 * know when to rebuild associated root domain bandwidth information.
+	 */
+	int nr_deadline_tasks;
+
 	/* Invalid partition error code, not lock protected */
 	enum prs_errcode prs_err;
 
@@ -245,6 +251,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
 	return css_cs(cs->css.parent);
 }
 
+void inc_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks++;
+}
+
+void dec_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks--;
+}
+
 /* bits in struct cpuset flags field */
 typedef enum {
 	CS_ONLINE,
@@ -2472,6 +2492,11 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 		ret = security_task_setscheduler(task);
 		if (ret)
 			goto out_unlock;
+
+		if (dl_task(task)) {
+			cs->nr_deadline_tasks++;
+			cpuset_attach_old_cs->nr_deadline_tasks--;
+		}
 	}
 
 	/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5902cbb5e751..d586a8440348 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7683,6 +7683,16 @@ static int __sched_setscheduler(struct task_struct *p,
 		goto unlock;
 	}
 
+	/*
+	 * In case a task is setscheduled to SCHED_DEADLINE, or if a task is
+	 * moved to a different sched policy, we need to keep track of that on
+	 * its cpuset (for correct bandwidth tracking).
+	 */
+	if (dl_policy(policy) && !dl_task(p))
+		inc_dl_tasks_cs(p);
+	else if (dl_task(p) && !dl_policy(policy))
+		dec_dl_tasks_cs(p);
+
 	p->sched_reset_on_fork = reset_on_fork;
 	oldprio = p->prio;
 
-- 
2.39.2


  parent reply	other threads:[~2023-03-15 12:20 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-15 12:18 [RFC PATCH 0/3] sched/deadline: cpuset: Rework DEADLINE bandwidth restoration Juri Lelli
2023-03-15 12:18 ` Juri Lelli
2023-03-15 12:18 ` [RFC PATCH 1/3] sched/cpuset: Bring back cpuset_mutex Juri Lelli
2023-03-15 12:18   ` Juri Lelli
2023-03-15 12:18 ` Juri Lelli [this message]
2023-03-15 12:18   ` [RFC PATCH 2/3] sched/cpuset: Keep track of SCHED_DEADLINE tasks in cpusets Juri Lelli
2023-03-15 14:49   ` Qais Yousef
2023-03-15 14:49     ` Qais Yousef
2023-03-15 17:18     ` Juri Lelli
2023-03-15 17:18       ` Juri Lelli
2023-03-15 19:25       ` Qais Yousef
2023-03-15 19:25         ` Qais Yousef
2023-03-15 15:46   ` Waiman Long
2023-03-15 15:46     ` Waiman Long
2023-03-15 17:14     ` Juri Lelli
2023-03-15 17:14       ` Juri Lelli
2023-03-15 18:01       ` Waiman Long
2023-03-15 18:01         ` Waiman Long
2023-03-15 18:10         ` Waiman Long
2023-03-15 18:10           ` Waiman Long
2023-03-15 23:27         ` Waiman Long
2023-03-15 23:27           ` Waiman Long
2023-03-22 14:05         ` Dietmar Eggemann
2023-03-22 14:05           ` Dietmar Eggemann
2023-03-22 13:18       ` Dietmar Eggemann
2023-03-22 13:18         ` Dietmar Eggemann
2023-03-15 12:18 ` [RFC PATCH 3/3] cgroup/cpuset: Iterate only if DEADLINE tasks are present Juri Lelli
2023-03-15 12:18   ` Juri Lelli
2023-03-15 14:55 ` [RFC PATCH 0/3] sched/deadline: cpuset: Rework DEADLINE bandwidth restoration Qais Yousef
2023-03-15 14:55   ` Qais Yousef
2023-03-15 17:10   ` Juri Lelli
2023-03-15 17:10     ` Juri Lelli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230315121812.206079-3-juri.lelli@redhat.com \
    --to=juri.lelli@redhat.com \
    --cc=agordeev@linux.ibm.com \
    --cc=bristot@redhat.com \
    --cc=cgroups@vger.kernel.org \
    --cc=claudio@evidence.eu.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=gor@linux.ibm.com \
    --cc=hannes@cmpxchg.org \
    --cc=haoluo@google.com \
    --cc=hca@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan.x@bytedance.com \
    --cc=longman@redhat.com \
    --cc=luca.abeni@santannapisa.it \
    --cc=mathieu.poirier@linaro.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=qperret@google.com \
    --cc=qyousef@layalina.io \
    --cc=rickyiu@google.com \
    --cc=rostedt@goodmis.org \
    --cc=sudeep.holla@arm.com \
    --cc=tj@kernel.org \
    --cc=tommaso.cucinotta@santannapisa.it \
    --cc=vincent.guittot@linaro.org \
    --cc=wvw@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.