All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-18  2:02 Suren Baghdasaryan
  2021-05-18 18:08   ` Shakeel Butt
  2021-05-18 18:52   ` Peter Zijlstra
  0 siblings, 2 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2021-05-18  2:02 UTC (permalink / raw)
  To: tj
  Cc: hannes, lizefan.x, mingo, peterz, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, minchan, corbet,
	bristot, paulmck, rdunlap, akpm, tglx, macro, viresh.kumar,
	mike.kravetz, linux-doc, linux-kernel, cgroups, surenb,
	kernel-team

PSI accounts stalls for each cgroup separately and aggregates it at each
level of the hierarchy. This causes additional overhead with psi_avgs_work
being called for each cgroup in the hierarchy. psi_avgs_work has been
highly optimized, however on systems with large number of cgroups the
overhead becomes noticeable.
Systems which use PSI only at the system level could avoid this overhead
if PSI can be configured to skip per-cgroup stall accounting.
Add "cgroup_disable=pressure" kernel command-line option to allow
requesting system-wide only pressure stall accounting. When set, it
keeps system-wide accounting under /proc/pressure/ but skips accounting
for individual cgroups and does not expose PSI nodes in cgroup hierarchy.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
changes in v2:
- Added psi_cgroups_disabled static key, per PeterZ
- Refactored iterate_groups, per Johannes

 .../admin-guide/kernel-parameters.txt         |  9 +++-
 include/linux/cgroup-defs.h                   |  1 +
 include/linux/cgroup.h                        |  7 +++
 kernel/cgroup/cgroup.c                        | 46 +++++++++++++++++++
 kernel/sched/psi.c                            | 30 ++++++------
 5 files changed, 78 insertions(+), 15 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc46..653c62142f07 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -497,16 +497,21 @@
 	ccw_timeout_log	[S390]
 			See Documentation/s390/common_io.rst for details.
 
-	cgroup_disable=	[KNL] Disable a particular controller
-			Format: {name of the controller(s) to disable}
+	cgroup_disable=	[KNL] Disable a particular controller or optional feature
+			Format: {name of the controller(s) or feature(s) to disable}
 			The effects of cgroup_disable=foo are:
 			- foo isn't auto-mounted if you mount all cgroups in
 			  a single hierarchy
 			- foo isn't visible as an individually mountable
 			  subsystem
+			- if foo is an optional feature then the feature is
+			  disabled and corresponding cgroup files are not
+			  created
 			{Currently only "memory" controller deal with this and
 			cut the overhead, others just disable the usage. So
 			only cgroup_disable=memory is actually worthy}
+			Specifying "pressure" disables per-cgroup pressure
+			stall information accounting feature
 
 	cgroup_no_v1=	[KNL] Disable cgroup controllers and named hierarchies in v1
 			Format: { { controller | "all" | "named" }
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 559ee05f86b2..671f55cac0f0 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -110,6 +110,7 @@ enum {
 	CFTYPE_NO_PREFIX	= (1 << 3),	/* (DON'T USE FOR NEW FILES) no subsys prefix */
 	CFTYPE_WORLD_WRITABLE	= (1 << 4),	/* (DON'T USE FOR NEW FILES) S_IWUGO */
 	CFTYPE_DEBUG		= (1 << 5),	/* create when cgroup_debug */
+	CFTYPE_PRESSURE		= (1 << 6),	/* only if pressure feature is enabled */
 
 	/* internal flags, do not use outside cgroup core proper */
 	__CFTYPE_ONLY_ON_DFL	= (1 << 16),	/* only on default hierarchy */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4f2f79de083e..b929f589968b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -676,6 +676,8 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
 	return &cgrp->psi;
 }
 
+bool cgroup_psi_enabled(void);
+
 static inline void cgroup_init_kthreadd(void)
 {
 	/*
@@ -735,6 +737,11 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
 	return NULL;
 }
 
+static inline bool cgroup_psi_enabled(void)
+{
+	return false;
+}
+
 static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 					       struct cgroup *ancestor)
 {
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e049edd66776..c4b16c82e199 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -209,6 +209,22 @@ struct cgroup_namespace init_cgroup_ns = {
 static struct file_system_type cgroup2_fs_type;
 static struct cftype cgroup_base_files[];
 
+/* cgroup optional features */
+enum cgroup_opt_features {
+#ifdef CONFIG_PSI
+	OPT_FEATURE_PRESSURE,
+#endif
+	OPT_FEATURE_COUNT
+};
+
+static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = {
+#ifdef CONFIG_PSI
+	"pressure",
+#endif
+};
+
+static u16 cgroup_feature_disable_mask __read_mostly;
+
 static int cgroup_apply_control(struct cgroup *cgrp);
 static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
 static void css_task_iter_skip(struct css_task_iter *it,
@@ -3631,6 +3647,18 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
 {
 	psi_trigger_replace(&of->priv, NULL);
 }
+
+bool cgroup_psi_enabled(void)
+{
+	return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0;
+}
+
+#else /* CONFIG_PSI */
+bool cgroup_psi_enabled(void)
+{
+	return false;
+}
+
 #endif /* CONFIG_PSI */
 
 static int cgroup_freeze_show(struct seq_file *seq, void *v)
@@ -3881,6 +3909,8 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
 restart:
 	for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
 		/* does cft->flags tell us to skip this file on @cgrp? */
+		if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
+			continue;
 		if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
 			continue;
 		if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
@@ -3958,6 +3988,9 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 
 		WARN_ON(cft->ss || cft->kf_ops);
 
+		if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
+			continue;
+
 		if (cft->seq_start)
 			kf_ops = &cgroup_kf_ops;
 		else
@@ -4866,6 +4899,7 @@ static struct cftype cgroup_base_files[] = {
 #ifdef CONFIG_PSI
 	{
 		.name = "io.pressure",
+		.flags = CFTYPE_PRESSURE,
 		.seq_show = cgroup_io_pressure_show,
 		.write = cgroup_io_pressure_write,
 		.poll = cgroup_pressure_poll,
@@ -4873,6 +4907,7 @@ static struct cftype cgroup_base_files[] = {
 	},
 	{
 		.name = "memory.pressure",
+		.flags = CFTYPE_PRESSURE,
 		.seq_show = cgroup_memory_pressure_show,
 		.write = cgroup_memory_pressure_write,
 		.poll = cgroup_pressure_poll,
@@ -4880,6 +4915,7 @@ static struct cftype cgroup_base_files[] = {
 	},
 	{
 		.name = "cpu.pressure",
+		.flags = CFTYPE_PRESSURE,
 		.seq_show = cgroup_cpu_pressure_show,
 		.write = cgroup_cpu_pressure_write,
 		.poll = cgroup_pressure_poll,
@@ -6216,6 +6252,13 @@ static int __init cgroup_disable(char *str)
 				continue;
 			cgroup_disable_mask |= 1 << i;
 		}
+
+		for (i = 0; i < OPT_FEATURE_COUNT; i++) {
+			if (strcmp(token, cgroup_opt_feature_names[i]))
+				continue;
+			cgroup_feature_disable_mask |= 1 << i;
+			break;
+		}
 	}
 	return 1;
 }
@@ -6514,6 +6557,9 @@ static ssize_t show_delegatable_files(struct cftype *files, char *buf,
 		if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
 			continue;
 
+		if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
+			continue;
+
 		if (prefix)
 			ret += snprintf(buf + ret, size - ret, "%s.", prefix);
 
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index cc25a3cff41f..4b8e72640ac9 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -148,6 +148,7 @@
 static int psi_bug __read_mostly;
 
 DEFINE_STATIC_KEY_FALSE(psi_disabled);
+DEFINE_STATIC_KEY_FALSE(psi_cgroups_disabled);
 
 #ifdef CONFIG_PSI_DEFAULT_DISABLED
 static bool psi_enable;
@@ -211,6 +212,9 @@ void __init psi_init(void)
 		return;
 	}
 
+	if (!cgroup_psi_enabled())
+		static_branch_enable(&psi_cgroups_disabled);
+
 	psi_period = jiffies_to_nsecs(PSI_FREQ);
 	group_init(&psi_system);
 }
@@ -744,23 +748,23 @@ static void psi_group_change(struct psi_group *group, int cpu,
 
 static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
 {
+	if (*iter == &psi_system)
+		return NULL;
+
 #ifdef CONFIG_CGROUPS
-	struct cgroup *cgroup = NULL;
+	if (!static_branch_likely(&psi_cgroups_disabled)) {
+		struct cgroup *cgroup = NULL;
 
-	if (!*iter)
-		cgroup = task->cgroups->dfl_cgrp;
-	else if (*iter == &psi_system)
-		return NULL;
-	else
-		cgroup = cgroup_parent(*iter);
+		if (!*iter)
+			cgroup = task->cgroups->dfl_cgrp;
+		else
+			cgroup = cgroup_parent(*iter);
 
-	if (cgroup && cgroup_parent(cgroup)) {
-		*iter = cgroup;
-		return cgroup_psi(cgroup);
+		if (cgroup && cgroup_parent(cgroup)) {
+			*iter = cgroup;
+			return cgroup_psi(cgroup);
+		}
 	}
-#else
-	if (*iter)
-		return NULL;
 #endif
 	*iter = &psi_system;
 	return &psi_system;
-- 
2.31.1.751.gd2f1c929bd-goog


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-18 18:08   ` Shakeel Butt
  0 siblings, 0 replies; 11+ messages in thread
From: Shakeel Butt @ 2021-05-18 18:08 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: Tejun Heo, Johannes Weiner, Zefan Li, Ingo Molnar,
	Peter Zijlstra (Intel),
	Juri Lelli, Vincent Guittot, dietmar.eggemann, Steven Rostedt,
	Benjamin Segall, Mel Gorman, Minchan Kim, Jonathan Corbet,
	bristot, paulmck, Randy Dunlap, Andrew Morton, Thomas Gleixner,
	macro, viresh.kumar, Mike Kravetz, linux-doc, LKML, Cgroups,
	kernel-team

On Mon, May 17, 2021 at 7:02 PM Suren Baghdasaryan <surenb@google.com> wrote:
>
> PSI accounts stalls for each cgroup separately and aggregates it at each
> level of the hierarchy. This causes additional overhead with psi_avgs_work
> being called for each cgroup in the hierarchy. psi_avgs_work has been
> highly optimized, however on systems with large number of cgroups the
> overhead becomes noticeable.
> Systems which use PSI only at the system level could avoid this overhead
> if PSI can be configured to skip per-cgroup stall accounting.
> Add "cgroup_disable=pressure" kernel command-line option to allow
> requesting system-wide only pressure stall accounting. When set, it
> keeps system-wide accounting under /proc/pressure/ but skips accounting
> for individual cgroups and does not expose PSI nodes in cgroup hierarchy.
>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

I am assuming that this is for Android and at the moment Android is
only interested in system level pressure. I am wondering if there is
any plan for Android to have cgroup hierarchies with explicit limits
in future?

If yes, then I think we should follow up (this patch is fine
independently) with making this feature more general by explicitly
enabling psi for each cgroup level similar to how we enable
controllers through cgroup.subtree_control.

Something like:

$ echo "+psi" > cgroup.subtree_control

This definitely would be helpful for server use cases where jobs do
sub-containers but might not be interested in psi but the admin is
interested in the top level job's psi.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-18 18:08   ` Shakeel Butt
  0 siblings, 0 replies; 11+ messages in thread
From: Shakeel Butt @ 2021-05-18 18:08 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: Tejun Heo, Johannes Weiner, Zefan Li, Ingo Molnar,
	Peter Zijlstra (Intel),
	Juri Lelli, Vincent Guittot, dietmar.eggemann-5wv7dgnIgG8,
	Steven Rostedt, Benjamin Segall, Mel Gorman, Minchan Kim,
	Jonathan Corbet, bristot-H+wXaHxf7aLQT0dZR+AlfA,
	paulmck-DgEjT+Ai2ygdnm+yROfE0A, Randy Dunlap, Andrew Morton,
	Thomas Gleixner, macro-fOkVWZO6G+f10XsdtD+oqA,
	viresh.kumar-QSEj5FYQhm4dnm+yROfE0A, Mike Kravetz,
	linux-doc-u79uwXL29TY76Z2rM5mHXA, LKML, Cgrou

On Mon, May 17, 2021 at 7:02 PM Suren Baghdasaryan <surenb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> wrote:
>
> PSI accounts stalls for each cgroup separately and aggregates it at each
> level of the hierarchy. This causes additional overhead with psi_avgs_work
> being called for each cgroup in the hierarchy. psi_avgs_work has been
> highly optimized, however on systems with large number of cgroups the
> overhead becomes noticeable.
> Systems which use PSI only at the system level could avoid this overhead
> if PSI can be configured to skip per-cgroup stall accounting.
> Add "cgroup_disable=pressure" kernel command-line option to allow
> requesting system-wide only pressure stall accounting. When set, it
> keeps system-wide accounting under /proc/pressure/ but skips accounting
> for individual cgroups and does not expose PSI nodes in cgroup hierarchy.
>
> Signed-off-by: Suren Baghdasaryan <surenb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>

I am assuming that this is for Android and at the moment Android is
only interested in system level pressure. I am wondering if there is
any plan for Android to have cgroup hierarchies with explicit limits
in future?

If yes, then I think we should follow up (this patch is fine
independently) with making this feature more general by explicitly
enabling psi for each cgroup level similar to how we enable
controllers through cgroup.subtree_control.

Something like:

$ echo "+psi" > cgroup.subtree_control

This definitely would be helpful for server use cases where jobs do
sub-containers but might not be interested in psi but the admin is
interested in the top level job's psi.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
  2021-05-18 18:08   ` Shakeel Butt
@ 2021-05-18 18:23     ` Suren Baghdasaryan
  -1 siblings, 0 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2021-05-18 18:23 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Tejun Heo, Johannes Weiner, Zefan Li, Ingo Molnar,
	Peter Zijlstra (Intel),
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Benjamin Segall, Mel Gorman, Minchan Kim, Jonathan Corbet,
	Daniel Bristot de Oliveira, Paul E . McKenney, Randy Dunlap,
	Andrew Morton, Thomas Gleixner, macro, Viresh Kumar,
	Mike Kravetz, linux-doc, LKML, Cgroups, kernel-team

On Tue, May 18, 2021 at 11:08 AM Shakeel Butt <shakeelb@google.com> wrote:
>
> On Mon, May 17, 2021 at 7:02 PM Suren Baghdasaryan <surenb@google.com> wrote:
> >
> > PSI accounts stalls for each cgroup separately and aggregates it at each
> > level of the hierarchy. This causes additional overhead with psi_avgs_work
> > being called for each cgroup in the hierarchy. psi_avgs_work has been
> > highly optimized, however on systems with large number of cgroups the
> > overhead becomes noticeable.
> > Systems which use PSI only at the system level could avoid this overhead
> > if PSI can be configured to skip per-cgroup stall accounting.
> > Add "cgroup_disable=pressure" kernel command-line option to allow
> > requesting system-wide only pressure stall accounting. When set, it
> > keeps system-wide accounting under /proc/pressure/ but skips accounting
> > for individual cgroups and does not expose PSI nodes in cgroup hierarchy.
> >
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
>
> I am assuming that this is for Android and at the moment Android is
> only interested in system level pressure. I am wondering if there is
> any plan for Android to have cgroup hierarchies with explicit limits
> in future?

Correct and yes, we would like to use memcgs to limit memory in the
future, however we do not plan on using per-cgroup psi so far.

>
> If yes, then I think we should follow up (this patch is fine
> independently) with making this feature more general by explicitly
> enabling psi for each cgroup level similar to how we enable
> controllers through cgroup.subtree_control.
>
> Something like:
>
> $ echo "+psi" > cgroup.subtree_control
>
> This definitely would be helpful for server use cases where jobs do
> sub-containers but might not be interested in psi but the admin is
> interested in the top level job's psi.

Haven't thought about it before but that makes sense to me.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-18 18:23     ` Suren Baghdasaryan
  0 siblings, 0 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2021-05-18 18:23 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Tejun Heo, Johannes Weiner, Zefan Li, Ingo Molnar,
	Peter Zijlstra (Intel),
	Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Benjamin Segall, Mel Gorman, Minchan Kim, Jonathan Corbet,
	Daniel Bristot de Oliveira, Paul E . McKenney, Randy Dunlap,
	Andrew Morton, Thomas Gleixner, macro, Viresh Kumar, Mike

On Tue, May 18, 2021 at 11:08 AM Shakeel Butt <shakeelb@google.com> wrote:
>
> On Mon, May 17, 2021 at 7:02 PM Suren Baghdasaryan <surenb@google.com> wrote:
> >
> > PSI accounts stalls for each cgroup separately and aggregates it at each
> > level of the hierarchy. This causes additional overhead with psi_avgs_work
> > being called for each cgroup in the hierarchy. psi_avgs_work has been
> > highly optimized, however on systems with large number of cgroups the
> > overhead becomes noticeable.
> > Systems which use PSI only at the system level could avoid this overhead
> > if PSI can be configured to skip per-cgroup stall accounting.
> > Add "cgroup_disable=pressure" kernel command-line option to allow
> > requesting system-wide only pressure stall accounting. When set, it
> > keeps system-wide accounting under /proc/pressure/ but skips accounting
> > for individual cgroups and does not expose PSI nodes in cgroup hierarchy.
> >
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
>
> I am assuming that this is for Android and at the moment Android is
> only interested in system level pressure. I am wondering if there is
> any plan for Android to have cgroup hierarchies with explicit limits
> in future?

Correct and yes, we would like to use memcgs to limit memory in the
future, however we do not plan on using per-cgroup psi so far.

>
> If yes, then I think we should follow up (this patch is fine
> independently) with making this feature more general by explicitly
> enabling psi for each cgroup level similar to how we enable
> controllers through cgroup.subtree_control.
>
> Something like:
>
> $ echo "+psi" > cgroup.subtree_control
>
> This definitely would be helpful for server use cases where jobs do
> sub-containers but might not be interested in psi but the admin is
> interested in the top level job's psi.

Haven't thought about it before but that makes sense to me.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-18 18:52   ` Peter Zijlstra
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2021-05-18 18:52 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: tj, hannes, lizefan.x, mingo, juri.lelli, vincent.guittot,
	dietmar.eggemann, rostedt, bsegall, mgorman, minchan, corbet,
	bristot, paulmck, rdunlap, akpm, tglx, macro, viresh.kumar,
	mike.kravetz, linux-doc, linux-kernel, cgroups, kernel-team

On Mon, May 17, 2021 at 07:02:00PM -0700, Suren Baghdasaryan wrote:

> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> index cc25a3cff41f..4b8e72640ac9 100644
> --- a/kernel/sched/psi.c
> +++ b/kernel/sched/psi.c
> @@ -148,6 +148,7 @@
>  static int psi_bug __read_mostly;
>  
>  DEFINE_STATIC_KEY_FALSE(psi_disabled);
> +DEFINE_STATIC_KEY_FALSE(psi_cgroups_disabled);

I'm thinking the whole thing will be easier/clearer when you make this:

DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);

>  
>  #ifdef CONFIG_PSI_DEFAULT_DISABLED
>  static bool psi_enable;
> @@ -211,6 +212,9 @@ void __init psi_init(void)
>  		return;
>  	}
>  
> +	if (!cgroup_psi_enabled())
> +		static_branch_enable(&psi_cgroups_disabled);

	if (!cgroup_psi_enabled())
		static_branch_disable(&psi_cgroups_enabled);

> +
>  	psi_period = jiffies_to_nsecs(PSI_FREQ);
>  	group_init(&psi_system);
>  }
> @@ -744,23 +748,23 @@ static void psi_group_change(struct psi_group *group, int cpu,
>  
>  static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
>  {
> +	if (*iter == &psi_system)
> +		return NULL;
> +
>  #ifdef CONFIG_CGROUPS
> +	if (!static_branch_likely(&psi_cgroups_disabled)) {

	if (static_branch_likely(&psi_cgroups_enabled)) {

> +		struct cgroup *cgroup = NULL;
>  
> +		if (!*iter)
> +			cgroup = task->cgroups->dfl_cgrp;
> +		else
> +			cgroup = cgroup_parent(*iter);
>  
> +		if (cgroup && cgroup_parent(cgroup)) {
> +			*iter = cgroup;
> +			return cgroup_psi(cgroup);
> +		}
>  	}
>  #endif
>  	*iter = &psi_system;
>  	return &psi_system;

But yes, very nice.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-18 18:52   ` Peter Zijlstra
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2021-05-18 18:52 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: tj-DgEjT+Ai2ygdnm+yROfE0A, hannes-druUgvl0LCNAfugRpC6u6w,
	lizefan.x-EC8Uxl6Npydl57MIdRCFDg, mingo-H+wXaHxf7aLQT0dZR+AlfA,
	juri.lelli-H+wXaHxf7aLQT0dZR+AlfA,
	vincent.guittot-QSEj5FYQhm4dnm+yROfE0A,
	dietmar.eggemann-5wv7dgnIgG8, rostedt-nx8X9YLhiw1AfugRpC6u6w,
	bsegall-hpIqsD4AKlfQT0dZR+AlfA, mgorman-l3A5Bk7waGM,
	minchan-DgEjT+Ai2ygdnm+yROfE0A, corbet-T1hC0tSOHrs,
	bristot-H+wXaHxf7aLQT0dZR+AlfA, paulmck-DgEjT+Ai2ygdnm+yROfE0A,
	rdunlap-wEGCiKHe2LqWVfeAwA7xHQ,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
	tglx-hfZtesqFncYOwBW4kG4KsQ, macro-fOkVWZO6G+f10XsdtD+oqA,
	viresh.kumar-QSEj5FYQhm4dnm+yROfE0A,
	mike.kravetz-QHcLZuEGTsvQT0dZR+AlfA,
	linux-doc-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	cgroups-u79uwXL29TY76Z2rM5mHXA,
	kernel-team-z5hGa2qSFaRBDgjK7y7TUQ

On Mon, May 17, 2021 at 07:02:00PM -0700, Suren Baghdasaryan wrote:

> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> index cc25a3cff41f..4b8e72640ac9 100644
> --- a/kernel/sched/psi.c
> +++ b/kernel/sched/psi.c
> @@ -148,6 +148,7 @@
>  static int psi_bug __read_mostly;
>  
>  DEFINE_STATIC_KEY_FALSE(psi_disabled);
> +DEFINE_STATIC_KEY_FALSE(psi_cgroups_disabled);

I'm thinking the whole thing will be easier/clearer when you make this:

DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);

>  
>  #ifdef CONFIG_PSI_DEFAULT_DISABLED
>  static bool psi_enable;
> @@ -211,6 +212,9 @@ void __init psi_init(void)
>  		return;
>  	}
>  
> +	if (!cgroup_psi_enabled())
> +		static_branch_enable(&psi_cgroups_disabled);

	if (!cgroup_psi_enabled())
		static_branch_disable(&psi_cgroups_enabled);

> +
>  	psi_period = jiffies_to_nsecs(PSI_FREQ);
>  	group_init(&psi_system);
>  }
> @@ -744,23 +748,23 @@ static void psi_group_change(struct psi_group *group, int cpu,
>  
>  static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
>  {
> +	if (*iter == &psi_system)
> +		return NULL;
> +
>  #ifdef CONFIG_CGROUPS
> +	if (!static_branch_likely(&psi_cgroups_disabled)) {

	if (static_branch_likely(&psi_cgroups_enabled)) {

> +		struct cgroup *cgroup = NULL;
>  
> +		if (!*iter)
> +			cgroup = task->cgroups->dfl_cgrp;
> +		else
> +			cgroup = cgroup_parent(*iter);
>  
> +		if (cgroup && cgroup_parent(cgroup)) {
> +			*iter = cgroup;
> +			return cgroup_psi(cgroup);
> +		}
>  	}
>  #endif
>  	*iter = &psi_system;
>  	return &psi_system;

But yes, very nice.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-18 18:55     ` Suren Baghdasaryan
  0 siblings, 0 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2021-05-18 18:55 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tejun Heo, Johannes Weiner, Zefan Li, Ingo Molnar, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Benjamin Segall, Mel Gorman, Minchan Kim, Jonathan Corbet,
	Daniel Bristot de Oliveira, Paul E . McKenney, Randy Dunlap,
	Andrew Morton, Thomas Gleixner, macro, Viresh Kumar,
	Mike Kravetz, linux-doc, LKML, cgroups mailinglist, kernel-team

On Tue, May 18, 2021 at 11:52 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Mon, May 17, 2021 at 07:02:00PM -0700, Suren Baghdasaryan wrote:
>
> > diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> > index cc25a3cff41f..4b8e72640ac9 100644
> > --- a/kernel/sched/psi.c
> > +++ b/kernel/sched/psi.c
> > @@ -148,6 +148,7 @@
> >  static int psi_bug __read_mostly;
> >
> >  DEFINE_STATIC_KEY_FALSE(psi_disabled);
> > +DEFINE_STATIC_KEY_FALSE(psi_cgroups_disabled);
>
> I'm thinking the whole thing will be easier/clearer when you make this:
>
> DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
>

Sounds good. Will respin another version. Thanks for reviewing!


> >
> >  #ifdef CONFIG_PSI_DEFAULT_DISABLED
> >  static bool psi_enable;
> > @@ -211,6 +212,9 @@ void __init psi_init(void)
> >               return;
> >       }
> >
> > +     if (!cgroup_psi_enabled())
> > +             static_branch_enable(&psi_cgroups_disabled);
>
>         if (!cgroup_psi_enabled())
>                 static_branch_disable(&psi_cgroups_enabled);
>
> > +
> >       psi_period = jiffies_to_nsecs(PSI_FREQ);
> >       group_init(&psi_system);
> >  }
> > @@ -744,23 +748,23 @@ static void psi_group_change(struct psi_group *group, int cpu,
> >
> >  static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
> >  {
> > +     if (*iter == &psi_system)
> > +             return NULL;
> > +
> >  #ifdef CONFIG_CGROUPS
> > +     if (!static_branch_likely(&psi_cgroups_disabled)) {
>
>         if (static_branch_likely(&psi_cgroups_enabled)) {
>
> > +             struct cgroup *cgroup = NULL;
> >
> > +             if (!*iter)
> > +                     cgroup = task->cgroups->dfl_cgrp;
> > +             else
> > +                     cgroup = cgroup_parent(*iter);
> >
> > +             if (cgroup && cgroup_parent(cgroup)) {
> > +                     *iter = cgroup;
> > +                     return cgroup_psi(cgroup);
> > +             }
> >       }
> >  #endif
> >       *iter = &psi_system;
> >       return &psi_system;
>
> But yes, very nice.
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-18 18:55     ` Suren Baghdasaryan
  0 siblings, 0 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2021-05-18 18:55 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tejun Heo, Johannes Weiner, Zefan Li, Ingo Molnar, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Benjamin Segall, Mel Gorman, Minchan Kim, Jonathan Corbet,
	Daniel Bristot de Oliveira, Paul E . McKenney, Randy Dunlap,
	Andrew Morton, Thomas Gleixner, macro-fOkVWZO6G+f10XsdtD+oqA,
	Viresh Kumar, Mike Kravetz, linux-doc-fy+rA21nqHI

On Tue, May 18, 2021 at 11:52 AM Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org> wrote:
>
> On Mon, May 17, 2021 at 07:02:00PM -0700, Suren Baghdasaryan wrote:
>
> > diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> > index cc25a3cff41f..4b8e72640ac9 100644
> > --- a/kernel/sched/psi.c
> > +++ b/kernel/sched/psi.c
> > @@ -148,6 +148,7 @@
> >  static int psi_bug __read_mostly;
> >
> >  DEFINE_STATIC_KEY_FALSE(psi_disabled);
> > +DEFINE_STATIC_KEY_FALSE(psi_cgroups_disabled);
>
> I'm thinking the whole thing will be easier/clearer when you make this:
>
> DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
>

Sounds good. Will respin another version. Thanks for reviewing!


> >
> >  #ifdef CONFIG_PSI_DEFAULT_DISABLED
> >  static bool psi_enable;
> > @@ -211,6 +212,9 @@ void __init psi_init(void)
> >               return;
> >       }
> >
> > +     if (!cgroup_psi_enabled())
> > +             static_branch_enable(&psi_cgroups_disabled);
>
>         if (!cgroup_psi_enabled())
>                 static_branch_disable(&psi_cgroups_enabled);
>
> > +
> >       psi_period = jiffies_to_nsecs(PSI_FREQ);
> >       group_init(&psi_system);
> >  }
> > @@ -744,23 +748,23 @@ static void psi_group_change(struct psi_group *group, int cpu,
> >
> >  static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
> >  {
> > +     if (*iter == &psi_system)
> > +             return NULL;
> > +
> >  #ifdef CONFIG_CGROUPS
> > +     if (!static_branch_likely(&psi_cgroups_disabled)) {
>
>         if (static_branch_likely(&psi_cgroups_enabled)) {
>
> > +             struct cgroup *cgroup = NULL;
> >
> > +             if (!*iter)
> > +                     cgroup = task->cgroups->dfl_cgrp;
> > +             else
> > +                     cgroup = cgroup_parent(*iter);
> >
> > +             if (cgroup && cgroup_parent(cgroup)) {
> > +                     *iter = cgroup;
> > +                     return cgroup_psi(cgroup);
> > +             }
> >       }
> >  #endif
> >       *iter = &psi_system;
> >       return &psi_system;
>
> But yes, very nice.
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe-z5hGa2qSFaTowKkBSvOlow@public.gmane.org
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
  2021-05-18 18:55     ` Suren Baghdasaryan
@ 2021-05-24 19:58       ` Suren Baghdasaryan
  -1 siblings, 0 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2021-05-24 19:58 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tejun Heo, Johannes Weiner, Zefan Li, Ingo Molnar, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Benjamin Segall, Mel Gorman, Minchan Kim, Jonathan Corbet,
	Daniel Bristot de Oliveira, Paul E . McKenney, Randy Dunlap,
	Andrew Morton, Thomas Gleixner, macro, Viresh Kumar,
	Mike Kravetz, linux-doc, LKML, cgroups mailinglist, kernel-team

On Tue, May 18, 2021 at 11:55 AM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Tue, May 18, 2021 at 11:52 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Mon, May 17, 2021 at 07:02:00PM -0700, Suren Baghdasaryan wrote:
> >
> > > diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> > > index cc25a3cff41f..4b8e72640ac9 100644
> > > --- a/kernel/sched/psi.c
> > > +++ b/kernel/sched/psi.c
> > > @@ -148,6 +148,7 @@
> > >  static int psi_bug __read_mostly;
> > >
> > >  DEFINE_STATIC_KEY_FALSE(psi_disabled);
> > > +DEFINE_STATIC_KEY_FALSE(psi_cgroups_disabled);
> >
> > I'm thinking the whole thing will be easier/clearer when you make this:
> >
> > DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
> >
>
> Sounds good. Will respin another version. Thanks for reviewing!

v3 is posted at https://lore.kernel.org/patchwork/patch/1435705

>
>
> > >
> > >  #ifdef CONFIG_PSI_DEFAULT_DISABLED
> > >  static bool psi_enable;
> > > @@ -211,6 +212,9 @@ void __init psi_init(void)
> > >               return;
> > >       }
> > >
> > > +     if (!cgroup_psi_enabled())
> > > +             static_branch_enable(&psi_cgroups_disabled);
> >
> >         if (!cgroup_psi_enabled())
> >                 static_branch_disable(&psi_cgroups_enabled);
> >
> > > +
> > >       psi_period = jiffies_to_nsecs(PSI_FREQ);
> > >       group_init(&psi_system);
> > >  }
> > > @@ -744,23 +748,23 @@ static void psi_group_change(struct psi_group *group, int cpu,
> > >
> > >  static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
> > >  {
> > > +     if (*iter == &psi_system)
> > > +             return NULL;
> > > +
> > >  #ifdef CONFIG_CGROUPS
> > > +     if (!static_branch_likely(&psi_cgroups_disabled)) {
> >
> >         if (static_branch_likely(&psi_cgroups_enabled)) {
> >
> > > +             struct cgroup *cgroup = NULL;
> > >
> > > +             if (!*iter)
> > > +                     cgroup = task->cgroups->dfl_cgrp;
> > > +             else
> > > +                     cgroup = cgroup_parent(*iter);
> > >
> > > +             if (cgroup && cgroup_parent(cgroup)) {
> > > +                     *iter = cgroup;
> > > +                     return cgroup_psi(cgroup);
> > > +             }
> > >       }
> > >  #endif
> > >       *iter = &psi_system;
> > >       return &psi_system;
> >
> > But yes, very nice.
> >
> > --
> > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
> >

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable
@ 2021-05-24 19:58       ` Suren Baghdasaryan
  0 siblings, 0 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2021-05-24 19:58 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Tejun Heo, Johannes Weiner, Zefan Li, Ingo Molnar, Juri Lelli,
	Vincent Guittot, Dietmar Eggemann, Steven Rostedt,
	Benjamin Segall, Mel Gorman, Minchan Kim, Jonathan Corbet,
	Daniel Bristot de Oliveira, Paul E . McKenney, Randy Dunlap,
	Andrew Morton, Thomas Gleixner, macro, Viresh Kumar,
	Mike Kravetz, linux-doc

On Tue, May 18, 2021 at 11:55 AM Suren Baghdasaryan <surenb@google.com> wrote:
>
> On Tue, May 18, 2021 at 11:52 AM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Mon, May 17, 2021 at 07:02:00PM -0700, Suren Baghdasaryan wrote:
> >
> > > diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> > > index cc25a3cff41f..4b8e72640ac9 100644
> > > --- a/kernel/sched/psi.c
> > > +++ b/kernel/sched/psi.c
> > > @@ -148,6 +148,7 @@
> > >  static int psi_bug __read_mostly;
> > >
> > >  DEFINE_STATIC_KEY_FALSE(psi_disabled);
> > > +DEFINE_STATIC_KEY_FALSE(psi_cgroups_disabled);
> >
> > I'm thinking the whole thing will be easier/clearer when you make this:
> >
> > DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
> >
>
> Sounds good. Will respin another version. Thanks for reviewing!

v3 is posted at https://lore.kernel.org/patchwork/patch/1435705

>
>
> > >
> > >  #ifdef CONFIG_PSI_DEFAULT_DISABLED
> > >  static bool psi_enable;
> > > @@ -211,6 +212,9 @@ void __init psi_init(void)
> > >               return;
> > >       }
> > >
> > > +     if (!cgroup_psi_enabled())
> > > +             static_branch_enable(&psi_cgroups_disabled);
> >
> >         if (!cgroup_psi_enabled())
> >                 static_branch_disable(&psi_cgroups_enabled);
> >
> > > +
> > >       psi_period = jiffies_to_nsecs(PSI_FREQ);
> > >       group_init(&psi_system);
> > >  }
> > > @@ -744,23 +748,23 @@ static void psi_group_change(struct psi_group *group, int cpu,
> > >
> > >  static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
> > >  {
> > > +     if (*iter == &psi_system)
> > > +             return NULL;
> > > +
> > >  #ifdef CONFIG_CGROUPS
> > > +     if (!static_branch_likely(&psi_cgroups_disabled)) {
> >
> >         if (static_branch_likely(&psi_cgroups_enabled)) {
> >
> > > +             struct cgroup *cgroup = NULL;
> > >
> > > +             if (!*iter)
> > > +                     cgroup = task->cgroups->dfl_cgrp;
> > > +             else
> > > +                     cgroup = cgroup_parent(*iter);
> > >
> > > +             if (cgroup && cgroup_parent(cgroup)) {
> > > +                     *iter = cgroup;
> > > +                     return cgroup_psi(cgroup);
> > > +             }
> > >       }
> > >  #endif
> > >       *iter = &psi_system;
> > >       return &psi_system;
> >
> > But yes, very nice.
> >
> > --
> > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com.
> >

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-05-24 19:58 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-18  2:02 [PATCH v2 1/1] cgroup: make per-cgroup pressure stall tracking configurable Suren Baghdasaryan
2021-05-18 18:08 ` Shakeel Butt
2021-05-18 18:08   ` Shakeel Butt
2021-05-18 18:23   ` Suren Baghdasaryan
2021-05-18 18:23     ` Suren Baghdasaryan
2021-05-18 18:52 ` Peter Zijlstra
2021-05-18 18:52   ` Peter Zijlstra
2021-05-18 18:55   ` Suren Baghdasaryan
2021-05-18 18:55     ` Suren Baghdasaryan
2021-05-24 19:58     ` Suren Baghdasaryan
2021-05-24 19:58       ` Suren Baghdasaryan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.