linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Add hierarchical accounting to cpu accounting controller
@ 2008-10-23  5:43 Bharata B Rao
  2008-10-23  6:41 ` Bharata B Rao
  2008-10-23 15:49 ` Paul Menage
  0 siblings, 2 replies; 28+ messages in thread
From: Bharata B Rao @ 2008-10-23  5:43 UTC (permalink / raw)
  To: linux-kernel; +Cc: Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

Add hierarchical accounting to cpu accounting controller

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c |   30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,10 +9131,15 @@ struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
 	u64 *cpuusage;
+	struct cpuacct *parent;
 };
 
+static struct cpuacct init_cpuacct_group;
 struct cgroup_subsys cpuacct_subsys;
 
+#define for_each_cpuacct_group(ca) \
+		for (; ca; ca = ca->parent)
+
 /* return cpu accounting group corresponding to this container */
 static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
 {
@@ -9153,17 +9158,28 @@ static inline struct cpuacct *task_ca(st
 static struct cgroup_subsys_state *cpuacct_create(
 	struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
-	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+	struct cpuacct *ca;
 
-	if (!ca)
-		return ERR_PTR(-ENOMEM);
+	if (!cgrp->parent) {
+		/* This is early initialization for the top cgroup */
+		ca = &init_cpuacct_group;
+	} else {
+		ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+		if (!ca)
+			return ERR_PTR(-ENOMEM);
+	}
 
 	ca->cpuusage = alloc_percpu(u64);
 	if (!ca->cpuusage) {
-		kfree(ca);
+		if (cgrp->parent)
+			kfree(ca);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (cgrp->parent) {
+		ca->css.cgroup = cgrp;
+		ca->parent = cgroup_ca(cgrp->parent);
+	}
 	return &ca->css;
 }
 
@@ -9243,14 +9259,16 @@ static int cpuacct_populate(struct cgrou
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
+	int cpu;
 
 	if (!cpuacct_subsys.active)
 		return;
 
+	cpu = task_cpu(tsk);
 	ca = task_ca(tsk);
-	if (ca) {
-		u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
 
+	for_each_cpuacct_group(ca) {
+		u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
 		*cpuusage += cputime;
 	}
 }

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-23  5:43 [PATCH] Add hierarchical accounting to cpu accounting controller Bharata B Rao
@ 2008-10-23  6:41 ` Bharata B Rao
  2008-10-23 15:49 ` Paul Menage
  1 sibling, 0 replies; 28+ messages in thread
From: Bharata B Rao @ 2008-10-23  6:41 UTC (permalink / raw)
  To: linux-kernel; +Cc: Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

I realized from the patch present at http://lkml.org/lkml/2008/10/5/277,
that it is not necessary to initialize css.group in cpuacct_create()
as it is done in cgroup core. Present below is the updated patch:

Regards,
Bharata.

Add hierarchical accounting to cpu accounting controller

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c |   29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,10 +9131,15 @@ struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
 	u64 *cpuusage;
+	struct cpuacct *parent;
 };
 
+static struct cpuacct init_cpuacct_group;
 struct cgroup_subsys cpuacct_subsys;
 
+#define for_each_cpuacct_group(ca) \
+		for (; ca; ca = ca->parent)
+
 /* return cpu accounting group corresponding to this container */
 static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
 {
@@ -9153,17 +9158,27 @@ static inline struct cpuacct *task_ca(st
 static struct cgroup_subsys_state *cpuacct_create(
 	struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
-	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+	struct cpuacct *ca;
 
-	if (!ca)
-		return ERR_PTR(-ENOMEM);
+	if (!cgrp->parent) {
+		/* This is early initialization for the top cgroup */
+		ca = &init_cpuacct_group;
+	} else {
+		ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+		if (!ca)
+			return ERR_PTR(-ENOMEM);
+	}
 
 	ca->cpuusage = alloc_percpu(u64);
 	if (!ca->cpuusage) {
-		kfree(ca);
+		if (cgrp->parent)
+			kfree(ca);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (cgrp->parent)
+		ca->parent = cgroup_ca(cgrp->parent);
+
 	return &ca->css;
 }
 
@@ -9243,14 +9258,16 @@ static int cpuacct_populate(struct cgrou
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
+	int cpu;
 
 	if (!cpuacct_subsys.active)
 		return;
 
+	cpu = task_cpu(tsk);
 	ca = task_ca(tsk);
-	if (ca) {
-		u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
 
+	for_each_cpuacct_group(ca) {
+		u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
 		*cpuusage += cputime;
 	}
 }

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-23  5:43 [PATCH] Add hierarchical accounting to cpu accounting controller Bharata B Rao
  2008-10-23  6:41 ` Bharata B Rao
@ 2008-10-23 15:49 ` Paul Menage
  2008-10-24  5:08   ` Bharata B Rao
  1 sibling, 1 reply; 28+ messages in thread
From: Paul Menage @ 2008-10-23 15:49 UTC (permalink / raw)
  To: bharata; +Cc: linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Wed, Oct 22, 2008 at 10:43 PM, Bharata B Rao
<bharata@linux.vnet.ibm.com> wrote:
> ---
>  kernel/sched.c |   30 ++++++++++++++++++++++++------
>  1 file changed, 24 insertions(+), 6 deletions(-)
>
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -9131,10 +9131,15 @@ struct cpuacct {
>        struct cgroup_subsys_state css;
>        /* cpuusage holds pointer to a u64-type object on every cpu */
>        u64 *cpuusage;
> +       struct cpuacct *parent;
>  };
>
> +static struct cpuacct init_cpuacct_group;

Why are you making the root state static?

>  struct cgroup_subsys cpuacct_subsys;
>
> +#define for_each_cpuacct_group(ca) \
> +               for (; ca; ca = ca->parent)
> +

This is only used once, so doesn't seem worth creating a new macro for.

>
> +       if (cgrp->parent) {
> +               ca->css.cgroup = cgrp;

The cgroup pointer in the css is maintained by cgroups - you don't
need to set it.

Paul

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-23 15:49 ` Paul Menage
@ 2008-10-24  5:08   ` Bharata B Rao
  2008-10-24 17:37     ` Paul Menage
  0 siblings, 1 reply; 28+ messages in thread
From: Bharata B Rao @ 2008-10-24  5:08 UTC (permalink / raw)
  To: Paul Menage; +Cc: linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Thu, Oct 23, 2008 at 08:49:19AM -0700, Paul Menage wrote:
> On Wed, Oct 22, 2008 at 10:43 PM, Bharata B Rao
> <bharata@linux.vnet.ibm.com> wrote:
> > ---
> >  kernel/sched.c |   30 ++++++++++++++++++++++++------
> >  1 file changed, 24 insertions(+), 6 deletions(-)
> >
> > --- a/kernel/sched.c
> > +++ b/kernel/sched.c
> > @@ -9131,10 +9131,15 @@ struct cpuacct {
> >        struct cgroup_subsys_state css;
> >        /* cpuusage holds pointer to a u64-type object on every cpu */
> >        u64 *cpuusage;
> > +       struct cpuacct *parent;
> >  };
> >
> > +static struct cpuacct init_cpuacct_group;
> 
> Why are you making the root state static?

Because it is not used anywhere outside sched.c. Is that a problem ?
> 
> >  struct cgroup_subsys cpuacct_subsys;
> >
> > +#define for_each_cpuacct_group(ca) \
> > +               for (; ca; ca = ca->parent)
> > +
> 
> This is only used once, so doesn't seem worth creating a new macro for.

Ok. Removed.

> 
> >
> > +       if (cgrp->parent) {
> > +               ca->css.cgroup = cgrp;
> 
> The cgroup pointer in the css is maintained by cgroups - you don't
> need to set it.

Yes, I realized this immediately after my 1st post and posted
a corrected version subsequently.

Paul, thanks for your review.

Updated patch below:

Add hierarchical accounting to cpu accounting controller

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c |   26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,8 +9131,10 @@ struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
 	u64 *cpuusage;
+	struct cpuacct *parent;
 };
 
+static struct cpuacct init_cpuacct_group;
 struct cgroup_subsys cpuacct_subsys;
 
 /* return cpu accounting group corresponding to this container */
@@ -9153,17 +9155,27 @@ static inline struct cpuacct *task_ca(st
 static struct cgroup_subsys_state *cpuacct_create(
 	struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
-	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+	struct cpuacct *ca;
 
-	if (!ca)
-		return ERR_PTR(-ENOMEM);
+	if (!cgrp->parent) {
+		/* This is early initialization for the top cgroup */
+		ca = &init_cpuacct_group;
+	} else {
+		ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+		if (!ca)
+			return ERR_PTR(-ENOMEM);
+	}
 
 	ca->cpuusage = alloc_percpu(u64);
 	if (!ca->cpuusage) {
-		kfree(ca);
+		if (cgrp->parent)
+			kfree(ca);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (cgrp->parent)
+		ca->parent = cgroup_ca(cgrp->parent);
+
 	return &ca->css;
 }
 
@@ -9243,14 +9255,16 @@ static int cpuacct_populate(struct cgrou
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
+	int cpu;
 
 	if (!cpuacct_subsys.active)
 		return;
 
+	cpu = task_cpu(tsk);
 	ca = task_ca(tsk);
-	if (ca) {
-		u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
 
+	for (; ca; ca = ca->parent) {
+		u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
 		*cpuusage += cputime;
 	}
 }

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-24  5:08   ` Bharata B Rao
@ 2008-10-24 17:37     ` Paul Menage
  2008-10-25  6:01       ` Bharata B Rao
  0 siblings, 1 reply; 28+ messages in thread
From: Paul Menage @ 2008-10-24 17:37 UTC (permalink / raw)
  To: bharata; +Cc: linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Thu, Oct 23, 2008 at 10:08 PM, Bharata B Rao
<bharata@linux.vnet.ibm.com> wrote:
>> > +static struct cpuacct init_cpuacct_group;
>>
>> Why are you making the root state static?
>
> Because it is not used anywhere outside sched.c. Is that a problem ?

Sorry, I wasn't clear - I mean, why are you changing it from
dynamically-allocated in cpuacct_create to statically-allocated in the
BSS?

Paul

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-24 17:37     ` Paul Menage
@ 2008-10-25  6:01       ` Bharata B Rao
  2008-10-25 15:38         ` Paul Menage
  2008-10-27  5:54         ` Li Zefan
  0 siblings, 2 replies; 28+ messages in thread
From: Bharata B Rao @ 2008-10-25  6:01 UTC (permalink / raw)
  To: Paul Menage; +Cc: linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Fri, Oct 24, 2008 at 10:37:34AM -0700, Paul Menage wrote:
> On Thu, Oct 23, 2008 at 10:08 PM, Bharata B Rao
> <bharata@linux.vnet.ibm.com> wrote:
> >> > +static struct cpuacct init_cpuacct_group;
> >>
> >> Why are you making the root state static?
> >
> > Because it is not used anywhere outside sched.c. Is that a problem ?
> 
> Sorry, I wasn't clear - I mean, why are you changing it from
> dynamically-allocated in cpuacct_create to statically-allocated in the
> BSS?

Ok. I realized that defining init_cpuacct_group explicitly and statically
isn't needed. I was just influenced by init_task_group and init_mem_cgroup.
I guess those controllers have different reasons to have their init groups
statically defined.

Here is the updated (hopefully final) patch:

Add hierarchical accounting to cpu accounting controller

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Ingo Molnar <mingo@elte.hu>
CC: Paul Menage <menage@google.com>
CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
---
 kernel/sched.c |   10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9131,6 +9131,7 @@ struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
 	u64 *cpuusage;
+	struct cpuacct *parent;
 };
 
 struct cgroup_subsys cpuacct_subsys;
@@ -9164,6 +9165,9 @@ static struct cgroup_subsys_state *cpuac
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (cgrp->parent)
+		ca->parent = cgroup_ca(cgrp->parent);
+
 	return &ca->css;
 }
 
@@ -9243,14 +9247,16 @@ static int cpuacct_populate(struct cgrou
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
+	int cpu;
 
 	if (!cpuacct_subsys.active)
 		return;
 
+	cpu = task_cpu(tsk);
 	ca = task_ca(tsk);
-	if (ca) {
-		u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
 
+	for (; ca; ca = ca->parent) {
+		u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
 		*cpuusage += cputime;
 	}
 }

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-25  6:01       ` Bharata B Rao
@ 2008-10-25 15:38         ` Paul Menage
  2008-10-27  1:17           ` KAMEZAWA Hiroyuki
  2008-10-27  5:54         ` Li Zefan
  1 sibling, 1 reply; 28+ messages in thread
From: Paul Menage @ 2008-10-25 15:38 UTC (permalink / raw)
  To: bharata; +Cc: linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Fri, Oct 24, 2008 at 11:01 PM, Bharata B Rao
<bharata@linux.vnet.ibm.com> wrote:
>
> Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>

Reviewed-by: Paul Menage <menage@google.com>

So in technical terms this patch looks fine now. There's still the
question of whether it's OK to change the existing API, since it's
been in the kernel in its currently (non-hierarchical) form for
several releases now.

> CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
> CC: Ingo Molnar <mingo@elte.hu>
> CC: Paul Menage <menage@google.com>
> CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> ---
>  kernel/sched.c |   10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -9131,6 +9131,7 @@ struct cpuacct {
>        struct cgroup_subsys_state css;
>        /* cpuusage holds pointer to a u64-type object on every cpu */
>        u64 *cpuusage;
> +       struct cpuacct *parent;
>  };
>
>  struct cgroup_subsys cpuacct_subsys;
> @@ -9164,6 +9165,9 @@ static struct cgroup_subsys_state *cpuac
>                return ERR_PTR(-ENOMEM);
>        }
>
> +       if (cgrp->parent)
> +               ca->parent = cgroup_ca(cgrp->parent);
> +
>        return &ca->css;
>  }
>
> @@ -9243,14 +9247,16 @@ static int cpuacct_populate(struct cgrou
>  static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
>  {
>        struct cpuacct *ca;
> +       int cpu;
>
>        if (!cpuacct_subsys.active)
>                return;
>
> +       cpu = task_cpu(tsk);
>        ca = task_ca(tsk);
> -       if (ca) {
> -               u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
>
> +       for (; ca; ca = ca->parent) {
> +               u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
>                *cpuusage += cputime;
>        }
>  }
>

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-25 15:38         ` Paul Menage
@ 2008-10-27  1:17           ` KAMEZAWA Hiroyuki
  2008-10-27  4:43             ` Bharata B Rao
  0 siblings, 1 reply; 28+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-10-27  1:17 UTC (permalink / raw)
  To: Paul Menage
  Cc: bharata, linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Sat, 25 Oct 2008 08:38:52 -0700
"Paul Menage" <menage@google.com> wrote:

> On Fri, Oct 24, 2008 at 11:01 PM, Bharata B Rao
> <bharata@linux.vnet.ibm.com> wrote:
> >
> > Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> 
> Reviewed-by: Paul Menage <menage@google.com>
> 
> So in technical terms this patch looks fine now. There's still the
> question of whether it's OK to change the existing API, since it's
> been in the kernel in its currently (non-hierarchical) form for
> several releases now.
> 
Hmm..how about having 2 params as "aggregated usage" and "private usage" ?

 cpuacct.usage.
 cpuacct.all_subtree_usage.

heavy ?

-Kame


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-27  1:17           ` KAMEZAWA Hiroyuki
@ 2008-10-27  4:43             ` Bharata B Rao
  2008-10-27  6:57               ` Balbir Singh
  0 siblings, 1 reply; 28+ messages in thread
From: Bharata B Rao @ 2008-10-27  4:43 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Paul Menage, linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra,
	Ingo Molnar

On Mon, Oct 27, 2008 at 10:17:03AM +0900, KAMEZAWA Hiroyuki wrote:
> On Sat, 25 Oct 2008 08:38:52 -0700
> "Paul Menage" <menage@google.com> wrote:
> 
> > On Fri, Oct 24, 2008 at 11:01 PM, Bharata B Rao
> > <bharata@linux.vnet.ibm.com> wrote:
> > >
> > > Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > 
> > Reviewed-by: Paul Menage <menage@google.com>

Thanks Paul.

> > 
> > So in technical terms this patch looks fine now. There's still the
> > question of whether it's OK to change the existing API, since it's
> > been in the kernel in its currently (non-hierarchical) form for
> > several releases now.

Hmm... Can we consider this as an API change ? Currently cpuacct.usage
readers of a parent accounting group are missing the usage contributions
from its children groups. I would consider this patch as fixing the
above problem by correctly reflecting the cpu usage for every accounting
group.

> > 
> Hmm..how about having 2 params as "aggregated usage" and "private usage" ?
> 
>  cpuacct.usage.
>  cpuacct.all_subtree_usage.

Is there really a need to differentiate between aggregated and private
usage other than to maintain the current behaviour ?

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-25  6:01       ` Bharata B Rao
  2008-10-25 15:38         ` Paul Menage
@ 2008-10-27  5:54         ` Li Zefan
  1 sibling, 0 replies; 28+ messages in thread
From: Li Zefan @ 2008-10-27  5:54 UTC (permalink / raw)
  To: bharata
  Cc: Paul Menage, linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra,
	Ingo Molnar

> Ok. I realized that defining init_cpuacct_group explicitly and statically
> isn't needed. I was just influenced by init_task_group and init_mem_cgroup.
> I guess those controllers have different reasons to have their init groups
> statically defined.
> 

I think one reason is, cpu_cgroup_subsys has to be initialized very early,
and at that time kmalloc() is not usable, so we use statically defined
init_task_group.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-27  4:43             ` Bharata B Rao
@ 2008-10-27  6:57               ` Balbir Singh
  2008-10-27  8:25                 ` Li Zefan
  0 siblings, 1 reply; 28+ messages in thread
From: Balbir Singh @ 2008-10-27  6:57 UTC (permalink / raw)
  To: bharata
  Cc: KAMEZAWA Hiroyuki, Paul Menage, linux-kernel, Srivatsa Vaddagiri,
	Peter Zijlstra, Ingo Molnar

On Mon, Oct 27, 2008 at 10:13 AM, Bharata B Rao
<bharata@linux.vnet.ibm.com> wrote:
> On Mon, Oct 27, 2008 at 10:17:03AM +0900, KAMEZAWA Hiroyuki wrote:
>> On Sat, 25 Oct 2008 08:38:52 -0700
>> "Paul Menage" <menage@google.com> wrote:
>>
>> > On Fri, Oct 24, 2008 at 11:01 PM, Bharata B Rao
>> > <bharata@linux.vnet.ibm.com> wrote:
>> > >
>> > > Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
>> > > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
>> >
>> > Reviewed-by: Paul Menage <menage@google.com>
>
> Thanks Paul.
>
>> >
>> > So in technical terms this patch looks fine now. There's still the
>> > question of whether it's OK to change the existing API, since it's
>> > been in the kernel in its currently (non-hierarchical) form for
>> > several releases now.
>
> Hmm... Can we consider this as an API change ? Currently cpuacct.usage
> readers of a parent accounting group are missing the usage contributions
> from its children groups. I would consider this patch as fixing the
> above problem by correctly reflecting the cpu usage for every accounting
> group.
>

If a particular application desires to derive the usage of its
immediate tasks and does not care about subcgroups, it is a simple
iteration (after this fix)

cpuacct - sigma(cpuacct_child)

and currently if we cared about child accounting, we could do

cpuacct + recursively(sigma(cpuacct_child))

In that sense this fix makes more sense, but like Paul said we need to
figure out if it is an API change. My take is that it is a BUG fix,
since we do care about child subgroups in accounting.


>> >
>> Hmm..how about having 2 params as "aggregated usage" and "private usage" ?
>>
>>  cpuacct.usage.
>>  cpuacct.all_subtree_usage.
>
> Is there really a need to differentiate between aggregated and private
> usage other than to maintain the current behaviour ?
>

That might be useful to have, but as above it can always be derived.

Balbir

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-27  6:57               ` Balbir Singh
@ 2008-10-27  8:25                 ` Li Zefan
  2008-10-30 17:16                   ` Dhaval Giani
  0 siblings, 1 reply; 28+ messages in thread
From: Li Zefan @ 2008-10-27  8:25 UTC (permalink / raw)
  To: Balbir Singh
  Cc: bharata, KAMEZAWA Hiroyuki, Paul Menage, linux-kernel,
	Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

>>>> So in technical terms this patch looks fine now. There's still the
>>>> question of whether it's OK to change the existing API, since it's
>>>> been in the kernel in its currently (non-hierarchical) form for
>>>> several releases now.
>> Hmm... Can we consider this as an API change ? Currently cpuacct.usage
>> readers of a parent accounting group are missing the usage contributions
>> from its children groups. I would consider this patch as fixing the
>> above problem by correctly reflecting the cpu usage for every accounting
>> group.
>>
> 
> If a particular application desires to derive the usage of its
> immediate tasks and does not care about subcgroups, it is a simple
> iteration (after this fix)
> 
> cpuacct - sigma(cpuacct_child)
> 
> and currently if we cared about child accounting, we could do
> 
> cpuacct + recursively(sigma(cpuacct_child))
> 
> In that sense this fix makes more sense, but like Paul said we need to
> figure out if it is an API change. My take is that it is a BUG fix,
> since we do care about child subgroups in accounting.
> 

cpuacct was designed to count cpu usage of a group of tasks, and now some people
want it to also take child group's usage into account, so I think this is a feature
request but not a bug fix.

How about add a flag to disable/enable hierarchical accounting?

=====

From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 27 Oct 2008 16:00:21 +0800
Subject: [PATCH] cpuacct: add hierarchical accouning

Add hierarchical accouning to cpu accouting subsystem, so the cputime
of a task is chareged to its accounting group and all it's parent
accouning groups.

Also add 'cpuacct.hierarchy' control file, so we can enable/disable
hierarchical accounting. The default is disabled, so we reserve the
original behavior of cpuacct.

Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
 kernel/sched.c |   75 ++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 59 insertions(+), 16 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index 6625c3c..1c997bd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9232,15 +9232,22 @@ struct cgroup_subsys cpu_cgroup_subsys = {
  * (balbir@in.ibm.com).
  */
 
-/* track cpu usage of a group of tasks */
+/*
+ * Track cpu usage of a group of tasks.
+ *
+ * If cpuacct_hierarchy is set, it's children's usage is also accounted.
+ */
 struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
 	u64 *cpuusage;
+	struct cpuacct *parent;
 };
 
 struct cgroup_subsys cpuacct_subsys;
 
+static int cpuacct_hierarchy;
+
 /* return cpu accounting group corresponding to this container */
 static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
 {
@@ -9256,8 +9263,8 @@ static inline struct cpuacct *task_ca(struct task_struct *tsk)
 }
 
 /* create a new cpu accounting group */
-static struct cgroup_subsys_state *cpuacct_create(
-	struct cgroup_subsys *ss, struct cgroup *cgrp)
+static struct cgroup_subsys_state *cpuacct_create(struct cgroup_subsys *ss,
+						  struct cgroup *cgrp)
 {
 	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
 
@@ -9270,12 +9277,14 @@ static struct cgroup_subsys_state *cpuacct_create(
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (cgrp->parent)
+		ca->parent = cgroup_ca(cgrp->parent);
+
 	return &ca->css;
 }
 
 /* destroy an existing cpu accounting group */
-static void
-cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static void cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
 	struct cpuacct *ca = cgroup_ca(cgrp);
 
@@ -9306,7 +9315,7 @@ static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
 }
 
 static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
-								u64 reset)
+			  u64 reset)
 {
 	struct cpuacct *ca = cgroup_ca(cgrp);
 	int err = 0;
@@ -9328,17 +9337,42 @@ out:
 	return err;
 }
 
-static struct cftype files[] = {
-	{
-		.name = "usage",
-		.read_u64 = cpuusage_read,
-		.write_u64 = cpuusage_write,
-	},
+static u64 cpuacct_hierarchy_read(struct cgroup *cgrp, struct cftype *cft)
+{
+	return cpuacct_hierarchy;
+}
+
+static int cpuacct_hierarchy_write(struct cgroup *cgrp, struct cftype *cftype,
+				   u64 val)
+{
+	cpuacct_hierarchy = !!val;
+	return 0;
+}
+
+static struct cftype cft_cpuusage = {
+	.name = "usage",
+	.read_u64 = cpuusage_read,
+	.write_u64 = cpuusage_write,
+};
+
+static struct cftype cft_hierarchy = {
+	.name = "hierarchy",
+	.read_u64 = cpuacct_hierarchy_read,
+	.write_u64 = cpuacct_hierarchy_write,
 };
 
 static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
-	return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files));
+	int ret;
+
+	ret = cgroup_add_file(cgrp, ss, &cft_cpuusage);
+	if (ret)
+		return ret;
+
+	if (!cgrp->parent)
+		ret = cgroup_add_file(cgrp, ss, &cft_hierarchy);
+
+	return ret;
 }
 
 /*
@@ -9349,15 +9383,24 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
+	int cpu;
 
 	if (!cpuacct_subsys.active)
 		return;
 
+	cpu = task_cpu(tsk);
 	ca = task_ca(tsk);
-	if (ca) {
-		u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
 
-		*cpuusage += cputime;
+	if (cpuacct_hierarchy) {
+		for (; ca; ca = ca->parent) {
+			u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+			*cpuusage += cputime;
+		}
+	} else {
+		if (ca) {
+			u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
+			*cpuusage += cputime;
+		}
 	}
 }
 
-- 
1.5.4.rc3



^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-27  8:25                 ` Li Zefan
@ 2008-10-30 17:16                   ` Dhaval Giani
  2008-10-31  0:40                     ` KAMEZAWA Hiroyuki
  2008-11-05  3:31                     ` Li Zefan
  0 siblings, 2 replies; 28+ messages in thread
From: Dhaval Giani @ 2008-10-30 17:16 UTC (permalink / raw)
  To: Li Zefan
  Cc: Balbir Singh, bharata, KAMEZAWA Hiroyuki, Paul Menage,
	linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Mon, Oct 27, 2008 at 04:25:01PM +0800, Li Zefan wrote:
> >>>> So in technical terms this patch looks fine now. There's still the
> >>>> question of whether it's OK to change the existing API, since it's
> >>>> been in the kernel in its currently (non-hierarchical) form for
> >>>> several releases now.
> >> Hmm... Can we consider this as an API change ? Currently cpuacct.usage
> >> readers of a parent accounting group are missing the usage contributions
> >> from its children groups. I would consider this patch as fixing the
> >> above problem by correctly reflecting the cpu usage for every accounting
> >> group.
> >>
> > 
> > If a particular application desires to derive the usage of its
> > immediate tasks and does not care about subcgroups, it is a simple
> > iteration (after this fix)
> > 
> > cpuacct - sigma(cpuacct_child)
> > 
> > and currently if we cared about child accounting, we could do
> > 
> > cpuacct + recursively(sigma(cpuacct_child))
> > 
> > In that sense this fix makes more sense, but like Paul said we need to
> > figure out if it is an API change. My take is that it is a BUG fix,
> > since we do care about child subgroups in accounting.
> > 
> 
> cpuacct was designed to count cpu usage of a group of tasks, and now some people
> want it to also take child group's usage into account, so I think this is a feature
> request but not a bug fix.
> 

I disagree. The child is a part of the parent's hierarchy, and therefore
its usage should reflect in the parent's usage.

Thanks,
-- 
regards,
Dhaval

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-30 17:16                   ` Dhaval Giani
@ 2008-10-31  0:40                     ` KAMEZAWA Hiroyuki
  2008-11-04 12:49                       ` Bharata B Rao
  2008-11-05  3:31                     ` Li Zefan
  1 sibling, 1 reply; 28+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-10-31  0:40 UTC (permalink / raw)
  To: Dhaval Giani
  Cc: Li Zefan, Balbir Singh, bharata, Paul Menage, linux-kernel,
	Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Thu, 30 Oct 2008 22:46:22 +0530
Dhaval Giani <dhaval@linux.vnet.ibm.com> wrote:

> On Mon, Oct 27, 2008 at 04:25:01PM +0800, Li Zefan wrote:
> > >>>> So in technical terms this patch looks fine now. There's still the
> > >>>> question of whether it's OK to change the existing API, since it's
> > >>>> been in the kernel in its currently (non-hierarchical) form for
> > >>>> several releases now.
> > >> Hmm... Can we consider this as an API change ? Currently cpuacct.usage
> > >> readers of a parent accounting group are missing the usage contributions
> > >> from its children groups. I would consider this patch as fixing the
> > >> above problem by correctly reflecting the cpu usage for every accounting
> > >> group.
> > >>
> > > 
> > > If a particular application desires to derive the usage of its
> > > immediate tasks and does not care about subcgroups, it is a simple
> > > iteration (after this fix)
> > > 
> > > cpuacct - sigma(cpuacct_child)
> > > 
> > > and currently if we cared about child accounting, we could do
> > > 
> > > cpuacct + recursively(sigma(cpuacct_child))
> > > 
> > > In that sense this fix makes more sense, but like Paul said we need to
> > > figure out if it is an API change. My take is that it is a BUG fix,
> > > since we do care about child subgroups in accounting.
> > > 
> > 
> > cpuacct was designed to count cpu usage of a group of tasks, and now some people
> > want it to also take child group's usage into account, so I think this is a feature
> > request but not a bug fix.
> > 
> 
> I disagree. The child is a part of the parent's hierarchy, and therefore
> its usage should reflect in the parent's usage.
> 

In my point of view, there is no big difference. It's just whether we need a tool
in userland or not. If there is no performance impact, I have no objections.

One request from me is add Documentation/controllers/cpuacct.txt or some to explain
"what we see".

Thanks,
-Kame


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-31  0:40                     ` KAMEZAWA Hiroyuki
@ 2008-11-04 12:49                       ` Bharata B Rao
  2008-11-04 17:20                         ` Randy Dunlap
                                           ` (3 more replies)
  0 siblings, 4 replies; 28+ messages in thread
From: Bharata B Rao @ 2008-11-04 12:49 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: Dhaval Giani, Li Zefan, Balbir Singh, Paul Menage, linux-kernel,
	Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Fri, Oct 31, 2008 at 09:40:41AM +0900, KAMEZAWA Hiroyuki wrote:
> On Thu, 30 Oct 2008 22:46:22 +0530
> Dhaval Giani <dhaval@linux.vnet.ibm.com> wrote:
> > I disagree. The child is a part of the parent's hierarchy, and therefore
> > its usage should reflect in the parent's usage.
> > 
> 
> In my point of view, there is no big difference. It's just whether we need a tool
> in userland or not. If there is no performance impact, I have no objections.
> 
> One request from me is add Documentation/controllers/cpuacct.txt or some to explain
> "what we see".

I am not sure which version (mine or Li Zefan's) Paul prefers. I am
resending my patch anyway with documentation and performance numbers
included. I don't see any significant improvement or degradation in
hackbench, lmbench and volanomark numbers with this patch.

Regards,
Bharata.

Add hierarchical accounting to cpu accounting controller and cpuacct
documentation.

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Ingo Molnar <mingo@elte.hu>
CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
---
 Documentation/controllers/cpuacct.txt |   32 ++++++++++++++++++++++++++++++++
 kernel/sched.c                        |   10 ++++++++--
 2 files changed, 40 insertions(+), 2 deletions(-)

--- /dev/null
+++ b/Documentation/controllers/cpuacct.txt
@@ -0,0 +1,32 @@
+CPU Accounting Controller
+-------------------------
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these group of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of it's child groups and
+the tasks directly present in it's group.
+
+Accounting groups can be created by first mounting the cgroup filesystem.
+
+# mkdir /cgroups
+# mount -t cgroup -ocpuacct none /cgroups
+
+With the above step, the initial or the parent accounting group
+becomes visible at /cgroups. At bootup, this group comprises of all the
+tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
+/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
+this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /cgroups.
+
+# cd /cgroups
+# mkdir g1
+# echo $$ > g1
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and it's children
+can be obtained from g1/cpuacct.usage and the same gets accumulated in
+/cgroups/cpuacct.usage also.
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9236,6 +9236,7 @@ struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
 	u64 *cpuusage;
+	struct cpuacct *parent;
 };
 
 struct cgroup_subsys cpuacct_subsys;
@@ -9269,6 +9270,9 @@ static struct cgroup_subsys_state *cpuac
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (cgrp->parent)
+		ca->parent = cgroup_ca(cgrp->parent);
+
 	return &ca->css;
 }
 
@@ -9348,14 +9352,16 @@ static int cpuacct_populate(struct cgrou
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
+	int cpu;
 
 	if (!cpuacct_subsys.active)
 		return;
 
+	cpu = task_cpu(tsk);
 	ca = task_ca(tsk);
-	if (ca) {
-		u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
 
+	for (; ca; ca = ca->parent) {
+		u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
 		*cpuusage += cputime;
 	}
 }

Performance numbers
==================
2x2HT=4CPU i386 running 2.6.28-rc3

All benchmarks were run from bash which belonged to the grand
child group of the topmost accounting group. The tests were
first run on 2628rc3 and then on 2628rc3+this patch and the
normalized difference between the two runs is shown below.

I. hackbench
============
# ./hackbench 100 process 100
Running with 100*40 (== 4000) tasks.
Normalized time difference for avg of 5 runs: 1.0059

# ./hackbench 25 thread 100
Running with 25*40 (== 1000) tasks.
Normalized time difference for avg of 5 runs: 1.0139

II. lmbench
===========
---------------------
4 threads
---------------------
size	Normalized
(kb)	Change (Time)
---------------------
16	1.1017
64	1.1168
128	1.1072
256	1.0085
--------------------
8 threads
-------------------
16	1.1835
64	1.0617
128	0.9980
256	0.9682
-------------------
16 threads
-------------------
16	1.1186
64	0.9921
128	0.9505
256	1.0043
-------------------
32 threads
-------------------
16	1.0005
64	1.0089
128	1.0019
256	1.0226
-------------------
64 threads
-------------------
16	1.0207
64	1.0385
128	1.0109
256	1.0159
-------------------

III. volanomark
===============
Normalized average throughput difference for loopback test

------------------------------------------------------------
Nr runs		Normalized average throughput difference
------------------------------------------------------------
10		0.9579
------------------------------------------------------------
4		1.1465
------------------------------------------------------------
9		0.9451
------------------------------------------------------------
19		1.0133
------------------------------------------------------------

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-04 12:49                       ` Bharata B Rao
@ 2008-11-04 17:20                         ` Randy Dunlap
  2008-11-05  3:24                           ` Bharata B Rao
  2008-11-05  6:17                         ` Li Zefan
                                           ` (2 subsequent siblings)
  3 siblings, 1 reply; 28+ messages in thread
From: Randy Dunlap @ 2008-11-04 17:20 UTC (permalink / raw)
  To: bharata
  Cc: KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan, Balbir Singh,
	Paul Menage, linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra,
	Ingo Molnar

On Tue, 4 Nov 2008 18:19:37 +0530 Bharata B Rao wrote:

> Add hierarchical accounting to cpu accounting controller and cpuacct
> documentation.
> 
> Currently, while charging the task's cputime to its accounting group,
> the accounting group hierarchy isn't updated. This patch charges the cputime
> of a task to its accounting group and all its parent accounting groups.
> 
> Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
> CC: Ingo Molnar <mingo@elte.hu>
> CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Reviewed-by: Paul Menage <menage@google.com>
> ---
>  Documentation/controllers/cpuacct.txt |   32 ++++++++++++++++++++++++++++++++
>  kernel/sched.c                        |   10 ++++++++--
>  2 files changed, 40 insertions(+), 2 deletions(-)
> 
> --- /dev/null
> +++ b/Documentation/controllers/cpuacct.txt
> @@ -0,0 +1,32 @@
> +CPU Accounting Controller
> +-------------------------
> +
> +The CPU accounting controller is used to group tasks using cgroups and
> +account the CPU usage of these group of tasks.
> +
> +The CPU accounting controller supports multi-hierarchy groups. An accounting
> +group accumulates the CPU usage of all of it's child groups and

                                             its  {it's means "it is"}

> +the tasks directly present in it's group.

                                 its

> +
> +Accounting groups can be created by first mounting the cgroup filesystem.
> +
> +# mkdir /cgroups
> +# mount -t cgroup -ocpuacct none /cgroups
> +
> +With the above step, the initial or the parent accounting group
> +becomes visible at /cgroups. At bootup, this group comprises of all the

   "comprises of all" seems awkward to me.  Maybe "includes all" or
   "comprises all" if you have to use that word.  ;)

> +tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
> +/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
> +this group which is essentially the CPU time obtained by all the tasks
> +in the system.
> +
> +New accounting groups can be created under the parent group /cgroups.
> +
> +# cd /cgroups
> +# mkdir g1
> +# echo $$ > g1
> +
> +The above steps create a new group g1 and move the current shell
> +process (bash) into it. CPU time consumed by this bash and it's children

                                                              its

> +can be obtained from g1/cpuacct.usage and the same gets accumulated in

                                                      is accumulated in

> +/cgroups/cpuacct.usage also.



> Performance numbers
> ==================
> 2x2HT=4CPU i386 running 2.6.28-rc3
> 
> All benchmarks were run from bash which belonged to the grand

                                                          grandchild (?)
Otherwise it says that the "child group" is "grand."
Oh, never mind.  I thought that this was part of a doc file, but it's not.


> child group of the topmost accounting group. The tests were
> first run on 2628rc3 and then on 2628rc3+this patch and the
> normalized difference between the two runs is shown below.
> 
> I. hackbench
> ============
> # ./hackbench 100 process 100
> Running with 100*40 (== 4000) tasks.
> Normalized time difference for avg of 5 runs: 1.0059
> 
> # ./hackbench 25 thread 100
> Running with 25*40 (== 1000) tasks.
> Normalized time difference for avg of 5 runs: 1.0139
> 
> II. lmbench
> ===========
> ---------------------
> 4 threads
> ---------------------
> size	Normalized
> (kb)	Change (Time)
> ---------------------
> 16	1.1017
> 64	1.1168
> 128	1.1072
> 256	1.0085
> --------------------
> 8 threads
> -------------------
> 16	1.1835
> 64	1.0617
> 128	0.9980
> 256	0.9682
> -------------------
> 16 threads
> -------------------
> 16	1.1186
> 64	0.9921
> 128	0.9505
> 256	1.0043
> -------------------
> 32 threads
> -------------------
> 16	1.0005
> 64	1.0089
> 128	1.0019
> 256	1.0226
> -------------------
> 64 threads
> -------------------
> 16	1.0207
> 64	1.0385
> 128	1.0109
> 256	1.0159
> -------------------
> 
> III. volanomark
> ===============
> Normalized average throughput difference for loopback test
> 
> ------------------------------------------------------------
> Nr runs		Normalized average throughput difference
> ------------------------------------------------------------
> 10		0.9579
> ------------------------------------------------------------
> 4		1.1465
> ------------------------------------------------------------
> 9		0.9451
> ------------------------------------------------------------
> 19		1.0133
> ------------------------------------------------------------
> --


---
~Randy

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-04 17:20                         ` Randy Dunlap
@ 2008-11-05  3:24                           ` Bharata B Rao
  2008-11-05  3:29                             ` Srivatsa Vaddagiri
  2008-11-10 13:09                             ` Ingo Molnar
  0 siblings, 2 replies; 28+ messages in thread
From: Bharata B Rao @ 2008-11-05  3:24 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan, Balbir Singh,
	Paul Menage, linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra,
	Ingo Molnar

On Tue, Nov 04, 2008 at 09:20:40AM -0800, Randy Dunlap wrote:
> On Tue, 4 Nov 2008 18:19:37 +0530 Bharata B Rao wrote:
> 
> > Add hierarchical accounting to cpu accounting controller and cpuacct
> > documentation.
> > 
> > Currently, while charging the task's cputime to its accounting group,
> > the accounting group hierarchy isn't updated. This patch charges the cputime
> > of a task to its accounting group and all its parent accounting groups.
> > 
> > Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> > CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > CC: Ingo Molnar <mingo@elte.hu>
> > CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> > Reviewed-by: Paul Menage <menage@google.com>
> > ---
> >  Documentation/controllers/cpuacct.txt |   32 ++++++++++++++++++++++++++++++++
> >  kernel/sched.c                        |   10 ++++++++--
> >  2 files changed, 40 insertions(+), 2 deletions(-)
> > 
> > --- /dev/null
> > +++ b/Documentation/controllers/cpuacct.txt
> > @@ -0,0 +1,32 @@
> > +CPU Accounting Controller
> > +-------------------------
> > +
> > +The CPU accounting controller is used to group tasks using cgroups and
> > +account the CPU usage of these group of tasks.
> > +
> > +The CPU accounting controller supports multi-hierarchy groups. An accounting
> > +group accumulates the CPU usage of all of it's child groups and
> 
>                                              its  {it's means "it is"}
> 
> > +the tasks directly present in it's group.
> 
>                                  its

You are right, I was under the wrong impression that possessive case of
"it" is still "it's".

I will resend the patch with other corrections you mentioned only if
Paul/Ingo want this patch to go in.

Thanks for your comments.

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-05  3:24                           ` Bharata B Rao
@ 2008-11-05  3:29                             ` Srivatsa Vaddagiri
  2008-11-10 10:21                               ` Dhaval Giani
  2008-11-10 13:09                             ` Ingo Molnar
  1 sibling, 1 reply; 28+ messages in thread
From: Srivatsa Vaddagiri @ 2008-11-05  3:29 UTC (permalink / raw)
  To: Bharata B Rao
  Cc: Randy Dunlap, KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan,
	Balbir Singh, Paul Menage, linux-kernel, Peter Zijlstra,
	Ingo Molnar

On Wed, Nov 05, 2008 at 08:54:40AM +0530, Bharata B Rao wrote:
> I will resend the patch with other corrections you mentioned only if
> Paul/Ingo want this patch to go in.

I strongly recommend his patch to go in, as otherwise group accounting
will be broken where control hierarchy is not same as accounting
hierarchy. In that sense, I would treat this patch as more of a bug fix
than anything else.

- vatsa

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-10-30 17:16                   ` Dhaval Giani
  2008-10-31  0:40                     ` KAMEZAWA Hiroyuki
@ 2008-11-05  3:31                     ` Li Zefan
  2008-11-05  3:48                       ` KAMEZAWA Hiroyuki
  1 sibling, 1 reply; 28+ messages in thread
From: Li Zefan @ 2008-11-05  3:31 UTC (permalink / raw)
  To: Dhaval Giani
  Cc: Balbir Singh, bharata, KAMEZAWA Hiroyuki, Paul Menage,
	linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

>> cpuacct was designed to count cpu usage of a group of tasks, and now some people
>> want it to also take child group's usage into account, so I think this is a feature
>> request but not a bug fix.
>>
> 
> I disagree. The child is a part of the parent's hierarchy, and therefore
> its usage should reflect in the parent's usage.
> 

In memcg the child's usage doesn't reflect in its parent's usage. ;)

Balbir just posted a patchset to add hierarchy support in memcg, and added memory.feature
to disable/enable this feature. Is it for performance only or also for keeping the user
interface/behavior unchanged?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-05  3:31                     ` Li Zefan
@ 2008-11-05  3:48                       ` KAMEZAWA Hiroyuki
  0 siblings, 0 replies; 28+ messages in thread
From: KAMEZAWA Hiroyuki @ 2008-11-05  3:48 UTC (permalink / raw)
  To: Li Zefan
  Cc: Dhaval Giani, Balbir Singh, bharata, Paul Menage, linux-kernel,
	Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

On Wed, 05 Nov 2008 11:31:32 +0800
Li Zefan <lizf@cn.fujitsu.com> wrote:

> >> cpuacct was designed to count cpu usage of a group of tasks, and now some people
> >> want it to also take child group's usage into account, so I think this is a feature
> >> request but not a bug fix.
> >>
> > 
> > I disagree. The child is a part of the parent's hierarchy, and therefore
> > its usage should reflect in the parent's usage.
> > 
> 
> In memcg the child's usage doesn't reflect in its parent's usage. ;)
> 
> Balbir just posted a patchset to add hierarchy support in memcg, and added memory.feature
> to disable/enable this feature. Is it for performance only or also for keeping the user
> interface/behavior unchanged?

The main reason is performance.

And one of memcg's purpose is isolating resource usage of groups. Sum of usage
is not very important sometimes. (we have /proc/meminfo ;)
Sum of usage can be easily calculcated by user land and we don't have to pay
the cost for it in the kernel.

Thanks,
-Kame


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-04 12:49                       ` Bharata B Rao
  2008-11-04 17:20                         ` Randy Dunlap
@ 2008-11-05  6:17                         ` Li Zefan
  2008-11-06  6:43                         ` Balbir Singh
  2008-11-10 10:22                         ` Peter Zijlstra
  3 siblings, 0 replies; 28+ messages in thread
From: Li Zefan @ 2008-11-05  6:17 UTC (permalink / raw)
  To: bharata
  Cc: KAMEZAWA Hiroyuki, Dhaval Giani, Balbir Singh, Paul Menage,
	linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

> +The CPU accounting controller is used to group tasks using cgroups and
> +account the CPU usage of these group of tasks.

this group or these groups?

...

> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -9236,6 +9236,7 @@ struct cpuacct {
>  	struct cgroup_subsys_state css;
>  	/* cpuusage holds pointer to a u64-type object on every cpu */
>  	u64 *cpuusage;
> +	struct cpuacct *parent;
>  };
>  

Maybe the comment needs to be updated?

/* track cpu usage of a group of tasks */
struct cpuacct {
	...
};

->

/* track cpu usage of a group of tasks and its child groups */

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-04 12:49                       ` Bharata B Rao
  2008-11-04 17:20                         ` Randy Dunlap
  2008-11-05  6:17                         ` Li Zefan
@ 2008-11-06  6:43                         ` Balbir Singh
  2008-11-10 10:22                         ` Peter Zijlstra
  3 siblings, 0 replies; 28+ messages in thread
From: Balbir Singh @ 2008-11-06  6:43 UTC (permalink / raw)
  To: bharata
  Cc: KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan, Paul Menage,
	linux-kernel, Srivatsa Vaddagiri, Peter Zijlstra, Ingo Molnar

Bharata B Rao wrote:
> On Fri, Oct 31, 2008 at 09:40:41AM +0900, KAMEZAWA Hiroyuki wrote:
>> On Thu, 30 Oct 2008 22:46:22 +0530
>> Dhaval Giani <dhaval@linux.vnet.ibm.com> wrote:
>>> I disagree. The child is a part of the parent's hierarchy, and therefore
>>> its usage should reflect in the parent's usage.
>>>
>> In my point of view, there is no big difference. It's just whether we need a tool
>> in userland or not. If there is no performance impact, I have no objections.
>>
>> One request from me is add Documentation/controllers/cpuacct.txt or some to explain
>> "what we see".
> 
> I am not sure which version (mine or Li Zefan's) Paul prefers. I am
> resending my patch anyway with documentation and performance numbers
> included. I don't see any significant improvement or degradation in
> hackbench, lmbench and volanomark numbers with this patch.
> 
> Regards,
> Bharata.
> 
> Add hierarchical accounting to cpu accounting controller and cpuacct
> documentation.
> 
> Currently, while charging the task's cputime to its accounting group,
> the accounting group hierarchy isn't updated. This patch charges the cputime
> of a task to its accounting group and all its parent accounting groups.
> 
> Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
> CC: Ingo Molnar <mingo@elte.hu>
> CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Reviewed-by: Paul Menage <menage@google.com>

Looks good and straight forward.

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>

-- 
	Balbir

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-05  3:29                             ` Srivatsa Vaddagiri
@ 2008-11-10 10:21                               ` Dhaval Giani
  0 siblings, 0 replies; 28+ messages in thread
From: Dhaval Giani @ 2008-11-10 10:21 UTC (permalink / raw)
  To: Srivatsa Vaddagiri
  Cc: Bharata B Rao, Randy Dunlap, KAMEZAWA Hiroyuki, Li Zefan,
	Balbir Singh, Paul Menage, linux-kernel, Peter Zijlstra,
	Ingo Molnar

On Wed, Nov 05, 2008 at 08:59:11AM +0530, Srivatsa Vaddagiri wrote:
> On Wed, Nov 05, 2008 at 08:54:40AM +0530, Bharata B Rao wrote:
> > I will resend the patch with other corrections you mentioned only if
> > Paul/Ingo want this patch to go in.
> 
> I strongly recommend his patch to go in, as otherwise group accounting
> will be broken where control hierarchy is not same as accounting
> hierarchy. In that sense, I would treat this patch as more of a bug fix
> than anything else.
> 

Wondering what the status of this patch is?

thanks,
-- 
regards,
Dhaval

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-04 12:49                       ` Bharata B Rao
                                           ` (2 preceding siblings ...)
  2008-11-06  6:43                         ` Balbir Singh
@ 2008-11-10 10:22                         ` Peter Zijlstra
  2008-11-10 13:05                           ` Balbir Singh
  3 siblings, 1 reply; 28+ messages in thread
From: Peter Zijlstra @ 2008-11-10 10:22 UTC (permalink / raw)
  To: bharata
  Cc: KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan, Balbir Singh,
	Paul Menage, linux-kernel, Srivatsa Vaddagiri, Ingo Molnar

On Tue, 2008-11-04 at 18:19 +0530, Bharata B Rao wrote:

> Add hierarchical accounting to cpu accounting controller and cpuacct
> documentation.
> 
> Currently, while charging the task's cputime to its accounting group,
> the accounting group hierarchy isn't updated. This patch charges the cputime
> of a task to its accounting group and all its parent accounting groups.
> 
> Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
> CC: Ingo Molnar <mingo@elte.hu>
> CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Reviewed-by: Paul Menage <menage@google.com>

Seems sane and simple enough.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

Ingo?

> ---
>  Documentation/controllers/cpuacct.txt |   32 ++++++++++++++++++++++++++++++++
>  kernel/sched.c                        |   10 ++++++++--
>  2 files changed, 40 insertions(+), 2 deletions(-)
> 
> --- /dev/null
> +++ b/Documentation/controllers/cpuacct.txt
> @@ -0,0 +1,32 @@
> +CPU Accounting Controller
> +-------------------------
> +
> +The CPU accounting controller is used to group tasks using cgroups and
> +account the CPU usage of these group of tasks.
> +
> +The CPU accounting controller supports multi-hierarchy groups. An accounting
> +group accumulates the CPU usage of all of it's child groups and
> +the tasks directly present in it's group.
> +
> +Accounting groups can be created by first mounting the cgroup filesystem.
> +
> +# mkdir /cgroups
> +# mount -t cgroup -ocpuacct none /cgroups
> +
> +With the above step, the initial or the parent accounting group
> +becomes visible at /cgroups. At bootup, this group comprises of all the
> +tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
> +/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
> +this group which is essentially the CPU time obtained by all the tasks
> +in the system.
> +
> +New accounting groups can be created under the parent group /cgroups.
> +
> +# cd /cgroups
> +# mkdir g1
> +# echo $$ > g1
> +
> +The above steps create a new group g1 and move the current shell
> +process (bash) into it. CPU time consumed by this bash and it's children
> +can be obtained from g1/cpuacct.usage and the same gets accumulated in
> +/cgroups/cpuacct.usage also.
> --- a/kernel/sched.c
> +++ b/kernel/sched.c
> @@ -9236,6 +9236,7 @@ struct cpuacct {
>  	struct cgroup_subsys_state css;
>  	/* cpuusage holds pointer to a u64-type object on every cpu */
>  	u64 *cpuusage;
> +	struct cpuacct *parent;
>  };
>  
>  struct cgroup_subsys cpuacct_subsys;
> @@ -9269,6 +9270,9 @@ static struct cgroup_subsys_state *cpuac
>  		return ERR_PTR(-ENOMEM);
>  	}
>  
> +	if (cgrp->parent)
> +		ca->parent = cgroup_ca(cgrp->parent);
> +
>  	return &ca->css;
>  }
>  
> @@ -9348,14 +9352,16 @@ static int cpuacct_populate(struct cgrou
>  static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
>  {
>  	struct cpuacct *ca;
> +	int cpu;
>  
>  	if (!cpuacct_subsys.active)
>  		return;
>  
> +	cpu = task_cpu(tsk);
>  	ca = task_ca(tsk);
> -	if (ca) {
> -		u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
>  
> +	for (; ca; ca = ca->parent) {
> +		u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
>  		*cpuusage += cputime;
>  	}
>  }



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-10 10:22                         ` Peter Zijlstra
@ 2008-11-10 13:05                           ` Balbir Singh
  0 siblings, 0 replies; 28+ messages in thread
From: Balbir Singh @ 2008-11-10 13:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: bharata, KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan, Paul Menage,
	linux-kernel, Srivatsa Vaddagiri, Ingo Molnar

Peter Zijlstra wrote:
> On Tue, 2008-11-04 at 18:19 +0530, Bharata B Rao wrote:
> 
>> Add hierarchical accounting to cpu accounting controller and cpuacct
>> documentation.
>>
>> Currently, while charging the task's cputime to its accounting group,
>> the accounting group hierarchy isn't updated. This patch charges the cputime
>> of a task to its accounting group and all its parent accounting groups.
>>
>> Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
>> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
>> CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
>> CC: Ingo Molnar <mingo@elte.hu>
>> CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
>> Reviewed-by: Paul Menage <menage@google.com>
> 
> Seems sane and simple enough.
> 
> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

Just in case it was missed

Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>

-- 
	Balbir

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-05  3:24                           ` Bharata B Rao
  2008-11-05  3:29                             ` Srivatsa Vaddagiri
@ 2008-11-10 13:09                             ` Ingo Molnar
  2008-11-10 15:11                               ` Bharata B Rao
  1 sibling, 1 reply; 28+ messages in thread
From: Ingo Molnar @ 2008-11-10 13:09 UTC (permalink / raw)
  To: Bharata B Rao
  Cc: Randy Dunlap, KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan,
	Balbir Singh, Paul Menage, linux-kernel, Srivatsa Vaddagiri,
	Peter Zijlstra


* Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> I will resend the patch with other corrections you mentioned only if 
> Paul/Ingo want this patch to go in.

yes, please resend the fixed version.

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-10 13:09                             ` Ingo Molnar
@ 2008-11-10 15:11                               ` Bharata B Rao
  2008-11-11 11:13                                 ` Ingo Molnar
  0 siblings, 1 reply; 28+ messages in thread
From: Bharata B Rao @ 2008-11-10 15:11 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Randy Dunlap, KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan,
	Balbir Singh, Paul Menage, linux-kernel, Srivatsa Vaddagiri,
	Peter Zijlstra

On Mon, Nov 10, 2008 at 02:09:37PM +0100, Ingo Molnar wrote:
> 
> * Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> 
> > I will resend the patch with other corrections you mentioned only if 
> > Paul/Ingo want this patch to go in.
> 
> yes, please resend the fixed version.

Here it is:

Add hierarchical accounting to cpu accounting controller and include
cpuacct documentation.

Currently, while charging the task's cputime to its accounting group,
the accounting group hierarchy isn't updated. This patch charges the cputime
of a task to its accounting group and all its parent accounting groups.

Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
CC: Ingo Molnar <mingo@elte.hu>
CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Reviewed-by: Paul Menage <menage@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 Documentation/controllers/cpuacct.txt |   32 ++++++++++++++++++++++++++++++++
 kernel/sched.c                        |   12 +++++++++---
 2 files changed, 41 insertions(+), 3 deletions(-)

--- /dev/null
+++ b/Documentation/controllers/cpuacct.txt
@@ -0,0 +1,32 @@
+CPU Accounting Controller
+-------------------------
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these groups of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of its child groups and the tasks
+directly present in its group.
+
+Accounting groups can be created by first mounting the cgroup filesystem.
+
+# mkdir /cgroups
+# mount -t cgroup -ocpuacct none /cgroups
+
+With the above step, the initial or the parent accounting group
+becomes visible at /cgroups. At bootup, this group includes all the
+tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
+/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
+this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /cgroups.
+
+# cd /cgroups
+# mkdir g1
+# echo $$ > g1
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and its children
+can be obtained from g1/cpuacct.usage and the same is accumulated in
+/cgroups/cpuacct.usage also.
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9231,11 +9231,12 @@ struct cgroup_subsys cpu_cgroup_subsys =
  * (balbir@in.ibm.com).
  */
 
-/* track cpu usage of a group of tasks */
+/* track cpu usage of a group of tasks and its child groups */
 struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
 	u64 *cpuusage;
+	struct cpuacct *parent;
 };
 
 struct cgroup_subsys cpuacct_subsys;
@@ -9269,6 +9270,9 @@ static struct cgroup_subsys_state *cpuac
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (cgrp->parent)
+		ca->parent = cgroup_ca(cgrp->parent);
+
 	return &ca->css;
 }
 
@@ -9348,14 +9352,16 @@ static int cpuacct_populate(struct cgrou
 static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
+	int cpu;
 
 	if (!cpuacct_subsys.active)
 		return;
 
+	cpu = task_cpu(tsk);
 	ca = task_ca(tsk);
-	if (ca) {
-		u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
 
+	for (; ca; ca = ca->parent) {
+		u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
 		*cpuusage += cputime;
 	}
 }

Regards,
Bharata.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] Add hierarchical accounting to cpu accounting controller
  2008-11-10 15:11                               ` Bharata B Rao
@ 2008-11-11 11:13                                 ` Ingo Molnar
  0 siblings, 0 replies; 28+ messages in thread
From: Ingo Molnar @ 2008-11-11 11:13 UTC (permalink / raw)
  To: Bharata B Rao
  Cc: Randy Dunlap, KAMEZAWA Hiroyuki, Dhaval Giani, Li Zefan,
	Balbir Singh, Paul Menage, linux-kernel, Srivatsa Vaddagiri,
	Peter Zijlstra


* Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:

> On Mon, Nov 10, 2008 at 02:09:37PM +0100, Ingo Molnar wrote:
> > 
> > * Bharata B Rao <bharata@linux.vnet.ibm.com> wrote:
> > 
> > > I will resend the patch with other corrections you mentioned only if 
> > > Paul/Ingo want this patch to go in.
> > 
> > yes, please resend the fixed version.
> 
> Here it is:
> 
> Add hierarchical accounting to cpu accounting controller and include
> cpuacct documentation.
> 
> Currently, while charging the task's cputime to its accounting group,
> the accounting group hierarchy isn't updated. This patch charges the cputime
> of a task to its accounting group and all its parent accounting groups.
> 
> Reported-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> CC: Ingo Molnar <mingo@elte.hu>
> CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
> Reviewed-by: Paul Menage <menage@google.com>
> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
>  Documentation/controllers/cpuacct.txt |   32 ++++++++++++++++++++++++++++++++
>  kernel/sched.c                        |   12 +++++++++---
>  2 files changed, 41 insertions(+), 3 deletions(-)

applied to tip/sched/core, thanks Bharata!

	Ingo

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2008-11-11 11:14 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-10-23  5:43 [PATCH] Add hierarchical accounting to cpu accounting controller Bharata B Rao
2008-10-23  6:41 ` Bharata B Rao
2008-10-23 15:49 ` Paul Menage
2008-10-24  5:08   ` Bharata B Rao
2008-10-24 17:37     ` Paul Menage
2008-10-25  6:01       ` Bharata B Rao
2008-10-25 15:38         ` Paul Menage
2008-10-27  1:17           ` KAMEZAWA Hiroyuki
2008-10-27  4:43             ` Bharata B Rao
2008-10-27  6:57               ` Balbir Singh
2008-10-27  8:25                 ` Li Zefan
2008-10-30 17:16                   ` Dhaval Giani
2008-10-31  0:40                     ` KAMEZAWA Hiroyuki
2008-11-04 12:49                       ` Bharata B Rao
2008-11-04 17:20                         ` Randy Dunlap
2008-11-05  3:24                           ` Bharata B Rao
2008-11-05  3:29                             ` Srivatsa Vaddagiri
2008-11-10 10:21                               ` Dhaval Giani
2008-11-10 13:09                             ` Ingo Molnar
2008-11-10 15:11                               ` Bharata B Rao
2008-11-11 11:13                                 ` Ingo Molnar
2008-11-05  6:17                         ` Li Zefan
2008-11-06  6:43                         ` Balbir Singh
2008-11-10 10:22                         ` Peter Zijlstra
2008-11-10 13:05                           ` Balbir Singh
2008-11-05  3:31                     ` Li Zefan
2008-11-05  3:48                       ` KAMEZAWA Hiroyuki
2008-10-27  5:54         ` Li Zefan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).