All of lore.kernel.org
 help / color / mirror / Atom feed
From: Venkatesh Pallipadi <venki@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@elte.hu>, "H. Peter Anvin" <hpa@zytor.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Venkatesh Pallipadi <venki@google.com>,
	Paul Menage <menage@google.com>,
	linux-kernel@vger.kernel.org, Paul Turner <pjt@google.com>,
	Martin Schwidefsky <schwidefsky@de.ibm.com>,
	Heiko Carstens <heiko.carstens@de.ibm.com>,
	Paul Mackerras <paulus@samba.org>,
	Tony Luck <tony.luck@intel.com>
Subject: [PATCH 4/4] sched: Export irq times through cpuacct cgroup
Date: Mon, 19 Jul 2010 16:57:15 -0700	[thread overview]
Message-ID: <1279583835-22854-5-git-send-email-venki@google.com> (raw)
In-Reply-To: <1279583835-22854-1-git-send-email-venki@google.com>

Adds hi_time, si_time, hi_time_percpu and si_time_percpu info in cpuacct
cgroup.

The info will be fine granularity timings when either
CONFIG_IRQ_TIME_ACCOUNTING or CONFIG_VIRT_CPU_ACCOUNTING is enabled.
Otherwise the info will be based on tick samples.

Looked at adding this under cpuacct.stat. But, this information is useful
to the administrator in percpu format, so that any hi or si activity
on a particular CPU can be noted and some resource reallocation
(move the irq away, assign a different CPU to this cgroup, etc)
can be done based on that info.

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
---
 Documentation/cgroups/cpuacct.txt |    5 +++
 kernel/sched.c                    |   73 +++++++++++++++++++++++++++++++------
 2 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt
index 8b93094..817435e 100644
--- a/Documentation/cgroups/cpuacct.txt
+++ b/Documentation/cgroups/cpuacct.txt
@@ -48,3 +48,8 @@ system times. This has two side effects:
   against concurrent writes.
 - It is possible to see slightly outdated values for user and system times
   due to the batch processing nature of percpu_counter.
+
+cpuacct.hi_time and cpuacct.si_time provides the information about hardirq
+and softirq processing time that was accounted to this cgroup. There is also
+percpu variants of hi_time and si_time that splits the info at percpu level.
+All this times are in USER_HZ unit.
diff --git a/kernel/sched.c b/kernel/sched.c
index c12c8ea..7198041 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1398,6 +1398,8 @@ enum cpuacct_stat_index {
 
 enum cpuacct_charge_index {
 	CPUACCT_CHARGE_USAGE,	/* ... execution time */
+	CPUACCT_CHARGE_SI_TIME,	/* ... softirq time */
+	CPUACCT_CHARGE_HI_TIME,	/* ... hardirq time */
 
 	CPUACCT_CHARGE_NCHARGES,
 };
@@ -3226,9 +3228,11 @@ void enable_sched_clock_irqtime(void)
 #endif
 
 #if defined(CONFIG_VIRT_CPU_ACCOUNTING)
-static void account_task_irqtime(cputime64_t *task_irqtime, cputime64_t irqtime)
+static void account_task_irqtime(struct task_struct *p,
+		cputime64_t *task_irqtime, int idx, cputime64_t irqtime)
 {
 	*task_irqtime = cputime64_add(*task_irqtime, irqtime);
+	cpuacct_charge(p, idx, irqtime);
 }
 #else
 /*
@@ -3236,10 +3240,13 @@ static void account_task_irqtime(cputime64_t *task_irqtime, cputime64_t irqtime)
  * We handle !sched_clock_irqtime case here as when sched_clock_irqtime is set,
  * this accounting is done in account_system_vtime() below.
  */
-static void account_task_irqtime(cputime64_t *task_irqtime, cputime64_t irqtime)
+static void account_task_irqtime(struct task_struct *p,
+		cputime64_t *task_irqtime, int idx, cputime64_t irqtime)
 {
-	if (!sched_clock_irqtime)
+	if (!sched_clock_irqtime) {
 		*task_irqtime = cputime64_add(*task_irqtime, TICK_NSEC);
+		cpuacct_charge(p, idx, TICK_NSEC);
+	}
 }
 #endif
 
@@ -3270,10 +3277,12 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 	tmp = cputime_to_cputime64(cputime);
 	if (hardirq_count() - hardirq_offset) {
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
-		account_task_irqtime(&p->hi_time, tmp);
+		account_task_irqtime(p, &p->hi_time,
+					CPUACCT_CHARGE_HI_TIME, tmp);
 	} else if (softirq_count()) {
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-		account_task_irqtime(&p->si_time, tmp);
+		account_task_irqtime(p, &p->si_time,
+					CPUACCT_CHARGE_SI_TIME, tmp);
 	} else {
 		cpustat->system = cputime64_add(cpustat->system, tmp);
 	}
@@ -8737,6 +8746,22 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	kfree(ca);
 }
 
+static u64 cpuacct_cpuusage_convert(u64 data, enum cpuacct_charge_index idx)
+{
+	switch (idx) {
+	case CPUACCT_CHARGE_SI_TIME:
+	case CPUACCT_CHARGE_HI_TIME:
+		/*
+		 * irqtime is stored either in ns or cputime64, depending
+		 * on CONFIG_VIRT_CPU_ACCOUNTING. Convert it to clock_t
+		 * before returning to user.
+		 */
+		return irqtime_to_clock_t(data);
+	default:
+		return data;
+	}
+}
+
 static u64 cpuacct_cpuusage_read(struct cpuacct *ca,
 		enum cpuacct_charge_index idx, int cpu)
 {
@@ -8754,7 +8779,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca,
 	data = *cpuusage;
 #endif
 
-	return data;
+	return cpuacct_cpuusage_convert(data, idx);
 }
 
 static void cpuacct_cpuusage_write(struct cpuacct *ca,
@@ -8853,6 +8878,26 @@ static struct cftype files[] = {
 		.private = CPUACCT_CHARGE_USAGE,
 	},
 	{
+		.name = "si_time",
+		.read_u64 = cpuusage_read,
+		.private = CPUACCT_CHARGE_SI_TIME,
+	},
+	{
+		.name = "si_time_percpu",
+		.read_seq_string = cpuacct_percpu_seq_read,
+		.private = CPUACCT_CHARGE_SI_TIME,
+	},
+	{
+		.name = "hi_time",
+		.read_u64 = cpuusage_read,
+		.private = CPUACCT_CHARGE_HI_TIME,
+	},
+	{
+		.name = "hi_time_percpu",
+		.read_seq_string = cpuacct_percpu_seq_read,
+		.private = CPUACCT_CHARGE_HI_TIME,
+	},
+	{
 		.name = "stat",
 		.read_map = cpuacct_stats_show,
 	},
@@ -9017,7 +9062,7 @@ void account_system_vtime(struct task_struct *tsk)
 {
 	unsigned long flags;
 	int cpu;
-	u64 now;
+	u64 now, delta;
 
 	if (!sched_clock_irqtime)
 		return;
@@ -9025,12 +9070,16 @@ void account_system_vtime(struct task_struct *tsk)
 	local_irq_save(flags);
 	cpu = task_cpu(tsk);
 	now = sched_clock_cpu(cpu);
-	if (hardirq_count())
-		tsk->hi_time += now - per_cpu(irq_start_time, cpu);
-	else if (softirq_count())
-		tsk->si_time += now - per_cpu(irq_start_time, cpu);
-
+	delta = now - per_cpu(irq_start_time, cpu);
 	per_cpu(irq_start_time, cpu) = now;
+	if (hardirq_count()) {
+		tsk->hi_time += delta;
+		cpuacct_charge(tsk, CPUACCT_CHARGE_HI_TIME, delta);
+	} else if (softirq_count()) {
+		tsk->si_time += delta;
+		cpuacct_charge(tsk, CPUACCT_CHARGE_SI_TIME, delta);
+	}
+
 	local_irq_restore(flags);
 }
 
-- 
1.7.1


  parent reply	other threads:[~2010-07-19 23:58 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-07-19 23:57 [PATCH 0/4] Finer granularity and task/cgroup irq time accounting Venkatesh Pallipadi
2010-07-19 23:57 ` [PATCH 1/4] sched: Track and export per task [hard|soft]irq time Venkatesh Pallipadi
2010-07-19 23:57 ` [PATCH 2/4] x86: Add IRQ_TIME_ACCOUNTING, finer accounting of irq time to task Venkatesh Pallipadi
2010-07-19 23:57 ` [PATCH 3/4] sched: Generalize cpuacct usage tracking making it simpler to add new stats Venkatesh Pallipadi
2010-07-19 23:57 ` Venkatesh Pallipadi [this message]
2010-07-20  7:55 ` [PATCH 0/4] Finer granularity and task/cgroup irq time accounting Martin Schwidefsky
2010-07-20 16:55   ` Venkatesh Pallipadi
2010-07-22 11:12     ` Martin Schwidefsky
2010-07-23  2:12       ` Venkatesh Pallipadi
2010-08-24  7:51         ` Peter Zijlstra
2010-08-24  8:05           ` Balbir Singh
2010-08-24  9:09             ` Peter Zijlstra
2010-08-24 11:38               ` Balbir Singh
2010-08-24 11:49                 ` Peter Zijlstra
2010-08-24 11:53                 ` Peter Zijlstra
2010-08-24 12:06                   ` Martin Schwidefsky
2010-08-24 12:39                     ` Peter Zijlstra
2010-08-24 12:47                   ` Balbir Singh
2010-08-24 13:08                     ` Peter Zijlstra
2010-08-24 19:20                   ` Venkatesh Pallipadi
2010-08-24 20:39                     ` Peter Zijlstra
2010-08-25  2:02                       ` Venkatesh Pallipadi
2010-08-25  7:20                         ` Martin Schwidefsky
2010-09-08 11:12                         ` Peter Zijlstra
2010-08-24  8:14           ` Ingo Molnar
2010-08-24  8:49             ` Peter Zijlstra
2010-08-24  0:56 ` Venkatesh Pallipadi
2010-08-24  7:52   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1279583835-22854-5-git-send-email-venki@google.com \
    --to=venki@google.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=heiko.carstens@de.ibm.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=schwidefsky@de.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.