All of lore.kernel.org
 help / color / mirror / Atom feed
From: riel@redhat.com
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mingo@kernel.org, pbonzini@redhat.com,
	fweisbec@redhat.com, wanpeng.li@hotmail.com, efault@gmx.de,
	tglx@linutronix.de, rkrcmar@redhat.com
Subject: [PATCH 1/5] sched,time: count actually elapsed irq & softirq time
Date: Wed, 22 Jun 2016 22:25:47 -0400	[thread overview]
Message-ID: <1466648751-7958-2-git-send-email-riel@redhat.com> (raw)
In-Reply-To: <1466648751-7958-1-git-send-email-riel@redhat.com>

From: Rik van Riel <riel@redhat.com>

Currently, if there was any irq or softirq time during 'ticks'
jiffies, the entire period will be accounted as irq or softirq
time.

This is inaccurate if only a subset of 'ticks' jiffies was
actually spent handling irqs, and could conceivably mis-count
all of the ticks during a period as irq time, when there was
some irq and some softirq time.

This can actually happen when irqtime_account_process_tick
is called from account_idle_ticks, which can pass a larger
number of ticks down all at once.

Fix this by changing irqtime_account_hi_update and
irqtime_account_si_update to round elapsed irq and softirq
time to jiffies, and return the number of jiffies spent in
each mode, similar to how steal time is handled.

Additionally, have irqtime_account_process_tick take into
account how much time was spent in each of steal, irq,
and softirq time.

The latter could help improve the accuracy of timekeeping
when returning from idle on a NO_HZ_IDLE CPU.

Properly accounting how much time was spent in hardirq and
softirq time will also allow the NO_HZ_FULL code to re-use
these same functions for hardirq and softirq accounting.

Signed-off-by: Rik van Riel <riel@redhat.com>
---
 kernel/sched/cputime.c | 81 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 60 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3d60e5d76fdb..15b813c014be 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -79,40 +79,54 @@ void irqtime_account_irq(struct task_struct *curr)
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
-static int irqtime_account_hi_update(void)
+static unsigned long irqtime_account_hi_update(unsigned long max_jiffies)
 {
 	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	unsigned long irq_jiffies = 0;
 	unsigned long flags;
-	u64 latest_ns;
-	int ret = 0;
+	u64 irq;
 
 	local_irq_save(flags);
-	latest_ns = this_cpu_read(cpu_hardirq_time);
-	if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
-		ret = 1;
+	irq = this_cpu_read(cpu_hardirq_time) - cpustat[CPUTIME_IRQ];
+	if (irq > cputime_one_jiffy) {
+		irq_jiffies = min(max_jiffies, cputime_to_jiffies(irq));
+		cpustat[CPUTIME_IRQ] += jiffies_to_cputime(irq_jiffies);
+	}
 	local_irq_restore(flags);
-	return ret;
+	return irq_jiffies;
 }
 
-static int irqtime_account_si_update(void)
+static unsigned long irqtime_account_si_update(unsigned long max_jiffies)
 {
 	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	unsigned long si_jiffies = 0;
 	unsigned long flags;
-	u64 latest_ns;
-	int ret = 0;
+	u64 softirq;
 
 	local_irq_save(flags);
-	latest_ns = this_cpu_read(cpu_softirq_time);
-	if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
-		ret = 1;
+	softirq = this_cpu_read(cpu_softirq_time) - cpustat[CPUTIME_SOFTIRQ];
+	if (softirq > cputime_one_jiffy) {
+		si_jiffies = min(max_jiffies, cputime_to_jiffies(softirq));
+		cpustat[CPUTIME_SOFTIRQ] += jiffies_to_cputime(si_jiffies);
+	}
 	local_irq_restore(flags);
-	return ret;
+	return si_jiffies;
 }
 
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 #define sched_clock_irqtime	(0)
 
+static unsigned long irqtime_account_hi_update(unsigned long dummy)
+{
+	return 0;
+}
+
+static unsigned long irqtime_account_si_update(unsigned long dummy)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
 
 static inline void task_group_account_field(struct task_struct *p, int index,
@@ -283,6 +297,26 @@ static __always_inline unsigned long steal_account_process_tick(unsigned long ma
 }
 
 /*
+ * Account how much elapsed time was spent in steal, irq, or softirq time.
+ * Due to rounding errors, the calculated amount can sometimes exceed
+ * max_jiffies; be careful not to account more than max_jiffies.
+ */
+static inline int account_other_ticks(unsigned long max_jiffies)
+{
+	unsigned long accounted;
+
+	accounted = steal_account_process_tick(max_jiffies);
+
+	if (accounted < max_jiffies)
+		accounted += irqtime_account_hi_update(max_jiffies - accounted);
+
+	if (accounted < max_jiffies)
+		accounted += irqtime_account_si_update(max_jiffies - accounted);
+
+	return accounted;
+}
+
+/*
  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  * tasks (sum on group iteration) belonging to @tsk's group.
  */
@@ -344,19 +378,24 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 {
 	cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
 	u64 cputime = (__force u64) cputime_one_jiffy;
-	u64 *cpustat = kcpustat_this_cpu->cpustat;
+	unsigned long other;
 
-	if (steal_account_process_tick(ULONG_MAX))
+	/*
+	 * When returning from idle, many ticks can get accounted at
+	 * once, including some ticks of steal, irq, and softirq time.
+	 * Subtract those ticks from the amount of time accounted to
+	 * idle, or potentially user or system time. Due to rounding,
+	 * other time can exceed ticks occasionally.
+	 */
+	other = account_other_ticks(ticks);
+	if (other >= ticks)
 		return;
+	ticks -= other;
 
 	cputime *= ticks;
 	scaled *= ticks;
 
-	if (irqtime_account_hi_update()) {
-		cpustat[CPUTIME_IRQ] += cputime;
-	} else if (irqtime_account_si_update()) {
-		cpustat[CPUTIME_SOFTIRQ] += cputime;
-	} else if (this_cpu_ksoftirqd() == p) {
+	if (this_cpu_ksoftirqd() == p) {
 		/*
 		 * ksoftirqd time do not get accounted in cpu_softirq_time.
 		 * So, we have to handle it separately here.
-- 
2.5.5

  reply	other threads:[~2016-06-23  2:26 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-23  2:25 [PATCH v2 0/5] sched,time: fix irq time accounting with nohz_idle riel
2016-06-23  2:25 ` riel [this message]
2016-06-27 12:25   ` [PATCH 1/5] sched,time: count actually elapsed irq & softirq time Frederic Weisbecker
2016-06-27 12:50     ` Rik van Riel
2016-06-28 20:56       ` Frederic Weisbecker
2016-06-23  2:25 ` [PATCH 2/5] nohz,cputime: remove VTIME_GEN vtime irq time code riel
2016-06-27 23:21   ` Frederic Weisbecker
2016-06-27 23:31     ` Rik van Riel
2016-06-27 23:51       ` Frederic Weisbecker
2016-06-23  2:25 ` [PATCH 3/5] cputime: allow irq time accounting to be selected as an option riel
2016-06-26  3:03   ` kbuild test robot
2016-06-23  2:25 ` [PATCH 4/5] irqtime: add irq type parameter to irqtime_account_irq riel
2016-06-23  2:25 ` [PATCH 5/5] irqtime: drop local_irq_save/restore from irqtime_account_irq riel
  -- strict thread matches above, loose matches on Subject: below --
2016-07-13 14:50 [GIT PULL] cputime fixes and cleanups Frederic Weisbecker
2016-07-13 14:50 ` [PATCH 1/5] sched,time: Count actually elapsed irq & softirq time Frederic Weisbecker
2016-08-09  3:59   ` Wanpeng Li
2016-08-09 14:06     ` Rik van Riel
2016-08-09 23:07       ` Wanpeng Li
2016-08-10  7:51         ` Wanpeng Li
2016-08-09 23:25       ` Wanpeng Li
2016-08-09 23:31         ` Wanpeng Li
2016-08-09 23:35         ` Wanpeng Li
2016-08-09 23:39         ` Wanpeng Li
2016-08-10  5:07           ` Rik van Riel
2016-08-10  6:33             ` Wanpeng Li
2016-06-16 16:06 [PATCH 0/5] sched,time: fix irq time accounting with nohz_idle riel
2016-06-16 16:06 ` [PATCH 1/5] sched,time: count actually elapsed irq & softirq time riel
2016-06-16 16:22   ` kbuild test robot
2016-06-21 21:21   ` Peter Zijlstra
2016-06-21 22:20     ` Rik van Riel
2016-06-22 10:40     ` Paolo Bonzini
2016-06-22 10:52       ` Peter Zijlstra
2016-06-08  2:29 [PATCH RFC 0/5] sched,time: make irq time accounting work for nohz_idle riel
2016-06-08  2:30 ` [PATCH 1/5] sched,time: count actually elapsed irq & softirq time riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1466648751-7958-2-git-send-email-riel@redhat.com \
    --to=riel@redhat.com \
    --cc=efault@gmx.de \
    --cc=fweisbec@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rkrcmar@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=wanpeng.li@hotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.