From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754673AbcFPQGY (ORCPT ); Thu, 16 Jun 2016 12:06:24 -0400 Received: from shelob.surriel.com ([74.92.59.67]:34855 "EHLO shelob.surriel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754610AbcFPQGU (ORCPT ); Thu, 16 Jun 2016 12:06:20 -0400 From: riel@redhat.com To: linux-kernel@vger.kernel.org Cc: mingo@kernel.org, pbonzini@redhat.com, peterz@infradead.org, fweisbec@gmail.com, wanpeng.li@hotmail.com, efault@gmx.de, tglx@linutronix.de, rkrcmar@redhat.com Subject: [PATCH 1/5] sched,time: count actually elapsed irq & softirq time Date: Thu, 16 Jun 2016 12:06:03 -0400 Message-Id: <1466093167-27653-2-git-send-email-riel@redhat.com> X-Mailer: git-send-email 2.5.5 In-Reply-To: <1466093167-27653-1-git-send-email-riel@redhat.com> References: <1466093167-27653-1-git-send-email-riel@redhat.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Rik van Riel Currently, if there was any irq or softirq time during 'ticks' jiffies, the entire period will be accounted as irq or softirq time. This is inaccurate if only a subset of 'ticks' jiffies was actually spent handling irqs, and could conceivably mis-count all of the ticks during a period as irq time, when there was some irq and some softirq time. This can actually happen when irqtime_account_process_tick is called from account_idle_ticks, which can pass a larger number of ticks down all at once. Fix this by changing irqtime_account_hi_update and irqtime_account_si_update to round elapsed irq and softirq time to jiffies, and return the number of jiffies spent in each mode, similar to how steal time is handled. Additionally, have irqtime_account_process_tick take into account how much time was spent in each of steal, irq, and softirq time. The latter could help improve the accuracy of timekeeping when returning from idle on a NO_HZ_IDLE CPU. Properly accounting how much time was spent in hardirq and softirq time will also allow the NO_HZ_FULL code to re-use these same functions for hardirq and softirq accounting. Signed-off-by: Rik van Riel --- kernel/sched/cputime.c | 69 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 3d60e5d76fdb..9bd2d4f42037 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -79,34 +79,36 @@ void irqtime_account_irq(struct task_struct *curr) } EXPORT_SYMBOL_GPL(irqtime_account_irq); -static int irqtime_account_hi_update(void) +static unsigned long irqtime_account_hi_update(unsigned long max_jiffies) { u64 *cpustat = kcpustat_this_cpu->cpustat; + unsigned long irq_jiffies; unsigned long flags; - u64 latest_ns; - int ret = 0; + u64 irq; local_irq_save(flags); - latest_ns = this_cpu_read(cpu_hardirq_time); - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) - ret = 1; + irq = this_cpu_read(cpu_hardirq_time) - cpustat[CPUTIME_IRQ]; + irq_jiffies = min(cputime_to_jiffies(irq), max_jiffies); + if (irq_jiffies) + cpustat[CPUTIME_IRQ] += jiffies_to_cputime(irq_jiffies); local_irq_restore(flags); - return ret; + return irq_jiffies; } -static int irqtime_account_si_update(void) +static unsigned long irqtime_account_si_update(unsigned long max_jiffies) { u64 *cpustat = kcpustat_this_cpu->cpustat; + unsigned long si_jiffies; unsigned long flags; - u64 latest_ns; - int ret = 0; + u64 softirq; local_irq_save(flags); - latest_ns = this_cpu_read(cpu_softirq_time); - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) - ret = 1; + softirq = this_cpu_read(cpu_softirq_time) - cpustat[CPUTIME_SOFTIRQ]; + si_jiffies = min(cputime_to_jiffies(softirq), max_jiffies); + if (si_jiffies) + cpustat[CPUTIME_SOFTIRQ] += jiffies_to_cputime(si_jiffies); local_irq_restore(flags); - return ret; + return si_jiffies; } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ @@ -283,6 +285,26 @@ static __always_inline unsigned long steal_account_process_tick(unsigned long ma } /* + * Account how much elapsed time was spent in steal, irq, or softirq time. + * Due to rounding errors, the calculated amount can sometimes exceed + * max_jiffies; be careful not to account more than max_jiffies. + */ +static inline int account_other_ticks(unsigned long max_jiffies) +{ + unsigned long accounted; + + accounted = steal_account_process_tick(max_jiffies); + + if (accounted < max_jiffies) + accounted += irqtime_account_hi_update(max_jiffies - accounted); + + if (accounted < max_jiffies) + accounted += irqtime_account_si_update(max_jiffies - accounted); + + return accounted; +} + +/* * Accumulate raw cputime values of dead tasks (sig->[us]time) and live * tasks (sum on group iteration) belonging to @tsk's group. */ @@ -344,19 +366,24 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, { cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); u64 cputime = (__force u64) cputime_one_jiffy; - u64 *cpustat = kcpustat_this_cpu->cpustat; + unsigned long other; - if (steal_account_process_tick(ULONG_MAX)) + /* + * When returning from idle, many ticks can get accounted at + * once, including some ticks of steal, irq, and softirq time. + * Subtract those ticks from the amount of time accounted to + * idle, or potentially user or system time. Due to rounding, + * other time can exceed ticks occasionally. + */ + other = account_other_ticks(ticks); + if (other >= ticks) return; + ticks -= other; cputime *= ticks; scaled *= ticks; - if (irqtime_account_hi_update()) { - cpustat[CPUTIME_IRQ] += cputime; - } else if (irqtime_account_si_update()) { - cpustat[CPUTIME_SOFTIRQ] += cputime; - } else if (this_cpu_ksoftirqd() == p) { + if (this_cpu_ksoftirqd() == p) { /* * ksoftirqd time do not get accounted in cpu_softirq_time. * So, we have to handle it separately here. -- 2.5.5