All of lore.kernel.org
 help / color / mirror / Atom feed
From: Frederic Weisbecker <frederic@kernel.org>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Tony Luck <tony.luck@intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	Fenghua Yu <fenghua.yu@intel.com>,
	Heiko Carstens <hca@linux.ibm.com>
Subject: [PATCH 4/5] irqtime: Move irqtime entry accounting after irq offset incrementation
Date: Tue,  1 Dec 2020 01:12:25 +0100	[thread overview]
Message-ID: <20201201001226.65107-5-frederic@kernel.org> (raw)
In-Reply-To: <20201201001226.65107-1-frederic@kernel.org>

IRQ time entry is currently accounted before HARDIRQ_OFFSET or
SOFTIRQ_OFFSET are incremented. This is convenient to decide to which
index the cputime to account is dispatched.

Unfortunately it prevents tick_irq_enter() from being called under
HARDIRQ_OFFSET because tick_irq_enter() has to be called before the IRQ
entry accounting due to the necessary clock catch up. As a result we
don't benefit from appropriate lockdep coverage on tick_irq_enter().

To prepare for fixing this, move the IRQ entry cputime accounting after
the preempt offset is incremented. This requires the cputime dispatch
code to handle the extra offset.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/hardirq.h |  4 ++--
 include/linux/vtime.h   | 10 ++++----
 kernel/sched/cputime.c  | 53 +++++++++++++++++++++++++++++++----------
 kernel/softirq.c        |  2 +-
 4 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 754f67ac4326..02499c10fbf7 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -32,9 +32,9 @@ static __always_inline void rcu_irq_enter_check_tick(void)
  */
 #define __irq_enter()					\
 	do {						\
+		preempt_count_add(HARDIRQ_OFFSET);	\
+		lockdep_hardirq_enter();		\
 		account_irq_enter_time(current);	\
-		preempt_count_add(HARDIRQ_OFFSET);	\
-		lockdep_hardirq_enter();		\
 	} while (0)
 
 /*
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index f827b38c3bb7..cad8ff530273 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -96,21 +96,23 @@ static inline void vtime_flush(struct task_struct *tsk) { }
 
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-extern void irqtime_account_irq(struct task_struct *tsk);
+extern void irqtime_account_enter(struct task_struct *tsk);
+extern void irqtime_account_exit(struct task_struct *tsk);
 #else
-static inline void irqtime_account_irq(struct task_struct *tsk) { }
+static inline void irqtime_account_enter(struct task_struct *tsk) { }
+static inline void irqtime_account_exit(struct task_struct *tsk) { }
 #endif
 
 static inline void account_irq_enter_time(struct task_struct *tsk)
 {
 	vtime_account_irq_enter(tsk);
-	irqtime_account_irq(tsk);
+	irqtime_account_enter(tsk);
 }
 
 static inline void account_irq_exit_time(struct task_struct *tsk)
 {
 	vtime_account_irq_exit(tsk);
-	irqtime_account_irq(tsk);
+	irqtime_account_exit(tsk);
 }
 
 #endif /* _LINUX_KERNEL_VTIME_H */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3675452f6029..44bd774af37d 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -43,23 +43,48 @@ static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
 	u64_stats_update_end(&irqtime->sync);
 }
 
-/*
- * Called before incrementing preempt_count on {soft,}irq_enter
- * and before decrementing preempt_count on {soft,}irq_exit.
- */
-void irqtime_account_irq(struct task_struct *curr)
+static s64 irqtime_get_delta(struct irqtime *irqtime)
 {
-	struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
+	int cpu = smp_processor_id();
 	s64 delta;
-	int cpu;
 
-	if (!sched_clock_irqtime)
-		return;
-
-	cpu = smp_processor_id();
 	delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
 	irqtime->irq_start_time += delta;
 
+	return delta;
+}
+
+/* Called after incrementing preempt_count on {soft,}irq_enter */
+void irqtime_account_enter(struct task_struct *curr)
+{
+	struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
+	u64 delta;
+
+	if (!sched_clock_irqtime)
+		return;
+
+	delta = irqtime_get_delta(irqtime);
+	/*
+	 * We do not account for softirq time from ksoftirqd here.
+	 * We want to continue accounting softirq time to ksoftirqd thread
+	 * in that case, so as not to confuse scheduler with a special task
+	 * that do not consume any time, but still wants to run.
+	 */
+	if ((irq_count() == (SOFTIRQ_OFFSET | HARDIRQ_OFFSET)) &&
+	    curr != this_cpu_ksoftirqd())
+		irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
+}
+
+/* Called before decrementing preempt_count on {soft,}irq_exit */
+void irqtime_account_exit(struct task_struct *curr)
+{
+	struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
+	u64 delta;
+
+	if (!sched_clock_irqtime)
+		return;
+
+	delta = irqtime_get_delta(irqtime);
 	/*
 	 * We do not account for softirq time from ksoftirqd here.
 	 * We want to continue accounting softirq time to ksoftirqd thread
@@ -427,9 +452,11 @@ void vtime_task_switch(struct task_struct *prev)
  */
 void vtime_account_irq_enter(struct task_struct *tsk)
 {
-	if (hardirq_count()) {
+	WARN_ON_ONCE(in_task());
+
+	if (hardirq_count() > HARDIRQ_OFFSET) {
 		vtime_account_hardirq(tsk);
-	} else if (in_serving_softirq()) {
+	} else if (hardirq_count() && in_serving_softirq()) {
 		vtime_account_softirq(tsk);
 	} else if (is_idle_task(tsk)) {
 		vtime_account_idle(tsk);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 617009ccd82c..24254c41bb7c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -315,9 +315,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 	current->flags &= ~PF_MEMALLOC;
 
 	pending = local_softirq_pending();
-	account_irq_enter_time(current);
 
 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
+	account_irq_enter_time(current);
 	in_hardirq = lockdep_softirq_start();
 
 restart:
-- 
2.25.1


  parent reply	other threads:[~2020-12-01  0:13 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-01  0:12 [PATCH 0/5] irq: Reorder time handling against HARDIRQ_OFFSET on IRQ entry v2 Frederic Weisbecker
2020-12-01  0:12 ` [PATCH 1/5] sched/cputime: Remove symbol exports from IRQ time accounting Frederic Weisbecker
2020-12-01  0:12 ` [PATCH 2/5] sched/vtime: Consolidate " Frederic Weisbecker
2020-12-01  0:12 ` [PATCH 3/5] s390/vtime: Convert to consolidated " Frederic Weisbecker
2020-12-01  0:12 ` Frederic Weisbecker [this message]
2020-12-01  9:20   ` [PATCH 4/5] irqtime: Move irqtime entry accounting after irq offset incrementation Peter Zijlstra
2020-12-01 11:23     ` Frederic Weisbecker
2020-12-01 11:33     ` Thomas Gleixner
2020-12-01 11:40       ` Frederic Weisbecker
2020-12-01 13:34         ` Thomas Gleixner
2020-12-01 14:35           ` Frederic Weisbecker
2020-12-01 15:01             ` Peter Zijlstra
2020-12-01 15:53               ` Thomas Gleixner
2020-12-01  0:12 ` [PATCH 5/5] irq: Call tick_irq_enter() inside HARDIRQ_OFFSET Frederic Weisbecker
2020-12-02 11:57 [PATCH 0/5] irq: Reorder time handling against HARDIRQ_OFFSET on IRQ entry v3 Frederic Weisbecker
2020-12-02 11:57 ` [PATCH 4/5] irqtime: Move irqtime entry accounting after irq offset incrementation Frederic Weisbecker
2020-12-02 12:36   ` Peter Zijlstra
2020-12-28  2:15   ` Qais Yousef
2020-12-29 13:41     ` Frederic Weisbecker
2020-12-29 14:12       ` Qais Yousef
2020-12-29 14:30         ` Frederic Weisbecker
2020-12-29 15:58           ` Qais Yousef

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201201001226.65107-5-frederic@kernel.org \
    --to=frederic@kernel.org \
    --cc=benh@kernel.crashing.org \
    --cc=borntraeger@de.ibm.com \
    --cc=fenghua.yu@intel.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.