linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] cputime: Moar cleanups / enhancements v2
@ 2012-10-25  0:51 Frederic Weisbecker
  2012-10-25  0:51 ` [PATCH 1/5] vtime: Gather vtime declarations to their own header file Frederic Weisbecker
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Frederic Weisbecker @ 2012-10-25  0:51 UTC (permalink / raw)
  To: LKML
  Cc: Frederic Weisbecker, Peter Zijlstra, Ingo Molnar,
	Thomas Gleixner, Steven Rostedt, Paul Gortmaker, Tony Luck,
	Fenghua Yu, Benjamin Herrenschmidt, Paul Mackerras,
	Heiko Carstens, Martin Schwidefsky, Avi Kivity, Marcelo Tosatti,
	Joerg Roedel, Alexander Graf, Xiantao Zhang,
	Christian Borntraeger, Cornelia Huck

I made some changes to the series:

- Handle possible softirq interrupting idle on local_bh_enable()
- Have a dedicated vtime.h header file
- Do some clearer ifdeffery
- Use an irqsafe vtime_account_system() on kvm.

Patches 1/5 and 5/5 are something I really think we want. Patch 3/5
implicitly fixes a bug in s390. If you prefer I can just fix s390
and drop the rest of the patch that is just a micro-optimization
in the kvm guest switch path.

The rest of the patches is micro optimizations on the irq path.
If you think these are pointless over-optimizations, I can just drop
these and only keep 1/5, rebase 5/5 and extract the bug fix in s390 that
resides in 3/5.

Otherwise I'll send a pull request to Ingo in a week or so.

If you want to test, it is pullable from:

git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
	cputime/cleanups-v3

Tested on powerpc and x86. Built tested on ia64. s390 doesn't build defconfig
on v3.7-rc2.

Frederic Weisbecker (5):
  vtime: Gather vtime declarations to their own header file
  vtime: Provide an irq safe version of vtime_account_system()
  kvm: Directly account vtime to system on guest switch
  cputime: Specialize irq vtime hooks
  cputime: Separate irqtime accounting from generic vtime

 arch/s390/kernel/vtime.c    |    4 +++
 include/linux/hardirq.h     |   15 ++----------
 include/linux/kernel_stat.h |    9 +-------
 include/linux/kvm_host.h    |   12 +++++++++-
 include/linux/vtime.h       |   48 +++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/cputime.c      |   13 +++++++++-
 kernel/softirq.c            |    6 ++--
 7 files changed, 80 insertions(+), 27 deletions(-)
 create mode 100644 include/linux/vtime.h

-- 
1.7.5.4


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/5] vtime: Gather vtime declarations to their own header file
  2012-10-25  0:51 [PATCH 0/5] cputime: Moar cleanups / enhancements v2 Frederic Weisbecker
@ 2012-10-25  0:51 ` Frederic Weisbecker
  2012-10-25  0:51 ` [PATCH 2/5] vtime: Provide an irq safe version of vtime_account_system() Frederic Weisbecker
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Frederic Weisbecker @ 2012-10-25  0:51 UTC (permalink / raw)
  To: LKML
  Cc: Frederic Weisbecker, Peter Zijlstra, Ingo Molnar,
	Thomas Gleixner, Steven Rostedt, Paul Gortmaker

These APIs are scattered around and are going to expand a bit.
Let's create a dedicated header file for sanity.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/hardirq.h     |   11 +----------
 include/linux/kernel_stat.h |    9 +--------
 include/linux/vtime.h       |   22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+), 18 deletions(-)
 create mode 100644 include/linux/vtime.h

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index cab3da3..b083a47 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,6 +4,7 @@
 #include <linux/preempt.h>
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
+#include <linux/vtime.h>
 #include <asm/hardirq.h>
 
 /*
@@ -129,16 +130,6 @@ extern void synchronize_irq(unsigned int irq);
 # define synchronize_irq(irq)	barrier()
 #endif
 
-struct task_struct;
-
-#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
-static inline void vtime_account(struct task_struct *tsk)
-{
-}
-#else
-extern void vtime_account(struct task_struct *tsk);
-#endif
-
 #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
 
 static inline void rcu_nmi_enter(void)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 36d12f0..1865b1f 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -7,6 +7,7 @@
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/vtime.h>
 #include <asm/irq.h>
 #include <asm/cputime.h>
 
@@ -130,12 +131,4 @@ extern void account_process_tick(struct task_struct *, int user);
 extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void vtime_task_switch(struct task_struct *prev);
-extern void vtime_account_system(struct task_struct *tsk);
-extern void vtime_account_idle(struct task_struct *tsk);
-#else
-static inline void vtime_task_switch(struct task_struct *prev) { }
-#endif
-
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
new file mode 100644
index 0000000..7199c24
--- /dev/null
+++ b/include/linux/vtime.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_KERNEL_VTIME_H
+#define _LINUX_KERNEL_VTIME_H
+
+struct task_struct;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void vtime_task_switch(struct task_struct *prev);
+extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_idle(struct task_struct *tsk);
+#else
+static inline void vtime_task_switch(struct task_struct *prev) { }
+#endif
+
+#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
+static inline void vtime_account(struct task_struct *tsk)
+{
+}
+#else
+extern void vtime_account(struct task_struct *tsk);
+#endif
+
+#endif /* _LINUX_KERNEL_VTIME_H */
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/5] vtime: Provide an irq safe version of vtime_account_system()
  2012-10-25  0:51 [PATCH 0/5] cputime: Moar cleanups / enhancements v2 Frederic Weisbecker
  2012-10-25  0:51 ` [PATCH 1/5] vtime: Gather vtime declarations to their own header file Frederic Weisbecker
@ 2012-10-25  0:51 ` Frederic Weisbecker
  2012-10-25  7:48   ` Ingo Molnar
  2012-10-25  0:51 ` [PATCH 3/5] kvm: Directly account vtime to system on guest switch Frederic Weisbecker
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Frederic Weisbecker @ 2012-10-25  0:51 UTC (permalink / raw)
  To: LKML
  Cc: Frederic Weisbecker, Peter Zijlstra, Ingo Molnar,
	Thomas Gleixner, Steven Rostedt, Paul Gortmaker

vtime_account_system() currently has only one caller with
vtime_account() that is irq safe.

Now we are going to call it from other places like kvm, so
let's provide an irqsafe version.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 arch/s390/kernel/vtime.c |    4 ++++
 include/linux/vtime.h    |    2 ++
 kernel/sched/cputime.c   |    9 +++++++++
 3 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 7903344..80d1dbc 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -140,6 +140,10 @@ void vtime_account(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(vtime_account);
 
+void vtime_account_system(struct task_struct *tsk)
+__attribute__((alias("vtime_account")));
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
 void __kprobes vtime_stop_cpu(void)
 {
 	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 7199c24..57f290e 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -6,9 +6,11 @@ struct task_struct;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 #else
 static inline void vtime_task_switch(struct task_struct *prev) { }
+static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
 #endif
 
 #if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 81b763b..3ccbea0 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -433,6 +433,15 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	*st = cputime.stime;
 }
 
+void vtime_account_system_irqsafe(struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	vtime_account_system(tsk);
+	local_irq_restore(flags);
+}
+
 /*
  * Archs that account the whole time spent in the idle task
  * (outside irq) as idle time can rely on this and just implement
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/5] kvm: Directly account vtime to system on guest switch
  2012-10-25  0:51 [PATCH 0/5] cputime: Moar cleanups / enhancements v2 Frederic Weisbecker
  2012-10-25  0:51 ` [PATCH 1/5] vtime: Gather vtime declarations to their own header file Frederic Weisbecker
  2012-10-25  0:51 ` [PATCH 2/5] vtime: Provide an irq safe version of vtime_account_system() Frederic Weisbecker
@ 2012-10-25  0:51 ` Frederic Weisbecker
  2012-10-25  7:51   ` Christian Borntraeger
  2012-10-25  0:51 ` [PATCH 4/5] cputime: Specialize irq vtime hooks Frederic Weisbecker
  2012-10-25  0:51 ` [PATCH 5/5] cputime: Separate irqtime accounting from generic vtime Frederic Weisbecker
  4 siblings, 1 reply; 11+ messages in thread
From: Frederic Weisbecker @ 2012-10-25  0:51 UTC (permalink / raw)
  To: LKML
  Cc: Frederic Weisbecker, Tony Luck, Fenghua Yu,
	Benjamin Herrenschmidt, Paul Mackerras, Heiko Carstens,
	Martin Schwidefsky, Avi Kivity, Marcelo Tosatti, Joerg Roedel,
	Alexander Graf, Xiantao Zhang, Christian Borntraeger,
	Cornelia Huck, Peter Zijlstra, Ingo Molnar, Thomas Gleixner,
	Steven Rostedt, Paul Gortmaker

Switching to or from guest context is done on ioctl context.
So by the time we call kvm_guest_enter() or kvm_guest_exit()
we know we are not running the idle task.

As a result, we can directly account the cputime using
vtime_account_system_irqsafe().

There are two good reasons to do this:

* We avoid some useless checks on guest switch. It optimizes
a bit this fast path.

* In the case of CONFIG_IRQ_TIME_ACCOUNTING, calling vtime_account()
checks for irq time to account. This is pointless since we know
we are not in an irq on guest switch. This is wasting cpu cycles
for no good reason. vtime_account_system() OTOH is a no-op in
this config option.

* s390 doesn't disable irqs in its implementation of vtime_account().
If vtime_account() in kvm races with an irq, the pending time might
be accounted twice. With vtime_account_system_irqsafe() we are protected.

A further optimization may consist in introducing a vtime_account_guest()
that directly calls account_guest_time().

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Xiantao Zhang <xiantao.zhang@intel.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/kvm_host.h |   12 ++++++++++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 93bfc9f..f17158b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -737,7 +737,11 @@ static inline int kvm_deassign_device(struct kvm *kvm,
 static inline void kvm_guest_enter(void)
 {
 	BUG_ON(preemptible());
-	vtime_account(current);
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system_irqsafe(current);
 	current->flags |= PF_VCPU;
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
@@ -751,7 +755,11 @@ static inline void kvm_guest_enter(void)
 
 static inline void kvm_guest_exit(void)
 {
-	vtime_account(current);
+	/*
+	 * This is running in ioctl context so we can avoid
+	 * the call to vtime_account() with its unnecessary idle check.
+	 */
+	vtime_account_system_irqsafe(current);
 	current->flags &= ~PF_VCPU;
 }
 
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 4/5] cputime: Specialize irq vtime hooks
  2012-10-25  0:51 [PATCH 0/5] cputime: Moar cleanups / enhancements v2 Frederic Weisbecker
                   ` (2 preceding siblings ...)
  2012-10-25  0:51 ` [PATCH 3/5] kvm: Directly account vtime to system on guest switch Frederic Weisbecker
@ 2012-10-25  0:51 ` Frederic Weisbecker
  2012-10-25  0:51 ` [PATCH 5/5] cputime: Separate irqtime accounting from generic vtime Frederic Weisbecker
  4 siblings, 0 replies; 11+ messages in thread
From: Frederic Weisbecker @ 2012-10-25  0:51 UTC (permalink / raw)
  To: LKML
  Cc: Frederic Weisbecker, Peter Zijlstra, Ingo Molnar,
	Thomas Gleixner, Steven Rostedt, Paul Gortmaker

With CONFIG_VIRT_CPU_ACCOUNTING, when vtime_account()
is called in irq entry/exit, we perform a check on the
context: if we are interrupting the idle task we
account the pending cputime to idle, otherwise account
to system time or its sub-areas: tsk->stime, hardirq time,
softirq time, ...

However this check for idle only concerns the hardirq entry
and softirq entry:

* Hardirq may directly interrupt the idle task, in which case
we need to flush the pending CPU time to idle.

* The idle task may be directly interrupted by a softirq if
it calls local_bh_enable(). There is probably no such call
in any idle task but we need to cover every case. Ksoftirqd
is not concerned because the idle time is flushed on context
switch and softirq in the end of hardirq have the idle time
already flushed from the hardirq entry.

In the other cases we always account to system/irq time:

* On hardirq exit we account the time to hardirq time.
* On softirq exit we account the time to softirq time.

To optimize this and avoid the indirect call to vtime_account()
and the checks it performs, specialize the vtime irq APIs and
only perform the check on irq entry. Irq exit can directly call
vtime_account_system().

CONFIG_IRQ_TIME_ACCOUNTING behaviour doesn't change and directly
maps to its own vtime_account() implementation. One may want
to take benefits from the new APIs to optimize irq time accounting
as well in the future.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/hardirq.h |    4 ++--
 include/linux/vtime.h   |   25 +++++++++++++++++++++++++
 kernel/softirq.c        |    6 +++---
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index b083a47..624ef3f 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void);
  */
 #define __irq_enter()					\
 	do {						\
-		vtime_account(current);		\
+		vtime_account_irq_enter(current);	\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
 	} while (0)
@@ -169,7 +169,7 @@ extern void irq_enter(void);
 #define __irq_exit()					\
 	do {						\
 		trace_hardirq_exit();			\
-		vtime_account(current);		\
+		vtime_account_irq_exit(current);	\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 57f290e..40634fb 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -21,4 +21,29 @@ static inline void vtime_account(struct task_struct *tsk)
 extern void vtime_account(struct task_struct *tsk);
 #endif
 
+static inline void vtime_account_irq_enter(struct task_struct *tsk)
+{
+	/*
+	 * Hardirq can interrupt idle task anytime. So we need vtime_account()
+	 * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING.
+	 * Softirq can also interrupt idle task directly if it calls
+	 * local_bh_enable(). Such case probably don't exist but we never know.
+	 * Ksoftirqd is not concerned because idle time is flushed on context
+	 * switch. Softirqs in the end of hardirqs are also not a problem because
+	 * the idle time is flushed on hardirq time already.
+	 */
+	vtime_account(tsk);
+}
+
+static inline void vtime_account_irq_exit(struct task_struct *tsk)
+{
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	/* On hard|softirq exit we always account to hard|softirq cputime */
+	vtime_account_system(tsk);
+#endif
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+	vtime_account(tsk);
+#endif
+}
+
 #endif /* _LINUX_KERNEL_VTIME_H */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index cc96bdc..ed567ba 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void)
 	current->flags &= ~PF_MEMALLOC;
 
 	pending = local_softirq_pending();
-	vtime_account(current);
+	vtime_account_irq_enter(current);
 
 	__local_bh_disable((unsigned long)__builtin_return_address(0),
 				SOFTIRQ_OFFSET);
@@ -272,7 +272,7 @@ restart:
 
 	lockdep_softirq_exit();
 
-	vtime_account(current);
+	vtime_account_irq_exit(current);
 	__local_bh_enable(SOFTIRQ_OFFSET);
 	tsk_restore_flags(current, old_flags, PF_MEMALLOC);
 }
@@ -341,7 +341,7 @@ static inline void invoke_softirq(void)
  */
 void irq_exit(void)
 {
-	vtime_account(current);
+	vtime_account_irq_exit(current);
 	trace_hardirq_exit();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 5/5] cputime: Separate irqtime accounting from generic vtime
  2012-10-25  0:51 [PATCH 0/5] cputime: Moar cleanups / enhancements v2 Frederic Weisbecker
                   ` (3 preceding siblings ...)
  2012-10-25  0:51 ` [PATCH 4/5] cputime: Specialize irq vtime hooks Frederic Weisbecker
@ 2012-10-25  0:51 ` Frederic Weisbecker
  4 siblings, 0 replies; 11+ messages in thread
From: Frederic Weisbecker @ 2012-10-25  0:51 UTC (permalink / raw)
  To: LKML
  Cc: Frederic Weisbecker, Peter Zijlstra, Ingo Molnar,
	Thomas Gleixner, Steven Rostedt, Paul Gortmaker

vtime_account() doesn't have the same role in
CONFIG_VIRT_CPU_ACCOUNTING and CONFIG_IRQ_TIME_ACCOUNTING.

In the first case it handles time accounting in any context. In
the second case it only handles irq time accounting.

So when vtime_account() is called from outside vtime_account_irq_*()
this call is pointless to CONFIG_IRQ_TIME_ACCOUNTING.

To fix the confusion, change vtime_account() to irqtime_account_irq()
in CONFIG_IRQ_TIME_ACCOUNTING. This way we ensure future account_vtime()
calls won't waste useless cycles in the irqtime APIs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/vtime.h  |   19 +++++++++----------
 kernel/sched/cputime.c |    4 ++--
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 40634fb..f1f91ac 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -8,17 +8,18 @@ extern void vtime_task_switch(struct task_struct *prev);
 extern void vtime_account_system(struct task_struct *tsk);
 extern void vtime_account_system_irqsafe(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
+extern void vtime_account(struct task_struct *tsk);
 #else
 static inline void vtime_task_switch(struct task_struct *prev) { }
+static inline void vtime_account_system(struct task_struct *tsk) { }
 static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
+static inline void vtime_account(struct task_struct *tsk) { }
 #endif
 
-#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
-static inline void vtime_account(struct task_struct *tsk)
-{
-}
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+extern void irqtime_account_irq(struct task_struct *tsk);
 #else
-extern void vtime_account(struct task_struct *tsk);
+static inline void irqtime_account_irq(struct task_struct *tsk) { }
 #endif
 
 static inline void vtime_account_irq_enter(struct task_struct *tsk)
@@ -33,17 +34,15 @@ static inline void vtime_account_irq_enter(struct task_struct *tsk)
 	 * the idle time is flushed on hardirq time already.
 	 */
 	vtime_account(tsk);
+	irqtime_account_irq(tsk);
 }
 
 static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	/* On hard|softirq exit we always account to hard|softirq cputime */
 	vtime_account_system(tsk);
-#endif
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-	vtime_account(tsk);
-#endif
+	irqtime_account_irq(tsk);
+
 }
 
 #endif /* _LINUX_KERNEL_VTIME_H */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3ccbea0..1a7e1369 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -43,7 +43,7 @@ DEFINE_PER_CPU(seqcount_t, irq_time_seq);
  * Called before incrementing preempt_count on {soft,}irq_enter
  * and before decrementing preempt_count on {soft,}irq_exit.
  */
-void vtime_account(struct task_struct *curr)
+void irqtime_account_irq(struct task_struct *curr)
 {
 	unsigned long flags;
 	s64 delta;
@@ -73,7 +73,7 @@ void vtime_account(struct task_struct *curr)
 	irq_time_write_end();
 	local_irq_restore(flags);
 }
-EXPORT_SYMBOL_GPL(vtime_account);
+EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
 static int irqtime_account_hi_update(void)
 {
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/5] vtime: Provide an irq safe version of vtime_account_system()
  2012-10-25  0:51 ` [PATCH 2/5] vtime: Provide an irq safe version of vtime_account_system() Frederic Weisbecker
@ 2012-10-25  7:48   ` Ingo Molnar
  0 siblings, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2012-10-25  7:48 UTC (permalink / raw)
  To: Frederic Weisbecker, Martin Schwidefsky, Heiko Carstens, linux-s390
  Cc: LKML, Peter Zijlstra, Thomas Gleixner, Steven Rostedt, Paul Gortmaker


I've Cc:-ed the S390 folks - patch is quoted below.

Thanks,

	Ingo

* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> vtime_account_system() currently has only one caller with
> vtime_account() that is irq safe.
> 
> Now we are going to call it from other places like kvm, so
> let's provide an irqsafe version.
> 
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
> ---
>  arch/s390/kernel/vtime.c |    4 ++++
>  include/linux/vtime.h    |    2 ++
>  kernel/sched/cputime.c   |    9 +++++++++
>  3 files changed, 15 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
> index 7903344..80d1dbc 100644
> --- a/arch/s390/kernel/vtime.c
> +++ b/arch/s390/kernel/vtime.c
> @@ -140,6 +140,10 @@ void vtime_account(struct task_struct *tsk)
>  }
>  EXPORT_SYMBOL_GPL(vtime_account);
>  
> +void vtime_account_system(struct task_struct *tsk)
> +__attribute__((alias("vtime_account")));
> +EXPORT_SYMBOL_GPL(vtime_account_system);
> +
>  void __kprobes vtime_stop_cpu(void)
>  {
>  	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
> diff --git a/include/linux/vtime.h b/include/linux/vtime.h
> index 7199c24..57f290e 100644
> --- a/include/linux/vtime.h
> +++ b/include/linux/vtime.h
> @@ -6,9 +6,11 @@ struct task_struct;
>  #ifdef CONFIG_VIRT_CPU_ACCOUNTING
>  extern void vtime_task_switch(struct task_struct *prev);
>  extern void vtime_account_system(struct task_struct *tsk);
> +extern void vtime_account_system_irqsafe(struct task_struct *tsk);
>  extern void vtime_account_idle(struct task_struct *tsk);
>  #else
>  static inline void vtime_task_switch(struct task_struct *prev) { }
> +static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
>  #endif
>  
>  #if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index 81b763b..3ccbea0 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -433,6 +433,15 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
>  	*st = cputime.stime;
>  }
>  
> +void vtime_account_system_irqsafe(struct task_struct *tsk)
> +{
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	vtime_account_system(tsk);
> +	local_irq_restore(flags);
> +}
> +
>  /*
>   * Archs that account the whole time spent in the idle task
>   * (outside irq) as idle time can rely on this and just implement
> -- 
> 1.7.5.4
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/5] kvm: Directly account vtime to system on guest switch
  2012-10-25  0:51 ` [PATCH 3/5] kvm: Directly account vtime to system on guest switch Frederic Weisbecker
@ 2012-10-25  7:51   ` Christian Borntraeger
  2012-10-25  7:56     ` Frederic Weisbecker
  0 siblings, 1 reply; 11+ messages in thread
From: Christian Borntraeger @ 2012-10-25  7:51 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: LKML, Tony Luck, Fenghua Yu, Benjamin Herrenschmidt,
	Paul Mackerras, Heiko Carstens, Martin Schwidefsky, Avi Kivity,
	Marcelo Tosatti, Joerg Roedel, Alexander Graf, Xiantao Zhang,
	Cornelia Huck, Peter Zijlstra, Ingo Molnar, Thomas Gleixner,
	Steven Rostedt, Paul Gortmaker

On 25/10/12 02:51, Frederic Weisbecker wrote:
> Switching to or from guest context is done on ioctl context.
> So by the time we call kvm_guest_enter() or kvm_guest_exit()
> we know we are not running the idle task.
> 
> As a result, we can directly account the cputime using
> vtime_account_system_irqsafe().
> 
> There are two good reasons to do this:
> 
> * We avoid some useless checks on guest switch. It optimizes
> a bit this fast path.
> 
> * In the case of CONFIG_IRQ_TIME_ACCOUNTING, calling vtime_account()
> checks for irq time to account. This is pointless since we know
> we are not in an irq on guest switch. This is wasting cpu cycles
> for no good reason. vtime_account_system() OTOH is a no-op in
> this config option.
> 
> * s390 doesn't disable irqs in its implementation of vtime_account().
> If vtime_account() in kvm races with an irq, the pending time might
> be accounted twice. With vtime_account_system_irqsafe() we are protected.

We disable irqs before we call kvm_guest_enter/exit, see kvm-s390.c:


[...]
	if (!kvm_is_ucontrol(vcpu->kvm))
	        kvm_s390_deliver_pending_interrupts(vcpu);
	vcpu->arch.sie_block->icptcode = 0;
	local_irq_disable();
	kvm_guest_enter();
	local_irq_enable();
[...]


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/5] kvm: Directly account vtime to system on guest switch
  2012-10-25  7:51   ` Christian Borntraeger
@ 2012-10-25  7:56     ` Frederic Weisbecker
  2012-10-25  7:59       ` Christian Borntraeger
  0 siblings, 1 reply; 11+ messages in thread
From: Frederic Weisbecker @ 2012-10-25  7:56 UTC (permalink / raw)
  To: Christian Borntraeger
  Cc: LKML, Tony Luck, Fenghua Yu, Benjamin Herrenschmidt,
	Paul Mackerras, Heiko Carstens, Martin Schwidefsky, Avi Kivity,
	Marcelo Tosatti, Joerg Roedel, Alexander Graf, Xiantao Zhang,
	Cornelia Huck, Peter Zijlstra, Ingo Molnar, Thomas Gleixner,
	Steven Rostedt, Paul Gortmaker

2012/10/25 Christian Borntraeger <borntraeger@de.ibm.com>:
> On 25/10/12 02:51, Frederic Weisbecker wrote:
>> Switching to or from guest context is done on ioctl context.
>> So by the time we call kvm_guest_enter() or kvm_guest_exit()
>> we know we are not running the idle task.
>>
>> As a result, we can directly account the cputime using
>> vtime_account_system_irqsafe().
>>
>> There are two good reasons to do this:
>>
>> * We avoid some useless checks on guest switch. It optimizes
>> a bit this fast path.
>>
>> * In the case of CONFIG_IRQ_TIME_ACCOUNTING, calling vtime_account()
>> checks for irq time to account. This is pointless since we know
>> we are not in an irq on guest switch. This is wasting cpu cycles
>> for no good reason. vtime_account_system() OTOH is a no-op in
>> this config option.
>>
>> * s390 doesn't disable irqs in its implementation of vtime_account().
>> If vtime_account() in kvm races with an irq, the pending time might
>> be accounted twice. With vtime_account_system_irqsafe() we are protected.
>
> We disable irqs before we call kvm_guest_enter/exit, see kvm-s390.c:
>
>
> [...]
>         if (!kvm_is_ucontrol(vcpu->kvm))
>                 kvm_s390_deliver_pending_interrupts(vcpu);
>         vcpu->arch.sie_block->icptcode = 0;
>         local_irq_disable();
>         kvm_guest_enter();
>         local_irq_enable();
> [...]
>

Ah ok. Hmm I still need to keep it irqsafe for the other archs though,
as it is currently with vtime_account(). So perhaps I can remove your
local_irq_disable there and use vtime_account_system_irqsafe()
instead?

thanks.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/5] kvm: Directly account vtime to system on guest switch
  2012-10-25  7:56     ` Frederic Weisbecker
@ 2012-10-25  7:59       ` Christian Borntraeger
  0 siblings, 0 replies; 11+ messages in thread
From: Christian Borntraeger @ 2012-10-25  7:59 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: LKML, Tony Luck, Fenghua Yu, Benjamin Herrenschmidt,
	Paul Mackerras, Heiko Carstens, Martin Schwidefsky, Avi Kivity,
	Marcelo Tosatti, Joerg Roedel, Alexander Graf, Xiantao Zhang,
	Cornelia Huck, Peter Zijlstra, Ingo Molnar, Thomas Gleixner,
	Steven Rostedt, Paul Gortmaker

On 25/10/12 09:56, Frederic Weisbecker wrote:
[...]
>>> * s390 doesn't disable irqs in its implementation of vtime_account().
>>> If vtime_account() in kvm races with an irq, the pending time might
>>> be accounted twice. With vtime_account_system_irqsafe() we are protected.
>>
>> We disable irqs before we call kvm_guest_enter/exit, see kvm-s390.c:
>>
>>
>> [...]
>>         if (!kvm_is_ucontrol(vcpu->kvm))
>>                 kvm_s390_deliver_pending_interrupts(vcpu);
>>         vcpu->arch.sie_block->icptcode = 0;
>>         local_irq_disable();
>>         kvm_guest_enter();
>>         local_irq_enable();
>> [...]
>>
> 
> Ah ok. Hmm I still need to keep it irqsafe for the other archs though,
> as it is currently with vtime_account(). So perhaps I can remove your
> local_irq_disable there and use vtime_account_system_irqsafe()
> instead?

Yes. We added the local_irq_disable code just for the vtime accounting.





^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/5] vtime: Gather vtime declarations to their own header file
  2012-10-25 17:47 [PATCH 0/5] cputime: Moar cleanups / enhancements v3 Frederic Weisbecker
@ 2012-10-25 17:47 ` Frederic Weisbecker
  0 siblings, 0 replies; 11+ messages in thread
From: Frederic Weisbecker @ 2012-10-25 17:47 UTC (permalink / raw)
  To: LKML
  Cc: Frederic Weisbecker, Peter Zijlstra, Ingo Molnar,
	Thomas Gleixner, Steven Rostedt, Paul Gortmaker

These APIs are scattered around and are going to expand a bit.
Let's create a dedicated header file for sanity.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/hardirq.h     |   11 +----------
 include/linux/kernel_stat.h |    9 +--------
 include/linux/vtime.h       |   22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+), 18 deletions(-)
 create mode 100644 include/linux/vtime.h

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index cab3da3..b083a47 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -4,6 +4,7 @@
 #include <linux/preempt.h>
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
+#include <linux/vtime.h>
 #include <asm/hardirq.h>
 
 /*
@@ -129,16 +130,6 @@ extern void synchronize_irq(unsigned int irq);
 # define synchronize_irq(irq)	barrier()
 #endif
 
-struct task_struct;
-
-#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
-static inline void vtime_account(struct task_struct *tsk)
-{
-}
-#else
-extern void vtime_account(struct task_struct *tsk);
-#endif
-
 #if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
 
 static inline void rcu_nmi_enter(void)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 36d12f0..1865b1f 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -7,6 +7,7 @@
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
+#include <linux/vtime.h>
 #include <asm/irq.h>
 #include <asm/cputime.h>
 
@@ -130,12 +131,4 @@ extern void account_process_tick(struct task_struct *, int user);
 extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void vtime_task_switch(struct task_struct *prev);
-extern void vtime_account_system(struct task_struct *tsk);
-extern void vtime_account_idle(struct task_struct *tsk);
-#else
-static inline void vtime_task_switch(struct task_struct *prev) { }
-#endif
-
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
new file mode 100644
index 0000000..7199c24
--- /dev/null
+++ b/include/linux/vtime.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_KERNEL_VTIME_H
+#define _LINUX_KERNEL_VTIME_H
+
+struct task_struct;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void vtime_task_switch(struct task_struct *prev);
+extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_idle(struct task_struct *tsk);
+#else
+static inline void vtime_task_switch(struct task_struct *prev) { }
+#endif
+
+#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
+static inline void vtime_account(struct task_struct *tsk)
+{
+}
+#else
+extern void vtime_account(struct task_struct *tsk);
+#endif
+
+#endif /* _LINUX_KERNEL_VTIME_H */
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2012-10-25 17:48 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-25  0:51 [PATCH 0/5] cputime: Moar cleanups / enhancements v2 Frederic Weisbecker
2012-10-25  0:51 ` [PATCH 1/5] vtime: Gather vtime declarations to their own header file Frederic Weisbecker
2012-10-25  0:51 ` [PATCH 2/5] vtime: Provide an irq safe version of vtime_account_system() Frederic Weisbecker
2012-10-25  7:48   ` Ingo Molnar
2012-10-25  0:51 ` [PATCH 3/5] kvm: Directly account vtime to system on guest switch Frederic Weisbecker
2012-10-25  7:51   ` Christian Borntraeger
2012-10-25  7:56     ` Frederic Weisbecker
2012-10-25  7:59       ` Christian Borntraeger
2012-10-25  0:51 ` [PATCH 4/5] cputime: Specialize irq vtime hooks Frederic Weisbecker
2012-10-25  0:51 ` [PATCH 5/5] cputime: Separate irqtime accounting from generic vtime Frederic Weisbecker
2012-10-25 17:47 [PATCH 0/5] cputime: Moar cleanups / enhancements v3 Frederic Weisbecker
2012-10-25 17:47 ` [PATCH 1/5] vtime: Gather vtime declarations to their own header file Frederic Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).