* [PATCH 0/2] vtime: Remove pair of seqcount on context switch
@ 2019-10-03 16:17 Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Frederic Weisbecker @ 2019-10-03 16:17 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar
Cc: LKML, Frederic Weisbecker, Wanpeng Li, Thomas Gleixner,
Yauheni Kaliuta, Rik van Riel
Extracted from a larger queue that fixes kcpustat on nohz_full, these
two patches have value on their own as they remove two write barriers
on nohz_full context switch.
Frederic Weisbecker (2):
vtime: Rename vtime_account_system() to vtime_account_kernel()
vtime: Spare a seqcount lock/unlock cycle on context switch
arch/ia64/kernel/time.c | 4 +--
arch/powerpc/kernel/time.c | 6 ++--
arch/s390/kernel/vtime.c | 4 +--
include/linux/context_tracking.h | 4 +--
include/linux/vtime.h | 38 ++++++++++++------------
kernel/sched/cputime.c | 50 ++++++++++++++++++--------------
6 files changed, 57 insertions(+), 49 deletions(-)
--
2.23.0
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel()
2019-10-03 16:17 [PATCH 0/2] vtime: Remove pair of seqcount on context switch Frederic Weisbecker
@ 2019-10-03 16:17 ` Frederic Weisbecker
2019-10-09 12:59 ` [tip: sched/core] sched/cputime: " tip-bot2 for Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
2019-10-07 16:20 ` [PATCH 0/2] vtime: Remove pair of seqcount " Ingo Molnar
2 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2019-10-03 16:17 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar
Cc: LKML, Frederic Weisbecker, Wanpeng Li, Thomas Gleixner,
Yauheni Kaliuta, Rik van Riel
vtime_account_system() decides if we need to account the time to the
system (__vtime_account_system()) or to the guest (vtime_account_guest()).
So this function is a misnommer as we are on a higher level than
"system". All we know when we call that function is that we are
accounting kernel cputime. Whether it belongs to guest or system time
is a lower level detail.
Rename this function to vtime_account_kernel(). This will clarify things
and avoid too many underscored vtime_account_system() versions.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
arch/ia64/kernel/time.c | 4 ++--
arch/powerpc/kernel/time.c | 6 +++---
arch/s390/kernel/vtime.c | 4 ++--
include/linux/context_tracking.h | 4 ++--
include/linux/vtime.h | 6 +++---
kernel/sched/cputime.c | 18 +++++++++---------
6 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 1e95d32c8877..91b4024c9351 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -132,7 +132,7 @@ static __u64 vtime_delta(struct task_struct *tsk)
return delta_stime;
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
__u64 stime = vtime_delta(tsk);
@@ -146,7 +146,7 @@ void vtime_account_system(struct task_struct *tsk)
else
ti->stime += stime;
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..84827da01d45 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
return stime;
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
#endif
}
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
* recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c475ca49cfc6..8df10d3c8f6c 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
__attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
/*
* Sorted add to a list. List is linear searched until first bigger
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d05609ad329d..558a209c247d 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -141,7 +141,7 @@ static inline void guest_enter_irqoff(void)
* to assume that it's the stime pending cputime
* to flush.
*/
- vtime_account_system(current);
+ vtime_account_kernel(current);
current->flags |= PF_VCPU;
rcu_virt_note_context_switch(smp_processor_id());
}
@@ -149,7 +149,7 @@ static inline void guest_enter_irqoff(void)
static inline void guest_exit_irqoff(void)
{
/* Flush the guest cputime we spent on the guest */
- vtime_account_system(current);
+ vtime_account_kernel(current);
current->flags &= ~PF_VCPU;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index a26ed10a4eac..2fd247f90408 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -57,13 +57,13 @@ static inline void vtime_task_switch(struct task_struct *prev)
}
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
-extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_kernel(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline void vtime_task_switch(struct task_struct *prev) { }
-static inline void vtime_account_system(struct task_struct *tsk) { }
+static inline void vtime_account_kernel(struct task_struct *tsk) { }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -86,7 +86,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
- vtime_account_system(tsk);
+ vtime_account_kernel(tsk);
}
extern void vtime_flush(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 46ed4e1383e2..b45932e27857 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -412,7 +412,7 @@ void vtime_common_task_switch(struct task_struct *prev)
if (is_idle_task(prev))
vtime_account_idle(prev);
else
- vtime_account_system(prev);
+ vtime_account_kernel(prev);
vtime_flush(prev);
arch_vtime_task_switch(prev);
@@ -425,7 +425,7 @@ void vtime_common_task_switch(struct task_struct *prev)
/*
* Archs that account the whole time spent in the idle task
* (outside irq) as idle time can rely on this and just implement
- * vtime_account_system() and vtime_account_idle(). Archs that
+ * vtime_account_kernel() and vtime_account_idle(). Archs that
* have other meaning of the idle time (s390 only includes the
* time spent by the CPU when it's in low power mode) must override
* vtime_account().
@@ -436,7 +436,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
if (!in_interrupt() && is_idle_task(tsk))
vtime_account_idle(tsk);
else
- vtime_account_system(tsk);
+ vtime_account_kernel(tsk);
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -711,8 +711,8 @@ static u64 get_vtime_delta(struct vtime *vtime)
return delta - other;
}
-static void __vtime_account_system(struct task_struct *tsk,
- struct vtime *vtime)
+static void vtime_account_system(struct task_struct *tsk,
+ struct vtime *vtime)
{
vtime->stime += get_vtime_delta(vtime);
if (vtime->stime >= TICK_NSEC) {
@@ -731,7 +731,7 @@ static void vtime_account_guest(struct task_struct *tsk,
}
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
struct vtime *vtime = &tsk->vtime;
@@ -743,7 +743,7 @@ void vtime_account_system(struct task_struct *tsk)
if (tsk->flags & PF_VCPU)
vtime_account_guest(tsk, vtime);
else
- __vtime_account_system(tsk, vtime);
+ vtime_account_system(tsk, vtime);
write_seqcount_end(&vtime->seqcount);
}
@@ -752,7 +752,7 @@ void vtime_user_enter(struct task_struct *tsk)
struct vtime *vtime = &tsk->vtime;
write_seqcount_begin(&vtime->seqcount);
- __vtime_account_system(tsk, vtime);
+ vtime_account_system(tsk, vtime);
vtime->state = VTIME_USER;
write_seqcount_end(&vtime->seqcount);
}
@@ -782,7 +782,7 @@ void vtime_guest_enter(struct task_struct *tsk)
* that can thus safely catch up with a tickless delta.
*/
write_seqcount_begin(&vtime->seqcount);
- __vtime_account_system(tsk, vtime);
+ vtime_account_system(tsk, vtime);
tsk->flags |= PF_VCPU;
write_seqcount_end(&vtime->seqcount);
}
--
2.23.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch
2019-10-03 16:17 [PATCH 0/2] vtime: Remove pair of seqcount on context switch Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
@ 2019-10-03 16:17 ` Frederic Weisbecker
2019-10-09 12:59 ` [tip: sched/core] sched/cputime: " tip-bot2 for Frederic Weisbecker
2019-10-07 16:20 ` [PATCH 0/2] vtime: Remove pair of seqcount " Ingo Molnar
2 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2019-10-03 16:17 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar
Cc: LKML, Frederic Weisbecker, Wanpeng Li, Thomas Gleixner,
Yauheni Kaliuta, Rik van Riel
On context switch we are locking the vtime seqcount of the scheduling-out
task twice:
* On vtime_task_switch_common(), when we flush the pending vtime through
vtime_account_system()
* On arch_vtime_task_switch() to reset the vtime state.
This is pointless as these actions can be performed without the need
to unlock/lock in the middle. The reason these steps are separated is to
consolidate a very small amount of common code between
CONFIG_VIRT_CPU_ACCOUNTING_GEN and CONFIG_VIRT_CPU_ACCOUNTING_NATIVE.
Performance in this fast path is definetly a priority over artificial
code factorization so split the task switch code between GEN and
NATIVE and mutualize the parts than can run under a single seqcount
locked block.
As a side effect, vtime_account_idle() becomes included in the seqcount
protection. This happens to be a welcome preparation in order to
properly support kcpustat under vtime in the future and fetch
CPUTIME_IDLE without race.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
include/linux/vtime.h | 32 ++++++++++++++++----------------
kernel/sched/cputime.c | 34 +++++++++++++++++++++-------------
2 files changed, 37 insertions(+), 29 deletions(-)
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 2fd247f90408..d9160ab3667a 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -14,8 +14,12 @@ struct task_struct;
* vtime_accounting_cpu_enabled() definitions/declarations
*/
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
+
static inline bool vtime_accounting_cpu_enabled(void) { return true; }
+extern void vtime_task_switch(struct task_struct *prev);
+
#elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
+
/*
* Checks if vtime is enabled on some CPU. Cputime readers want to be careful
* in that case and compute the tickless cputime.
@@ -36,33 +40,29 @@ static inline bool vtime_accounting_cpu_enabled(void)
return false;
}
+
+extern void vtime_task_switch_generic(struct task_struct *prev);
+
+static inline void vtime_task_switch(struct task_struct *prev)
+{
+ if (vtime_accounting_cpu_enabled())
+ vtime_task_switch_generic(prev);
+}
+
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
static inline bool vtime_accounting_cpu_enabled(void) { return false; }
-#endif
+static inline void vtime_task_switch(struct task_struct *prev) { }
+#endif
/*
* Common vtime APIs
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-
-#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
-extern void vtime_task_switch(struct task_struct *prev);
-#else
-extern void vtime_common_task_switch(struct task_struct *prev);
-static inline void vtime_task_switch(struct task_struct *prev)
-{
- if (vtime_accounting_cpu_enabled())
- vtime_common_task_switch(prev);
-}
-#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
-
extern void vtime_account_kernel(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
-
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-
-static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_kernel(struct task_struct *tsk) { }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b45932e27857..cef23c211f41 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -405,9 +405,10 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
/*
* Use precise platform statistics if available:
*/
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
-void vtime_common_task_switch(struct task_struct *prev)
+void vtime_task_switch(struct task_struct *prev)
{
if (is_idle_task(prev))
vtime_account_idle(prev);
@@ -418,10 +419,7 @@ void vtime_common_task_switch(struct task_struct *prev)
arch_vtime_task_switch(prev);
}
# endif
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Archs that account the whole time spent in the idle task
* (outside irq) as idle time can rely on this and just implement
@@ -731,19 +729,25 @@ static void vtime_account_guest(struct task_struct *tsk,
}
}
-void vtime_account_kernel(struct task_struct *tsk)
+static void __vtime_account_kernel(struct task_struct *tsk,
+ struct vtime *vtime)
{
- struct vtime *vtime = &tsk->vtime;
-
- if (!vtime_delta(vtime))
- return;
-
- write_seqcount_begin(&vtime->seqcount);
/* We might have scheduled out from guest path */
if (tsk->flags & PF_VCPU)
vtime_account_guest(tsk, vtime);
else
vtime_account_system(tsk, vtime);
+}
+
+void vtime_account_kernel(struct task_struct *tsk)
+{
+ struct vtime *vtime = &tsk->vtime;
+
+ if (!vtime_delta(vtime))
+ return;
+
+ write_seqcount_begin(&vtime->seqcount);
+ __vtime_account_kernel(tsk, vtime);
write_seqcount_end(&vtime->seqcount);
}
@@ -804,11 +808,15 @@ void vtime_account_idle(struct task_struct *tsk)
account_idle_time(get_vtime_delta(&tsk->vtime));
}
-void arch_vtime_task_switch(struct task_struct *prev)
+void vtime_task_switch_generic(struct task_struct *prev)
{
struct vtime *vtime = &prev->vtime;
write_seqcount_begin(&vtime->seqcount);
+ if (is_idle_task(prev))
+ vtime_account_idle(prev);
+ else
+ __vtime_account_kernel(prev, vtime);
vtime->state = VTIME_INACTIVE;
write_seqcount_end(&vtime->seqcount);
--
2.23.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 0/2] vtime: Remove pair of seqcount on context switch
2019-10-03 16:17 [PATCH 0/2] vtime: Remove pair of seqcount on context switch Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
@ 2019-10-07 16:20 ` Ingo Molnar
2019-10-07 16:51 ` Frederic Weisbecker
2 siblings, 1 reply; 7+ messages in thread
From: Ingo Molnar @ 2019-10-07 16:20 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Peter Zijlstra, LKML, Wanpeng Li, Thomas Gleixner,
Yauheni Kaliuta, Rik van Riel
* Frederic Weisbecker <frederic@kernel.org> wrote:
> Extracted from a larger queue that fixes kcpustat on nohz_full, these
> two patches have value on their own as they remove two write barriers
> on nohz_full context switch.
>
> Frederic Weisbecker (2):
> vtime: Rename vtime_account_system() to vtime_account_kernel()
> vtime: Spare a seqcount lock/unlock cycle on context switch
>
> arch/ia64/kernel/time.c | 4 +--
> arch/powerpc/kernel/time.c | 6 ++--
> arch/s390/kernel/vtime.c | 4 +--
> include/linux/context_tracking.h | 4 +--
> include/linux/vtime.h | 38 ++++++++++++------------
> kernel/sched/cputime.c | 50 ++++++++++++++++++--------------
> 6 files changed, 57 insertions(+), 49 deletions(-)
Which tree is this against? Doesn't apply cleanly to v5.4-rc2 nor v5.3.
Does it have any prereqs perhaps?
Thanks,
Ingo
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 0/2] vtime: Remove pair of seqcount on context switch
2019-10-07 16:20 ` [PATCH 0/2] vtime: Remove pair of seqcount " Ingo Molnar
@ 2019-10-07 16:51 ` Frederic Weisbecker
0 siblings, 0 replies; 7+ messages in thread
From: Frederic Weisbecker @ 2019-10-07 16:51 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, LKML, Wanpeng Li, Thomas Gleixner,
Yauheni Kaliuta, Rik van Riel
On Mon, Oct 07, 2019 at 06:20:31PM +0200, Ingo Molnar wrote:
>
> * Frederic Weisbecker <frederic@kernel.org> wrote:
>
> > Extracted from a larger queue that fixes kcpustat on nohz_full, these
> > two patches have value on their own as they remove two write barriers
> > on nohz_full context switch.
> >
> > Frederic Weisbecker (2):
> > vtime: Rename vtime_account_system() to vtime_account_kernel()
> > vtime: Spare a seqcount lock/unlock cycle on context switch
> >
> > arch/ia64/kernel/time.c | 4 +--
> > arch/powerpc/kernel/time.c | 6 ++--
> > arch/s390/kernel/vtime.c | 4 +--
> > include/linux/context_tracking.h | 4 +--
> > include/linux/vtime.h | 38 ++++++++++++------------
> > kernel/sched/cputime.c | 50 ++++++++++++++++++--------------
> > 6 files changed, 57 insertions(+), 49 deletions(-)
>
> Which tree is this against? Doesn't apply cleanly to v5.4-rc2 nor v5.3.
> Does it have any prereqs perhaps?
Indeed, you need to apply this fix first: https://lore.kernel.org/lkml/20190925214242.21873-1-frederic@kernel.org/
"[PATCH] sched/vtime: Fix guest/system mis-accounting on task switch"
Thanks!
^ permalink raw reply [flat|nested] 7+ messages in thread
* [tip: sched/core] sched/cputime: Rename vtime_account_system() to vtime_account_kernel()
2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
@ 2019-10-09 12:59 ` tip-bot2 for Frederic Weisbecker
0 siblings, 0 replies; 7+ messages in thread
From: tip-bot2 for Frederic Weisbecker @ 2019-10-09 12:59 UTC (permalink / raw)
To: linux-tip-commits
Cc: Frederic Weisbecker, Peter Zijlstra (Intel),
Linus Torvalds, Rik van Riel, Thomas Gleixner, Wanpeng Li,
Yauheni Kaliuta, Ingo Molnar, Borislav Petkov, linux-kernel
The following commit has been merged into the sched/core branch of tip:
Commit-ID: f83eeb1a01689b2691f6f56629ac9f66de8d41c2
Gitweb: https://git.kernel.org/tip/f83eeb1a01689b2691f6f56629ac9f66de8d41c2
Author: Frederic Weisbecker <frederic@kernel.org>
AuthorDate: Thu, 03 Oct 2019 18:17:44 +02:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 09 Oct 2019 12:39:25 +02:00
sched/cputime: Rename vtime_account_system() to vtime_account_kernel()
vtime_account_system() decides if we need to account the time to the
system (__vtime_account_system()) or to the guest (vtime_account_guest()).
So this function is a misnomer as we are on a higher level than
"system". All we know when we call that function is that we are
accounting kernel cputime. Whether it belongs to guest or system time
is a lower level detail.
Rename this function to vtime_account_kernel(). This will clarify things
and avoid too many underscored vtime_account_system() versions.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Link: https://lkml.kernel.org/r/20191003161745.28464-2-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/ia64/kernel/time.c | 4 ++--
arch/powerpc/kernel/time.c | 6 +++---
arch/s390/kernel/vtime.c | 4 ++--
include/linux/context_tracking.h | 4 ++--
include/linux/vtime.h | 6 +++---
kernel/sched/cputime.c | 18 +++++++++---------
6 files changed, 21 insertions(+), 21 deletions(-)
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 1e95d32..91b4024 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -132,7 +132,7 @@ static __u64 vtime_delta(struct task_struct *tsk)
return delta_stime;
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
__u64 stime = vtime_delta(tsk);
@@ -146,7 +146,7 @@ void vtime_account_system(struct task_struct *tsk)
else
ti->stime += stime;
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6945223..84827da 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
return stime;
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
#endif
}
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
* recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c475ca4..8df10d3 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
__attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
/*
* Sorted add to a list. List is linear searched until first bigger
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d05609a..558a209 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -141,7 +141,7 @@ static inline void guest_enter_irqoff(void)
* to assume that it's the stime pending cputime
* to flush.
*/
- vtime_account_system(current);
+ vtime_account_kernel(current);
current->flags |= PF_VCPU;
rcu_virt_note_context_switch(smp_processor_id());
}
@@ -149,7 +149,7 @@ static inline void guest_enter_irqoff(void)
static inline void guest_exit_irqoff(void)
{
/* Flush the guest cputime we spent on the guest */
- vtime_account_system(current);
+ vtime_account_kernel(current);
current->flags &= ~PF_VCPU;
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index a26ed10..2fd247f 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -57,13 +57,13 @@ static inline void vtime_task_switch(struct task_struct *prev)
}
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
-extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_kernel(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline void vtime_task_switch(struct task_struct *prev) { }
-static inline void vtime_account_system(struct task_struct *tsk) { }
+static inline void vtime_account_kernel(struct task_struct *tsk) { }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -86,7 +86,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
static inline void vtime_account_irq_exit(struct task_struct *tsk)
{
/* On hard|softirq exit we always account to hard|softirq cputime */
- vtime_account_system(tsk);
+ vtime_account_kernel(tsk);
}
extern void vtime_flush(struct task_struct *tsk);
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 46ed4e1..b45932e 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -412,7 +412,7 @@ void vtime_common_task_switch(struct task_struct *prev)
if (is_idle_task(prev))
vtime_account_idle(prev);
else
- vtime_account_system(prev);
+ vtime_account_kernel(prev);
vtime_flush(prev);
arch_vtime_task_switch(prev);
@@ -425,7 +425,7 @@ void vtime_common_task_switch(struct task_struct *prev)
/*
* Archs that account the whole time spent in the idle task
* (outside irq) as idle time can rely on this and just implement
- * vtime_account_system() and vtime_account_idle(). Archs that
+ * vtime_account_kernel() and vtime_account_idle(). Archs that
* have other meaning of the idle time (s390 only includes the
* time spent by the CPU when it's in low power mode) must override
* vtime_account().
@@ -436,7 +436,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
if (!in_interrupt() && is_idle_task(tsk))
vtime_account_idle(tsk);
else
- vtime_account_system(tsk);
+ vtime_account_kernel(tsk);
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -711,8 +711,8 @@ static u64 get_vtime_delta(struct vtime *vtime)
return delta - other;
}
-static void __vtime_account_system(struct task_struct *tsk,
- struct vtime *vtime)
+static void vtime_account_system(struct task_struct *tsk,
+ struct vtime *vtime)
{
vtime->stime += get_vtime_delta(vtime);
if (vtime->stime >= TICK_NSEC) {
@@ -731,7 +731,7 @@ static void vtime_account_guest(struct task_struct *tsk,
}
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
struct vtime *vtime = &tsk->vtime;
@@ -743,7 +743,7 @@ void vtime_account_system(struct task_struct *tsk)
if (tsk->flags & PF_VCPU)
vtime_account_guest(tsk, vtime);
else
- __vtime_account_system(tsk, vtime);
+ vtime_account_system(tsk, vtime);
write_seqcount_end(&vtime->seqcount);
}
@@ -752,7 +752,7 @@ void vtime_user_enter(struct task_struct *tsk)
struct vtime *vtime = &tsk->vtime;
write_seqcount_begin(&vtime->seqcount);
- __vtime_account_system(tsk, vtime);
+ vtime_account_system(tsk, vtime);
vtime->state = VTIME_USER;
write_seqcount_end(&vtime->seqcount);
}
@@ -782,7 +782,7 @@ void vtime_guest_enter(struct task_struct *tsk)
* that can thus safely catch up with a tickless delta.
*/
write_seqcount_begin(&vtime->seqcount);
- __vtime_account_system(tsk, vtime);
+ vtime_account_system(tsk, vtime);
tsk->flags |= PF_VCPU;
write_seqcount_end(&vtime->seqcount);
}
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [tip: sched/core] sched/cputime: Spare a seqcount lock/unlock cycle on context switch
2019-10-03 16:17 ` [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
@ 2019-10-09 12:59 ` tip-bot2 for Frederic Weisbecker
0 siblings, 0 replies; 7+ messages in thread
From: tip-bot2 for Frederic Weisbecker @ 2019-10-09 12:59 UTC (permalink / raw)
To: linux-tip-commits
Cc: Frederic Weisbecker, Peter Zijlstra (Intel),
Linus Torvalds, Rik van Riel, Thomas Gleixner, Wanpeng Li,
Yauheni Kaliuta, Ingo Molnar, Borislav Petkov, linux-kernel
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 8d495477d62e4397207f22a432fcaa86d9f2bc2d
Gitweb: https://git.kernel.org/tip/8d495477d62e4397207f22a432fcaa86d9f2bc2d
Author: Frederic Weisbecker <frederic@kernel.org>
AuthorDate: Thu, 03 Oct 2019 18:17:45 +02:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 09 Oct 2019 12:39:26 +02:00
sched/cputime: Spare a seqcount lock/unlock cycle on context switch
On context switch we are locking the vtime seqcount of the scheduling-out
task twice:
* On vtime_task_switch_common(), when we flush the pending vtime through
vtime_account_system()
* On arch_vtime_task_switch() to reset the vtime state.
This is pointless as these actions can be performed without the need
to unlock/lock in the middle. The reason these steps are separated is to
consolidate a very small amount of common code between
CONFIG_VIRT_CPU_ACCOUNTING_GEN and CONFIG_VIRT_CPU_ACCOUNTING_NATIVE.
Performance in this fast path is definitely a priority over artificial
code factorization so split the task switch code between GEN and
NATIVE and mutualize the parts than can run under a single seqcount
locked block.
As a side effect, vtime_account_idle() becomes included in the seqcount
protection. This happens to be a welcome preparation in order to
properly support kcpustat under vtime in the future and fetch
CPUTIME_IDLE without race.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Link: https://lkml.kernel.org/r/20191003161745.28464-3-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
include/linux/vtime.h | 32 ++++++++++++++++----------------
kernel/sched/cputime.c | 30 +++++++++++++++++++-----------
2 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 2fd247f..d9160ab 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -14,8 +14,12 @@ struct task_struct;
* vtime_accounting_cpu_enabled() definitions/declarations
*/
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
+
static inline bool vtime_accounting_cpu_enabled(void) { return true; }
+extern void vtime_task_switch(struct task_struct *prev);
+
#elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
+
/*
* Checks if vtime is enabled on some CPU. Cputime readers want to be careful
* in that case and compute the tickless cputime.
@@ -36,33 +40,29 @@ static inline bool vtime_accounting_cpu_enabled(void)
return false;
}
+
+extern void vtime_task_switch_generic(struct task_struct *prev);
+
+static inline void vtime_task_switch(struct task_struct *prev)
+{
+ if (vtime_accounting_cpu_enabled())
+ vtime_task_switch_generic(prev);
+}
+
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
static inline bool vtime_accounting_cpu_enabled(void) { return false; }
-#endif
+static inline void vtime_task_switch(struct task_struct *prev) { }
+#endif
/*
* Common vtime APIs
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-
-#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
-extern void vtime_task_switch(struct task_struct *prev);
-#else
-extern void vtime_common_task_switch(struct task_struct *prev);
-static inline void vtime_task_switch(struct task_struct *prev)
-{
- if (vtime_accounting_cpu_enabled())
- vtime_common_task_switch(prev);
-}
-#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
-
extern void vtime_account_kernel(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
-
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-
-static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_kernel(struct task_struct *tsk) { }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b45932e..cef23c2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -405,9 +405,10 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
/*
* Use precise platform statistics if available:
*/
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
-void vtime_common_task_switch(struct task_struct *prev)
+void vtime_task_switch(struct task_struct *prev)
{
if (is_idle_task(prev))
vtime_account_idle(prev);
@@ -418,10 +419,7 @@ void vtime_common_task_switch(struct task_struct *prev)
arch_vtime_task_switch(prev);
}
# endif
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Archs that account the whole time spent in the idle task
* (outside irq) as idle time can rely on this and just implement
@@ -731,6 +729,16 @@ static void vtime_account_guest(struct task_struct *tsk,
}
}
+static void __vtime_account_kernel(struct task_struct *tsk,
+ struct vtime *vtime)
+{
+ /* We might have scheduled out from guest path */
+ if (tsk->flags & PF_VCPU)
+ vtime_account_guest(tsk, vtime);
+ else
+ vtime_account_system(tsk, vtime);
+}
+
void vtime_account_kernel(struct task_struct *tsk)
{
struct vtime *vtime = &tsk->vtime;
@@ -739,11 +747,7 @@ void vtime_account_kernel(struct task_struct *tsk)
return;
write_seqcount_begin(&vtime->seqcount);
- /* We might have scheduled out from guest path */
- if (tsk->flags & PF_VCPU)
- vtime_account_guest(tsk, vtime);
- else
- vtime_account_system(tsk, vtime);
+ __vtime_account_kernel(tsk, vtime);
write_seqcount_end(&vtime->seqcount);
}
@@ -804,11 +808,15 @@ void vtime_account_idle(struct task_struct *tsk)
account_idle_time(get_vtime_delta(&tsk->vtime));
}
-void arch_vtime_task_switch(struct task_struct *prev)
+void vtime_task_switch_generic(struct task_struct *prev)
{
struct vtime *vtime = &prev->vtime;
write_seqcount_begin(&vtime->seqcount);
+ if (is_idle_task(prev))
+ vtime_account_idle(prev);
+ else
+ __vtime_account_kernel(prev, vtime);
vtime->state = VTIME_INACTIVE;
write_seqcount_end(&vtime->seqcount);
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2019-10-09 12:59 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-03 16:17 [PATCH 0/2] vtime: Remove pair of seqcount on context switch Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
2019-10-09 12:59 ` [tip: sched/core] sched/cputime: " tip-bot2 for Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
2019-10-09 12:59 ` [tip: sched/core] sched/cputime: " tip-bot2 for Frederic Weisbecker
2019-10-07 16:20 ` [PATCH 0/2] vtime: Remove pair of seqcount " Ingo Molnar
2019-10-07 16:51 ` Frederic Weisbecker
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.