All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] vtime: Remove pair of seqcount on context switch
@ 2019-10-03 16:17 Frederic Weisbecker
  2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Frederic Weisbecker @ 2019-10-03 16:17 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: LKML, Frederic Weisbecker, Wanpeng Li, Thomas Gleixner,
	Yauheni Kaliuta, Rik van Riel

Extracted from a larger queue that fixes kcpustat on nohz_full, these
two patches have value on their own as they remove two write barriers
on nohz_full context switch.

Frederic Weisbecker (2):
  vtime: Rename vtime_account_system() to vtime_account_kernel()
  vtime: Spare a seqcount lock/unlock cycle on context switch

 arch/ia64/kernel/time.c          |  4 +--
 arch/powerpc/kernel/time.c       |  6 ++--
 arch/s390/kernel/vtime.c         |  4 +--
 include/linux/context_tracking.h |  4 +--
 include/linux/vtime.h            | 38 ++++++++++++------------
 kernel/sched/cputime.c           | 50 ++++++++++++++++++--------------
 6 files changed, 57 insertions(+), 49 deletions(-)

-- 
2.23.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel()
  2019-10-03 16:17 [PATCH 0/2] vtime: Remove pair of seqcount on context switch Frederic Weisbecker
@ 2019-10-03 16:17 ` Frederic Weisbecker
  2019-10-09 12:59   ` [tip: sched/core] sched/cputime: " tip-bot2 for Frederic Weisbecker
  2019-10-03 16:17 ` [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
  2019-10-07 16:20 ` [PATCH 0/2] vtime: Remove pair of seqcount " Ingo Molnar
  2 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2019-10-03 16:17 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: LKML, Frederic Weisbecker, Wanpeng Li, Thomas Gleixner,
	Yauheni Kaliuta, Rik van Riel

vtime_account_system() decides if we need to account the time to the
system (__vtime_account_system()) or to the guest (vtime_account_guest()).

So this function is a misnommer as we are on a higher level than
"system". All we know when we call that function is that we are
accounting kernel cputime. Whether it belongs to guest or system time
is a lower level detail.

Rename this function to vtime_account_kernel(). This will clarify things
and avoid too many underscored vtime_account_system() versions.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/kernel/time.c          |  4 ++--
 arch/powerpc/kernel/time.c       |  6 +++---
 arch/s390/kernel/vtime.c         |  4 ++--
 include/linux/context_tracking.h |  4 ++--
 include/linux/vtime.h            |  6 +++---
 kernel/sched/cputime.c           | 18 +++++++++---------
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 1e95d32c8877..91b4024c9351 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -132,7 +132,7 @@ static __u64 vtime_delta(struct task_struct *tsk)
 	return delta_stime;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 {
 	struct thread_info *ti = task_thread_info(tsk);
 	__u64 stime = vtime_delta(tsk);
@@ -146,7 +146,7 @@ void vtime_account_system(struct task_struct *tsk)
 	else
 		ti->stime += stime;
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..84827da01d45 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
 	return stime;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 {
 	unsigned long stime, stime_scaled, steal_time;
 	struct cpu_accounting_data *acct = get_accounting(tsk);
@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
 #endif
 	}
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
 /*
  * Account the whole cputime accumulated in the paca
  * Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
  * recently (i.e. since the last entry from usermode) so that
  * get_paca()->user_time_scaled is up to date.
  */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c475ca49cfc6..8df10d3c8f6c 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 __attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 /*
  * Sorted add to a list. List is linear searched until first bigger
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d05609ad329d..558a209c247d 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -141,7 +141,7 @@ static inline void guest_enter_irqoff(void)
 	 * to assume that it's the stime pending cputime
 	 * to flush.
 	 */
-	vtime_account_system(current);
+	vtime_account_kernel(current);
 	current->flags |= PF_VCPU;
 	rcu_virt_note_context_switch(smp_processor_id());
 }
@@ -149,7 +149,7 @@ static inline void guest_enter_irqoff(void)
 static inline void guest_exit_irqoff(void)
 {
 	/* Flush the guest cputime we spent on the guest */
-	vtime_account_system(current);
+	vtime_account_kernel(current);
 	current->flags &= ~PF_VCPU;
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index a26ed10a4eac..2fd247f90408 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -57,13 +57,13 @@ static inline void vtime_task_switch(struct task_struct *prev)
 }
 #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
 
-extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_kernel(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static inline void vtime_task_switch(struct task_struct *prev) { }
-static inline void vtime_account_system(struct task_struct *tsk) { }
+static inline void vtime_account_kernel(struct task_struct *tsk) { }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -86,7 +86,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
 static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
 	/* On hard|softirq exit we always account to hard|softirq cputime */
-	vtime_account_system(tsk);
+	vtime_account_kernel(tsk);
 }
 extern void vtime_flush(struct task_struct *tsk);
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 46ed4e1383e2..b45932e27857 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -412,7 +412,7 @@ void vtime_common_task_switch(struct task_struct *prev)
 	if (is_idle_task(prev))
 		vtime_account_idle(prev);
 	else
-		vtime_account_system(prev);
+		vtime_account_kernel(prev);
 
 	vtime_flush(prev);
 	arch_vtime_task_switch(prev);
@@ -425,7 +425,7 @@ void vtime_common_task_switch(struct task_struct *prev)
 /*
  * Archs that account the whole time spent in the idle task
  * (outside irq) as idle time can rely on this and just implement
- * vtime_account_system() and vtime_account_idle(). Archs that
+ * vtime_account_kernel() and vtime_account_idle(). Archs that
  * have other meaning of the idle time (s390 only includes the
  * time spent by the CPU when it's in low power mode) must override
  * vtime_account().
@@ -436,7 +436,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
 	if (!in_interrupt() && is_idle_task(tsk))
 		vtime_account_idle(tsk);
 	else
-		vtime_account_system(tsk);
+		vtime_account_kernel(tsk);
 }
 EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -711,8 +711,8 @@ static u64 get_vtime_delta(struct vtime *vtime)
 	return delta - other;
 }
 
-static void __vtime_account_system(struct task_struct *tsk,
-				   struct vtime *vtime)
+static void vtime_account_system(struct task_struct *tsk,
+				 struct vtime *vtime)
 {
 	vtime->stime += get_vtime_delta(vtime);
 	if (vtime->stime >= TICK_NSEC) {
@@ -731,7 +731,7 @@ static void vtime_account_guest(struct task_struct *tsk,
 	}
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 {
 	struct vtime *vtime = &tsk->vtime;
 
@@ -743,7 +743,7 @@ void vtime_account_system(struct task_struct *tsk)
 	if (tsk->flags & PF_VCPU)
 		vtime_account_guest(tsk, vtime);
 	else
-		__vtime_account_system(tsk, vtime);
+		vtime_account_system(tsk, vtime);
 	write_seqcount_end(&vtime->seqcount);
 }
 
@@ -752,7 +752,7 @@ void vtime_user_enter(struct task_struct *tsk)
 	struct vtime *vtime = &tsk->vtime;
 
 	write_seqcount_begin(&vtime->seqcount);
-	__vtime_account_system(tsk, vtime);
+	vtime_account_system(tsk, vtime);
 	vtime->state = VTIME_USER;
 	write_seqcount_end(&vtime->seqcount);
 }
@@ -782,7 +782,7 @@ void vtime_guest_enter(struct task_struct *tsk)
 	 * that can thus safely catch up with a tickless delta.
 	 */
 	write_seqcount_begin(&vtime->seqcount);
-	__vtime_account_system(tsk, vtime);
+	vtime_account_system(tsk, vtime);
 	tsk->flags |= PF_VCPU;
 	write_seqcount_end(&vtime->seqcount);
 }
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch
  2019-10-03 16:17 [PATCH 0/2] vtime: Remove pair of seqcount on context switch Frederic Weisbecker
  2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
@ 2019-10-03 16:17 ` Frederic Weisbecker
  2019-10-09 12:59   ` [tip: sched/core] sched/cputime: " tip-bot2 for Frederic Weisbecker
  2019-10-07 16:20 ` [PATCH 0/2] vtime: Remove pair of seqcount " Ingo Molnar
  2 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2019-10-03 16:17 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar
  Cc: LKML, Frederic Weisbecker, Wanpeng Li, Thomas Gleixner,
	Yauheni Kaliuta, Rik van Riel

On context switch we are locking the vtime seqcount of the scheduling-out
task twice:

 * On vtime_task_switch_common(), when we flush the pending vtime through
   vtime_account_system()

 * On arch_vtime_task_switch() to reset the vtime state.

This is pointless as these actions can be performed without the need
to unlock/lock in the middle. The reason these steps are separated is to
consolidate a very small amount of common code between
CONFIG_VIRT_CPU_ACCOUNTING_GEN and CONFIG_VIRT_CPU_ACCOUNTING_NATIVE.

Performance in this fast path is definetly a priority over artificial
code factorization so split the task switch code between GEN and
NATIVE and mutualize the parts than can run under a single seqcount
locked block.

As a side effect, vtime_account_idle() becomes included in the seqcount
protection. This happens to be a welcome preparation in order to
properly support kcpustat under vtime in the future and fetch
CPUTIME_IDLE without race.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
 include/linux/vtime.h  | 32 ++++++++++++++++----------------
 kernel/sched/cputime.c | 34 +++++++++++++++++++++-------------
 2 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 2fd247f90408..d9160ab3667a 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -14,8 +14,12 @@ struct task_struct;
  * vtime_accounting_cpu_enabled() definitions/declarations
  */
 #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
+
 static inline bool vtime_accounting_cpu_enabled(void) { return true; }
+extern void vtime_task_switch(struct task_struct *prev);
+
 #elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
+
 /*
  * Checks if vtime is enabled on some CPU. Cputime readers want to be careful
  * in that case and compute the tickless cputime.
@@ -36,33 +40,29 @@ static inline bool vtime_accounting_cpu_enabled(void)
 
 	return false;
 }
+
+extern void vtime_task_switch_generic(struct task_struct *prev);
+
+static inline void vtime_task_switch(struct task_struct *prev)
+{
+	if (vtime_accounting_cpu_enabled())
+		vtime_task_switch_generic(prev);
+}
+
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
 static inline bool vtime_accounting_cpu_enabled(void) { return false; }
-#endif
+static inline void vtime_task_switch(struct task_struct *prev) { }
 
+#endif
 
 /*
  * Common vtime APIs
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-
-#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
-extern void vtime_task_switch(struct task_struct *prev);
-#else
-extern void vtime_common_task_switch(struct task_struct *prev);
-static inline void vtime_task_switch(struct task_struct *prev)
-{
-	if (vtime_accounting_cpu_enabled())
-		vtime_common_task_switch(prev);
-}
-#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
-
 extern void vtime_account_kernel(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
-
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-
-static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_kernel(struct task_struct *tsk) { }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b45932e27857..cef23c211f41 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -405,9 +405,10 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
 /*
  * Use precise platform statistics if available:
  */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
 # ifndef __ARCH_HAS_VTIME_TASK_SWITCH
-void vtime_common_task_switch(struct task_struct *prev)
+void vtime_task_switch(struct task_struct *prev)
 {
 	if (is_idle_task(prev))
 		vtime_account_idle(prev);
@@ -418,10 +419,7 @@ void vtime_common_task_switch(struct task_struct *prev)
 	arch_vtime_task_switch(prev);
 }
 # endif
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Archs that account the whole time spent in the idle task
  * (outside irq) as idle time can rely on this and just implement
@@ -731,19 +729,25 @@ static void vtime_account_guest(struct task_struct *tsk,
 	}
 }
 
-void vtime_account_kernel(struct task_struct *tsk)
+static void __vtime_account_kernel(struct task_struct *tsk,
+				   struct vtime *vtime)
 {
-	struct vtime *vtime = &tsk->vtime;
-
-	if (!vtime_delta(vtime))
-		return;
-
-	write_seqcount_begin(&vtime->seqcount);
 	/* We might have scheduled out from guest path */
 	if (tsk->flags & PF_VCPU)
 		vtime_account_guest(tsk, vtime);
 	else
 		vtime_account_system(tsk, vtime);
+}
+
+void vtime_account_kernel(struct task_struct *tsk)
+{
+	struct vtime *vtime = &tsk->vtime;
+
+	if (!vtime_delta(vtime))
+		return;
+
+	write_seqcount_begin(&vtime->seqcount);
+	__vtime_account_kernel(tsk, vtime);
 	write_seqcount_end(&vtime->seqcount);
 }
 
@@ -804,11 +808,15 @@ void vtime_account_idle(struct task_struct *tsk)
 	account_idle_time(get_vtime_delta(&tsk->vtime));
 }
 
-void arch_vtime_task_switch(struct task_struct *prev)
+void vtime_task_switch_generic(struct task_struct *prev)
 {
 	struct vtime *vtime = &prev->vtime;
 
 	write_seqcount_begin(&vtime->seqcount);
+	if (is_idle_task(prev))
+		vtime_account_idle(prev);
+	else
+		__vtime_account_kernel(prev, vtime);
 	vtime->state = VTIME_INACTIVE;
 	write_seqcount_end(&vtime->seqcount);
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/2] vtime: Remove pair of seqcount on context switch
  2019-10-03 16:17 [PATCH 0/2] vtime: Remove pair of seqcount on context switch Frederic Weisbecker
  2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
  2019-10-03 16:17 ` [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
@ 2019-10-07 16:20 ` Ingo Molnar
  2019-10-07 16:51   ` Frederic Weisbecker
  2 siblings, 1 reply; 7+ messages in thread
From: Ingo Molnar @ 2019-10-07 16:20 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Peter Zijlstra, LKML, Wanpeng Li, Thomas Gleixner,
	Yauheni Kaliuta, Rik van Riel


* Frederic Weisbecker <frederic@kernel.org> wrote:

> Extracted from a larger queue that fixes kcpustat on nohz_full, these
> two patches have value on their own as they remove two write barriers
> on nohz_full context switch.
> 
> Frederic Weisbecker (2):
>   vtime: Rename vtime_account_system() to vtime_account_kernel()
>   vtime: Spare a seqcount lock/unlock cycle on context switch
> 
>  arch/ia64/kernel/time.c          |  4 +--
>  arch/powerpc/kernel/time.c       |  6 ++--
>  arch/s390/kernel/vtime.c         |  4 +--
>  include/linux/context_tracking.h |  4 +--
>  include/linux/vtime.h            | 38 ++++++++++++------------
>  kernel/sched/cputime.c           | 50 ++++++++++++++++++--------------
>  6 files changed, 57 insertions(+), 49 deletions(-)

Which tree is this against? Doesn't apply cleanly to v5.4-rc2 nor v5.3. 
Does it have any prereqs perhaps?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 0/2] vtime: Remove pair of seqcount on context switch
  2019-10-07 16:20 ` [PATCH 0/2] vtime: Remove pair of seqcount " Ingo Molnar
@ 2019-10-07 16:51   ` Frederic Weisbecker
  0 siblings, 0 replies; 7+ messages in thread
From: Frederic Weisbecker @ 2019-10-07 16:51 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, LKML, Wanpeng Li, Thomas Gleixner,
	Yauheni Kaliuta, Rik van Riel

On Mon, Oct 07, 2019 at 06:20:31PM +0200, Ingo Molnar wrote:
> 
> * Frederic Weisbecker <frederic@kernel.org> wrote:
> 
> > Extracted from a larger queue that fixes kcpustat on nohz_full, these
> > two patches have value on their own as they remove two write barriers
> > on nohz_full context switch.
> > 
> > Frederic Weisbecker (2):
> >   vtime: Rename vtime_account_system() to vtime_account_kernel()
> >   vtime: Spare a seqcount lock/unlock cycle on context switch
> > 
> >  arch/ia64/kernel/time.c          |  4 +--
> >  arch/powerpc/kernel/time.c       |  6 ++--
> >  arch/s390/kernel/vtime.c         |  4 +--
> >  include/linux/context_tracking.h |  4 +--
> >  include/linux/vtime.h            | 38 ++++++++++++------------
> >  kernel/sched/cputime.c           | 50 ++++++++++++++++++--------------
> >  6 files changed, 57 insertions(+), 49 deletions(-)
> 
> Which tree is this against? Doesn't apply cleanly to v5.4-rc2 nor v5.3. 
> Does it have any prereqs perhaps?

Indeed, you need to apply this fix first: https://lore.kernel.org/lkml/20190925214242.21873-1-frederic@kernel.org/

"[PATCH] sched/vtime: Fix guest/system mis-accounting on task switch"

Thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [tip: sched/core] sched/cputime: Rename vtime_account_system() to vtime_account_kernel()
  2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
@ 2019-10-09 12:59   ` tip-bot2 for Frederic Weisbecker
  0 siblings, 0 replies; 7+ messages in thread
From: tip-bot2 for Frederic Weisbecker @ 2019-10-09 12:59 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Frederic Weisbecker, Peter Zijlstra (Intel),
	Linus Torvalds, Rik van Riel, Thomas Gleixner, Wanpeng Li,
	Yauheni Kaliuta, Ingo Molnar, Borislav Petkov, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     f83eeb1a01689b2691f6f56629ac9f66de8d41c2
Gitweb:        https://git.kernel.org/tip/f83eeb1a01689b2691f6f56629ac9f66de8d41c2
Author:        Frederic Weisbecker <frederic@kernel.org>
AuthorDate:    Thu, 03 Oct 2019 18:17:44 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 09 Oct 2019 12:39:25 +02:00

sched/cputime: Rename vtime_account_system() to vtime_account_kernel()

vtime_account_system() decides if we need to account the time to the
system (__vtime_account_system()) or to the guest (vtime_account_guest()).

So this function is a misnomer as we are on a higher level than
"system". All we know when we call that function is that we are
accounting kernel cputime. Whether it belongs to guest or system time
is a lower level detail.

Rename this function to vtime_account_kernel(). This will clarify things
and avoid too many underscored vtime_account_system() versions.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Link: https://lkml.kernel.org/r/20191003161745.28464-2-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/kernel/time.c          |  4 ++--
 arch/powerpc/kernel/time.c       |  6 +++---
 arch/s390/kernel/vtime.c         |  4 ++--
 include/linux/context_tracking.h |  4 ++--
 include/linux/vtime.h            |  6 +++---
 kernel/sched/cputime.c           | 18 +++++++++---------
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 1e95d32..91b4024 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -132,7 +132,7 @@ static __u64 vtime_delta(struct task_struct *tsk)
 	return delta_stime;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 {
 	struct thread_info *ti = task_thread_info(tsk);
 	__u64 stime = vtime_delta(tsk);
@@ -146,7 +146,7 @@ void vtime_account_system(struct task_struct *tsk)
 	else
 		ti->stime += stime;
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 6945223..84827da 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
 	return stime;
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 {
 	unsigned long stime, stime_scaled, steal_time;
 	struct cpu_accounting_data *acct = get_accounting(tsk);
@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
 #endif
 	}
 }
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 void vtime_account_idle(struct task_struct *tsk)
 {
@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
 /*
  * Account the whole cputime accumulated in the paca
  * Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
  * recently (i.e. since the last entry from usermode) so that
  * get_paca()->user_time_scaled is up to date.
  */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c475ca4..8df10d3 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 __attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 /*
  * Sorted add to a list. List is linear searched until first bigger
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d05609a..558a209 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -141,7 +141,7 @@ static inline void guest_enter_irqoff(void)
 	 * to assume that it's the stime pending cputime
 	 * to flush.
 	 */
-	vtime_account_system(current);
+	vtime_account_kernel(current);
 	current->flags |= PF_VCPU;
 	rcu_virt_note_context_switch(smp_processor_id());
 }
@@ -149,7 +149,7 @@ static inline void guest_enter_irqoff(void)
 static inline void guest_exit_irqoff(void)
 {
 	/* Flush the guest cputime we spent on the guest */
-	vtime_account_system(current);
+	vtime_account_kernel(current);
 	current->flags &= ~PF_VCPU;
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index a26ed10..2fd247f 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -57,13 +57,13 @@ static inline void vtime_task_switch(struct task_struct *prev)
 }
 #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
 
-extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_kernel(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
 
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 static inline void vtime_task_switch(struct task_struct *prev) { }
-static inline void vtime_account_system(struct task_struct *tsk) { }
+static inline void vtime_account_kernel(struct task_struct *tsk) { }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -86,7 +86,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
 static inline void vtime_account_irq_exit(struct task_struct *tsk)
 {
 	/* On hard|softirq exit we always account to hard|softirq cputime */
-	vtime_account_system(tsk);
+	vtime_account_kernel(tsk);
 }
 extern void vtime_flush(struct task_struct *tsk);
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 46ed4e1..b45932e 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -412,7 +412,7 @@ void vtime_common_task_switch(struct task_struct *prev)
 	if (is_idle_task(prev))
 		vtime_account_idle(prev);
 	else
-		vtime_account_system(prev);
+		vtime_account_kernel(prev);
 
 	vtime_flush(prev);
 	arch_vtime_task_switch(prev);
@@ -425,7 +425,7 @@ void vtime_common_task_switch(struct task_struct *prev)
 /*
  * Archs that account the whole time spent in the idle task
  * (outside irq) as idle time can rely on this and just implement
- * vtime_account_system() and vtime_account_idle(). Archs that
+ * vtime_account_kernel() and vtime_account_idle(). Archs that
  * have other meaning of the idle time (s390 only includes the
  * time spent by the CPU when it's in low power mode) must override
  * vtime_account().
@@ -436,7 +436,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
 	if (!in_interrupt() && is_idle_task(tsk))
 		vtime_account_idle(tsk);
 	else
-		vtime_account_system(tsk);
+		vtime_account_kernel(tsk);
 }
 EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 #endif /* __ARCH_HAS_VTIME_ACCOUNT */
@@ -711,8 +711,8 @@ static u64 get_vtime_delta(struct vtime *vtime)
 	return delta - other;
 }
 
-static void __vtime_account_system(struct task_struct *tsk,
-				   struct vtime *vtime)
+static void vtime_account_system(struct task_struct *tsk,
+				 struct vtime *vtime)
 {
 	vtime->stime += get_vtime_delta(vtime);
 	if (vtime->stime >= TICK_NSEC) {
@@ -731,7 +731,7 @@ static void vtime_account_guest(struct task_struct *tsk,
 	}
 }
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 {
 	struct vtime *vtime = &tsk->vtime;
 
@@ -743,7 +743,7 @@ void vtime_account_system(struct task_struct *tsk)
 	if (tsk->flags & PF_VCPU)
 		vtime_account_guest(tsk, vtime);
 	else
-		__vtime_account_system(tsk, vtime);
+		vtime_account_system(tsk, vtime);
 	write_seqcount_end(&vtime->seqcount);
 }
 
@@ -752,7 +752,7 @@ void vtime_user_enter(struct task_struct *tsk)
 	struct vtime *vtime = &tsk->vtime;
 
 	write_seqcount_begin(&vtime->seqcount);
-	__vtime_account_system(tsk, vtime);
+	vtime_account_system(tsk, vtime);
 	vtime->state = VTIME_USER;
 	write_seqcount_end(&vtime->seqcount);
 }
@@ -782,7 +782,7 @@ void vtime_guest_enter(struct task_struct *tsk)
 	 * that can thus safely catch up with a tickless delta.
 	 */
 	write_seqcount_begin(&vtime->seqcount);
-	__vtime_account_system(tsk, vtime);
+	vtime_account_system(tsk, vtime);
 	tsk->flags |= PF_VCPU;
 	write_seqcount_end(&vtime->seqcount);
 }

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [tip: sched/core] sched/cputime: Spare a seqcount lock/unlock cycle on context switch
  2019-10-03 16:17 ` [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
@ 2019-10-09 12:59   ` tip-bot2 for Frederic Weisbecker
  0 siblings, 0 replies; 7+ messages in thread
From: tip-bot2 for Frederic Weisbecker @ 2019-10-09 12:59 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: Frederic Weisbecker, Peter Zijlstra (Intel),
	Linus Torvalds, Rik van Riel, Thomas Gleixner, Wanpeng Li,
	Yauheni Kaliuta, Ingo Molnar, Borislav Petkov, linux-kernel

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     8d495477d62e4397207f22a432fcaa86d9f2bc2d
Gitweb:        https://git.kernel.org/tip/8d495477d62e4397207f22a432fcaa86d9f2bc2d
Author:        Frederic Weisbecker <frederic@kernel.org>
AuthorDate:    Thu, 03 Oct 2019 18:17:45 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Wed, 09 Oct 2019 12:39:26 +02:00

sched/cputime: Spare a seqcount lock/unlock cycle on context switch

On context switch we are locking the vtime seqcount of the scheduling-out
task twice:

 * On vtime_task_switch_common(), when we flush the pending vtime through
   vtime_account_system()

 * On arch_vtime_task_switch() to reset the vtime state.

This is pointless as these actions can be performed without the need
to unlock/lock in the middle. The reason these steps are separated is to
consolidate a very small amount of common code between
CONFIG_VIRT_CPU_ACCOUNTING_GEN and CONFIG_VIRT_CPU_ACCOUNTING_NATIVE.

Performance in this fast path is definitely a priority over artificial
code factorization so split the task switch code between GEN and
NATIVE and mutualize the parts than can run under a single seqcount
locked block.

As a side effect, vtime_account_idle() becomes included in the seqcount
protection. This happens to be a welcome preparation in order to
properly support kcpustat under vtime in the future and fetch
CPUTIME_IDLE without race.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Link: https://lkml.kernel.org/r/20191003161745.28464-3-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/vtime.h  | 32 ++++++++++++++++----------------
 kernel/sched/cputime.c | 30 +++++++++++++++++++-----------
 2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 2fd247f..d9160ab 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -14,8 +14,12 @@ struct task_struct;
  * vtime_accounting_cpu_enabled() definitions/declarations
  */
 #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE)
+
 static inline bool vtime_accounting_cpu_enabled(void) { return true; }
+extern void vtime_task_switch(struct task_struct *prev);
+
 #elif defined(CONFIG_VIRT_CPU_ACCOUNTING_GEN)
+
 /*
  * Checks if vtime is enabled on some CPU. Cputime readers want to be careful
  * in that case and compute the tickless cputime.
@@ -36,33 +40,29 @@ static inline bool vtime_accounting_cpu_enabled(void)
 
 	return false;
 }
+
+extern void vtime_task_switch_generic(struct task_struct *prev);
+
+static inline void vtime_task_switch(struct task_struct *prev)
+{
+	if (vtime_accounting_cpu_enabled())
+		vtime_task_switch_generic(prev);
+}
+
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
+
 static inline bool vtime_accounting_cpu_enabled(void) { return false; }
-#endif
+static inline void vtime_task_switch(struct task_struct *prev) { }
 
+#endif
 
 /*
  * Common vtime APIs
  */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
-
-#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
-extern void vtime_task_switch(struct task_struct *prev);
-#else
-extern void vtime_common_task_switch(struct task_struct *prev);
-static inline void vtime_task_switch(struct task_struct *prev)
-{
-	if (vtime_accounting_cpu_enabled())
-		vtime_common_task_switch(prev);
-}
-#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
-
 extern void vtime_account_kernel(struct task_struct *tsk);
 extern void vtime_account_idle(struct task_struct *tsk);
-
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING */
-
-static inline void vtime_task_switch(struct task_struct *prev) { }
 static inline void vtime_account_kernel(struct task_struct *tsk) { }
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
 
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b45932e..cef23c2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -405,9 +405,10 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
 /*
  * Use precise platform statistics if available:
  */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
 # ifndef __ARCH_HAS_VTIME_TASK_SWITCH
-void vtime_common_task_switch(struct task_struct *prev)
+void vtime_task_switch(struct task_struct *prev)
 {
 	if (is_idle_task(prev))
 		vtime_account_idle(prev);
@@ -418,10 +419,7 @@ void vtime_common_task_switch(struct task_struct *prev)
 	arch_vtime_task_switch(prev);
 }
 # endif
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
-
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 /*
  * Archs that account the whole time spent in the idle task
  * (outside irq) as idle time can rely on this and just implement
@@ -731,6 +729,16 @@ static void vtime_account_guest(struct task_struct *tsk,
 	}
 }
 
+static void __vtime_account_kernel(struct task_struct *tsk,
+				   struct vtime *vtime)
+{
+	/* We might have scheduled out from guest path */
+	if (tsk->flags & PF_VCPU)
+		vtime_account_guest(tsk, vtime);
+	else
+		vtime_account_system(tsk, vtime);
+}
+
 void vtime_account_kernel(struct task_struct *tsk)
 {
 	struct vtime *vtime = &tsk->vtime;
@@ -739,11 +747,7 @@ void vtime_account_kernel(struct task_struct *tsk)
 		return;
 
 	write_seqcount_begin(&vtime->seqcount);
-	/* We might have scheduled out from guest path */
-	if (tsk->flags & PF_VCPU)
-		vtime_account_guest(tsk, vtime);
-	else
-		vtime_account_system(tsk, vtime);
+	__vtime_account_kernel(tsk, vtime);
 	write_seqcount_end(&vtime->seqcount);
 }
 
@@ -804,11 +808,15 @@ void vtime_account_idle(struct task_struct *tsk)
 	account_idle_time(get_vtime_delta(&tsk->vtime));
 }
 
-void arch_vtime_task_switch(struct task_struct *prev)
+void vtime_task_switch_generic(struct task_struct *prev)
 {
 	struct vtime *vtime = &prev->vtime;
 
 	write_seqcount_begin(&vtime->seqcount);
+	if (is_idle_task(prev))
+		vtime_account_idle(prev);
+	else
+		__vtime_account_kernel(prev, vtime);
 	vtime->state = VTIME_INACTIVE;
 	write_seqcount_end(&vtime->seqcount);
 

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-10-09 12:59 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-03 16:17 [PATCH 0/2] vtime: Remove pair of seqcount on context switch Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 1/2] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
2019-10-09 12:59   ` [tip: sched/core] sched/cputime: " tip-bot2 for Frederic Weisbecker
2019-10-03 16:17 ` [PATCH 2/2] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
2019-10-09 12:59   ` [tip: sched/core] sched/cputime: " tip-bot2 for Frederic Weisbecker
2019-10-07 16:20 ` [PATCH 0/2] vtime: Remove pair of seqcount " Ingo Molnar
2019-10-07 16:51   ` Frederic Weisbecker

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.