linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Frederic Weisbecker <frederic@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Wanpeng Li <wanpengli@tencent.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Yauheni Kaliuta <yauheni.kaliuta@redhat.com>,
	Ingo Molnar <mingo@kernel.org>, Rik van Riel <riel@redhat.com>
Subject: [PATCH 08/25] vtime: Exit vtime before exit_notify()
Date: Wed, 14 Nov 2018 03:45:52 +0100	[thread overview]
Message-ID: <1542163569-20047-9-git-send-email-frederic@kernel.org> (raw)
In-Reply-To: <1542163569-20047-1-git-send-email-frederic@kernel.org>

In order to correctly implement kcpustat under nohz_full, we need to
track the task running on a given CPU and read its vtime state safely,
reliably and locklessly.

This leaves us with tracking and fetching that task under RCU. This will
be done in a further patch. Until then we need to prepare vtime for
handling that properly and close the accounting before we meet the earliest
opportunity for the RCU delayed put_task_struct() to be queued. That
point happens to be in exit_notify() in case of auto-reaping.

Therefore we need to finish the accounting right before exit_notify().
After that we shouldn't track the exiting task any further.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h  |  2 ++
 include/linux/vtime.h  |  2 ++
 kernel/exit.c          |  1 +
 kernel/sched/cputime.c | 56 ++++++++++++++++++++++++++++++++++++++++++--------
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d458d65..27e0544 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -265,6 +265,8 @@ struct task_cputime {
 enum vtime_state {
 	/* Task is sleeping or running in a CPU with VTIME inactive: */
 	VTIME_INACTIVE = 0,
+	/* Task has passed exit_notify() */
+	VTIME_DEAD,
 	/* Task is idle */
 	VTIME_IDLE,
 	/* Task runs in kernelspace in a CPU with VTIME active: */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index d9160ab..8350a0b 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -73,12 +73,14 @@ extern void vtime_user_exit(struct task_struct *tsk);
 extern void vtime_guest_enter(struct task_struct *tsk);
 extern void vtime_guest_exit(struct task_struct *tsk);
 extern void vtime_init_idle(struct task_struct *tsk, int cpu);
+extern void vtime_exit_task(struct task_struct *tsk);
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN  */
 static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
 static inline void vtime_guest_enter(struct task_struct *tsk) { }
 static inline void vtime_guest_exit(struct task_struct *tsk) { }
 static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
+static inline void vtime_exit_task(struct task_struct *tsk) { }
 #endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/kernel/exit.c b/kernel/exit.c
index 0e21e6d..cae3fe9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -883,6 +883,7 @@ void __noreturn do_exit(long code)
 	 */
 	flush_ptrace_hw_breakpoint(tsk);
 
+	vtime_exit_task(tsk);
 	exit_tasks_rcu_start();
 	exit_notify(tsk, group_dead);
 	proc_exit_connector(tsk);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index f64afd7..a0c3a82 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -702,7 +702,7 @@ static u64 get_vtime_delta(struct vtime *vtime)
 	 * errors from causing elapsed vtime to go negative.
 	 */
 	other = account_other_time(delta);
-	WARN_ON_ONCE(vtime->state == VTIME_INACTIVE);
+	WARN_ON_ONCE(vtime->state < VTIME_IDLE);
 	vtime->starttime += delta;
 
 	return delta - other;
@@ -813,17 +813,31 @@ void vtime_task_switch_generic(struct task_struct *prev)
 {
 	struct vtime *vtime = &prev->vtime;
 
-	write_seqcount_begin(&vtime->seqcount);
-	if (vtime->state == VTIME_IDLE)
-		vtime_account_idle(prev);
-	else
-		__vtime_account_kernel(prev, vtime);
-	vtime->state = VTIME_INACTIVE;
-	vtime->cpu = -1;
-	write_seqcount_end(&vtime->seqcount);
+	/*
+	 * Flush the prev task vtime, unless it has passed
+	 * vtime_exit_task(), in which case there is nothing
+	 * left to account.
+	 */
+	if (vtime->state != VTIME_DEAD) {
+		write_seqcount_begin(&vtime->seqcount);
+		if (vtime->state == VTIME_IDLE)
+			vtime_account_idle(prev);
+		else
+			__vtime_account_kernel(prev, vtime);
+		vtime->state = VTIME_INACTIVE;
+		vtime->cpu = -1;
+		write_seqcount_end(&vtime->seqcount);
+	}
 
 	vtime = &current->vtime;
 
+	/*
+	 * Ignore the next task if it has been preempted after
+	 * vtime_exit_task().
+	 */
+	if (vtime->state == VTIME_DEAD)
+		return;
+
 	write_seqcount_begin(&vtime->seqcount);
 	if (is_idle_task(current))
 		vtime->state = VTIME_IDLE;
@@ -850,6 +864,30 @@ void vtime_init_idle(struct task_struct *t, int cpu)
 	local_irq_restore(flags);
 }
 
+/*
+ * This is the final settlement point after which we don't account
+ * anymore vtime for this task.
+ */
+void vtime_exit_task(struct task_struct *t)
+{
+	struct vtime *vtime = &t->vtime;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	write_seqcount_begin(&vtime->seqcount);
+	/*
+	 * A task that has never run on a nohz_full CPU hasn't
+	 * been tracked by vtime. Thus it's in VTIME_INACTIVE
+	 * state. Nothing to account for it.
+	 */
+	if (vtime->state != VTIME_INACTIVE)
+		vtime_account_system(t, vtime);
+	vtime->state = VTIME_DEAD;
+	vtime->cpu = -1;
+	write_seqcount_end(&vtime->seqcount);
+	local_irq_restore(flags);
+}
+
 u64 task_gtime(struct task_struct *t)
 {
 	struct vtime *vtime = &t->vtime;
-- 
2.7.4


  parent reply	other threads:[~2018-11-14  2:46 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-14  2:45 [PATCH 00/25] sched/nohz: Make kcpustat vtime aware (Fix kcpustat on nohz_full) Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 01/25] sched/vtime: Fix guest/system mis-accounting on task switch Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 02/25] sched/vtime: Protect idle accounting under vtime seqcount Frederic Weisbecker
2018-11-20 13:19   ` Peter Zijlstra
2018-11-14  2:45 ` [PATCH 03/25] vtime: Rename vtime_account_system() to vtime_account_kernel() Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 04/25] vtime: Spare a seqcount lock/unlock cycle on context switch Frederic Weisbecker
2018-11-20 13:25   ` Peter Zijlstra
2019-09-25 14:42     ` Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 05/25] sched/vtime: Record CPU under seqcount for kcpustat needs Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 06/25] sched/cputime: Add vtime idle task state Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 07/25] sched/cputime: Add vtime guest " Frederic Weisbecker
2018-11-14  2:45 ` Frederic Weisbecker [this message]
2018-11-20 13:54   ` [PATCH 08/25] vtime: Exit vtime before exit_notify() Peter Zijlstra
2018-11-14  2:45 ` [PATCH 09/25] kcpustat: Track running task following vtime sequences Frederic Weisbecker
2018-11-20 13:58   ` Peter Zijlstra
2018-11-14  2:45 ` [PATCH 10/25] context_tracking: Remove context_tracking_active() Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 11/25] context_tracking: s/context_tracking_is_enabled/context_tracking_enabled() Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 12/25] context_tracking: Rename context_tracking_is_cpu_enabled() to context_tracking_enabled_this_cpu() Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 13/25] context_tracking: Introduce context_tracking_enabled_cpu() Frederic Weisbecker
2018-11-20 14:02   ` Peter Zijlstra
2018-11-14  2:45 ` [PATCH 14/25] sched/vtime: Rename vtime_accounting_cpu_enabled() to vtime_accounting_enabled_this_cpu() Frederic Weisbecker
2018-11-14  2:45 ` [PATCH 15/25] sched/vtime: Introduce vtime_accounting_enabled_cpu() Frederic Weisbecker
2018-11-20 14:04   ` Peter Zijlstra
2018-11-14  2:46 ` [PATCH 16/25] sched/cputime: Allow to pass cputime index on user/guest accounting Frederic Weisbecker
2018-11-14  2:46 ` [PATCH 17/25] sched/cputime: Standardize the kcpustat index based accounting functions Frederic Weisbecker
2018-11-14  2:46 ` [PATCH 18/25] vtime: Track nice-ness on top of context switch Frederic Weisbecker
2018-11-20 14:09   ` Peter Zijlstra
2018-11-14  2:46 ` [PATCH 19/25] sched/vite: Handle nice updates under vtime Frederic Weisbecker
2018-11-20 14:17   ` Peter Zijlstra
2018-11-26 15:53     ` Frederic Weisbecker
2018-11-26 16:11       ` Peter Zijlstra
2018-11-26 18:41         ` Frederic Weisbecker
2018-11-14  2:46 ` [PATCH 20/25] sched/kcpustat: Introduce vtime-aware kcpustat accessor Frederic Weisbecker
2018-11-20 14:23   ` Peter Zijlstra
2018-11-20 22:40     ` Frederic Weisbecker
2018-11-21  8:18       ` Peter Zijlstra
2018-11-21  8:35         ` Peter Zijlstra
2018-11-21 16:33         ` Frederic Weisbecker
2018-11-14  2:46 ` [PATCH 21/25] procfs: Use vtime aware " Frederic Weisbecker
2018-11-20 14:24   ` Peter Zijlstra
2018-11-20 22:31     ` Frederic Weisbecker
2018-11-14  2:46 ` [PATCH 22/25] cpufreq: " Frederic Weisbecker
2018-11-14  2:46 ` [PATCH 23/25] leds: Use vtime aware kcpustat accessors Frederic Weisbecker
2018-11-14  2:46 ` [PATCH 24/25] rackmeter: " Frederic Weisbecker
2018-11-14  2:46 ` [PATCH 25/25] sched/vtime: Clarify vtime_task_switch() argument layout Frederic Weisbecker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1542163569-20047-9-git-send-email-frederic@kernel.org \
    --to=frederic@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=wanpengli@tencent.com \
    --cc=yauheni.kaliuta@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).