All of lore.kernel.org
 help / color / mirror / Atom feed
From: Frederic Weisbecker <frederic@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>, Ingo Molnar <mingo@kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Jacek Anaszewski <jacek.anaszewski@gmail.com>,
	Wanpeng Li <wanpengli@tencent.com>, Pavel Machek <pavel@ucw.cz>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Yauheni Kaliuta <yauheni.kaliuta@redhat.com>,
	"Rafael J . Wysocki" <rjw@rjwysocki.net>,
	Viresh Kumar <viresh.kumar@linaro.org>,
	Rik van Riel <riel@surriel.com>
Subject: [PATCH 5/9] sched/vtime: Bring all-in-one kcpustat accessor for vtime fields
Date: Wed,  6 Nov 2019 04:08:03 +0100	[thread overview]
Message-ID: <20191106030807.31091-6-frederic@kernel.org> (raw)
In-Reply-To: <20191106030807.31091-1-frederic@kernel.org>

Many callsites want to fetch the values of system, user, user_nice, guest
or guest_nice kcpustat fields altogether or at least a pair of these.

In that case calling kcpustat_field() for each requested field brings
unecessary overhead when we could fetch all of them in a row.

So provide kcpustat_cputime() that fetches all vtime sensitive fields
under the same RCU and seqcount block.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h |  23 ++++++
 kernel/sched/cputime.c      | 138 ++++++++++++++++++++++++++++++------
 2 files changed, 141 insertions(+), 20 deletions(-)

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 1b9b97f6946e..c76daad2d8e2 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,15 +78,38 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 	return kstat_cpu(cpu).irqs_sum;
 }
 
+
+static inline void kcpustat_cputime_raw(u64 *cpustat, u64 *user, u64 *nice,
+					u64 *system, u64 *guest, u64 *guest_nice)
+{
+	*user = cpustat[CPUTIME_USER];
+	*nice = cpustat[CPUTIME_NICE];
+	*system = cpustat[CPUTIME_SYSTEM];
+	*guest = cpustat[CPUTIME_GUEST];
+	*guest_nice = cpustat[CPUTIME_GUEST_NICE];
+}
+
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 			  enum cpu_usage_stat usage, int cpu);
+extern void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+			     u64 *user, u64 *nice, u64 *system,
+			     u64 *guest, u64 *guest_nice);
 #else
 static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 				 enum cpu_usage_stat usage, int cpu)
 {
 	return kcpustat->cpustat[usage];
 }
+
+static inline void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+				    u64 *user, u64 *nice, u64 *system,
+				    u64 *guest, u64 *guest_nice)
+{
+	kcpustat_cputime_raw(kcpustat->cpustat, user, nice,
+			     system, guest, guest_nice);
+}
+
 #endif
 
 extern void account_user_time(struct task_struct *, u64);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index bf4b61f71194..0006dfccbeb7 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -1042,6 +1042,30 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 	} while (read_seqcount_retry(&vtime->seqcount, seq));
 }
 
+static int vtime_state_check(struct vtime *vtime, int cpu)
+{
+	/*
+	 * We raced against context switch, fetch the
+	 * kcpustat task again.
+	 */
+	if (vtime->cpu != cpu && vtime->cpu != -1)
+		return -EAGAIN;
+
+	/*
+	 * Two possible things here:
+	 * 1) We are seeing the scheduling out task (prev) or any past one.
+	 * 2) We are seeing the scheduling in task (next) but it hasn't
+	 *    passed though vtime_task_switch() yet so the pending
+	 *    cputime of the prev task may not be flushed yet.
+	 *
+	 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
+	 */
+	if (vtime->state == VTIME_INACTIVE)
+		return -EAGAIN;
+
+	return 0;
+}
+
 static u64 kcpustat_user_vtime(struct vtime *vtime)
 {
 	if (vtime->state == VTIME_USER)
@@ -1062,26 +1086,9 @@ static int kcpustat_field_vtime(u64 *cpustat,
 	do {
 		seq = read_seqcount_begin(&vtime->seqcount);
 
-		/*
-		 * We raced against context switch, fetch the
-		 * kcpustat task again.
-		 */
-		if (vtime->cpu != cpu && vtime->cpu != -1)
-			return -EAGAIN;
-
-		/*
-		 * Two possible things here:
-		 * 1) We are seeing the scheduling out task (prev) or any past one.
-		 * 2) We are seeing the scheduling in task (next) but it hasn't
-		 *    passed though vtime_task_switch() yet so the pending
-		 *    cputime of the prev task may not be flushed yet.
-		 *
-		 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
-		 */
-		if (vtime->state == VTIME_INACTIVE)
-			return -EAGAIN;
-
-		err = 0;
+		err = vtime_state_check(vtime, cpu);
+		if (err < 0)
+			return err;
 
 		*val = cpustat[usage];
 
@@ -1149,4 +1156,95 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
 	}
 }
 EXPORT_SYMBOL_GPL(kcpustat_field);
+
+static int kcpustat_cputime_vtime(u64 *cpustat, struct vtime *vtime,
+				  int cpu, u64 *user, u64 *nice,
+				  u64 *system, u64 *guest, u64 *guest_nice)
+{
+	unsigned int seq;
+	u64 delta;
+	int err;
+
+	do {
+		seq = read_seqcount_begin(&vtime->seqcount);
+
+		err = vtime_state_check(vtime, cpu);
+		if (err < 0)
+			return err;
+
+		kcpustat_cputime_raw(cpustat, user, nice,
+				     system, guest, guest_nice);
+
+		/* Task is sleeping, dead or idle, nothing to add */
+		if (vtime->state < VTIME_SYS)
+			continue;
+
+		delta = vtime_delta(vtime);
+
+		/*
+		 * Task runs either in user (including guest) or kernel space,
+		 * add pending nohz time to the right place.
+		 */
+		if (vtime->state == VTIME_SYS) {
+			*system += vtime->stime + delta;
+		} else if (vtime->state == VTIME_USER) {
+			if (vtime->nice)
+				*nice += vtime->utime + delta;
+			else
+				*user += vtime->utime + delta;
+		} else {
+			WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+			if (vtime->nice) {
+				*guest_nice += vtime->gtime + delta;
+				*nice += vtime->gtime + delta;
+			} else {
+				*guest += vtime->gtime + delta;
+				*user += vtime->gtime + delta;
+			}
+		}
+	} while (read_seqcount_retry(&vtime->seqcount, seq));
+
+	return err;
+}
+
+void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+		      u64 *user, u64 *nice, u64 *system,
+		      u64 *guest, u64 *guest_nice)
+{
+	u64 *cpustat = kcpustat->cpustat;
+	struct rq *rq;
+	int err;
+
+	if (!vtime_accounting_enabled_cpu(cpu)) {
+		kcpustat_cputime_raw(cpustat, user, nice,
+				     system, guest, guest_nice);
+		return;
+	}
+
+	rq = cpu_rq(cpu);
+
+	for (;;) {
+		struct task_struct *curr;
+
+		rcu_read_lock();
+		curr = rcu_dereference(rq->curr);
+		if (WARN_ON_ONCE(!curr)) {
+			rcu_read_unlock();
+			kcpustat_cputime_raw(cpustat, user, nice,
+					     system, guest, guest_nice);
+			return;
+		}
+
+		err = kcpustat_cputime_vtime(cpustat, &curr->vtime, cpu, user,
+					     nice, system, guest, guest_nice);
+		rcu_read_unlock();
+
+		if (!err)
+			return;
+
+		cpu_relax();
+	}
+}
+EXPORT_SYMBOL_GPL(kcpustat_cputime);
+
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
-- 
2.23.0


  parent reply	other threads:[~2019-11-06  3:08 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-06  3:07 [PATCH 0/9] sched/nohz: Make the rest of kcpustat vtime aware Frederic Weisbecker
2019-11-06  3:07 ` [PATCH 1/9] sched/cputime: Allow to pass cputime index on user/guest accounting Frederic Weisbecker
2019-11-06  3:08 ` [PATCH 2/9] sched/cputime: Standardize the kcpustat index based accounting functions Frederic Weisbecker
2019-11-06  3:08 ` [PATCH 3/9] sched/vtime: Handle nice updates under vtime Frederic Weisbecker
2019-11-15 10:14   ` Peter Zijlstra
2019-11-15 10:16   ` Peter Zijlstra
2019-11-15 10:18     ` Peter Zijlstra
2019-11-15 15:27       ` Frederic Weisbecker
2019-11-18 13:04         ` Peter Zijlstra
2019-11-06  3:08 ` [PATCH 4/9] sched/cputime: Support other fields on kcpustat_field() Frederic Weisbecker
2019-11-06  3:08 ` Frederic Weisbecker [this message]
2019-11-06  3:08 ` [PATCH 6/9] procfs: Use all-in-one vtime aware kcpustat accessor Frederic Weisbecker
2019-11-06  3:08 ` [PATCH 7/9] cpufreq: Use vtime aware kcpustat accessors for user time Frederic Weisbecker
2019-11-06  3:08 ` [PATCH 8/9] leds: Use all-in-one vtime aware kcpustat accessor Frederic Weisbecker
2019-11-06  3:08 ` [PATCH 9/9] rackmeter: Use " Frederic Weisbecker
2019-11-14 15:25 ` [PATCH 0/9] sched/nohz: Make the rest of kcpustat vtime aware Frederic Weisbecker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191106030807.31091-6-frederic@kernel.org \
    --to=frederic@kernel.org \
    --cc=benh@kernel.crashing.org \
    --cc=jacek.anaszewski@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=pavel@ucw.cz \
    --cc=peterz@infradead.org \
    --cc=riel@surriel.com \
    --cc=rjw@rjwysocki.net \
    --cc=tglx@linutronix.de \
    --cc=viresh.kumar@linaro.org \
    --cc=wanpengli@tencent.com \
    --cc=yauheni.kaliuta@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.