From: Frederic Weisbecker <frederic@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>, Ingo Molnar <mingo@kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
Frederic Weisbecker <frederic@kernel.org>,
Jacek Anaszewski <jacek.anaszewski@gmail.com>,
Wanpeng Li <wanpengli@tencent.com>,
"Rafael J . Wysocki" <rjw@rjwysocki.net>,
Benjamin Herrenschmidt <benh@kernel.crashing.org>,
Rik van Riel <riel@surriel.com>,
Thomas Gleixner <tglx@linutronix.de>,
Yauheni Kaliuta <yauheni.kaliuta@redhat.com>,
Viresh Kumar <viresh.kumar@linaro.org>,
Pavel Machek <pavel@ucw.cz>
Subject: [PATCH 2/6] sched/vtime: Bring all-in-one kcpustat accessor for vtime fields
Date: Wed, 20 Nov 2019 00:22:14 +0100 [thread overview]
Message-ID: <20191119232218.4206-3-frederic@kernel.org> (raw)
In-Reply-To: <20191119232218.4206-1-frederic@kernel.org>
Many callsites want to fetch the values of system, user, user_nice, guest
or guest_nice kcpustat fields altogether or at least a pair of these.
In that case calling kcpustat_field() for each requested field brings
unecessary overhead when we could fetch all of them in a row.
So provide kcpustat_cputime() that fetches all vtime sensitive fields
under the same RCU and seqcount block.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
include/linux/kernel_stat.h | 23 ++++++
kernel/sched/cputime.c | 139 ++++++++++++++++++++++++++++++------
2 files changed, 142 insertions(+), 20 deletions(-)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 79781196eb25..6bd70e464c61 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,15 +78,38 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
return kstat_cpu(cpu).irqs_sum;
}
+
+static inline void kcpustat_cputime_raw(u64 *cpustat, u64 *user, u64 *nice,
+ u64 *system, u64 *guest, u64 *guest_nice)
+{
+ *user = cpustat[CPUTIME_USER];
+ *nice = cpustat[CPUTIME_NICE];
+ *system = cpustat[CPUTIME_SYSTEM];
+ *guest = cpustat[CPUTIME_GUEST];
+ *guest_nice = cpustat[CPUTIME_GUEST_NICE];
+}
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
enum cpu_usage_stat usage, int cpu);
+extern void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+ u64 *user, u64 *nice, u64 *system,
+ u64 *guest, u64 *guest_nice);
#else
static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
enum cpu_usage_stat usage, int cpu)
{
return kcpustat->cpustat[usage];
}
+
+static inline void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+ u64 *user, u64 *nice, u64 *system,
+ u64 *guest, u64 *guest_nice)
+{
+ kcpustat_cputime_raw(kcpustat->cpustat, user, nice,
+ system, guest, guest_nice);
+}
+
#endif
extern void account_user_time(struct task_struct *, u64);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b2cf544e2109..f576bbb1f4ee 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -912,6 +912,30 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
} while (read_seqcount_retry(&vtime->seqcount, seq));
}
+static int vtime_state_check(struct vtime *vtime, int cpu)
+{
+ /*
+ * We raced against context switch, fetch the
+ * kcpustat task again.
+ */
+ if (vtime->cpu != cpu && vtime->cpu != -1)
+ return -EAGAIN;
+
+ /*
+ * Two possible things here:
+ * 1) We are seeing the scheduling out task (prev) or any past one.
+ * 2) We are seeing the scheduling in task (next) but it hasn't
+ * passed though vtime_task_switch() yet so the pending
+ * cputime of the prev task may not be flushed yet.
+ *
+ * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
+ */
+ if (vtime->state == VTIME_INACTIVE)
+ return -EAGAIN;
+
+ return 0;
+}
+
static u64 kcpustat_user_vtime(struct vtime *vtime)
{
if (vtime->state == VTIME_USER)
@@ -933,26 +957,9 @@ static int kcpustat_field_vtime(u64 *cpustat,
do {
seq = read_seqcount_begin(&vtime->seqcount);
- /*
- * We raced against context switch, fetch the
- * kcpustat task again.
- */
- if (vtime->cpu != cpu && vtime->cpu != -1)
- return -EAGAIN;
-
- /*
- * Two possible things here:
- * 1) We are seeing the scheduling out task (prev) or any past one.
- * 2) We are seeing the scheduling in task (next) but it hasn't
- * passed though vtime_task_switch() yet so the pending
- * cputime of the prev task may not be flushed yet.
- *
- * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
- */
- if (vtime->state == VTIME_INACTIVE)
- return -EAGAIN;
-
- err = 0;
+ err = vtime_state_check(vtime, cpu);
+ if (err < 0)
+ return err;
*val = cpustat[usage];
@@ -1024,4 +1031,96 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
}
}
EXPORT_SYMBOL_GPL(kcpustat_field);
+
+static int kcpustat_cputime_vtime(u64 *cpustat, struct task_struct *tsk,
+ int cpu, u64 *user, u64 *nice,
+ u64 *system, u64 *guest, u64 *guest_nice)
+{
+ struct vtime *vtime = &tsk->vtime;
+ unsigned int seq;
+ u64 delta;
+ int err;
+
+ do {
+ seq = read_seqcount_begin(&vtime->seqcount);
+
+ err = vtime_state_check(vtime, cpu);
+ if (err < 0)
+ return err;
+
+ kcpustat_cputime_raw(cpustat, user, nice,
+ system, guest, guest_nice);
+
+ /* Task is sleeping, dead or idle, nothing to add */
+ if (vtime->state < VTIME_SYS)
+ continue;
+
+ delta = vtime_delta(vtime);
+
+ /*
+ * Task runs either in user (including guest) or kernel space,
+ * add pending nohz time to the right place.
+ */
+ if (vtime->state == VTIME_SYS) {
+ *system += vtime->stime + delta;
+ } else if (vtime->state == VTIME_USER) {
+ if (task_nice(tsk) > 0)
+ *nice += vtime->utime + delta;
+ else
+ *user += vtime->utime + delta;
+ } else {
+ WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+ if (task_nice(tsk) > 0) {
+ *guest_nice += vtime->gtime + delta;
+ *nice += vtime->gtime + delta;
+ } else {
+ *guest += vtime->gtime + delta;
+ *user += vtime->gtime + delta;
+ }
+ }
+ } while (read_seqcount_retry(&vtime->seqcount, seq));
+
+ return err;
+}
+
+void kcpustat_cputime(struct kernel_cpustat *kcpustat, int cpu,
+ u64 *user, u64 *nice, u64 *system,
+ u64 *guest, u64 *guest_nice)
+{
+ u64 *cpustat = kcpustat->cpustat;
+ struct rq *rq;
+ int err;
+
+ if (!vtime_accounting_enabled_cpu(cpu)) {
+ kcpustat_cputime_raw(cpustat, user, nice,
+ system, guest, guest_nice);
+ return;
+ }
+
+ rq = cpu_rq(cpu);
+
+ for (;;) {
+ struct task_struct *curr;
+
+ rcu_read_lock();
+ curr = rcu_dereference(rq->curr);
+ if (WARN_ON_ONCE(!curr)) {
+ rcu_read_unlock();
+ kcpustat_cputime_raw(cpustat, user, nice,
+ system, guest, guest_nice);
+ return;
+ }
+
+ err = kcpustat_cputime_vtime(cpustat, curr, cpu, user,
+ nice, system, guest, guest_nice);
+ rcu_read_unlock();
+
+ if (!err)
+ return;
+
+ cpu_relax();
+ }
+}
+EXPORT_SYMBOL_GPL(kcpustat_cputime);
+
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
--
2.23.0
next prev parent reply other threads:[~2019-11-19 23:22 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-19 23:22 [PATCH 0/6] sched/nohz: Make the rest of kcpustat vtime aware v2 Frederic Weisbecker
2019-11-19 23:22 ` [PATCH 1/6] sched/cputime: Support other fields on kcpustat_field() Frederic Weisbecker
2019-11-20 11:51 ` Ingo Molnar
2019-11-20 21:04 ` Peter Zijlstra
2019-11-19 23:22 ` Frederic Weisbecker [this message]
2019-11-20 12:04 ` [PATCH 2/6] sched/vtime: Bring all-in-one kcpustat accessor for vtime fields Ingo Molnar
2019-11-20 15:00 ` Frederic Weisbecker
2019-11-19 23:22 ` [PATCH 3/6] procfs: Use all-in-one vtime aware kcpustat accessor Frederic Weisbecker
2019-11-19 23:22 ` [PATCH 4/6] cpufreq: Use vtime aware kcpustat accessors for user time Frederic Weisbecker
2019-11-19 23:22 ` [PATCH 5/6] leds: Use all-in-one vtime aware kcpustat accessor Frederic Weisbecker
2019-11-19 23:22 ` [PATCH 6/6] rackmeter: Use " Frederic Weisbecker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191119232218.4206-3-frederic@kernel.org \
--to=frederic@kernel.org \
--cc=benh@kernel.crashing.org \
--cc=jacek.anaszewski@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=pavel@ucw.cz \
--cc=peterz@infradead.org \
--cc=riel@surriel.com \
--cc=rjw@rjwysocki.net \
--cc=tglx@linutronix.de \
--cc=viresh.kumar@linaro.org \
--cc=wanpengli@tencent.com \
--cc=yauheni.kaliuta@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).