All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Dmitry Safonov <dima@arista.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Hannes Frederic Sowa <hannes@stressinduktion.org>,
	Ingo Molnar <mingo@kernel.org>,
	"Levin, Alexander (Sasha Levin)" <alexander.levin@verizon.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Mauro Carvalho Chehab <mchehab@s-opensource.com>,
	Mike Galbraith <efault@gmx.de>, Paolo Abeni <pabeni@redhat.com>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Radu Rendec <rrendec@arista.com>, Rik van Riel <riel@redhat.com>,
	Stanislaw Gruszka <sgruszka@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Wanpeng Li <wanpeng.li@hotmail.com>
Subject: [RFC 6/6] softirq/sched: Account si cpu time to ksoftirqd(s)
Date: Thu, 18 Jan 2018 16:12:38 +0000	[thread overview]
Message-ID: <20180118161238.13792-7-dima@arista.com> (raw)
In-Reply-To: <20180118161238.13792-1-dima@arista.com>

Warning: non-merge-ready in any sense

Under CONFIG_FAIR_SOFTIRQ_SCHEDULE each sched tick will account cpu time
spent on processing softirqs to ksoftirqd of the softirq's group.
Update then ksoftirqd->se.sum_exec_runtime and recalculate
ksoftirqd->se.vruntime.

Use CFS's vrutime to decide if softirq needs to be served or deferred.
It's possible to tune this with ksoftirqd nice policy.

Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 include/linux/interrupt.h |  1 +
 kernel/sched/fair.c       | 38 ++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h      | 19 +++++++++++++++++++
 kernel/softirq.c          | 45 +++++++++++++++++++++++++++++++++++++--------
 4 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 17e1a04445fa..a0b5c24c088a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -512,6 +512,7 @@ extern struct task_struct *__percpu **ksoftirqd;
 extern unsigned nr_softirq_groups;
 
 extern bool servicing_softirq(unsigned nr);
+extern unsigned group_softirqs(unsigned nr);
 static inline bool current_is_ksoftirqd(void)
 {
 	unsigned i;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2fe3aa853e4d..d0105739551f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -813,6 +813,42 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 }
 #endif /* CONFIG_SMP */
 
+static void update_ksoftirqd(struct cfs_rq *cfs_rq)
+{
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+	int rq_cpu = cpu_of(rq_of(cfs_rq));
+	u64 si_times[NR_SOFTIRQS], delta[NR_SOFTIRQS];
+	unsigned i;
+
+	if (unlikely(!ksoftirqd))
+		return;
+
+	softirq_time_read(rq_cpu, si_times);
+
+	for (i = 0; i < NR_SOFTIRQS; i++) {
+		delta[i] = si_times[i] - cfs_rq->prev_si_time[i];
+		cfs_rq->prev_si_time[i] = si_times[i];
+		if (unlikely((s64)delta[i] < 0))
+			delta[i] = 0;
+	}
+
+	for (i = 0; i < nr_softirq_groups; i++) {
+		unsigned j, softirq = 0, group_mask = group_softirqs(i);
+		struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
+		u64 sum_delta = 0;
+
+		while ((j = ffs(group_mask))) {
+			softirq += j - 1;
+			group_mask >>= j;
+			sum_delta += delta[softirq];
+		}
+
+		tsk->se.sum_exec_runtime += sum_delta;
+		tsk->se.vruntime += calc_delta_fair(sum_delta, &tsk->se);
+	}
+#endif
+}
+
 /*
  * Update the current task's runtime statistics.
  */
@@ -822,6 +858,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	u64 now = rq_clock_task(rq_of(cfs_rq));
 	u64 delta_exec;
 
+	update_ksoftirqd(cfs_rq);
+
 	if (unlikely(!curr))
 		return;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14e154c86dc5..e95d8d4f9146 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -487,6 +487,10 @@ struct cfs_rq {
 	struct list_head leaf_cfs_rq_list;
 	struct task_group *tg;	/* group that "owns" this runqueue */
 
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+	u64 prev_si_time[NR_SOFTIRQS];
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
 	int runtime_enabled;
 	u64 runtime_expires;
@@ -2081,6 +2085,21 @@ static inline u64 irq_time_read(int cpu)
 }
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
+static inline void softirq_time_read(int cpu, u64 si_times[NR_SOFTIRQS])
+{
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+	unsigned int seq, i;
+
+	for (i = 0; i < NR_SOFTIRQS; i++) {
+		do {
+			seq = __u64_stats_fetch_begin(&irqtime->sync);
+			si_times[i] = irqtime->total_si[i];
+		} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
+	}
+#endif
+}
+
 #ifdef CONFIG_CPU_FREQ
 DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 516e31d3d5b4..a123bafa11c2 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -82,6 +82,11 @@ bool servicing_softirq(unsigned nr)
 	return false;
 }
 
+unsigned group_softirqs(unsigned nr)
+{
+	return group_to_softirqs[nr];
+}
+
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
  * but we also don't want to introduce a worst case 1/HZ latency
@@ -112,15 +117,10 @@ static void wakeup_softirqd(u32 softirq_mask)
  * If ksoftirqd is scheduled, we do not want to process pending softirqs
  * right now. Let ksoftirqd handle this at its own rate, to get fairness.
  */
-static bool ksoftirqd_running(void)
+static bool ksoftirqd_running(__u32 pending)
 {
-	/* We rely that there are pending softirqs */
-	__u32 pending = local_softirq_pending();
 	unsigned i;
 
-	if (!ksoftirqd)
-		return false;
-
 	for (i = 0; i < nr_softirq_groups && pending; i++) {
 		/* Interrupts are disabled: no need to stop preemption */
 		struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
@@ -137,6 +137,33 @@ static bool ksoftirqd_running(void)
 	return !pending;
 }
 
+static __u32 softirqs_to_serve(__u32 pending)
+{
+	unsigned i;
+	__u32 unserve = pending;
+
+	if (!ksoftirqd || !current || is_idle_task(current))
+		return pending;
+
+	if (!IS_ENABLED(CONFIG_FAIR_SOFTIRQ_SCHEDULE))
+		return ksoftirqd_running(pending) ? 0 : pending;
+
+	for (i = 0; i < nr_softirq_groups && unserve; i++) {
+		/* Interrupts are disabled: no need to stop preemption */
+		struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
+
+		if (tsk && (s64)(current->se.vruntime - tsk->se.vruntime) < 0) {
+			if (tsk->state != TASK_RUNNING)
+				wake_up_process(tsk);
+			continue;
+		}
+
+		unserve &= ~group_to_softirqs[i];
+	}
+
+	return pending & ~unserve;
+}
+
 /*
  * preempt_count and SOFTIRQ_OFFSET usage:
  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
@@ -385,7 +412,8 @@ asmlinkage __visible void do_softirq(void)
 
 	local_irq_save(flags);
 
-	if (!ksoftirqd_running())
+	pending = softirqs_to_serve(pending);
+	if (pending)
 		do_softirq_own_stack(pending);
 
 	local_irq_restore(flags);
@@ -414,7 +442,8 @@ static inline void invoke_softirq(void)
 {
 	__u32 pending = local_softirq_pending();
 
-	if (!pending || !ksoftirqd_running())
+	pending = softirqs_to_serve(pending);
+	if (!pending)
 		return;
 
 	if (!force_irqthreads) {
-- 
2.13.6

      parent reply	other threads:[~2018-01-18 16:13 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-18 16:12 [RFC 0/6] Multi-thread per-cpu ksoftirqd Dmitry Safonov
2018-01-18 16:12 ` [RFC 1/6] softirq: Add softirq_groups boot parameter Dmitry Safonov
2018-01-18 16:12 ` [RFC 2/6] softirq: Introduce mask for __do_softirq() Dmitry Safonov
2018-01-18 16:12 ` [RFC 3/6] softirq: Add reverse group-to-softirq map Dmitry Safonov
2018-01-18 16:12 ` [RFC 4/6] softirq: Run per-group per-cpu ksoftirqd thread Dmitry Safonov
2018-01-18 17:00   ` Mike Galbraith
2018-01-18 17:53     ` Dmitry Safonov
2018-01-18 18:28       ` Mike Galbraith
2018-01-18 16:12 ` [RFC 5/6] softirq: Add time accounting per-softirq type Dmitry Safonov
2018-01-18 16:12 ` Dmitry Safonov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180118161238.13792-7-dima@arista.com \
    --to=dima@arista.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.levin@verizon.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=efault@gmx.de \
    --cc=fweisbec@gmail.com \
    --cc=hannes@stressinduktion.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@s-opensource.com \
    --cc=mingo@kernel.org \
    --cc=pabeni@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=rrendec@arista.com \
    --cc=sgruszka@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=wanpeng.li@hotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.