All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Dmitry Safonov <dima@arista.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Miller <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Hannes Frederic Sowa <hannes@stressinduktion.org>,
	Ingo Molnar <mingo@kernel.org>,
	"Levin, Alexander (Sasha Levin)" <alexander.levin@verizon.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Mauro Carvalho Chehab <mchehab@s-opensource.com>,
	Mike Galbraith <efault@gmx.de>, Paolo Abeni <pabeni@redhat.com>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Radu Rendec <rrendec@arista.com>, Rik van Riel <riel@redhat.com>,
	Stanislaw Gruszka <sgruszka@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Wanpeng Li <wanpeng.li@hotmail.com>
Subject: [RFC 1/6] softirq: Add softirq_groups boot parameter
Date: Thu, 18 Jan 2018 16:12:33 +0000	[thread overview]
Message-ID: <20180118161238.13792-2-dima@arista.com> (raw)
In-Reply-To: <20180118161238.13792-1-dima@arista.com>

ksoftirqd thread allows to defer softirqs if the system is under storm.
While it prevents userspace from cpu-time starving, it increases
latencies for other softirqs (that are not raised under storm).

As creation of one ksoftirqd thread per-each-softirq-per-cpu will be
insane on a huge machines, separate softirqs by groups.
It will allow to defer softirqs of one group and continue servicing
from other. That means that under a storm of one group's softirqs,
softirqs from the other group will be serviced as they come and will
not have latency issues.
For each softirq group will be created a per-cpu kthread which
will process deferred softirqs of the group.

The parameter will allow an admin define how many ksoftirqd threads
will be created on each cpu and which softirqs have the same
deferring group.

Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 Documentation/admin-guide/kernel-parameters.txt | 16 +++++
 include/linux/interrupt.h                       |  1 +
 kernel/softirq.c                                | 87 +++++++++++++++++++++++++
 3 files changed, 104 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 46b26bfee27b..d5c44703a299 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3940,6 +3940,22 @@
 			Format: <integer>
 			Default: -1 (no limit)
 
+	softirq_groups=
+			[KNL] The count and contents of softirq groups.
+			Format:[group1],[group2],[groupN]
+			where group is <softirq1>/<softirq2>/<softirqM>
+			E.g: softirq_groups=HI/TIMER/HRTIMER,NET_TX/NET_RX,BLOCK
+
+			Defines how many ksoftirqd threads create *per-cpu*.
+			For each group one ksoftirqd thread is created.
+			The total number of threads created is
+			(NR_CPUS * NR_SOFTIRQ_GROUPS).
+			Admin can define one softirq in different softirq
+			groups. Softirqs those have no group defined will
+			be put in default softirq_group. If all softirqs
+			have been placed into groups default group is not
+			created.
+
 	softlockup_panic=
 			[KNL] Should the soft-lockup detector generate panics.
 			Format: <integer>
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 69c238210325..5bb6b435f0bb 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -486,6 +486,7 @@ extern const char * const softirq_to_name[NR_SOFTIRQS];
 struct softirq_action
 {
 	void	(*action)(struct softirq_action *);
+	u32	group_mask;
 };
 
 asmlinkage void do_softirq(void);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2f5e87f1bae2..c9aecdd57107 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -54,6 +54,7 @@ EXPORT_SYMBOL(irq_stat);
 #endif
 
 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
+static unsigned __initdata nr_softirq_groups = 0;
 
 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
@@ -635,10 +636,25 @@ void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
 }
 EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
 
+static void __init setup_default_softirq_group(unsigned nr)
+{
+	unsigned i;
+
+	for (i = 0; i < NR_SOFTIRQS; i++) {
+		u32 *gr_mask = &softirq_vec[i].group_mask;
+
+		if (!*gr_mask)
+			*gr_mask |= (1 << nr);
+		pr_debug("softirq-%s: %#x\n", softirq_to_name[i], *gr_mask);
+	}
+}
+
 void __init softirq_init(void)
 {
 	int cpu;
 
+	setup_default_softirq_group(nr_softirq_groups++);
+
 	for_each_possible_cpu(cpu) {
 		per_cpu(tasklet_vec, cpu).tail =
 			&per_cpu(tasklet_vec, cpu).head;
@@ -750,6 +766,77 @@ static __init int spawn_ksoftirqd(void)
 }
 early_initcall(spawn_ksoftirqd);
 
+static __init __u32 parse_softirq_name(char *name, size_t len)
+{
+	__u32 i;
+
+	for (i = 0; i < NR_SOFTIRQS; i++)
+		if (strncmp(name, softirq_to_name[i], len) == 0)
+			return i;
+
+	pr_warn("softirq: Ignored `%.*s' in softirq group", (int)len, name);
+
+	return NR_SOFTIRQS;
+}
+
+static bool __init parse_softirq_group(char *start, char *end, u32 group)
+{
+	char *next_softirq = strchrnul(start, '/');
+	bool is_empty = true;
+	u32 softirq_nr;
+
+	if (next_softirq == start)
+		return !is_empty;
+
+	do {
+		next_softirq = min(next_softirq, end);
+
+		softirq_nr = parse_softirq_name(start, next_softirq - start);
+		if (softirq_nr < NR_SOFTIRQS) {
+			softirq_vec[softirq_nr].group_mask |= (1 << group);
+			is_empty = false;
+		}
+
+		if (next_softirq == end)
+			break;
+
+		start = next_softirq + 1;
+		next_softirq = strchrnul(start, '/');
+	} while (1);
+
+	return !is_empty;
+}
+
+/*
+ * Format e.g.:
+ * softirq_groups=HI/TIMER/HRTIMER,NET_TX/NET_RX,BLOCK,TASKLET
+ * Admin *can* define one softirq in different groups.
+ * Softirqs those have no group defined will be put in default softirq_group.
+ * If all softirqs have been placed into groups, default group is not created.
+ */
+static int __init setup_softirq_groups(char *s)
+{
+	char *next_group = strchrnul(s, ',');
+	unsigned i = 0;
+
+	do {
+		/* Skip empty softirq groups. */
+		if (parse_softirq_group(s, next_group, i))
+			i++;
+
+		if (*next_group == '\0')
+			break;
+
+		s = next_group + 1;
+		next_group = strchrnul(s, ',');
+	} while(i < 31); /* if there is default softirq group it's nr 31 */
+
+	nr_softirq_groups = i;
+
+	return 0;
+}
+early_param("softirq_groups", setup_softirq_groups);
+
 /*
  * [ These __weak aliases are kept in a separate compilation unit, so that
  *   GCC does not inline them incorrectly. ]
-- 
2.13.6

  reply	other threads:[~2018-01-18 16:14 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-18 16:12 [RFC 0/6] Multi-thread per-cpu ksoftirqd Dmitry Safonov
2018-01-18 16:12 ` Dmitry Safonov [this message]
2018-01-18 16:12 ` [RFC 2/6] softirq: Introduce mask for __do_softirq() Dmitry Safonov
2018-01-18 16:12 ` [RFC 3/6] softirq: Add reverse group-to-softirq map Dmitry Safonov
2018-01-18 16:12 ` [RFC 4/6] softirq: Run per-group per-cpu ksoftirqd thread Dmitry Safonov
2018-01-18 17:00   ` Mike Galbraith
2018-01-18 17:53     ` Dmitry Safonov
2018-01-18 18:28       ` Mike Galbraith
2018-01-18 16:12 ` [RFC 5/6] softirq: Add time accounting per-softirq type Dmitry Safonov
2018-01-18 16:12 ` [RFC 6/6] softirq/sched: Account si cpu time to ksoftirqd(s) Dmitry Safonov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180118161238.13792-2-dima@arista.com \
    --to=dima@arista.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.levin@verizon.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=efault@gmx.de \
    --cc=fweisbec@gmail.com \
    --cc=hannes@stressinduktion.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@s-opensource.com \
    --cc=mingo@kernel.org \
    --cc=pabeni@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=rrendec@arista.com \
    --cc=sgruszka@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=wanpeng.li@hotmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.