All of lore.kernel.org
 help / color / mirror / Atom feed
From: qianjun.kernel@gmail.com
To: tglx@linutronix.de, peterz@infradead.org, will@kernel.org,
	luto@kernel.org, linux-kernel@vger.kernel.org
Cc: laoar.shao@gmail.com, urezki@gmail.com,
	jun qian <qianjun.kernel@gmail.com>
Subject: [PATCH V6 1/1] Softirq:avoid large sched delay from the pending softirqs
Date: Wed,  9 Sep 2020 17:09:31 +0800	[thread overview]
Message-ID: <20200909090931.8836-1-qianjun.kernel@gmail.com> (raw)

From: jun qian <qianjun.kernel@gmail.com>

When get the pending softirqs, it need to process all the pending
softirqs in the while loop. If the processing time of each pending
softirq is need more than 2 msec in this loop, or one of the softirq
will running a long time, according to the original code logic, it
will process all the pending softirqs without wakeuping ksoftirqd,
which will cause a relatively large scheduling delay on the
corresponding CPU, which we do not wish to see. The patch will check
the total time to process pending softirq, if the time exceeds 2 ms
we need to wakeup the ksofirqd to aviod large sched delay.

Signed-off-by: jun qian <qianjun.kernel@gmail.com>
---
 kernel/softirq.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 69 insertions(+), 14 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index c4201b7f..1f696c8 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -25,6 +25,7 @@
 #include <linux/smpboot.h>
 #include <linux/tick.h>
 #include <linux/irq.h>
+#include <linux/sched/clock.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/irq.h>
@@ -199,18 +200,17 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 
 /*
  * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
- * but break the loop if need_resched() is set or after 2 ms.
- * The MAX_SOFTIRQ_TIME provides a nice upper bound in most cases, but in
- * certain cases, such as stop_machine(), jiffies may cease to
- * increment and so we need the MAX_SOFTIRQ_RESTART limit as
- * well to make sure we eventually return from this method.
+ * but break the loop if need_resched() is set or after MAX_SOFTIRQ_TIME_NS
+ * ns. In the loop, if the processing time of the softirq has exceeded
+ * MAX_SOFTIRQ_TIME_NS ns, we also need to break the loop to wakeup the
+ * ksofirqd.
  *
  * These limits have been established via experimentation.
  * The two things to balance is latency against fairness -
  * we want to handle softirqs as soon as possible, but they
  * should not be able to lock up the box.
  */
-#define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
+#define MAX_SOFTIRQ_TIME_NS 2000000
 #define MAX_SOFTIRQ_RESTART 10
 
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -246,15 +246,20 @@ static inline void lockdep_softirq_end(bool in_hardirq)
 static inline void lockdep_softirq_end(bool in_hardirq) { }
 #endif
 
+DEFINE_PER_CPU(__u32, pending_new_flag);
+DEFINE_PER_CPU(__u32, pending_next_bit);
+#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1)
+
 asmlinkage __visible void __softirq_entry __do_softirq(void)
 {
-	unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
+	u64 end = sched_clock() + MAX_SOFTIRQ_TIME_NS;
 	unsigned long old_flags = current->flags;
 	int max_restart = MAX_SOFTIRQ_RESTART;
 	struct softirq_action *h;
 	bool in_hardirq;
-	__u32 pending;
-	int softirq_bit;
+	__u32 pending, pending_left, pending_new;
+	int softirq_bit, next_bit;
+	unsigned long flags;
 
 	/*
 	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
@@ -277,10 +282,33 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 
 	h = softirq_vec;
 
-	while ((softirq_bit = ffs(pending))) {
-		unsigned int vec_nr;
+	next_bit = per_cpu(pending_next_bit, smp_processor_id());
+	per_cpu(pending_new_flag, smp_processor_id()) = 0;
+
+	pending_left = pending &
+		(SOFTIRQ_PENDING_MASK << next_bit);
+	pending_new = pending &
+		(SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit));
+
+	/*
+	 * In order to be fair, we shold process the pengding bits by the
+	 * last processing order.
+	 */
+	while ((softirq_bit = ffs(pending_left)) ||
+		(softirq_bit = ffs(pending_new))) {
 		int prev_count;
+		unsigned int vec_nr = 0;
 
+		/*
+		 * when the left pengding bits have been handled, we should
+		 * to reset the h to softirq_vec.
+		 */
+		if (!ffs(pending_left)) {
+			if (per_cpu(pending_new_flag, smp_processor_id()) == 0) {
+				h = softirq_vec;
+				per_cpu(pending_new_flag, smp_processor_id()) = 1;
+			}
+		}
 		h += softirq_bit - 1;
 
 		vec_nr = h - softirq_vec;
@@ -298,17 +326,44 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 			preempt_count_set(prev_count);
 		}
 		h++;
-		pending >>= softirq_bit;
+
+		if (ffs(pending_left))
+			pending_left >>= softirq_bit;
+		else
+			pending_new >>= softirq_bit;
+
+		/*
+		 * the softirq's action has been run too much time,
+		 * so it may need to wakeup the ksoftirqd
+		 */
+		if (need_resched() && sched_clock() > end) {
+			/*
+			 * Ensure that the remaining pending bits will be
+			 * handled.
+			 */
+			local_irq_save(flags);
+			if (ffs(pending_left))
+				or_softirq_pending((pending_left << (vec_nr + 1)) |
+							pending_new);
+			else
+				or_softirq_pending(pending_new << (vec_nr + 1));
+			local_irq_restore(flags);
+			per_cpu(pending_next_bit, smp_processor_id()) = vec_nr + 1;
+			break;
+		}
 	}
 
+	/* reset the pending_next_bit */
+	per_cpu(pending_next_bit, smp_processor_id()) = 0;
+
 	if (__this_cpu_read(ksoftirqd) == current)
 		rcu_softirq_qs();
 	local_irq_disable();
 
 	pending = local_softirq_pending();
 	if (pending) {
-		if (time_before(jiffies, end) && !need_resched() &&
-		    --max_restart)
+		if (!need_resched() && --max_restart &&
+		    sched_clock() <= end)
 			goto restart;
 
 		wakeup_softirqd();
-- 
1.8.3.1


             reply	other threads:[~2020-09-09  9:09 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-09  9:09 qianjun.kernel [this message]
2020-09-11 15:55 ` [PATCH V6 1/1] Softirq:avoid large sched delay from the pending softirqs peterz
2020-09-12  7:17   ` jun qian
2020-09-11 16:46 ` Qais Yousef
2020-09-11 18:28   ` peterz
2020-09-14 11:27     ` Qais Yousef
2020-09-14 14:14       ` peterz
2020-09-14 15:28         ` Qais Yousef
     [not found]     ` <CA+njcd3HFV5Gqtt9qzTAzpnA4-4ngPBQ7T0gwgc0Fm9_VoJLcQ@mail.gmail.com>
2020-09-14 11:41       ` Qais Yousef

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200909090931.8836-1-qianjun.kernel@gmail.com \
    --to=qianjun.kernel@gmail.com \
    --cc=laoar.shao@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=urezki@gmail.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.