linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: qianjun.kernel@gmail.com
To: tglx@linutronix.de, peterz@infradead.org, will@kernel.org,
	luto@kernel.org, linux-kernel@vger.kernel.org
Cc: laoar.shao@gmail.com, qais.yousef@arm.com, urezki@gmail.com,
	jun qian <qianjun.kernel@gmail.com>
Subject: [PATCH V7 4/4] softirq: Allow early break the softirq processing loop
Date: Tue, 15 Sep 2020 19:56:09 +0800	[thread overview]
Message-ID: <20200915115609.85106-5-qianjun.kernel@gmail.com> (raw)
In-Reply-To: <20200915115609.85106-1-qianjun.kernel@gmail.com>

From: jun qian <qianjun.kernel@gmail.com>

Allow terminating the softirq processing loop without finishing the vectors.

Signed-off-by: jun qian <qianjun.kernel@gmail.com>
---
 kernel/softirq.c | 113 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 91 insertions(+), 22 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index cbb59b5..29cf079 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -254,6 +254,22 @@ static inline bool __softirq_needs_break(u64 start)
 	return false;
 }
 
+#define SOFTIRQ_PENDING_MASK ((1UL << NR_SOFTIRQS) - 1)
+
+/*
+ * The pending_next_bit is recorded for the next processing order when
+ * the loop is broken. This per cpu variable is to solve the following
+ * scenarios:
+ * Assume bit 0 and 1 are pending when the processing starts. Now it
+ * breaks out after bit 0 has been handled and stores back bit 1 as
+ * pending. Before ksoftirqd runs bit 0 gets raised again. ksoftirqd
+ * runs and handles bit 0, which takes more than the timeout. As a
+ * result the bit 0 processing can starve all other softirqs.
+ *
+ * so we need the pending_next_bit to record the next process order.
+ */
+DEFINE_PER_CPU(u32, pending_next_bit);
+
 asmlinkage __visible void __softirq_entry __do_softirq(void)
 {
 	u64 start = sched_clock();
@@ -261,8 +277,11 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 	unsigned int max_restart = MAX_SOFTIRQ_RESTART;
 	struct softirq_action *h;
 	unsigned long pending;
+	unsigned long pending_left, pending_again;
 	unsigned int vec_nr;
 	bool in_hardirq;
+	int next_bit;
+	unsigned long flags;
 
 	/*
 	 * Mask out PF_MEMALLOC as the current task context is borrowed for the
@@ -283,25 +302,66 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 
 	local_irq_enable();
 
-	for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
-		int prev_count;
-
-		__clear_bit(vec_nr, &pending);
-
-		h = softirq_vec + vec_nr;
-
-		prev_count = preempt_count();
-
-		kstat_incr_softirqs_this_cpu(vec_nr);
+	/*
+	 * pending_left means that the left bits unhandled when the loop is
+	 * broken without finishing the vectors. These bits will be handled
+	 * first in the next time. pending_again means that the new bits is
+	 * generated in the other time. These bits should be handled after
+	 * the pending_left bits have been handled.
+	 *
+	 * For example
+	 * If the pending bits is 1101010110, and the loop is broken after
+	 * the bit4 is handled. Then, the pending_next_bit will be 5, and
+	 * the pending_left is 1101000000, the pending_again is 000000110.
+	 */
+	next_bit = __this_cpu_read(pending_next_bit);
+	pending_left = pending &
+		(SOFTIRQ_PENDING_MASK << next_bit);
+	pending_again = pending &
+		(SOFTIRQ_PENDING_MASK >> (NR_SOFTIRQS - next_bit));
+
+	while (pending_left || pending_again) {
+		if  (pending_left) {
+			pending = pending_left;
+			pending_left = 0;
+		} else if (pending_again) {
+			pending = pending_again;
+			pending_again = 0;
+		} else
+			break;
+		for_each_set_bit(vec_nr, &pending, NR_SOFTIRQS) {
+			int prev_count;
+
+			__clear_bit(vec_nr, &pending);
+
+			h = softirq_vec + vec_nr;
+
+			prev_count = preempt_count();
+
+			kstat_incr_softirqs_this_cpu(vec_nr);
+
+			trace_softirq_entry(vec_nr);
+			h->action(h);
+			trace_softirq_exit(vec_nr);
+			if (unlikely(prev_count != preempt_count())) {
+				pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
+				       vec_nr, softirq_to_name[vec_nr], h->action,
+				       prev_count, preempt_count());
+				preempt_count_set(prev_count);
+			}
 
-		trace_softirq_entry(vec_nr);
-		h->action(h);
-		trace_softirq_exit(vec_nr);
-		if (unlikely(prev_count != preempt_count())) {
-			pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
-			       vec_nr, softirq_to_name[vec_nr], h->action,
-			       prev_count, preempt_count());
-			preempt_count_set(prev_count);
+			/* Allow early break to avoid big sched delay */
+			if (pending && __softirq_needs_break(start)) {
+				__this_cpu_write(pending_next_bit, vec_nr + 1);
+				/*
+				 * Ensure that the remaining pending bits will be
+				 * handled in the next time.
+				 */
+				local_irq_save(flags);
+				or_softirq_pending(pending | pending_again);
+				local_irq_restore(flags);
+				break;
+			}
 		}
 	}
 
@@ -309,12 +369,21 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 		rcu_softirq_qs();
 	local_irq_disable();
 
-	pending = local_softirq_pending();
-	if (pending) {
-		if (!__softirq_needs_break(start) && --max_restart)
-			goto restart;
+	/* get the unhandled bits */
+	pending |= pending_again;
+	if (!pending)
+		/*
+		 * If all of the pending bits have been handled,
+		 * reset the pending_next_bit to 0.
+		 */
+		__this_cpu_write(pending_next_bit, 0);
 
+	if (pending)
 		wakeup_softirqd();
+	else if (!__softirq_needs_break(start) && --max_restart) {
+		pending = local_softirq_pending();
+		if (pending)
+			goto restart;
 	}
 
 	lockdep_softirq_end(in_hardirq);
-- 
1.8.3.1


  parent reply	other threads:[~2020-09-15 12:13 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-15 11:56 [PATCH V7 0/4] Softirq:avoid large sched delay from the pending softirqs qianjun.kernel
2020-09-15 11:56 ` [PATCH V7 1/4] softirq: Use sched_clock() based timeout qianjun.kernel
2020-09-24  8:34   ` Thomas Gleixner
2020-09-15 11:56 ` [PATCH V7 2/4] softirq: Factor loop termination condition qianjun.kernel
2020-09-24  8:36   ` Thomas Gleixner
2020-09-24 12:31     ` Thomas Gleixner
2020-09-15 11:56 ` [PATCH V7 3/4] softirq: Rewrite softirq processing loop qianjun.kernel
2020-09-15 11:56 ` qianjun.kernel [this message]
2020-09-24 15:37   ` [PATCH V7 4/4] softirq: Allow early break the " Thomas Gleixner
2020-09-24 23:08     ` Frederic Weisbecker
2020-09-24 23:10       ` Frederic Weisbecker
2020-09-25 22:37       ` Thomas Gleixner
2020-09-25  0:42     ` Frederic Weisbecker
2020-09-25 22:42       ` Thomas Gleixner
2020-09-26 12:22         ` Frederic Weisbecker
2020-09-28 10:51       ` jun qian
2020-09-29 11:44         ` Frederic Weisbecker
2020-10-09 15:01           ` Qais Yousef
2020-10-13 10:43             ` Frederic Weisbecker
2020-10-13 12:40               ` Qais Yousef
2020-09-26  2:00     ` jun qian
2020-09-27  1:05   ` [softirq] 56c21abbe6: will-it-scale.per_process_ops -9.1% regression kernel test robot
2020-09-28  9:20   ` [PATCH V7 4/4] softirq: Allow early break the softirq processing loop Peter Zijlstra
2020-09-28 11:15     ` jun qian
2020-09-28  9:22   ` Peter Zijlstra
2020-09-28 10:09     ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200915115609.85106-5-qianjun.kernel@gmail.com \
    --to=qianjun.kernel@gmail.com \
    --cc=laoar.shao@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=peterz@infradead.org \
    --cc=qais.yousef@arm.com \
    --cc=tglx@linutronix.de \
    --cc=urezki@gmail.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).