linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
@ 2020-10-07 12:04 Michal Hocko
  2020-10-07 12:19 ` Peter Zijlstra
                   ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-07 12:04 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar, Michal Hocko

From: Michal Hocko <mhocko@suse.com>

Many people are still relying on pre built distribution kernels and so
distributions have to provide mutliple kernel flavors to offer different
preemption models. Most of them are providing PREEMPT_NONE for typical
server deployments and PREEMPT_VOLUNTARY for desktop users.

Having two different kernel binaries differing only by the preemption
mode seems rather wasteful and inflexible. Especially when the difference
between PREEMPT_NONE and PREEMPT_VOLUNTARY is really minimal. Both only
allow explicit scheduling points while running in the kernel and it is
only might_sleep which adds additional preemption points for
PREEMPT_VOLUNTARY.

Add a kernel command line parameter preempt_mode=[none, voluntary] which
allows to override the default compile time preemption mode
(CONFIG_PREEMPT_NONE resp. CONFIG_PREEMPT_VOLUTARY). Voluntary preempt
mode is guarded by a jump label to prevent any potential runtime overhead.

Signed-off-by: Michal Hocko <mhocko@suse.com>
---

Hi all,
this is sent as an RFC because it still needs some work. E.g. move jump
label changes into their own patch. They are needed mostly to get rid of
header files dependencies (seen for CONFIG_JUMP_LABEL=n via atomic.h ->
bug.h).

I wanted to make sure that the idea is sound for maintainers first. The
next step would be extending the command line to support full preemption
as well but there is much more work in that area. Frederic has promissed
to look into that.

Thoughts?

 .../admin-guide/kernel-parameters.txt         |  5 ++
 include/linux/gpio/consumer.h                 |  1 +
 include/linux/jump_label.h                    | 44 +----------------
 include/linux/jump_label_type.h               | 49 +++++++++++++++++++
 include/linux/kernel.h                        | 12 +++--
 kernel/sched/core.c                           | 30 ++++++++++++
 6 files changed, 95 insertions(+), 46 deletions(-)
 create mode 100644 include/linux/jump_label_type.h

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df..96bb74faeb50 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3873,6 +3873,11 @@
 			Format: {"off"}
 			Disable Hardware Transactional Memory
 
+	preempt_mode={none,voluntary}
+			Set the preemption mode.
+			none - equivalent to CONFIG_PREEMPT_NONE
+			voluntary - equivalent to CONFIG_PREEMPT_VOLUNTARY
+
 	print-fatal-signals=
 			[KNL] debug: print fatal signals
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 901aab89d025..d64e6dda5755 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -3,6 +3,7 @@
 #define __LINUX_GPIO_CONSUMER_H
 
 #include <linux/bits.h>
+#include <linux/jump_label.h>
 #include <linux/bug.h>
 #include <linux/compiler_types.h>
 #include <linux/err.h>
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 32809624d422..bd0d846d0bde 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -75,6 +75,7 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/jump_label_type.h>
 
 extern bool static_key_initialized;
 
@@ -82,35 +83,6 @@ extern bool static_key_initialized;
 				    "%s(): static key '%pS' used before call to jump_label_init()", \
 				    __func__, (key))
 
-#ifdef CONFIG_JUMP_LABEL
-
-struct static_key {
-	atomic_t enabled;
-/*
- * Note:
- *   To make anonymous unions work with old compilers, the static
- *   initialization of them requires brackets. This creates a dependency
- *   on the order of the struct with the initializers. If any fields
- *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
- *   to be modified.
- *
- * bit 0 => 1 if key is initially true
- *	    0 if initially false
- * bit 1 => 1 if points to struct static_key_mod
- *	    0 if points to struct jump_entry
- */
-	union {
-		unsigned long type;
-		struct jump_entry *entries;
-		struct static_key_mod *next;
-	};
-};
-
-#else
-struct static_key {
-	atomic_t enabled;
-};
-#endif	/* CONFIG_JUMP_LABEL */
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_JUMP_LABEL
@@ -343,14 +315,6 @@ static inline void static_key_disable(struct static_key *key)
  * All the below code is macros in order to play type games.
  */
 
-struct static_key_true {
-	struct static_key key;
-};
-
-struct static_key_false {
-	struct static_key key;
-};
-
 #define STATIC_KEY_TRUE_INIT  (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE,  }
 #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }
 
@@ -360,18 +324,12 @@ struct static_key_false {
 #define DEFINE_STATIC_KEY_TRUE_RO(name)	\
 	struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT
 
-#define DECLARE_STATIC_KEY_TRUE(name)	\
-	extern struct static_key_true name
-
 #define DEFINE_STATIC_KEY_FALSE(name)	\
 	struct static_key_false name = STATIC_KEY_FALSE_INIT
 
 #define DEFINE_STATIC_KEY_FALSE_RO(name)	\
 	struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT
 
-#define DECLARE_STATIC_KEY_FALSE(name)	\
-	extern struct static_key_false name
-
 #define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count)		\
 	struct static_key_true name[count] = {			\
 		[0 ... (count) - 1] = STATIC_KEY_TRUE_INIT,	\
diff --git a/include/linux/jump_label_type.h b/include/linux/jump_label_type.h
new file mode 100644
index 000000000000..37cb02c12f35
--- /dev/null
+++ b/include/linux/jump_label_type.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_JUMP_LABEL_TYPE_H
+#define _LINUX_JUMP_LABEL_TYPE_H
+
+#ifdef CONFIG_JUMP_LABEL
+
+struct static_key {
+	atomic_t enabled;
+/*
+ * Note:
+ *   To make anonymous unions work with old compilers, the static
+ *   initialization of them requires brackets. This creates a dependency
+ *   on the order of the struct with the initializers. If any fields
+ *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
+ *   to be modified.
+ *
+ * bit 0 => 1 if key is initially true
+ *	    0 if initially false
+ * bit 1 => 1 if points to struct static_key_mod
+ *	    0 if points to struct jump_entry
+ */
+	union {
+		unsigned long type;
+		struct jump_entry *entries;
+		struct static_key_mod *next;
+	};
+};
+
+#else
+struct static_key {
+	atomic_t enabled;
+};
+#endif	/* CONFIG_JUMP_LABEL */
+
+struct static_key_true {
+	struct static_key key;
+};
+
+struct static_key_false {
+	struct static_key key;
+};
+
+#define DECLARE_STATIC_KEY_TRUE(name)	\
+	extern struct static_key_true name
+
+#define DECLARE_STATIC_KEY_FALSE(name)	\
+	extern struct static_key_false name
+
+#endif /* _LINUX_JUMP_LABEL_TYPE_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index c25b8e41c0ea..94238df64afc 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/typecheck.h>
 #include <linux/printk.h>
 #include <linux/build_bug.h>
+#include <linux/jump_label_type.h>
 #include <asm/byteorder.h>
 #include <asm/div64.h>
 #include <uapi/linux/kernel.h>
@@ -192,11 +193,16 @@ struct completion;
 struct pt_regs;
 struct user;
 
+#ifndef CONFIG_PREEMPTION
 #ifdef CONFIG_PREEMPT_VOLUNTARY
-extern int _cond_resched(void);
-# define might_resched() _cond_resched()
+DECLARE_STATIC_KEY_TRUE(preempt_voluntary_key);
 #else
-# define might_resched() do { } while (0)
+DECLARE_STATIC_KEY_FALSE(preempt_voluntary_key);
+#endif
+
+extern int _cond_resched(void);
+# define might_resched() \
+	do { if (static_branch_likely(&preempt_voluntary_key)) _cond_resched(); } while (0)
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d95dc3f4644..07d37d862637 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -43,6 +43,14 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+DEFINE_STATIC_KEY_TRUE(preempt_voluntary_key);
+#else
+/* PREEMPT_NONE vs PREEMPT_VOLUNTARY */
+DEFINE_STATIC_KEY_FALSE(preempt_voluntary_key);
+#endif
+EXPORT_SYMBOL(preempt_voluntary_key);
+
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
 /*
  * Debugging: various feature bits
@@ -8482,3 +8490,25 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
 {
         trace_sched_update_nr_running_tp(rq, count);
 }
+
+#ifndef CONFIG_PREEMPTION
+static int __init setup_non_preempt_mode(char *str)
+{
+	if (!strcmp(str, "none")) {
+		if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
+			static_branch_disable(&preempt_voluntary_key);
+			pr_info("Switching to PREEMPT_NONE mode.");
+		}
+	} else if (!strcmp(str, "voluntary")) {
+		if (!IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
+			static_branch_enable(&preempt_voluntary_key);
+			pr_info("Switching to PREEMPT_VOLUNTARY mode.");
+		}
+	} else {
+		pr_warn("Unsupported preempt mode %s\n", str);
+		return 1;
+	}
+	return 0;
+}
+__setup("preempt_mode=", setup_non_preempt_mode);
+#endif
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-07 12:04 [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line Michal Hocko
@ 2020-10-07 12:19 ` Peter Zijlstra
  2020-10-07 12:29   ` Michal Hocko
  2020-10-07 12:21 ` Peter Zijlstra
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-07 12:19 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman,
	Ingo Molnar, Michal Hocko

On Wed, Oct 07, 2020 at 02:04:01PM +0200, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
> 
> Many people are still relying on pre built distribution kernels and so
> distributions have to provide mutliple kernel flavors to offer different
> preemption models. Most of them are providing PREEMPT_NONE for typical
> server deployments and PREEMPT_VOLUNTARY for desktop users.

Is there actually a benefit to NONE? We were recently talking about
removing it.

The much more interesting (runtime) switch (IMO) would be between
VOLUNTARY and PREEMPT.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-07 12:04 [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line Michal Hocko
  2020-10-07 12:19 ` Peter Zijlstra
@ 2020-10-07 12:21 ` Peter Zijlstra
  2020-10-07 12:35   ` Michal Hocko
  2020-10-09  9:12 ` Michal Hocko
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
  3 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-07 12:21 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman,
	Ingo Molnar, Michal Hocko

On Wed, Oct 07, 2020 at 02:04:01PM +0200, Michal Hocko wrote:
> I wanted to make sure that the idea is sound for maintainers first. The
> next step would be extending the command line to support full preemption
> as well but there is much more work in that area. Frederic has promissed
> to look into that.

The sanest way there is to static_call() __preempt_schedule() I think.
Alternatively you could use alternatives but that makes the whole thing
arch specific again.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-07 12:19 ` Peter Zijlstra
@ 2020-10-07 12:29   ` Michal Hocko
  2020-10-07 13:01     ` Mel Gorman
  0 siblings, 1 reply; 31+ messages in thread
From: Michal Hocko @ 2020-10-07 12:29 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Wed 07-10-20 14:19:39, Peter Zijlstra wrote:
> On Wed, Oct 07, 2020 at 02:04:01PM +0200, Michal Hocko wrote:
> > From: Michal Hocko <mhocko@suse.com>
> > 
> > Many people are still relying on pre built distribution kernels and so
> > distributions have to provide mutliple kernel flavors to offer different
> > preemption models. Most of them are providing PREEMPT_NONE for typical
> > server deployments and PREEMPT_VOLUNTARY for desktop users.
> 
> Is there actually a benefit to NONE? We were recently talking about
> removing it.

I believe Mel can provide much better insight. We have been historically using
PREEMPT_NONE for our enterprise customers mostly for nice throughput
numbers. Many users are really targeting throughput much more than
latencies. My understanding is that even though VOLUNTARY preemption model
doesn't add too many preemption points on top of NONE it is still
something that is observable (IIRC 2-3% on hackbench).
 
> The much more interesting (runtime) switch (IMO) would be between
> VOLUNTARY and PREEMPT.

Absolutely and as said we are looking into that. This is meant to be a
first baby step in that direction. Still very useful in our current
situation when we want to provide both NONE and VOLUNTARY.

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-07 12:21 ` Peter Zijlstra
@ 2020-10-07 12:35   ` Michal Hocko
  2020-10-09  9:47     ` Peter Zijlstra
  0 siblings, 1 reply; 31+ messages in thread
From: Michal Hocko @ 2020-10-07 12:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Wed 07-10-20 14:21:44, Peter Zijlstra wrote:
> On Wed, Oct 07, 2020 at 02:04:01PM +0200, Michal Hocko wrote:
> > I wanted to make sure that the idea is sound for maintainers first. The
> > next step would be extending the command line to support full preemption
> > as well but there is much more work in that area. Frederic has promissed
> > to look into that.
> 
> The sanest way there is to static_call() __preempt_schedule() I think.

Yes, I have checked the code and identified few other places like
irqentry_exit_cond_resched. We also need unconditional
CONFIG_PREEMPT_COUNT IIUC and there are quite some places guarded by
CONFIG_PREEMPTION that would need to be examined. Some of them are
likely pretending to be more clever than they really are/should be -
e.g. mm/slub.c. So there is likely a lot of leg work.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-07 12:29   ` Michal Hocko
@ 2020-10-07 13:01     ` Mel Gorman
  0 siblings, 0 replies; 31+ messages in thread
From: Mel Gorman @ 2020-10-07 13:01 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Peter Zijlstra, Thomas Gleixner, Frederic Weisbecker, LKML, Ingo Molnar

On Wed, Oct 07, 2020 at 02:29:23PM +0200, Michal Hocko wrote:
> On Wed 07-10-20 14:19:39, Peter Zijlstra wrote:
> > On Wed, Oct 07, 2020 at 02:04:01PM +0200, Michal Hocko wrote:
> > > From: Michal Hocko <mhocko@suse.com>
> > > 
> > > Many people are still relying on pre built distribution kernels and so
> > > distributions have to provide mutliple kernel flavors to offer different
> > > preemption models. Most of them are providing PREEMPT_NONE for typical
> > > server deployments and PREEMPT_VOLUNTARY for desktop users.
> > 
> > Is there actually a benefit to NONE? We were recently talking about
> > removing it.
> 
> I believe Mel can provide much better insight. We have been historically using
> PREEMPT_NONE for our enterprise customers mostly for nice throughput
> numbers. Many users are really targeting throughput much more than
> latencies. My understanding is that even though VOLUNTARY preemption model
> doesn't add too many preemption points on top of NONE it is still
> something that is observable (IIRC 2-3% on hackbench).
>  

It's marginal from the tests I ran but that was based on 5.3. At worst,
it looked like roughly a hit but a lot of loads simply didn't notice.
However, it might vary between architectures that I cannot cover or
workloads that I didn't consider.  As the impact of PREEMPT_VOLUNTARY
depends on where cond_resched and might_sleep is used, it's also something
that can vary over time. The intent was that by having the command-line
switch, a user could test the switch if there was a suspicion that a
regression was related to PREEMPT_VOLUNTARY as opposed to telling them
"tough, that's the reality now".

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-07 12:04 [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line Michal Hocko
  2020-10-07 12:19 ` Peter Zijlstra
  2020-10-07 12:21 ` Peter Zijlstra
@ 2020-10-09  9:12 ` Michal Hocko
  2020-10-09  9:42   ` Peter Zijlstra
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
  3 siblings, 1 reply; 31+ messages in thread
From: Michal Hocko @ 2020-10-09  9:12 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Wed 07-10-20 14:04:01, Michal Hocko wrote:
> From: Michal Hocko <mhocko@suse.com>
> 
> Many people are still relying on pre built distribution kernels and so
> distributions have to provide mutliple kernel flavors to offer different
> preemption models. Most of them are providing PREEMPT_NONE for typical
> server deployments and PREEMPT_VOLUNTARY for desktop users.
> 
> Having two different kernel binaries differing only by the preemption
> mode seems rather wasteful and inflexible. Especially when the difference
> between PREEMPT_NONE and PREEMPT_VOLUNTARY is really minimal. Both only
> allow explicit scheduling points while running in the kernel and it is
> only might_sleep which adds additional preemption points for
> PREEMPT_VOLUNTARY.
> 
> Add a kernel command line parameter preempt_mode=[none, voluntary] which
> allows to override the default compile time preemption mode
> (CONFIG_PREEMPT_NONE resp. CONFIG_PREEMPT_VOLUTARY). Voluntary preempt
> mode is guarded by a jump label to prevent any potential runtime overhead.
> 
> Signed-off-by: Michal Hocko <mhocko@suse.com>
> ---
> 
> Hi all,
> this is sent as an RFC because it still needs some work. E.g. move jump
> label changes into their own patch. They are needed mostly to get rid of
> header files dependencies (seen for CONFIG_JUMP_LABEL=n via atomic.h ->
> bug.h).
> 
> I wanted to make sure that the idea is sound for maintainers first. The
> next step would be extending the command line to support full preemption
> as well but there is much more work in that area. Frederic has promissed
> to look into that.
> 
> Thoughts?

Is there any additional feedback? Should I split up the patch and repost
for inclusion?
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09  9:12 ` Michal Hocko
@ 2020-10-09  9:42   ` Peter Zijlstra
  2020-10-09 10:10     ` Michal Hocko
  0 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09  9:42 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri, Oct 09, 2020 at 11:12:18AM +0200, Michal Hocko wrote:
> Is there any additional feedback? Should I split up the patch and repost
> for inclusion?

Maybe remove PREEMPT_NONE after that? Since that's then equivalent to
building with VOLUNTARY and booting with preempt=none.


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-07 12:35   ` Michal Hocko
@ 2020-10-09  9:47     ` Peter Zijlstra
  2020-10-09 10:14       ` Michal Hocko
  0 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09  9:47 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Wed, Oct 07, 2020 at 02:35:53PM +0200, Michal Hocko wrote:
> On Wed 07-10-20 14:21:44, Peter Zijlstra wrote:
> > On Wed, Oct 07, 2020 at 02:04:01PM +0200, Michal Hocko wrote:
> > > I wanted to make sure that the idea is sound for maintainers first. The
> > > next step would be extending the command line to support full preemption
> > > as well but there is much more work in that area. Frederic has promissed
> > > to look into that.
> > 
> > The sanest way there is to static_call() __preempt_schedule() I think.
> 
> Yes, I have checked the code and identified few other places like
> irqentry_exit_cond_resched. We also need unconditional
> CONFIG_PREEMPT_COUNT IIUC and there are quite some places guarded by
> CONFIG_PREEMPTION that would need to be examined. Some of them are
> likely pretending to be more clever than they really are/should be -
> e.g. mm/slub.c. So there is likely a lot of leg work.

The easiest way might be to introduce PREEMPT_DYNAMIC that
depends/selects PREEMPT. That way you're basically running a PREEMPT=y
kernel.

Then have PREEMPT_DYNAMIC allow disabling the __preempt_schedule /
preempt_schedule_irq() callsites using static_call/static_branch
respectively.

That is, work backwards (from PREEMPT back to VOLUNTARY) instead of the
other way around.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09  9:42   ` Peter Zijlstra
@ 2020-10-09 10:10     ` Michal Hocko
  2020-10-09 10:14       ` Peter Zijlstra
  0 siblings, 1 reply; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 10:10 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri 09-10-20 11:42:45, Peter Zijlstra wrote:
> On Fri, Oct 09, 2020 at 11:12:18AM +0200, Michal Hocko wrote:
> > Is there any additional feedback? Should I split up the patch and repost
> > for inclusion?
> 
> Maybe remove PREEMPT_NONE after that?  Since that's then equivalent to
> building with VOLUNTARY and booting with preempt=none.

So do you mean that I should post an additional patch which does this on
top? With a justification that there is one option less to chose from?

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bf82259cff96..103a5f9e4bcf 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -2,20 +2,7 @@
 
 choice
 	prompt "Preemption Model"
-	default PREEMPT_NONE
-
-config PREEMPT_NONE
-	bool "No Forced Preemption (Server)"
-	help
-	  This is the traditional Linux preemption model, geared towards
-	  throughput. It will still provide good latencies most of the
-	  time, but there are no guarantees and occasional longer delays
-	  are possible.
-
-	  Select this option if you are building a kernel for a server or
-	  scientific/computation system, or if you want to maximize the
-	  raw processing power of the kernel, irrespective of scheduling
-	  latencies.
+	default PREEMPT_VOLUNTARY
 
 config PREEMPT_VOLUNTARY
 	bool "Voluntary Kernel Preemption (Desktop)"

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09  9:47     ` Peter Zijlstra
@ 2020-10-09 10:14       ` Michal Hocko
  2020-10-09 10:20         ` Peter Zijlstra
  0 siblings, 1 reply; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 10:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri 09-10-20 11:47:41, Peter Zijlstra wrote:
> On Wed, Oct 07, 2020 at 02:35:53PM +0200, Michal Hocko wrote:
> > On Wed 07-10-20 14:21:44, Peter Zijlstra wrote:
> > > On Wed, Oct 07, 2020 at 02:04:01PM +0200, Michal Hocko wrote:
> > > > I wanted to make sure that the idea is sound for maintainers first. The
> > > > next step would be extending the command line to support full preemption
> > > > as well but there is much more work in that area. Frederic has promissed
> > > > to look into that.
> > > 
> > > The sanest way there is to static_call() __preempt_schedule() I think.
> > 
> > Yes, I have checked the code and identified few other places like
> > irqentry_exit_cond_resched. We also need unconditional
> > CONFIG_PREEMPT_COUNT IIUC and there are quite some places guarded by
> > CONFIG_PREEMPTION that would need to be examined. Some of them are
> > likely pretending to be more clever than they really are/should be -
> > e.g. mm/slub.c. So there is likely a lot of leg work.
> 
> The easiest way might be to introduce PREEMPT_DYNAMIC that
> depends/selects PREEMPT. That way you're basically running a PREEMPT=y
> kernel.
> 
> Then have PREEMPT_DYNAMIC allow disabling the __preempt_schedule /
> preempt_schedule_irq() callsites using static_call/static_branch
> respectively.
> 
> That is, work backwards (from PREEMPT back to VOLUNTARY) instead of the
> other way around.

My original idea was that the config would only define the default
preemption mode. preempt_none parameter would then just act as an
override. That would mean that CONFIG_PREEMPTION would be effectively
gone from the kernel. The reason being that any code outside of the
scheduler shouldn't really care about the preemption mode. I suspect
this will prevent from dubious hacks and provide a more robust code in
the end.

Does that sound reasonable?

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 10:10     ` Michal Hocko
@ 2020-10-09 10:14       ` Peter Zijlstra
  2020-10-09 10:37         ` Michal Hocko
  0 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09 10:14 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri, Oct 09, 2020 at 12:10:44PM +0200, Michal Hocko wrote:
> On Fri 09-10-20 11:42:45, Peter Zijlstra wrote:
> > On Fri, Oct 09, 2020 at 11:12:18AM +0200, Michal Hocko wrote:
> > > Is there any additional feedback? Should I split up the patch and repost
> > > for inclusion?
> > 
> > Maybe remove PREEMPT_NONE after that?  Since that's then equivalent to
> > building with VOLUNTARY and booting with preempt=none.
> 
> So do you mean that I should post an additional patch which does this on
> top? With a justification that there is one option less to chose from?

Exactly!

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 10:14       ` Michal Hocko
@ 2020-10-09 10:20         ` Peter Zijlstra
  2020-10-09 10:48           ` Michal Hocko
  0 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09 10:20 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri, Oct 09, 2020 at 12:14:05PM +0200, Michal Hocko wrote:
> On Fri 09-10-20 11:47:41, Peter Zijlstra wrote:

> > That is, work backwards (from PREEMPT back to VOLUNTARY) instead of the
> > other way around.
> 
> My original idea was that the config would only define the default
> preemption mode. preempt_none parameter would then just act as an
> override. That would mean that CONFIG_PREEMPTION would be effectively
> gone from the kernel. The reason being that any code outside of the
> scheduler shouldn't really care about the preemption mode. I suspect
> this will prevent from dubious hacks and provide a more robust code in
> the end.

Sure; but the way of arriving at that destination might be easier if
you work backwards from PREEMPT=y, because while there _should_ not be
dependencies outside of the scheduler, we both know there are.

This also makes your patches independent of the series that makes
CONFIG_PREEMPTION unconditional.

It also gives Kconfig space to limit the dynamic thing to archs that
have sufficient support (we'll be relying on static_call/static_branch,
and not everybody has that implemented in a way that makes it the
dynamic change worth-while).

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 10:14       ` Peter Zijlstra
@ 2020-10-09 10:37         ` Michal Hocko
  2020-10-09 11:42           ` Peter Zijlstra
  0 siblings, 1 reply; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 10:37 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman,
	Ingo Molnar, Richard Henderson, Ivan Kokshaysky, Matt Turner,
	linux-alpha, Brian Cain, linux-hexagon, Geert Uytterhoeven,
	linux-m68k, Jeff Dike, Richard Weinberger, Anton Ivanov,
	linux-um

On Fri 09-10-20 12:14:31, Peter Zijlstra wrote:
> On Fri, Oct 09, 2020 at 12:10:44PM +0200, Michal Hocko wrote:
> > On Fri 09-10-20 11:42:45, Peter Zijlstra wrote:
> > > On Fri, Oct 09, 2020 at 11:12:18AM +0200, Michal Hocko wrote:
> > > > Is there any additional feedback? Should I split up the patch and repost
> > > > for inclusion?
> > > 
> > > Maybe remove PREEMPT_NONE after that?  Since that's then equivalent to
> > > building with VOLUNTARY and booting with preempt=none.
> > 
> > So do you mean that I should post an additional patch which does this on
> > top? With a justification that there is one option less to chose from?
> 
> Exactly!

It seems we have to get rid of CONFIG_NO_PREEMPT first
$ git grep ARCH_NO_PREEMPT
arch/Kconfig:config ARCH_NO_PREEMPT
arch/alpha/Kconfig:     select ARCH_NO_PREEMPT
arch/hexagon/Kconfig:   select ARCH_NO_PREEMPT
arch/m68k/Kconfig:      select ARCH_NO_PREEMPT if !COLDFIRE
arch/um/Kconfig:        select ARCH_NO_PREEMPT
kernel/Kconfig.preempt: depends on !ARCH_NO_PREEMPT
kernel/Kconfig.preempt: depends on !ARCH_NO_PREEMPT
lib/Kconfig.debug:      select PREEMPT_COUNT if !ARCH_NO_PREEMPT
lib/Kconfig.debug:      depends on !ARCH_NO_PREEMPT

Is there anybody working on that. Is this even possible? I can see it
has been added by 87a4c375995e ("kconfig: include kernel/Kconfig.preempt
from init/Kconfig") but this looks more like a mechanical change and it
has defined ARCH_NO_PREEMPT all arches which haven't included
Kconfig.preempt. But is there any reason why those cannot support
preemption for some reason? Cc respective maintainer (the email thread
starts http://lkml.kernel.org/r/20201007120401.11200-1-mhocko@kernel.org
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 10:20         ` Peter Zijlstra
@ 2020-10-09 10:48           ` Michal Hocko
  2020-10-09 11:17             ` Michal Hocko
  2020-10-09 11:39             ` Peter Zijlstra
  0 siblings, 2 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 10:48 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri 09-10-20 12:20:09, Peter Zijlstra wrote:
> On Fri, Oct 09, 2020 at 12:14:05PM +0200, Michal Hocko wrote:
> > On Fri 09-10-20 11:47:41, Peter Zijlstra wrote:
> 
> > > That is, work backwards (from PREEMPT back to VOLUNTARY) instead of the
> > > other way around.
> > 
> > My original idea was that the config would only define the default
> > preemption mode. preempt_none parameter would then just act as an
> > override. That would mean that CONFIG_PREEMPTION would be effectively
> > gone from the kernel. The reason being that any code outside of the
> > scheduler shouldn't really care about the preemption mode. I suspect
> > this will prevent from dubious hacks and provide a more robust code in
> > the end.
> 
> Sure; but the way of arriving at that destination might be easier if
> you work backwards from PREEMPT=y, because while there _should_ not be
> dependencies outside of the scheduler, we both know there are.

Wouldn't we need to examine each of the CONFIG_PREEMPTION code anyway?
And wouldn't that be even more tricky? The boot time option would result
in a more restrictive preemption mode while the code is actually
assuming a less restrictive one.
 
> This also makes your patches independent of the series that makes
> CONFIG_PREEMPTION unconditional.
>
> It also gives Kconfig space to limit the dynamic thing to archs that
> have sufficient support (we'll be relying on static_call/static_branch,
> and not everybody has that implemented in a way that makes it the
> dynamic change worth-while).

Hmm, this is actually a good argument. I can imagine that kernels
without CONFIG_JUMP_LABEL might increase a runtime overhead for
something that users of that kernel might be not really interested in.
This would make CONFIG_PREEMPT_DYNAMIC be selected by CONFIG_JUMP_LABEL.

I will add the CONFIG_PREEMPT_DYNAMIC in the next version. I just have
to think whether flipping the direction is really safe and easier in the
end. For our particular usecase we are more interested in
NONE<->VOLUNTARY at this moment and having full preemption in the mix
later is just fine. If you insist on the other direction then we can
work on that.

Thanks!
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 10:48           ` Michal Hocko
@ 2020-10-09 11:17             ` Michal Hocko
  2020-10-09 11:26               ` Michal Hocko
  2020-10-09 11:39             ` Peter Zijlstra
  1 sibling, 1 reply; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 11:17 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri 09-10-20 12:48:09, Michal Hocko wrote:
[...]
> I will add the CONFIG_PREEMPT_DYNAMIC in the next version. I just have
> to think whether flipping the direction is really safe and easier in the
> end. For our particular usecase we are more interested in
> NONE<->VOLUNTARY at this moment and having full preemption in the mix
> later is just fine. If you insist on the other direction then we can
> work on that.

This is a quick implementation of PREEMPT_DYNAMIC (still with the
original approach to start from non preemptive kernels). It is a bit
hairy but I suspect that starting with CONFIG_PREEMPTION wouldn't make
it very much easier but I haven't tried that yet. Advantage of this
approach is that it is clear that it is safe as NONE/VOLUNTARY are
trivial to examine.

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d2d37bd5ecd5..b61ab02dba84 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -193,20 +193,36 @@ struct completion;
 struct pt_regs;
 struct user;
 
+/*
+ * cond_resched() and cond_resched_lock(): latency reduction via
+ * explicit rescheduling in places that are safe. The return
+ * value indicates whether a reschedule was done in fact.
+ * cond_resched_lock() will drop the spinlock before scheduling,
+ */
 #ifndef CONFIG_PREEMPTION
+extern int _cond_resched(void);
+#else
+static inline int _cond_resched(void) { return 0; }
+#endif
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 DECLARE_STATIC_KEY_TRUE(preempt_voluntary_key);
 #else
 DECLARE_STATIC_KEY_FALSE(preempt_voluntary_key);
 #endif
 
-extern int _cond_resched(void);
 # define might_resched() \
 	do { if (static_branch_likely(&preempt_voluntary_key)) _cond_resched(); } while (0)
 #else
+
+#ifdef CONFIG_PREEMPT_VOLUNTARY
 # define might_resched() \
-	do { } while (0)
+	do { _cond_resched(); } while (0)
+#else
+# define might_resched() do { } while (0)
 #endif
+#endif /* CONFIG_PREEMPT_DYNAMIC */
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 extern void ___might_sleep(const char *file, int line, int preempt_offset);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe01e232935..184b5e162184 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1812,18 +1812,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
-/*
- * cond_resched() and cond_resched_lock(): latency reduction via
- * explicit rescheduling in places that are safe. The return
- * value indicates whether a reschedule was done in fact.
- * cond_resched_lock() will drop the spinlock before scheduling,
- */
-#ifndef CONFIG_PREEMPTION
-extern int _cond_resched(void);
-#else
-static inline int _cond_resched(void) { return 0; }
-#endif
-
 #define cond_resched() ({			\
 	___might_sleep(__FILE__, __LINE__, 0);	\
 	_cond_resched();			\
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bf82259cff96..a73b5564cc51 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -74,6 +74,25 @@ config PREEMPT_RT
 
 endchoice
 
+config PREEMPT_DYNAMIC
+	bool "Allow boot time preemption model selection"
+	depends on PREEMPT_NONE || PREEMPT_VOLUNTARY
+	help
+	  This option allows to define the preemption model on the kernel
+	  command line parameter and thus override the default preemption
+	  model defined during compile time.
+
+	  The feature is primarily interesting for Linux distributions which
+	  provide a pre-built kernel binary to reduce the number of kernel
+	  flavors they offer while still offering different usecases.
+
+	  The runtime overhead is negligible with JUMP_LABELS enabled but if
+	  runtime patching is not available for the specific architecture then
+	  the potential overhead should be considered.
+
+	  Select if you the same pre-built kernel should be used for both Server
+	  and Desktop workloads.
+
 config PREEMPT_COUNT
        bool
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 07d37d862637..fe22b2fca864 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -43,6 +43,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 DEFINE_STATIC_KEY_TRUE(preempt_voluntary_key);
 #else
@@ -51,6 +53,8 @@ DEFINE_STATIC_KEY_FALSE(preempt_voluntary_key);
 #endif
 EXPORT_SYMBOL(preempt_voluntary_key);
 
+#endif
+
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
 /*
  * Debugging: various feature bits
@@ -8491,7 +8495,7 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
         trace_sched_update_nr_running_tp(rq, count);
 }
 
-#ifndef CONFIG_PREEMPTION
+#ifdef CONFIG_PREEMPT_DYNAMIC
 static int __init setup_non_preempt_mode(char *str)
 {
 	if (!strcmp(str, "none")) {
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 11:17             ` Michal Hocko
@ 2020-10-09 11:26               ` Michal Hocko
  0 siblings, 0 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 11:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri 09-10-20 13:17:04, Michal Hocko wrote:
[...]
> +config PREEMPT_DYNAMIC
> +	bool "Allow boot time preemption model selection"

	depends on !ARCH_NO_PREEMPT

> +	depends on PREEMPT_NONE || PREEMPT_VOLUNTARY
> +	help
> +	  This option allows to define the preemption model on the kernel
> +	  command line parameter and thus override the default preemption
> +	  model defined during compile time.
> +
> +	  The feature is primarily interesting for Linux distributions which
> +	  provide a pre-built kernel binary to reduce the number of kernel
> +	  flavors they offer while still offering different usecases.
> +
> +	  The runtime overhead is negligible with JUMP_LABELS enabled but if
> +	  runtime patching is not available for the specific architecture then
> +	  the potential overhead should be considered.
> +
> +	  Select if you the same pre-built kernel should be used for both Server
> +	  and Desktop workloads.
> +
>  config PREEMPT_COUNT
>         bool
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 10:48           ` Michal Hocko
  2020-10-09 11:17             ` Michal Hocko
@ 2020-10-09 11:39             ` Peter Zijlstra
  1 sibling, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09 11:39 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman, Ingo Molnar

On Fri, Oct 09, 2020 at 12:48:08PM +0200, Michal Hocko wrote:
> On Fri 09-10-20 12:20:09, Peter Zijlstra wrote:
> > On Fri, Oct 09, 2020 at 12:14:05PM +0200, Michal Hocko wrote:
> > > On Fri 09-10-20 11:47:41, Peter Zijlstra wrote:
> > 
> > > > That is, work backwards (from PREEMPT back to VOLUNTARY) instead of the
> > > > other way around.
> > > 
> > > My original idea was that the config would only define the default
> > > preemption mode. preempt_none parameter would then just act as an
> > > override. That would mean that CONFIG_PREEMPTION would be effectively
> > > gone from the kernel. The reason being that any code outside of the
> > > scheduler shouldn't really care about the preemption mode. I suspect
> > > this will prevent from dubious hacks and provide a more robust code in
> > > the end.
> > 
> > Sure; but the way of arriving at that destination might be easier if
> > you work backwards from PREEMPT=y, because while there _should_ not be
> > dependencies outside of the scheduler, we both know there are.
> 
> Wouldn't we need to examine each of the CONFIG_PREEMPTION code anyway?
> And wouldn't that be even more tricky? The boot time option would result
> in a more restrictive preemption mode while the code is actually
> assuming a less restrictive one.

Sure, in the end we'll have to look at all of that.

> > This also makes your patches independent of the series that makes
> > CONFIG_PREEMPTION unconditional.
> >
> > It also gives Kconfig space to limit the dynamic thing to archs that
> > have sufficient support (we'll be relying on static_call/static_branch,
> > and not everybody has that implemented in a way that makes it the
> > dynamic change worth-while).
> 
> Hmm, this is actually a good argument. I can imagine that kernels
> without CONFIG_JUMP_LABEL might increase a runtime overhead for
> something that users of that kernel might be not really interested in.
> This would make CONFIG_PREEMPT_DYNAMIC be selected by CONFIG_JUMP_LABEL.
> 
> I will add the CONFIG_PREEMPT_DYNAMIC in the next version. I just have
> to think whether flipping the direction is really safe and easier in the
> end. For our particular usecase we are more interested in
> NONE<->VOLUNTARY at this moment and having full preemption in the mix
> later is just fine. If you insist on the other direction then we can
> work on that.

Ah, I was purely thinking of the FULL preempt case. For the
NONE/VOLATILE case you can probably keep it simpler.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 10:37         ` Michal Hocko
@ 2020-10-09 11:42           ` Peter Zijlstra
  0 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09 11:42 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Frederic Weisbecker, LKML, Mel Gorman,
	Ingo Molnar, Richard Henderson, Ivan Kokshaysky, Matt Turner,
	linux-alpha, Brian Cain, linux-hexagon, Geert Uytterhoeven,
	linux-m68k, Jeff Dike, Richard Weinberger, Anton Ivanov,
	linux-um

On Fri, Oct 09, 2020 at 12:37:30PM +0200, Michal Hocko wrote:
> On Fri 09-10-20 12:14:31, Peter Zijlstra wrote:
> > On Fri, Oct 09, 2020 at 12:10:44PM +0200, Michal Hocko wrote:
> > > On Fri 09-10-20 11:42:45, Peter Zijlstra wrote:
> > > > On Fri, Oct 09, 2020 at 11:12:18AM +0200, Michal Hocko wrote:
> > > > > Is there any additional feedback? Should I split up the patch and repost
> > > > > for inclusion?
> > > > 
> > > > Maybe remove PREEMPT_NONE after that?  Since that's then equivalent to
> > > > building with VOLUNTARY and booting with preempt=none.
> > > 
> > > So do you mean that I should post an additional patch which does this on
> > > top? With a justification that there is one option less to chose from?
> > 
> > Exactly!
> 
> It seems we have to get rid of CONFIG_NO_PREEMPT first
> $ git grep ARCH_NO_PREEMPT
> arch/Kconfig:config ARCH_NO_PREEMPT
> arch/alpha/Kconfig:     select ARCH_NO_PREEMPT
> arch/hexagon/Kconfig:   select ARCH_NO_PREEMPT
> arch/m68k/Kconfig:      select ARCH_NO_PREEMPT if !COLDFIRE
> arch/um/Kconfig:        select ARCH_NO_PREEMPT
> kernel/Kconfig.preempt: depends on !ARCH_NO_PREEMPT
> kernel/Kconfig.preempt: depends on !ARCH_NO_PREEMPT
> lib/Kconfig.debug:      select PREEMPT_COUNT if !ARCH_NO_PREEMPT
> lib/Kconfig.debug:      depends on !ARCH_NO_PREEMPT
> 
> Is there anybody working on that. Is this even possible? I can see it
> has been added by 87a4c375995e ("kconfig: include kernel/Kconfig.preempt
> from init/Kconfig") but this looks more like a mechanical change and it
> has defined ARCH_NO_PREEMPT all arches which haven't included
> Kconfig.preempt. But is there any reason why those cannot support
> preemption for some reason? Cc respective maintainer (the email thread
> starts http://lkml.kernel.org/r/20201007120401.11200-1-mhocko@kernel.org

I suspect we can drop ARCH_NO_PREEMPT from VOLUNTARY, IIRC there's no
arch dependency there. PREEMPT itself obviously needs arch help.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [RFC PATCH v2 0/5] allow overriding default preempt mode from command line
  2020-10-07 12:04 [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line Michal Hocko
                   ` (2 preceding siblings ...)
  2020-10-09  9:12 ` Michal Hocko
@ 2020-10-09 12:29 ` Michal Hocko
  2020-10-09 12:29   ` [RFC PATCH v2 1/5] jump_label: split out declaration parts into its own headers Michal Hocko
                     ` (6 more replies)
  3 siblings, 7 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 12:29 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML

Hi,
let me repost the pile that has grown from the initial patch based on
the review feedback I have collected from Peter. I do realize that he
also suggested to go from the other direction and implement this for the
full preemption mode first. As I've said I believe this would require to
examine all CONFIG_PREEMPTION code paths and preempt_mode=[none,voluntary]
is already quite useful for distributions which are targeting high
throuhput and desktop environments. Adding full preemption on top is
definitely desirable and something we will be looking into. This is the
first step in that direction.

If maintainers really believe that starting from the full preemption
mode first then we are willing to explore that path of course.

I am stil posting that as an RFC because I didn't get to push this
through a complete compile time testing and rely on 0-day bot in that
aspect.

Thanks and I am looking forward for a further feedback.



^ permalink raw reply	[flat|nested] 31+ messages in thread

* [RFC PATCH v2 1/5] jump_label: split out declaration parts into its own headers
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
@ 2020-10-09 12:29   ` Michal Hocko
  2020-10-09 12:29   ` [RFC PATCH v2 2/5] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line Michal Hocko
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 12:29 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML, Michal Hocko

From: Michal Hocko <mhocko@suse.com>

Follow up patch would like to add a static key into kernel.h and that
requires a declaration of the key in the same file. Including
jump_label.h into kernel.h is not possible due to headers dependencies.

Separate parts needed for declaration into its own header which doesn't
depend on any other header file. kernel.h can include jump_abel_type.h.

Signed-off-by: Michal Hocko <mhocko@suse.com>
---
 include/linux/jump_label.h      | 44 +----------------------------
 include/linux/jump_label_type.h | 49 +++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 43 deletions(-)
 create mode 100644 include/linux/jump_label_type.h

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 32809624d422..bd0d846d0bde 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -75,6 +75,7 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/jump_label_type.h>
 
 extern bool static_key_initialized;
 
@@ -82,35 +83,6 @@ extern bool static_key_initialized;
 				    "%s(): static key '%pS' used before call to jump_label_init()", \
 				    __func__, (key))
 
-#ifdef CONFIG_JUMP_LABEL
-
-struct static_key {
-	atomic_t enabled;
-/*
- * Note:
- *   To make anonymous unions work with old compilers, the static
- *   initialization of them requires brackets. This creates a dependency
- *   on the order of the struct with the initializers. If any fields
- *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
- *   to be modified.
- *
- * bit 0 => 1 if key is initially true
- *	    0 if initially false
- * bit 1 => 1 if points to struct static_key_mod
- *	    0 if points to struct jump_entry
- */
-	union {
-		unsigned long type;
-		struct jump_entry *entries;
-		struct static_key_mod *next;
-	};
-};
-
-#else
-struct static_key {
-	atomic_t enabled;
-};
-#endif	/* CONFIG_JUMP_LABEL */
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_JUMP_LABEL
@@ -343,14 +315,6 @@ static inline void static_key_disable(struct static_key *key)
  * All the below code is macros in order to play type games.
  */
 
-struct static_key_true {
-	struct static_key key;
-};
-
-struct static_key_false {
-	struct static_key key;
-};
-
 #define STATIC_KEY_TRUE_INIT  (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE,  }
 #define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }
 
@@ -360,18 +324,12 @@ struct static_key_false {
 #define DEFINE_STATIC_KEY_TRUE_RO(name)	\
 	struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT
 
-#define DECLARE_STATIC_KEY_TRUE(name)	\
-	extern struct static_key_true name
-
 #define DEFINE_STATIC_KEY_FALSE(name)	\
 	struct static_key_false name = STATIC_KEY_FALSE_INIT
 
 #define DEFINE_STATIC_KEY_FALSE_RO(name)	\
 	struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT
 
-#define DECLARE_STATIC_KEY_FALSE(name)	\
-	extern struct static_key_false name
-
 #define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count)		\
 	struct static_key_true name[count] = {			\
 		[0 ... (count) - 1] = STATIC_KEY_TRUE_INIT,	\
diff --git a/include/linux/jump_label_type.h b/include/linux/jump_label_type.h
new file mode 100644
index 000000000000..37cb02c12f35
--- /dev/null
+++ b/include/linux/jump_label_type.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_JUMP_LABEL_TYPE_H
+#define _LINUX_JUMP_LABEL_TYPE_H
+
+#ifdef CONFIG_JUMP_LABEL
+
+struct static_key {
+	atomic_t enabled;
+/*
+ * Note:
+ *   To make anonymous unions work with old compilers, the static
+ *   initialization of them requires brackets. This creates a dependency
+ *   on the order of the struct with the initializers. If any fields
+ *   are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need
+ *   to be modified.
+ *
+ * bit 0 => 1 if key is initially true
+ *	    0 if initially false
+ * bit 1 => 1 if points to struct static_key_mod
+ *	    0 if points to struct jump_entry
+ */
+	union {
+		unsigned long type;
+		struct jump_entry *entries;
+		struct static_key_mod *next;
+	};
+};
+
+#else
+struct static_key {
+	atomic_t enabled;
+};
+#endif	/* CONFIG_JUMP_LABEL */
+
+struct static_key_true {
+	struct static_key key;
+};
+
+struct static_key_false {
+	struct static_key key;
+};
+
+#define DECLARE_STATIC_KEY_TRUE(name)	\
+	extern struct static_key_true name
+
+#define DECLARE_STATIC_KEY_FALSE(name)	\
+	extern struct static_key_false name
+
+#endif /* _LINUX_JUMP_LABEL_TYPE_H */
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [RFC PATCH v2 2/5] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
  2020-10-09 12:29   ` [RFC PATCH v2 1/5] jump_label: split out declaration parts into its own headers Michal Hocko
@ 2020-10-09 12:29   ` Michal Hocko
  2020-10-09 12:29   ` [RFC PATCH v2 3/5] kernel: ARCH_NO_PREEMPT shouldn't exclude PREEMPT_VOLUNTARY Michal Hocko
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 12:29 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML, Michal Hocko

From: Michal Hocko <mhocko@suse.com>

Many people are still relying on pre built distribution kernels and so
distributions have to provide mutliple kernel flavors to offer different
preemption models. Most of them are providing PREEMPT_NONE for typical
server deployments and PREEMPT_VOLUNTARY for desktop users.

Having two different kernel binaries differing only by the preemption
mode seems rather wasteful and inflexible. Especially when the difference
between PREEMPT_NONE and PREEMPT_VOLUNTARY is really minimal. Both only
allow explicit scheduling points while running in the kernel and it is
only might_sleep which adds additional preemption points for
PREEMPT_VOLUNTARY.

Add a kernel command line parameter preempt_mode=[none, voluntary] which
allows to override the default compile time preemption mode
(CONFIG_PREEMPT_NONE resp. CONFIG_PREEMPT_VOLUTARY). Voluntary preempt
mode is guarded by a jump label to prevent any potential runtime overhead.

Add an explicit include of jump_label to gpio/consumer.h to make sure
all its consumers will get static_branch_likely) as kernel.h cannot
include it directly.

Signed-off-by: Michal Hocko <mhocko@suse.com>
---
 .../admin-guide/kernel-parameters.txt         |  5 ++++
 include/linux/gpio/consumer.h                 |  1 +
 include/linux/kernel.h                        | 13 ++++++--
 kernel/sched/core.c                           | 30 +++++++++++++++++++
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df..96bb74faeb50 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3873,6 +3873,11 @@
 			Format: {"off"}
 			Disable Hardware Transactional Memory
 
+	preempt_mode={none,voluntary}
+			Set the preemption mode.
+			none - equivalent to CONFIG_PREEMPT_NONE
+			voluntary - equivalent to CONFIG_PREEMPT_VOLUNTARY
+
 	print-fatal-signals=
 			[KNL] debug: print fatal signals
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 901aab89d025..d64e6dda5755 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -3,6 +3,7 @@
 #define __LINUX_GPIO_CONSUMER_H
 
 #include <linux/bits.h>
+#include <linux/jump_label.h>
 #include <linux/bug.h>
 #include <linux/compiler_types.h>
 #include <linux/err.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index c25b8e41c0ea..d2d37bd5ecd5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/typecheck.h>
 #include <linux/printk.h>
 #include <linux/build_bug.h>
+#include <linux/jump_label_type.h>
 #include <asm/byteorder.h>
 #include <asm/div64.h>
 #include <uapi/linux/kernel.h>
@@ -192,11 +193,19 @@ struct completion;
 struct pt_regs;
 struct user;
 
+#ifndef CONFIG_PREEMPTION
 #ifdef CONFIG_PREEMPT_VOLUNTARY
+DECLARE_STATIC_KEY_TRUE(preempt_voluntary_key);
+#else
+DECLARE_STATIC_KEY_FALSE(preempt_voluntary_key);
+#endif
+
 extern int _cond_resched(void);
-# define might_resched() _cond_resched()
+# define might_resched() \
+	do { if (static_branch_likely(&preempt_voluntary_key)) _cond_resched(); } while (0)
 #else
-# define might_resched() do { } while (0)
+# define might_resched() \
+	do { } while (0)
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d95dc3f4644..07d37d862637 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -43,6 +43,14 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+DEFINE_STATIC_KEY_TRUE(preempt_voluntary_key);
+#else
+/* PREEMPT_NONE vs PREEMPT_VOLUNTARY */
+DEFINE_STATIC_KEY_FALSE(preempt_voluntary_key);
+#endif
+EXPORT_SYMBOL(preempt_voluntary_key);
+
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
 /*
  * Debugging: various feature bits
@@ -8482,3 +8490,25 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
 {
         trace_sched_update_nr_running_tp(rq, count);
 }
+
+#ifndef CONFIG_PREEMPTION
+static int __init setup_non_preempt_mode(char *str)
+{
+	if (!strcmp(str, "none")) {
+		if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
+			static_branch_disable(&preempt_voluntary_key);
+			pr_info("Switching to PREEMPT_NONE mode.");
+		}
+	} else if (!strcmp(str, "voluntary")) {
+		if (!IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
+			static_branch_enable(&preempt_voluntary_key);
+			pr_info("Switching to PREEMPT_VOLUNTARY mode.");
+		}
+	} else {
+		pr_warn("Unsupported preempt mode %s\n", str);
+		return 1;
+	}
+	return 0;
+}
+__setup("preempt_mode=", setup_non_preempt_mode);
+#endif
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [RFC PATCH v2 3/5] kernel: ARCH_NO_PREEMPT shouldn't exclude PREEMPT_VOLUNTARY
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
  2020-10-09 12:29   ` [RFC PATCH v2 1/5] jump_label: split out declaration parts into its own headers Michal Hocko
  2020-10-09 12:29   ` [RFC PATCH v2 2/5] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line Michal Hocko
@ 2020-10-09 12:29   ` Michal Hocko
  2020-10-09 12:29   ` [RFC PATCH v2 4/5] kernel: introduce CONFIG_PREEMPT_DYNAMIC Michal Hocko
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 12:29 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML, Michal Hocko

From: Michal Hocko <mhocko@suse.com>

PREEMPT_VOLUNTARY is fully arch agnostic so there shouldn't be any
reason to restrict this preemption mode by ARCH_NO_PREEMPT.

Signed-off-by: Michal Hocko <mhocko@suse.com>
---
 kernel/Kconfig.preempt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index bf82259cff96..c460a9a2373b 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -19,7 +19,6 @@ config PREEMPT_NONE
 
 config PREEMPT_VOLUNTARY
 	bool "Voluntary Kernel Preemption (Desktop)"
-	depends on !ARCH_NO_PREEMPT
 	help
 	  This option reduces the latency of the kernel by adding more
 	  "explicit preemption points" to the kernel code. These new
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [RFC PATCH v2 4/5] kernel: introduce CONFIG_PREEMPT_DYNAMIC
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
                     ` (2 preceding siblings ...)
  2020-10-09 12:29   ` [RFC PATCH v2 3/5] kernel: ARCH_NO_PREEMPT shouldn't exclude PREEMPT_VOLUNTARY Michal Hocko
@ 2020-10-09 12:29   ` Michal Hocko
  2020-10-09 12:29   ` [RFC PATCH v2 5/5] kernel: drop PREEMPT_NONE compile time option Michal Hocko
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 12:29 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML, Michal Hocko

From: Michal Hocko <mhocko@suse.com>

Boot time preemption mode selection is currently hardcoded for
!CONFIG_PREEMPTION. Peter has suggested to introduce a dedicated
option for the functionality because not each archiveture implements
implements static branches (jump labels) effectively and therefore
an additional overhead might be prohibitive or undesirable.

Introduce CONFIG_PREEMPT_DYNAMIC that allows boot time preemption mode
override. The functionality is currently implemented for PREEMPT_NONE
and PREEMPT_VOLUNTARY preemption modes.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
 include/linux/kernel.h | 20 ++++++++++++++++++--
 include/linux/sched.h  | 12 ------------
 kernel/Kconfig.preempt | 19 +++++++++++++++++++
 kernel/sched/core.c    |  6 +++++-
 4 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d2d37bd5ecd5..b61ab02dba84 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -193,20 +193,36 @@ struct completion;
 struct pt_regs;
 struct user;
 
+/*
+ * cond_resched() and cond_resched_lock(): latency reduction via
+ * explicit rescheduling in places that are safe. The return
+ * value indicates whether a reschedule was done in fact.
+ * cond_resched_lock() will drop the spinlock before scheduling,
+ */
 #ifndef CONFIG_PREEMPTION
+extern int _cond_resched(void);
+#else
+static inline int _cond_resched(void) { return 0; }
+#endif
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 DECLARE_STATIC_KEY_TRUE(preempt_voluntary_key);
 #else
 DECLARE_STATIC_KEY_FALSE(preempt_voluntary_key);
 #endif
 
-extern int _cond_resched(void);
 # define might_resched() \
 	do { if (static_branch_likely(&preempt_voluntary_key)) _cond_resched(); } while (0)
 #else
+
+#ifdef CONFIG_PREEMPT_VOLUNTARY
 # define might_resched() \
-	do { } while (0)
+	do { _cond_resched(); } while (0)
+#else
+# define might_resched() do { } while (0)
 #endif
+#endif /* CONFIG_PREEMPT_DYNAMIC */
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 extern void ___might_sleep(const char *file, int line, int preempt_offset);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index afe01e232935..184b5e162184 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1812,18 +1812,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
 }
 
-/*
- * cond_resched() and cond_resched_lock(): latency reduction via
- * explicit rescheduling in places that are safe. The return
- * value indicates whether a reschedule was done in fact.
- * cond_resched_lock() will drop the spinlock before scheduling,
- */
-#ifndef CONFIG_PREEMPTION
-extern int _cond_resched(void);
-#else
-static inline int _cond_resched(void) { return 0; }
-#endif
-
 #define cond_resched() ({			\
 	___might_sleep(__FILE__, __LINE__, 0);	\
 	_cond_resched();			\
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index c460a9a2373b..e142f36dd429 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -73,6 +73,25 @@ config PREEMPT_RT
 
 endchoice
 
+config PREEMPT_DYNAMIC
+	bool "Allow boot time preemption model selection"
+	depends on PREEMPT_NONE || PREEMPT_VOLUNTARY
+	help
+	  This option allows to define the preemption model on the kernel
+	  command line parameter and thus override the default preemption
+	  model defined during compile time.
+
+	  The feature is primarily interesting for Linux distributions which
+	  provide a pre-built kernel binary to reduce the number of kernel
+	  flavors they offer while still offering different usecases.
+
+	  The runtime overhead is negligible with JUMP_LABELS enabled but if
+	  runtime patching is not available for the specific architecture then
+	  the potential overhead should be considered.
+
+	  Select if you the same pre-built kernel should be used for both Server
+	  and Desktop workloads.
+
 config PREEMPT_COUNT
        bool
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 07d37d862637..fe22b2fca864 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -43,6 +43,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 DEFINE_STATIC_KEY_TRUE(preempt_voluntary_key);
 #else
@@ -51,6 +53,8 @@ DEFINE_STATIC_KEY_FALSE(preempt_voluntary_key);
 #endif
 EXPORT_SYMBOL(preempt_voluntary_key);
 
+#endif
+
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
 /*
  * Debugging: various feature bits
@@ -8491,7 +8495,7 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
         trace_sched_update_nr_running_tp(rq, count);
 }
 
-#ifndef CONFIG_PREEMPTION
+#ifdef CONFIG_PREEMPT_DYNAMIC
 static int __init setup_non_preempt_mode(char *str)
 {
 	if (!strcmp(str, "none")) {
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [RFC PATCH v2 5/5] kernel: drop PREEMPT_NONE compile time option
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
                     ` (3 preceding siblings ...)
  2020-10-09 12:29   ` [RFC PATCH v2 4/5] kernel: introduce CONFIG_PREEMPT_DYNAMIC Michal Hocko
@ 2020-10-09 12:29   ` Michal Hocko
  2020-10-09 12:50   ` [RFC PATCH v2 0/5] allow overriding default preempt mode from command line Peter Zijlstra
  2020-10-09 17:45   ` Peter Zijlstra
  6 siblings, 0 replies; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 12:29 UTC (permalink / raw)
  To: Peter Zijlstra, Thomas Gleixner
  Cc: Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML, Michal Hocko

From: Michal Hocko <mhocko@suse.com>

Now that preempt_mode command line parameter supports both preempt_none
and preempt_voluntary we do not necessarily need a config option for
this preemption mode and we can reduce the overall config space a bit.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Michal Hocko <mhocko@suse.com>
---
 kernel/Kconfig.preempt | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index e142f36dd429..e19b7d3a8d3c 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -2,23 +2,10 @@
 
 choice
 	prompt "Preemption Model"
-	default PREEMPT_NONE
-
-config PREEMPT_NONE
-	bool "No Forced Preemption (Server)"
-	help
-	  This is the traditional Linux preemption model, geared towards
-	  throughput. It will still provide good latencies most of the
-	  time, but there are no guarantees and occasional longer delays
-	  are possible.
-
-	  Select this option if you are building a kernel for a server or
-	  scientific/computation system, or if you want to maximize the
-	  raw processing power of the kernel, irrespective of scheduling
-	  latencies.
+	default PREEMPT_VOLUNTARY
 
 config PREEMPT_VOLUNTARY
-	bool "Voluntary Kernel Preemption (Desktop)"
+	bool "Voluntary Kernel Preemption (Throughput oriented workloads)"
 	help
 	  This option reduces the latency of the kernel by adding more
 	  "explicit preemption points" to the kernel code. These new
@@ -35,7 +22,7 @@ config PREEMPT_VOLUNTARY
 	  Select this if you are building a kernel for a desktop system.
 
 config PREEMPT
-	bool "Preemptible Kernel (Low-Latency Desktop)"
+	bool "Preemptible Kernel (Low-Latency oriented workloads)"
 	depends on !ARCH_NO_PREEMPT
 	select PREEMPTION
 	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
@@ -75,7 +62,7 @@ endchoice
 
 config PREEMPT_DYNAMIC
 	bool "Allow boot time preemption model selection"
-	depends on PREEMPT_NONE || PREEMPT_VOLUNTARY
+	depends on PREEMPT_VOLUNTARY
 	help
 	  This option allows to define the preemption model on the kernel
 	  command line parameter and thus override the default preemption
-- 
2.28.0


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH v2 0/5] allow overriding default preempt mode from command line
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
                     ` (4 preceding siblings ...)
  2020-10-09 12:29   ` [RFC PATCH v2 5/5] kernel: drop PREEMPT_NONE compile time option Michal Hocko
@ 2020-10-09 12:50   ` Peter Zijlstra
  2020-10-09 13:03     ` Michal Hocko
  2020-10-09 17:45   ` Peter Zijlstra
  6 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09 12:50 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML


Can you please not thread the new series onto the old one? That's some
seriously annoying behaviour that I see more and more... It makes me
loose whole patch-sets.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH v2 0/5] allow overriding default preempt mode from command line
  2020-10-09 12:50   ` [RFC PATCH v2 0/5] allow overriding default preempt mode from command line Peter Zijlstra
@ 2020-10-09 13:03     ` Michal Hocko
  2020-10-09 13:22       ` Peter Zijlstra
  0 siblings, 1 reply; 31+ messages in thread
From: Michal Hocko @ 2020-10-09 13:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Thomas Gleixner, Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML

On Fri 09-10-20 14:50:56, Peter Zijlstra wrote:
> 
> Can you please not thread the new series onto the old one? That's some
> seriously annoying behaviour that I see more and more... It makes me
> loose whole patch-sets.

Sure, no problem. This is not really unusual in mm and I personally
prefer to have discussion in a single thread rather than separated in
two or more. But I definitely do not insist of course. It is surprising
that you are losing the whole patchset as the threading seems to be done
properly. Mutt doesn't seem to have problems with that.

Anyway, let me know if I should repost.

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH v2 0/5] allow overriding default preempt mode from command line
  2020-10-09 13:03     ` Michal Hocko
@ 2020-10-09 13:22       ` Peter Zijlstra
  0 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09 13:22 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Mel Gorman, Frederic Weisbecker, Ingo Molnar, LKML

On Fri, Oct 09, 2020 at 03:03:44PM +0200, Michal Hocko wrote:
> On Fri 09-10-20 14:50:56, Peter Zijlstra wrote:
> > 
> > Can you please not thread the new series onto the old one? That's some
> > seriously annoying behaviour that I see more and more... It makes me
> > loose whole patch-sets.
> 
> Sure, no problem. This is not really unusual in mm and I personally
> prefer to have discussion in a single thread rather than separated in
> two or more. But I definitely do not insist of course. It is surprising
> that you are losing the whole patchset as the threading seems to be done
> properly. Mutt doesn't seem to have problems with that.

I often sort by thread and date and look at the recent most threads.
Mutt sorts threads by first post, not most recent post, hence the thread
is 'lost'.

> Anyway, let me know if I should repost.

I've already broken the thread (#), no need to repost.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH v2 0/5] allow overriding default preempt mode from command line
  2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
                     ` (5 preceding siblings ...)
  2020-10-09 12:50   ` [RFC PATCH v2 0/5] allow overriding default preempt mode from command line Peter Zijlstra
@ 2020-10-09 17:45   ` Peter Zijlstra
  2020-10-27 12:22     ` Frederic Weisbecker
  6 siblings, 1 reply; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-09 17:45 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Thomas Gleixner, Mel Gorman, Frederic Weisbecker, Ingo Molnar,
	LKML, x86, Linus Torvalds, Josh Poimboeuf

On Fri, Oct 09, 2020 at 02:29:21PM +0200, Michal Hocko wrote:
> Hi,
> let me repost the pile that has grown from the initial patch based on
> the review feedback I have collected from Peter. I do realize that he
> also suggested to go from the other direction and implement this for the
> full preemption mode first. As I've said I believe this would require to
> examine all CONFIG_PREEMPTION code paths and preempt_mode=[none,voluntary]
> is already quite useful for distributions which are targeting high
> throuhput and desktop environments. Adding full preemption on top is
> definitely desirable and something we will be looking into. This is the
> first step in that direction.

Just for giggles, configure a x86_64-PREEMPT kernel and apply the below.
It builds and boots here, except for "preempt=zero", which hangs
somewhere early.

It should be able to switch preemption mode at runtime too, but I've not
bothered to wire that up.

Also, some of it is utterly yuck ;-)

---
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 3bc9b3bb178c..0941dc919d35 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3894,6 +3894,11 @@
 			Format: {"off"}
 			Disable Hardware Transactional Memory
 
+	preempt={none,voluntary}
+			Set the preemption mode.
+			none - equivalent to CONFIG_PREEMPT_NONE
+			voluntary - equivalent to CONFIG_PREEMPT_VOLUNTARY
+
 	print-fatal-signals=
 			[KNL] debug: print fatal signals
 
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 69485ca13665..ca4f64229da9 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -5,6 +5,7 @@
 #include <asm/rmwcc.h>
 #include <asm/percpu.h>
 #include <linux/thread_info.h>
+#include <linux/static_call_types.h>
 
 DECLARE_PER_CPU(int, __preempt_count);
 
@@ -103,16 +104,27 @@ static __always_inline bool should_resched(int preempt_offset)
 }
 
 #ifdef CONFIG_PREEMPTION
-  extern asmlinkage void preempt_schedule_thunk(void);
-# define __preempt_schedule() \
-	asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT)
 
-  extern asmlinkage void preempt_schedule(void);
-  extern asmlinkage void preempt_schedule_notrace_thunk(void);
-# define __preempt_schedule_notrace() \
-	asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT)
+extern asmlinkage void preempt_schedule(void);
+extern asmlinkage void preempt_schedule_thunk(void);
+DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_thunk);
+
+#define __preempt_schedule() \
+do { \
+	__ADDRESSABLE(STATIC_CALL_KEY(preempt_schedule)); \
+	asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \
+} while (0)
+
+extern asmlinkage void preempt_schedule_notrace(void);
+extern asmlinkage void preempt_schedule_notrace_thunk(void);
+DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_thunk);
+
+#define __preempt_schedule_notrace() \
+do { \
+	__ADDRESSABLE(STATIC_CALL_KEY(preempt_schedule_notrace)); \
+	asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \
+} while (0)
 
-  extern asmlinkage void preempt_schedule_notrace(void);
 #endif
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index ca9a380d9c0b..0f19f4fefcf6 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -16,7 +16,12 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
 	int size = CALL_INSN_SIZE;
 	const void *code;
 
-	switch (type) {
+	if (type < 2 && func == &__static_call_return0) {
+
+		static const u8 ret0[5] = { 0x66, 0x66, 0x48, 0x31, 0xc0 };
+		code = ret0;
+
+	} else switch (type) {
 	case CALL:
 		code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
 		break;
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index c25b8e41c0ea..88baab97a910 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/typecheck.h>
 #include <linux/printk.h>
 #include <linux/build_bug.h>
+#include <linux/static_call_types.h>
 #include <asm/byteorder.h>
 #include <asm/div64.h>
 #include <uapi/linux/kernel.h>
@@ -192,12 +193,21 @@ struct completion;
 struct pt_regs;
 struct user;
 
-#ifdef CONFIG_PREEMPT_VOLUNTARY
-extern int _cond_resched(void);
-# define might_resched() _cond_resched()
-#else
-# define might_resched() do { } while (0)
-#endif
+extern int __cond_resched(void);
+DECLARE_STATIC_CALL(cond_resched, __cond_resched);
+
+static __always_inline int _cond_resched(void)
+{
+	return static_call(cond_resched)();
+}
+
+extern void __might_resched(void);
+DECLARE_STATIC_CALL(might_resched, __might_resched);
+
+static __always_inline void might_resched(void)
+{
+	static_call(might_resched)();
+}
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 extern void ___might_sleep(const char *file, int line, int preempt_offset);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 59eb5cd567d7..483b015ac32f 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -64,9 +64,7 @@ extern int rcu_scheduler_active __read_mostly;
 void rcu_end_inkernel_boot(void);
 bool rcu_inkernel_boot_has_ended(void);
 bool rcu_is_watching(void);
-#ifndef CONFIG_PREEMPTION
 void rcu_all_qs(void);
-#endif
 
 /* RCUtree hotplug events */
 int rcutree_prepare_cpu(unsigned int cpu);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d383cf09e78f..0443da7cca0c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1820,11 +1820,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
  * value indicates whether a reschedule was done in fact.
  * cond_resched_lock() will drop the spinlock before scheduling,
  */
-#ifndef CONFIG_PREEMPTION
-extern int _cond_resched(void);
-#else
-static inline int _cond_resched(void) { return 0; }
-#endif
 
 #define cond_resched() ({			\
 	___might_sleep(__FILE__, __LINE__, 0);	\
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 695da4c9b338..f47b8f5ffa69 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -107,26 +107,11 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
 
 #define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name)
 
-/*
- * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from
- * the symbol table so that objtool can reference it when it generates the
- * .static_call_sites section.
- */
-#define __static_call(name)						\
-({									\
-	__ADDRESSABLE(STATIC_CALL_KEY(name));				\
-	&STATIC_CALL_TRAMP(name);					\
-})
-
 #else
 #define STATIC_CALL_TRAMP_ADDR(name) NULL
 #endif
 
 
-#define DECLARE_STATIC_CALL(name, func)					\
-	extern struct static_call_key STATIC_CALL_KEY(name);		\
-	extern typeof(func) STATIC_CALL_TRAMP(name);
-
 #define static_call_update(name, func)					\
 ({									\
 	BUILD_BUG_ON(!__same_type(*(func), STATIC_CALL_TRAMP(name)));	\
@@ -136,6 +121,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
 
 #ifdef CONFIG_HAVE_STATIC_CALL_INLINE
 
+extern int __static_call_return0(void);
+
 extern int __init static_call_init(void);
 
 struct static_call_mod {
@@ -174,7 +161,6 @@ extern int static_call_text_reserved(void *start, void *end);
 	};								\
 	ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
-#define static_call(name)	__static_call(name)
 #define static_call_cond(name)	(void)__static_call(name)
 
 #define EXPORT_STATIC_CALL(name)					\
@@ -187,6 +173,8 @@ extern int static_call_text_reserved(void *start, void *end);
 
 #elif defined(CONFIG_HAVE_STATIC_CALL)
 
+static inline int __static_call_return0(void) { return 0; }
+
 static inline int static_call_init(void) { return 0; }
 
 struct static_call_key {
@@ -207,7 +195,6 @@ struct static_call_key {
 	};								\
 	ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)
 
-#define static_call(name)	__static_call(name)
 #define static_call_cond(name)	(void)__static_call(name)
 
 static inline
@@ -234,6 +221,8 @@ static inline int static_call_text_reserved(void *start, void *end)
 
 #else /* Generic implementation */
 
+static inline int __static_call_return0(void) { return 0; }
+
 static inline int static_call_init(void) { return 0; }
 
 struct static_call_key {
@@ -252,9 +241,6 @@ struct static_call_key {
 		.func = NULL,						\
 	}
 
-#define static_call(name)						\
-	((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
-
 static inline void __static_call_nop(void) { }
 
 /*
diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h
index 89135bb35bf7..08f78b1b88b4 100644
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/stringify.h>
+#include <linux/compiler.h>
 
 #define STATIC_CALL_KEY_PREFIX		__SCK__
 #define STATIC_CALL_KEY_PREFIX_STR	__stringify(STATIC_CALL_KEY_PREFIX)
@@ -32,4 +33,30 @@ struct static_call_site {
 	s32 key;
 };
 
+#define DECLARE_STATIC_CALL(name, func)					\
+	extern struct static_call_key STATIC_CALL_KEY(name);		\
+	extern typeof(func) STATIC_CALL_TRAMP(name);
+
+#ifdef CONFIG_HAVE_STATIC_CALL
+
+/*
+ * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from
+ * the symbol table so that objtool can reference it when it generates the
+ * .static_call_sites section.
+ */
+#define __static_call(name)						\
+({									\
+	__ADDRESSABLE(STATIC_CALL_KEY(name));				\
+	&STATIC_CALL_TRAMP(name);					\
+})
+
+#define static_call(name)	__static_call(name)
+
+#else
+
+#define static_call(name)						\
+	((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
+
+#endif /* CONFIG_HAVE_STATIC_CALL */
+
 #endif /* _STATIC_CALL_TYPES_H */
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 0a1e20f8d4e8..de0665b1890d 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -359,6 +359,8 @@ void irqentry_exit_cond_resched(void)
 	}
 }
 
+DECLARE_STATIC_KEY_TRUE(irq_preemption_key);
+
 noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
 {
 	lockdep_assert_irqs_disabled();
@@ -384,7 +386,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
 		}
 
 		instrumentation_begin();
-		if (IS_ENABLED(CONFIG_PREEMPTION))
+		if (static_branch_likely(&irq_preemption_key))
 			irqentry_exit_cond_resched();
 		/* Covers both tracing and lockdep */
 		trace_hardirqs_on();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8160ab5263f8..f872fb0ac560 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4709,6 +4709,9 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
 NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
 
+DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule);
+EXPORT_STATIC_CALL(preempt_schedule);
+
 /**
  * preempt_schedule_notrace - preempt_schedule called by tracing
  *
@@ -4761,6 +4764,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 }
 EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
 
+DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace);
+EXPORT_STATIC_CALL(preempt_schedule_notrace);
+
 #endif /* CONFIG_PREEMPTION */
 
 /*
@@ -6111,18 +6117,29 @@ SYSCALL_DEFINE0(sched_yield)
 	return 0;
 }
 
-#ifndef CONFIG_PREEMPTION
-int __sched _cond_resched(void)
+int __sched __cond_resched(void)
 {
 	if (should_resched(0)) {
 		preempt_schedule_common();
 		return 1;
 	}
+#ifndef CONFIG_PREEMPT_RCU
 	rcu_all_qs();
+#endif
 	return 0;
 }
-EXPORT_SYMBOL(_cond_resched);
-#endif
+EXPORT_SYMBOL(__cond_resched);
+
+DEFINE_STATIC_CALL(cond_resched, __cond_resched);
+EXPORT_STATIC_CALL(cond_resched);
+
+void __sched __might_resched(void)
+{
+	__cond_resched();
+}
+
+DEFINE_STATIC_CALL(might_resched, __might_resched);
+EXPORT_STATIC_CALL(might_resched);
 
 /*
  * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
@@ -8481,3 +8498,86 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
 {
         trace_sched_update_nr_running_tp(rq, count);
 }
+
+DEFINE_STATIC_KEY_TRUE(irq_preemption_key);
+
+/*
+ * SC:cond_resched
+ * SC:might_resched
+ * SC:preempt_schedule
+ * SC:preempt_schedule_notrace
+ * SB:irq_preemption_key
+ *
+ *
+ * ZERO
+ *   cond_resched             <- RET0
+ *   might_resched            <- NOP
+ *   preempt_schedule         <- NOP
+ *   preempt_schedule_notrace <- NOP
+ *   irq_preemption_key       <- false
+ *
+ * NONE:
+ *   cond_resched             <- __cond_resched
+ *   might_resched            <- NOP
+ *   preempt_schedule         <- NOP
+ *   preempt_schedule_notrace <- NOP
+ *   irq_preemption_key       <- false
+ *
+ * VOLUNTARY:
+ *   cond_resched             <- __cond_resched
+ *   might_resched            <- __might_resched
+ *   preempt_schedule         <- NOP
+ *   preempt_schedule_notrace <- NOP
+ *   irq_preemption_key       <- false
+ *
+ * FULL:
+ *   cond_resched             <- RET0
+ *   might_resched            <- NOP
+ *   preempt_schedule         <- preempt_schedule
+ *   preempt_schedule_notrace <- preempt_schedule_notrace
+ *   irq_preemption_key       <- true
+ */
+static int __init setup_preempt_mode(char *str)
+{
+	if (!strcmp(str, "zero")) {
+		static_call_update(cond_resched, __static_call_return0);
+		static_call_update(might_resched, (void (*)(void))NULL);
+		static_call_update(preempt_schedule, (void (*)(void))NULL);
+		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
+		static_branch_disable(&irq_preemption_key);
+		printk("XXX PREEMPT: %s\n", str);
+	} else if (!strcmp(str, "none")) {
+		static_call_update(cond_resched, __cond_resched);
+		static_call_update(might_resched, (void (*)(void))NULL);
+		static_call_update(preempt_schedule, (void (*)(void))NULL);
+		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
+		static_branch_disable(&irq_preemption_key);
+		printk("XXX PREEMPT: %s\n", str);
+	} else if (!strcmp(str, "voluntary")) {
+		static_call_update(cond_resched, __cond_resched);
+		static_call_update(might_resched, __might_resched);
+		static_call_update(preempt_schedule, (void (*)(void))NULL);
+		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
+		static_branch_disable(&irq_preemption_key);
+		printk("XXX PREEMPT: %s\n", str);
+	} else if (!strcmp(str, "ponies")) {
+		static_call_update(cond_resched, __cond_resched);
+		static_call_update(might_resched, (void (*)(void))NULL);
+		static_call_update(preempt_schedule, preempt_schedule_thunk);
+		static_call_update(preempt_schedule_notrace, preempt_schedule_notrace_thunk);
+		static_branch_enable(&irq_preemption_key);
+		printk("XXX PREEMPT: %s\n", str);
+	} else if (!strcmp(str, "full")) {
+		static_call_update(cond_resched, __static_call_return0);
+		static_call_update(might_resched, (void (*)(void))NULL);
+		static_call_update(preempt_schedule, preempt_schedule_thunk);
+		static_call_update(preempt_schedule_notrace, preempt_schedule_notrace_thunk);
+		static_branch_enable(&irq_preemption_key);
+		printk("XXX PREEMPT: %s\n", str);
+	} else {
+		pr_warn("Unsupported preempt mode %s\n", str);
+		return 1;
+	}
+	return 0;
+}
+__setup("preempt=", setup_preempt_mode);
diff --git a/kernel/static_call.c b/kernel/static_call.c
index 84565c2a41b8..1aa17e399448 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -438,6 +438,11 @@ int __init static_call_init(void)
 }
 early_initcall(static_call_init);
 
+int __static_call_return0(void)
+{
+	return 0;
+}
+
 #ifdef CONFIG_STATIC_CALL_SELFTEST
 
 static int func_a(int x)

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH v2 0/5] allow overriding default preempt mode from command line
  2020-10-09 17:45   ` Peter Zijlstra
@ 2020-10-27 12:22     ` Frederic Weisbecker
  2020-10-27 12:28       ` Peter Zijlstra
  0 siblings, 1 reply; 31+ messages in thread
From: Frederic Weisbecker @ 2020-10-27 12:22 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Michal Hocko, Thomas Gleixner, Mel Gorman, Frederic Weisbecker,
	Ingo Molnar, LKML, x86, Linus Torvalds, Josh Poimboeuf

On Fri, Oct 09, 2020 at 07:45:54PM +0200, Peter Zijlstra wrote:
> +DEFINE_STATIC_KEY_TRUE(irq_preemption_key);
> +
> +/*
> + * SC:cond_resched
> + * SC:might_resched
> + * SC:preempt_schedule
> + * SC:preempt_schedule_notrace
> + * SB:irq_preemption_key
> + *
> + *
> + * ZERO
> + *   cond_resched             <- RET0
> + *   might_resched            <- NOP
> + *   preempt_schedule         <- NOP
> + *   preempt_schedule_notrace <- NOP
> + *   irq_preemption_key       <- false
> + *
> + * NONE:
> + *   cond_resched             <- __cond_resched
> + *   might_resched            <- NOP
> + *   preempt_schedule         <- NOP
> + *   preempt_schedule_notrace <- NOP
> + *   irq_preemption_key       <- false
> + *
> + * VOLUNTARY:
> + *   cond_resched             <- __cond_resched
> + *   might_resched            <- __might_resched
> + *   preempt_schedule         <- NOP
> + *   preempt_schedule_notrace <- NOP
> + *   irq_preemption_key       <- false
> + *
> + * FULL:
> + *   cond_resched             <- RET0
> + *   might_resched            <- NOP
> + *   preempt_schedule         <- preempt_schedule
> + *   preempt_schedule_notrace <- preempt_schedule_notrace
> + *   irq_preemption_key       <- true
> + */

That's cute! I'll try to end up to that result.

> +static int __init setup_preempt_mode(char *str)
> +{
> +	if (!strcmp(str, "zero")) {
> +		static_call_update(cond_resched, __static_call_return0);
> +		static_call_update(might_resched, (void (*)(void))NULL);
> +		static_call_update(preempt_schedule, (void (*)(void))NULL);
> +		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
> +		static_branch_disable(&irq_preemption_key);
> +		printk("XXX PREEMPT: %s\n", str);
> +	} else if (!strcmp(str, "none")) {
> +		static_call_update(cond_resched, __cond_resched);
> +		static_call_update(might_resched, (void (*)(void))NULL);
> +		static_call_update(preempt_schedule, (void (*)(void))NULL);
> +		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
> +		static_branch_disable(&irq_preemption_key);
> +		printk("XXX PREEMPT: %s\n", str);
> +	} else if (!strcmp(str, "voluntary")) {
> +		static_call_update(cond_resched, __cond_resched);
> +		static_call_update(might_resched, __might_resched);
> +		static_call_update(preempt_schedule, (void (*)(void))NULL);
> +		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
> +		static_branch_disable(&irq_preemption_key);
> +		printk("XXX PREEMPT: %s\n", str);
> +	} else if (!strcmp(str, "ponies")) {
> +		static_call_update(cond_resched, __cond_resched);
> +		static_call_update(might_resched, (void (*)(void))NULL);
> +		static_call_update(preempt_schedule, preempt_schedule_thunk);
> +		static_call_update(preempt_schedule_notrace, preempt_schedule_notrace_thunk);
> +		static_branch_enable(&irq_preemption_key);
> +		printk("XXX PREEMPT: %s\n", str);

Why would we need that ponies version?

Thanks!

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [RFC PATCH v2 0/5] allow overriding default preempt mode from command line
  2020-10-27 12:22     ` Frederic Weisbecker
@ 2020-10-27 12:28       ` Peter Zijlstra
  0 siblings, 0 replies; 31+ messages in thread
From: Peter Zijlstra @ 2020-10-27 12:28 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Michal Hocko, Thomas Gleixner, Mel Gorman, Frederic Weisbecker,
	Ingo Molnar, LKML, x86, Linus Torvalds, Josh Poimboeuf

On Tue, Oct 27, 2020 at 01:22:41PM +0100, Frederic Weisbecker wrote:
> On Fri, Oct 09, 2020 at 07:45:54PM +0200, Peter Zijlstra wrote:
> > +DEFINE_STATIC_KEY_TRUE(irq_preemption_key);
> > +
> > +/*
> > + * SC:cond_resched
> > + * SC:might_resched
> > + * SC:preempt_schedule
> > + * SC:preempt_schedule_notrace
> > + * SB:irq_preemption_key
> > + *
> > + *
> > + * ZERO
> > + *   cond_resched             <- RET0
> > + *   might_resched            <- NOP
> > + *   preempt_schedule         <- NOP
> > + *   preempt_schedule_notrace <- NOP
> > + *   irq_preemption_key       <- false
> > + *
> > + * NONE:
> > + *   cond_resched             <- __cond_resched
> > + *   might_resched            <- NOP
> > + *   preempt_schedule         <- NOP
> > + *   preempt_schedule_notrace <- NOP
> > + *   irq_preemption_key       <- false
> > + *
> > + * VOLUNTARY:
> > + *   cond_resched             <- __cond_resched
> > + *   might_resched            <- __might_resched
> > + *   preempt_schedule         <- NOP
> > + *   preempt_schedule_notrace <- NOP
> > + *   irq_preemption_key       <- false
> > + *
> > + * FULL:
> > + *   cond_resched             <- RET0
> > + *   might_resched            <- NOP
> > + *   preempt_schedule         <- preempt_schedule
> > + *   preempt_schedule_notrace <- preempt_schedule_notrace
> > + *   irq_preemption_key       <- true
> > + */
> 
> That's cute! I'll try to end up to that result.

Beware, ZERO is 'broken', I included it because I was curious what, if
anything, would happen :-)

> > +static int __init setup_preempt_mode(char *str)
> > +{
> > +	if (!strcmp(str, "zero")) {
> > +		static_call_update(cond_resched, __static_call_return0);
> > +		static_call_update(might_resched, (void (*)(void))NULL);
> > +		static_call_update(preempt_schedule, (void (*)(void))NULL);
> > +		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
> > +		static_branch_disable(&irq_preemption_key);
> > +		printk("XXX PREEMPT: %s\n", str);
> > +	} else if (!strcmp(str, "none")) {
> > +		static_call_update(cond_resched, __cond_resched);
> > +		static_call_update(might_resched, (void (*)(void))NULL);
> > +		static_call_update(preempt_schedule, (void (*)(void))NULL);
> > +		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
> > +		static_branch_disable(&irq_preemption_key);
> > +		printk("XXX PREEMPT: %s\n", str);
> > +	} else if (!strcmp(str, "voluntary")) {
> > +		static_call_update(cond_resched, __cond_resched);
> > +		static_call_update(might_resched, __might_resched);
> > +		static_call_update(preempt_schedule, (void (*)(void))NULL);
> > +		static_call_update(preempt_schedule_notrace, (void (*)(void))NULL);
> > +		static_branch_disable(&irq_preemption_key);
> > +		printk("XXX PREEMPT: %s\n", str);
> > +	} else if (!strcmp(str, "ponies")) {
> > +		static_call_update(cond_resched, __cond_resched);
> > +		static_call_update(might_resched, (void (*)(void))NULL);
> > +		static_call_update(preempt_schedule, preempt_schedule_thunk);
> > +		static_call_update(preempt_schedule_notrace, preempt_schedule_notrace_thunk);
> > +		static_branch_enable(&irq_preemption_key);
> > +		printk("XXX PREEMPT: %s\n", str);
> 
> Why would we need that ponies version?

We don't, but it was a missing combination (like ZERO), and I wanted to
test it worked (as expected, it does).

We'll only encounter it as an intermediate state when flipping states at
runtime, but unlike zero, it should work just fine.

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2020-10-27 12:28 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-07 12:04 [RFC PATCH] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line Michal Hocko
2020-10-07 12:19 ` Peter Zijlstra
2020-10-07 12:29   ` Michal Hocko
2020-10-07 13:01     ` Mel Gorman
2020-10-07 12:21 ` Peter Zijlstra
2020-10-07 12:35   ` Michal Hocko
2020-10-09  9:47     ` Peter Zijlstra
2020-10-09 10:14       ` Michal Hocko
2020-10-09 10:20         ` Peter Zijlstra
2020-10-09 10:48           ` Michal Hocko
2020-10-09 11:17             ` Michal Hocko
2020-10-09 11:26               ` Michal Hocko
2020-10-09 11:39             ` Peter Zijlstra
2020-10-09  9:12 ` Michal Hocko
2020-10-09  9:42   ` Peter Zijlstra
2020-10-09 10:10     ` Michal Hocko
2020-10-09 10:14       ` Peter Zijlstra
2020-10-09 10:37         ` Michal Hocko
2020-10-09 11:42           ` Peter Zijlstra
2020-10-09 12:29 ` [RFC PATCH v2 0/5] allow overriding default preempt mode from " Michal Hocko
2020-10-09 12:29   ` [RFC PATCH v2 1/5] jump_label: split out declaration parts into its own headers Michal Hocko
2020-10-09 12:29   ` [RFC PATCH v2 2/5] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line Michal Hocko
2020-10-09 12:29   ` [RFC PATCH v2 3/5] kernel: ARCH_NO_PREEMPT shouldn't exclude PREEMPT_VOLUNTARY Michal Hocko
2020-10-09 12:29   ` [RFC PATCH v2 4/5] kernel: introduce CONFIG_PREEMPT_DYNAMIC Michal Hocko
2020-10-09 12:29   ` [RFC PATCH v2 5/5] kernel: drop PREEMPT_NONE compile time option Michal Hocko
2020-10-09 12:50   ` [RFC PATCH v2 0/5] allow overriding default preempt mode from command line Peter Zijlstra
2020-10-09 13:03     ` Michal Hocko
2020-10-09 13:22       ` Peter Zijlstra
2020-10-09 17:45   ` Peter Zijlstra
2020-10-27 12:22     ` Frederic Weisbecker
2020-10-27 12:28       ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).