linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Artem Bityutskiy <dedekind1@gmail.com>
To: x86@kernel.org, Linux PM Mailing List <linux-pm@vger.kernel.org>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Subject: [PATCH 3/3] intel_idle: add C0.2 state for Sapphire Rapids Xeon
Date: Mon,  6 Mar 2023 14:34:18 +0200	[thread overview]
Message-ID: <20230306123418.720679-4-dedekind1@gmail.com> (raw)
In-Reply-To: <20230306123418.720679-1-dedekind1@gmail.com>

From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>

Add Sapphire Rapids Xeon C0.2 state support. This state has a lower exit
latency comparing to C1, and saves energy comparing to POLL (in range of
5-20%).

This patch also improves performance (e.g., as measured by 'hackbench'),
because idle CPU power savings in C0.2 increase busy CPU power budget and
therefore, improve turbo boost of the busy CPU.

Suggested-by: Len Brown <len.brown@intel.com>
Suggested-by: Arjan Van De Ven <arjan.van.de.ven@intel.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 drivers/idle/intel_idle.c | 59 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 938c17f25d94..f7705a64d0e6 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -51,11 +51,13 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/moduleparam.h>
+#include <linux/units.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 #include <asm/nospec-branch.h>
 #include <asm/mwait.h>
 #include <asm/msr.h>
+#include <asm/tsc.h>
 #include <asm/fpu/api.h>
 
 #define INTEL_IDLE_VERSION "0.5.1"
@@ -73,6 +75,8 @@ static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
 
 static unsigned long auto_demotion_disable_flags;
 
+static u64 umwait_limit;
+
 static enum {
 	C1E_PROMOTION_PRESERVE,
 	C1E_PROMOTION_ENABLE,
@@ -225,6 +229,27 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
 	return 0;
 }
 
+/**
+ * intel_idle_umwait_irq - Request C0.x using the 'umwait' instruction.
+ * @dev: cpuidle device of the target CPU.
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
+ * @index: Target idle state index.
+ *
+ * Request C0.1 or C0.2 using 'umwait' instruction with interrupts enabled.
+ */
+static __cpuidle int intel_idle_umwait_irq(struct cpuidle_device *dev,
+					   struct cpuidle_driver *drv,
+					   int index)
+{
+	u32 state = flg2MWAIT(drv->states[index].flags);
+
+	raw_local_irq_enable();
+	umwait_idle(rdtsc() + umwait_limit, state);
+	local_irq_disable();
+
+	return index;
+}
+
 /*
  * States are indexed by the cstate number,
  * which is also the index into the MWAIT hint array.
@@ -968,6 +993,13 @@ static struct cpuidle_state adl_n_cstates[] __initdata = {
 };
 
 static struct cpuidle_state spr_cstates[] __initdata = {
+	{
+		.name = "C0.2",
+		.desc = "UMWAIT C0.2",
+		.flags = MWAIT2flg(TPAUSE_C02_STATE) | CPUIDLE_FLAG_IRQ_ENABLE,
+		.exit_latency_ns = 100,
+		.target_residency_ns = 100,
+		.enter = &intel_idle_umwait_irq, },
 	{
 		.name = "C1",
 		.desc = "MWAIT 0x00",
@@ -1894,7 +1926,8 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 		/* Structure copy. */
 		drv->states[drv->state_count] = cpuidle_state_table[cstate];
 
-		if ((cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) || force_irq_on) {
+		if (cpuidle_state_table[cstate].enter == intel_idle &&
+		    ((cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) || force_irq_on)) {
 			printk("intel_idle: forced intel_idle_irq for state %d\n", cstate);
 			drv->states[drv->state_count].enter = intel_idle_irq;
 		}
@@ -1926,6 +1959,29 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 	}
 }
 
+/**
+ * umwait_limit_init - initialize time limit value for 'umwait'.
+ * @drv: cpuidle driver structure to initialize.
+ *
+ * C0.1 and C0.2 (later C0.x) idle states are requested via the 'umwait'
+ * instruction. The 'umwait' instruction requires the "deadline" - the TSC
+ * counter value to break out of C0.x (unless it broke out because of an
+ * interrupt or some other event).
+ *
+ * The deadline is specified as an absolute TSC value, and it is calculated as
+ * current TSC value + 'umwait_limit'. This function initializes the
+ * 'umwait_limit' variable to count of cycles per tick. The motivation is:
+ *   * the tick is not disabled for shallow states like C0.x so, so idle will
+ *     not last longer than a tick anyway
+ *   * limit idle time to give cpuidle a chance to re-evaluate its C-state
+ *     selection decision and possibly select a deeper C-state.
+ */
+static void __init umwait_limit_init(void)
+{
+	umwait_limit = (u64)TICK_NSEC * tsc_khz;
+	do_div(umwait_limit, MICRO);
+}
+
 /**
  * intel_idle_cpuidle_driver_init - Create the list of available idle states.
  * @drv: cpuidle driver structure to initialize.
@@ -1933,6 +1989,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv)
 {
 	cpuidle_poll_state_init(drv);
+	umwait_limit_init();
 
 	if (disabled_states_mask & BIT(0))
 		drv->states[0].flags |= CPUIDLE_FLAG_OFF;
-- 
2.38.1


  parent reply	other threads:[~2023-03-06 12:34 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-06 12:34 [PATCH 0/3] Sapphire Rapids C0.x idle states support Artem Bityutskiy
2023-03-06 12:34 ` [PATCH 1/3] x86/mwait: Add support for idle via umwait Artem Bityutskiy
2023-03-06 14:55   ` Peter Zijlstra
2023-03-07 11:55     ` Rafael J. Wysocki
2023-03-08 12:35       ` Peter Zijlstra
2023-03-06 12:34 ` [PATCH 2/3] x86/umwait: Increase tpause and umwait quanta Artem Bityutskiy
2023-03-06 12:34 ` Artem Bityutskiy [this message]
2023-03-06 15:32   ` [PATCH 3/3] intel_idle: add C0.2 state for Sapphire Rapids Xeon Peter Zijlstra
2023-03-07 12:39     ` Rafael J. Wysocki
2023-03-08 12:32       ` Peter Zijlstra
2023-03-09  8:01         ` Artem Bityutskiy
2023-03-14 12:24           ` Peter Zijlstra
2023-03-17  8:42             ` Artem Bityutskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230306123418.720679-4-dedekind1@gmail.com \
    --to=dedekind1@gmail.com \
    --cc=linux-pm@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).