* [PATCH v4 1/3] x86/delay: Preparatory code cleanup
2020-04-24 19:37 [PATCH v4 0/3] x86/delay: Introduce TPAUSE instruction Kyung Min Park
@ 2020-04-24 19:37 ` Kyung Min Park
2020-05-07 14:13 ` [tip: x86/timers] " tip-bot2 for Thomas Gleixner
2020-04-24 19:37 ` [PATCH v4 2/3] x86/delay: Refactor delay_mwaitx() for TPAUSE support Kyung Min Park
2020-04-24 19:37 ` [PATCH v4 3/3] x86/delay: Introduce TPAUSE delay Kyung Min Park
2 siblings, 1 reply; 7+ messages in thread
From: Kyung Min Park @ 2020-04-24 19:37 UTC (permalink / raw)
To: x86, linux-kernel
Cc: tglx, mingo, hpa, gregkh, ak, tony.luck, ashok.raj,
ravi.v.shankar, fenghua.yu, kyung.min.park
From: Thomas Gleixner <tglx@linutronix.de>
The naming conventions in the delay code are confusing at best.
All delay variants use a loops argument and or variable which originates
from the original delay_loop() implementation. But all variants except
delay_loop() are based on TSC cycles.
Rename the argument to cycles and make it type u64 to avoid these weird
expansions to u64 in the functions.
Rename MWAITX_MAX_LOOPS to MWAITX_MAX_WAIT_CYCLES for the same reason
and fixup the comment of delay_mwaitx() as well.
Mark the delay_fn function pointer __ro_after_init and fixup the comment
for it.
No functional change and preparation for the upcoming TPAUSE based delay
variant.
[Kyung Min Park: Added __init to use_tsc_delay()]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
---
arch/x86/include/asm/delay.h | 3 ++-
arch/x86/include/asm/mwait.h | 2 +-
arch/x86/lib/delay.c | 45 +++++++++++++++++++++++---------------------
3 files changed, 27 insertions(+), 23 deletions(-)
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index de9e784..9aa38de 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -3,8 +3,9 @@
#define _ASM_X86_DELAY_H
#include <asm-generic/delay.h>
+#include <linux/init.h>
-void use_tsc_delay(void);
+void __init use_tsc_delay(void);
void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index b809f11..a43b35b 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -20,7 +20,7 @@
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
-#define MWAITX_MAX_LOOPS ((u32)-1)
+#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
#define MWAITX_DISABLE_CSTATES 0xf0
u32 get_umwait_control_msr(void);
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index c126571..887d52d 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -27,9 +27,19 @@
# include <asm/smp.h>
#endif
+static void delay_loop(u64 __loops);
+
+/*
+ * Calibration and selection of the delay mechanism happens only once
+ * during boot.
+ */
+static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+
/* simple loop based delay: */
-static void delay_loop(unsigned long loops)
+static void delay_loop(u64 __loops)
{
+ unsigned long loops = (unsigned long)__loops;
+
asm volatile(
" test %0,%0 \n"
" jz 3f \n"
@@ -49,9 +59,9 @@ static void delay_loop(unsigned long loops)
}
/* TSC based delay: */
-static void delay_tsc(unsigned long __loops)
+static void delay_tsc(u64 cycles)
{
- u64 bclock, now, loops = __loops;
+ u64 bclock, now;
int cpu;
preempt_disable();
@@ -59,7 +69,7 @@ static void delay_tsc(unsigned long __loops)
bclock = rdtsc_ordered();
for (;;) {
now = rdtsc_ordered();
- if ((now - bclock) >= loops)
+ if ((now - bclock) >= cycles)
break;
/* Allow RT tasks to run */
@@ -77,7 +87,7 @@ static void delay_tsc(unsigned long __loops)
* counter for this CPU.
*/
if (unlikely(cpu != smp_processor_id())) {
- loops -= (now - bclock);
+ cycles -= (now - bclock);
cpu = smp_processor_id();
bclock = rdtsc_ordered();
}
@@ -87,24 +97,24 @@ static void delay_tsc(unsigned long __loops)
/*
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
- * counts with TSC frequency. The input value is the loop of the
- * counter, it will exit when the timer expires.
+ * counts with TSC frequency. The input value is the number of TSC cycles
+ * to wait. MWAITX will also exit when the timer expires.
*/
-static void delay_mwaitx(unsigned long __loops)
+static void delay_mwaitx(u64 cycles)
{
- u64 start, end, delay, loops = __loops;
+ u64 start, end, delay;
/*
* Timer value of 0 causes MWAITX to wait indefinitely, unless there
* is a store on the memory monitored by MONITORX.
*/
- if (loops == 0)
+ if (!cycles)
return;
start = rdtsc_ordered();
for (;;) {
- delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+ delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
/*
* Use cpu_tss_rw as a cacheline-aligned, seldomly
@@ -121,22 +131,15 @@ static void delay_mwaitx(unsigned long __loops)
end = rdtsc_ordered();
- if (loops <= end - start)
+ if (cycles <= end - start)
break;
- loops -= end - start;
-
+ cycles -= end - start;
start = end;
}
}
-/*
- * Since we calibrate only once at boot, this
- * function should be set once at boot and not changed
- */
-static void (*delay_fn)(unsigned long) = delay_loop;
-
-void use_tsc_delay(void)
+void __init use_tsc_delay(void)
{
if (delay_fn == delay_loop)
delay_fn = delay_tsc;
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [tip: x86/timers] x86/delay: Preparatory code cleanup
2020-04-24 19:37 ` [PATCH v4 1/3] x86/delay: Preparatory code cleanup Kyung Min Park
@ 2020-05-07 14:13 ` tip-bot2 for Thomas Gleixner
0 siblings, 0 replies; 7+ messages in thread
From: tip-bot2 for Thomas Gleixner @ 2020-05-07 14:13 UTC (permalink / raw)
To: linux-tip-commits; +Cc: Thomas Gleixner, Kyung Min Park, x86, LKML
The following commit has been merged into the x86/timers branch of tip:
Commit-ID: e8824890249355656968d8846908a313fe231f11
Gitweb: https://git.kernel.org/tip/e8824890249355656968d8846908a313fe231f11
Author: Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 24 Apr 2020 12:37:54 -07:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 07 May 2020 16:06:19 +02:00
x86/delay: Preparatory code cleanup
The naming conventions in the delay code are confusing at best.
All delay variants use a loops argument and or variable which originates
from the original delay_loop() implementation. But all variants except
delay_loop() are based on TSC cycles.
Rename the argument to cycles and make it type u64 to avoid these weird
expansions to u64 in the functions.
Rename MWAITX_MAX_LOOPS to MWAITX_MAX_WAIT_CYCLES for the same reason
and fixup the comment of delay_mwaitx() as well.
Mark the delay_fn function pointer __ro_after_init and fixup the comment
for it.
No functional change and preparation for the upcoming TPAUSE based delay
variant.
[ Kyung Min Park: Added __init to use_tsc_delay() ]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/1587757076-30337-2-git-send-email-kyung.min.park@intel.com
---
arch/x86/include/asm/delay.h | 3 +-
arch/x86/include/asm/mwait.h | 2 +-
arch/x86/lib/delay.c | 45 ++++++++++++++++++-----------------
3 files changed, 27 insertions(+), 23 deletions(-)
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index de9e784..9aa38de 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -3,8 +3,9 @@
#define _ASM_X86_DELAY_H
#include <asm-generic/delay.h>
+#include <linux/init.h>
-void use_tsc_delay(void);
+void __init use_tsc_delay(void);
void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index b809f11..a43b35b 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -20,7 +20,7 @@
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
-#define MWAITX_MAX_LOOPS ((u32)-1)
+#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
#define MWAITX_DISABLE_CSTATES 0xf0
u32 get_umwait_control_msr(void);
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index c126571..887d52d 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -27,9 +27,19 @@
# include <asm/smp.h>
#endif
+static void delay_loop(u64 __loops);
+
+/*
+ * Calibration and selection of the delay mechanism happens only once
+ * during boot.
+ */
+static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+
/* simple loop based delay: */
-static void delay_loop(unsigned long loops)
+static void delay_loop(u64 __loops)
{
+ unsigned long loops = (unsigned long)__loops;
+
asm volatile(
" test %0,%0 \n"
" jz 3f \n"
@@ -49,9 +59,9 @@ static void delay_loop(unsigned long loops)
}
/* TSC based delay: */
-static void delay_tsc(unsigned long __loops)
+static void delay_tsc(u64 cycles)
{
- u64 bclock, now, loops = __loops;
+ u64 bclock, now;
int cpu;
preempt_disable();
@@ -59,7 +69,7 @@ static void delay_tsc(unsigned long __loops)
bclock = rdtsc_ordered();
for (;;) {
now = rdtsc_ordered();
- if ((now - bclock) >= loops)
+ if ((now - bclock) >= cycles)
break;
/* Allow RT tasks to run */
@@ -77,7 +87,7 @@ static void delay_tsc(unsigned long __loops)
* counter for this CPU.
*/
if (unlikely(cpu != smp_processor_id())) {
- loops -= (now - bclock);
+ cycles -= (now - bclock);
cpu = smp_processor_id();
bclock = rdtsc_ordered();
}
@@ -87,24 +97,24 @@ static void delay_tsc(unsigned long __loops)
/*
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
- * counts with TSC frequency. The input value is the loop of the
- * counter, it will exit when the timer expires.
+ * counts with TSC frequency. The input value is the number of TSC cycles
+ * to wait. MWAITX will also exit when the timer expires.
*/
-static void delay_mwaitx(unsigned long __loops)
+static void delay_mwaitx(u64 cycles)
{
- u64 start, end, delay, loops = __loops;
+ u64 start, end, delay;
/*
* Timer value of 0 causes MWAITX to wait indefinitely, unless there
* is a store on the memory monitored by MONITORX.
*/
- if (loops == 0)
+ if (!cycles)
return;
start = rdtsc_ordered();
for (;;) {
- delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+ delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
/*
* Use cpu_tss_rw as a cacheline-aligned, seldomly
@@ -121,22 +131,15 @@ static void delay_mwaitx(unsigned long __loops)
end = rdtsc_ordered();
- if (loops <= end - start)
+ if (cycles <= end - start)
break;
- loops -= end - start;
-
+ cycles -= end - start;
start = end;
}
}
-/*
- * Since we calibrate only once at boot, this
- * function should be set once at boot and not changed
- */
-static void (*delay_fn)(unsigned long) = delay_loop;
-
-void use_tsc_delay(void)
+void __init use_tsc_delay(void)
{
if (delay_fn == delay_loop)
delay_fn = delay_tsc;
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 2/3] x86/delay: Refactor delay_mwaitx() for TPAUSE support
2020-04-24 19:37 [PATCH v4 0/3] x86/delay: Introduce TPAUSE instruction Kyung Min Park
2020-04-24 19:37 ` [PATCH v4 1/3] x86/delay: Preparatory code cleanup Kyung Min Park
@ 2020-04-24 19:37 ` Kyung Min Park
2020-05-07 14:13 ` [tip: x86/timers] " tip-bot2 for Kyung Min Park
2020-04-24 19:37 ` [PATCH v4 3/3] x86/delay: Introduce TPAUSE delay Kyung Min Park
2 siblings, 1 reply; 7+ messages in thread
From: Kyung Min Park @ 2020-04-24 19:37 UTC (permalink / raw)
To: x86, linux-kernel
Cc: tglx, mingo, hpa, gregkh, ak, tony.luck, ashok.raj,
ravi.v.shankar, fenghua.yu, kyung.min.park
Refactor code to make it easier to add a new model specific function to
delay for a number of cycles.
No functional change.
Reviewed-by: Tony Luck <tony.luck@intel.com>
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
---
arch/x86/lib/delay.c | 48 ++++++++++++++++++++++++++++++------------------
1 file changed, 30 insertions(+), 18 deletions(-)
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 887d52d..fe91dc1 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -34,6 +34,7 @@ static void delay_loop(u64 __loops);
* during boot.
*/
static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
/* simple loop based delay: */
static void delay_loop(u64 __loops)
@@ -100,9 +101,33 @@ static void delay_tsc(u64 cycles)
* counts with TSC frequency. The input value is the number of TSC cycles
* to wait. MWAITX will also exit when the timer expires.
*/
-static void delay_mwaitx(u64 cycles)
+static void delay_halt_mwaitx(u64 unused, u64 cycles)
{
- u64 start, end, delay;
+ u64 delay;
+
+ delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
+ /*
+ * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
+ * variable as the monitor target.
+ */
+ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+
+ /*
+ * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
+ * enter any deep C-state and we use it here in delay() to minimize
+ * wakeup latency.
+ */
+ __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+}
+
+/*
+ * Call a vendor specific function to delay for a given amount of time. Because
+ * these functions may return earlier than requested, check for actual elapsed
+ * time and call again until done.
+ */
+static void delay_halt(u64 __cycles)
+{
+ u64 start, end, cycles = __cycles;
/*
* Timer value of 0 causes MWAITX to wait indefinitely, unless there
@@ -114,21 +139,7 @@ static void delay_mwaitx(u64 cycles)
start = rdtsc_ordered();
for (;;) {
- delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
-
- /*
- * Use cpu_tss_rw as a cacheline-aligned, seldomly
- * accessed per-cpu variable as the monitor target.
- */
- __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
-
- /*
- * AMD, like Intel's MWAIT version, supports the EAX hint and
- * EAX=0xf0 means, do not enter any deep C-state and we use it
- * here in delay() to minimize wakeup latency.
- */
- __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
-
+ delay_halt_fn(start, cycles);
end = rdtsc_ordered();
if (cycles <= end - start)
@@ -147,7 +158,8 @@ void __init use_tsc_delay(void)
void use_mwaitx_delay(void)
{
- delay_fn = delay_mwaitx;
+ delay_halt_fn = delay_halt_mwaitx;
+ delay_fn = delay_halt;
}
int read_current_timer(unsigned long *timer_val)
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [tip: x86/timers] x86/delay: Refactor delay_mwaitx() for TPAUSE support
2020-04-24 19:37 ` [PATCH v4 2/3] x86/delay: Refactor delay_mwaitx() for TPAUSE support Kyung Min Park
@ 2020-05-07 14:13 ` tip-bot2 for Kyung Min Park
0 siblings, 0 replies; 7+ messages in thread
From: tip-bot2 for Kyung Min Park @ 2020-05-07 14:13 UTC (permalink / raw)
To: linux-tip-commits
Cc: Fenghua Yu, Kyung Min Park, Thomas Gleixner, Tony Luck, x86, LKML
The following commit has been merged into the x86/timers branch of tip:
Commit-ID: 46f90c7aad62be1af76588108c730d826308a801
Gitweb: https://git.kernel.org/tip/46f90c7aad62be1af76588108c730d826308a801
Author: Kyung Min Park <kyung.min.park@intel.com>
AuthorDate: Fri, 24 Apr 2020 12:37:55 -07:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 07 May 2020 16:06:19 +02:00
x86/delay: Refactor delay_mwaitx() for TPAUSE support
Refactor code to make it easier to add a new model specific function to
delay for a number of cycles.
No functional change.
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1587757076-30337-3-git-send-email-kyung.min.park@intel.com
---
arch/x86/lib/delay.c | 48 ++++++++++++++++++++++++++-----------------
1 file changed, 30 insertions(+), 18 deletions(-)
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 887d52d..fe91dc1 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -34,6 +34,7 @@ static void delay_loop(u64 __loops);
* during boot.
*/
static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init;
/* simple loop based delay: */
static void delay_loop(u64 __loops)
@@ -100,9 +101,33 @@ static void delay_tsc(u64 cycles)
* counts with TSC frequency. The input value is the number of TSC cycles
* to wait. MWAITX will also exit when the timer expires.
*/
-static void delay_mwaitx(u64 cycles)
+static void delay_halt_mwaitx(u64 unused, u64 cycles)
{
- u64 start, end, delay;
+ u64 delay;
+
+ delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
+ /*
+ * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu
+ * variable as the monitor target.
+ */
+ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+
+ /*
+ * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not
+ * enter any deep C-state and we use it here in delay() to minimize
+ * wakeup latency.
+ */
+ __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+}
+
+/*
+ * Call a vendor specific function to delay for a given amount of time. Because
+ * these functions may return earlier than requested, check for actual elapsed
+ * time and call again until done.
+ */
+static void delay_halt(u64 __cycles)
+{
+ u64 start, end, cycles = __cycles;
/*
* Timer value of 0 causes MWAITX to wait indefinitely, unless there
@@ -114,21 +139,7 @@ static void delay_mwaitx(u64 cycles)
start = rdtsc_ordered();
for (;;) {
- delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
-
- /*
- * Use cpu_tss_rw as a cacheline-aligned, seldomly
- * accessed per-cpu variable as the monitor target.
- */
- __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
-
- /*
- * AMD, like Intel's MWAIT version, supports the EAX hint and
- * EAX=0xf0 means, do not enter any deep C-state and we use it
- * here in delay() to minimize wakeup latency.
- */
- __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
-
+ delay_halt_fn(start, cycles);
end = rdtsc_ordered();
if (cycles <= end - start)
@@ -147,7 +158,8 @@ void __init use_tsc_delay(void)
void use_mwaitx_delay(void)
{
- delay_fn = delay_mwaitx;
+ delay_halt_fn = delay_halt_mwaitx;
+ delay_fn = delay_halt;
}
int read_current_timer(unsigned long *timer_val)
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v4 3/3] x86/delay: Introduce TPAUSE delay
2020-04-24 19:37 [PATCH v4 0/3] x86/delay: Introduce TPAUSE instruction Kyung Min Park
2020-04-24 19:37 ` [PATCH v4 1/3] x86/delay: Preparatory code cleanup Kyung Min Park
2020-04-24 19:37 ` [PATCH v4 2/3] x86/delay: Refactor delay_mwaitx() for TPAUSE support Kyung Min Park
@ 2020-04-24 19:37 ` Kyung Min Park
2020-05-07 14:13 ` [tip: x86/timers] " tip-bot2 for Kyung Min Park
2 siblings, 1 reply; 7+ messages in thread
From: Kyung Min Park @ 2020-04-24 19:37 UTC (permalink / raw)
To: x86, linux-kernel
Cc: tglx, mingo, hpa, gregkh, ak, tony.luck, ashok.raj,
ravi.v.shankar, fenghua.yu, kyung.min.park
TPAUSE instructs the processor to enter an implementation-dependent
optimized state. The instruction execution wakes up when the time-stamp
counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
The instruction execution also wakes up due to the expiration of
the operating system time-limit or by an external interrupt
or exceptions such as a debug exception or a machine check exception.
TPAUSE offers a choice of two lower power states:
1. Light-weight power/performance optimized state C0.1
2. Improved power/performance optimized state C0.2
This way, it can save power with low wake-up latency in comparison to
spinloop based delay. The selection between the two is governed by the
input register.
TPAUSE is available on processors with X86_FEATURE_WAITPKG.
Reviewed-by: Tony Luck <tony.luck@intel.com>
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
---
arch/x86/Kconfig.assembler | 4 ++++
arch/x86/include/asm/delay.h | 1 +
arch/x86/include/asm/mwait.h | 22 ++++++++++++++++++++++
arch/x86/kernel/time.c | 3 +++
arch/x86/lib/delay.c | 27 +++++++++++++++++++++++++++
5 files changed, 57 insertions(+)
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index 13de0db..26b8c08 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -15,3 +15,7 @@ config AS_SHA256_NI
def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1)
help
Supported by binutils >= 2.24 and LLVM integrated assembler
+config AS_TPAUSE
+ def_bool $(as-instr,tpause %ecx)
+ help
+ Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index 9aa38de..630891d 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -6,6 +6,7 @@
#include <linux/init.h>
void __init use_tsc_delay(void);
+void __init use_tpause_delay(void);
void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a43b35b..73d997a 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -22,6 +22,8 @@
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
#define MWAITX_DISABLE_CSTATES 0xf0
+#define TPAUSE_C01_STATE 1
+#define TPAUSE_C02_STATE 0
u32 get_umwait_control_msr(void);
@@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
current_clr_polling();
}
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(u32 ecx, u32 edx, u32 eax)
+{
+ /* "tpause %ecx, %edx, %eax;" */
+ #ifdef CONFIG_AS_TPAUSE
+ asm volatile("tpause %%ecx\n"
+ :
+ : "c"(ecx), "d"(edx), "a"(eax));
+ #else
+ asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
+ :
+ : "c"(ecx), "d"(edx), "a"(eax));
+ #endif
+}
+
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 106e7f8..371a6b3 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -103,6 +103,9 @@ static __init void x86_late_time_init(void)
*/
x86_init.irqs.intr_mode_init();
tsc_init();
+
+ if (static_cpu_has(X86_FEATURE_WAITPKG))
+ use_tpause_delay();
}
/*
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fe91dc1..65d15df 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -97,6 +97,27 @@ static void delay_tsc(u64 cycles)
}
/*
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EDX:EAX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
+ */
+static void delay_halt_tpause(u64 start, u64 cycles)
+{
+ u64 until = start + cycles;
+ u32 eax, edx;
+
+ eax = lower_32_bits(until);
+ edx = upper_32_bits(until);
+
+ /*
+ * Hard code the deeper (C0.2) sleep state because exit latency is
+ * small compared to the "microseconds" that usleep() will delay.
+ */
+ __tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
* counts with TSC frequency. The input value is the number of TSC cycles
* to wait. MWAITX will also exit when the timer expires.
@@ -156,6 +177,12 @@ void __init use_tsc_delay(void)
delay_fn = delay_tsc;
}
+void __init use_tpause_delay(void)
+{
+ delay_halt_fn = delay_halt_tpause;
+ delay_fn = delay_halt;
+}
+
void use_mwaitx_delay(void)
{
delay_halt_fn = delay_halt_mwaitx;
--
2.7.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [tip: x86/timers] x86/delay: Introduce TPAUSE delay
2020-04-24 19:37 ` [PATCH v4 3/3] x86/delay: Introduce TPAUSE delay Kyung Min Park
@ 2020-05-07 14:13 ` tip-bot2 for Kyung Min Park
0 siblings, 0 replies; 7+ messages in thread
From: tip-bot2 for Kyung Min Park @ 2020-05-07 14:13 UTC (permalink / raw)
To: linux-tip-commits
Cc: Fenghua Yu, Kyung Min Park, Thomas Gleixner, Tony Luck, x86, LKML
The following commit has been merged into the x86/timers branch of tip:
Commit-ID: cec5f268cd02d25d2d74807843d8ae0292fe0fb7
Gitweb: https://git.kernel.org/tip/cec5f268cd02d25d2d74807843d8ae0292fe0fb7
Author: Kyung Min Park <kyung.min.park@intel.com>
AuthorDate: Fri, 24 Apr 2020 12:37:56 -07:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 07 May 2020 16:06:20 +02:00
x86/delay: Introduce TPAUSE delay
TPAUSE instructs the processor to enter an implementation-dependent
optimized state. The instruction execution wakes up when the time-stamp
counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
The instruction execution also wakes up due to the expiration of
the operating system time-limit or by an external interrupt
or exceptions such as a debug exception or a machine check exception.
TPAUSE offers a choice of two lower power states:
1. Light-weight power/performance optimized state C0.1
2. Improved power/performance optimized state C0.2
This way, it can save power with low wake-up latency in comparison to
spinloop based delay. The selection between the two is governed by the
input register.
TPAUSE is available on processors with X86_FEATURE_WAITPKG.
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1587757076-30337-4-git-send-email-kyung.min.park@intel.com
---
arch/x86/Kconfig.assembler | 4 ++++
arch/x86/include/asm/delay.h | 1 +
arch/x86/include/asm/mwait.h | 22 ++++++++++++++++++++++
arch/x86/kernel/time.c | 3 +++
arch/x86/lib/delay.c | 27 +++++++++++++++++++++++++++
5 files changed, 57 insertions(+)
diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler
index 13de0db..26b8c08 100644
--- a/arch/x86/Kconfig.assembler
+++ b/arch/x86/Kconfig.assembler
@@ -15,3 +15,7 @@ config AS_SHA256_NI
def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1)
help
Supported by binutils >= 2.24 and LLVM integrated assembler
+config AS_TPAUSE
+ def_bool $(as-instr,tpause %ecx)
+ help
+ Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7
diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h
index 9aa38de..630891d 100644
--- a/arch/x86/include/asm/delay.h
+++ b/arch/x86/include/asm/delay.h
@@ -6,6 +6,7 @@
#include <linux/init.h>
void __init use_tsc_delay(void);
+void __init use_tpause_delay(void);
void use_mwaitx_delay(void);
#endif /* _ASM_X86_DELAY_H */
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a43b35b..73d997a 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -22,6 +22,8 @@
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
#define MWAITX_DISABLE_CSTATES 0xf0
+#define TPAUSE_C01_STATE 1
+#define TPAUSE_C02_STATE 0
u32 get_umwait_control_msr(void);
@@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
current_clr_polling();
}
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(u32 ecx, u32 edx, u32 eax)
+{
+ /* "tpause %ecx, %edx, %eax;" */
+ #ifdef CONFIG_AS_TPAUSE
+ asm volatile("tpause %%ecx\n"
+ :
+ : "c"(ecx), "d"(edx), "a"(eax));
+ #else
+ asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
+ :
+ : "c"(ecx), "d"(edx), "a"(eax));
+ #endif
+}
+
#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 106e7f8..371a6b3 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -103,6 +103,9 @@ static __init void x86_late_time_init(void)
*/
x86_init.irqs.intr_mode_init();
tsc_init();
+
+ if (static_cpu_has(X86_FEATURE_WAITPKG))
+ use_tpause_delay();
}
/*
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fe91dc1..65d15df 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -97,6 +97,27 @@ static void delay_tsc(u64 cycles)
}
/*
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EDX:EAX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
+ */
+static void delay_halt_tpause(u64 start, u64 cycles)
+{
+ u64 until = start + cycles;
+ u32 eax, edx;
+
+ eax = lower_32_bits(until);
+ edx = upper_32_bits(until);
+
+ /*
+ * Hard code the deeper (C0.2) sleep state because exit latency is
+ * small compared to the "microseconds" that usleep() will delay.
+ */
+ __tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
* counts with TSC frequency. The input value is the number of TSC cycles
* to wait. MWAITX will also exit when the timer expires.
@@ -156,6 +177,12 @@ void __init use_tsc_delay(void)
delay_fn = delay_tsc;
}
+void __init use_tpause_delay(void)
+{
+ delay_halt_fn = delay_halt_tpause;
+ delay_fn = delay_halt;
+}
+
void use_mwaitx_delay(void)
{
delay_halt_fn = delay_halt_mwaitx;
^ permalink raw reply related [flat|nested] 7+ messages in thread