linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] x86/delay: Introduce TPAUSE instruction
@ 2020-02-26 19:10 Kyung Min Park
  2020-02-26 19:10 ` [PATCH 1/2] x86/asm: Define a few helpers in delay_waitx() Kyung Min Park
  2020-02-26 19:10 ` [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay Kyung Min Park
  0 siblings, 2 replies; 8+ messages in thread
From: Kyung Min Park @ 2020-02-26 19:10 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: tglx, mingo, hpa, gregkh, ak, tony.luck, ashok.raj,
	ravi.v.shankar, fenghua.yu

Intel processors that support the WAITPKG feature implement
the TPAUSE instruction that suspends execution in a lower power
state until the TSC (Time Stamp Counter) exceeds a certain value.

Update the udelay() function to use TPAUSE on systems where it
is available. Note that we hard code the deeper (C0.2) sleep
state because exit latency is small compared to the "microseconds"
that usleep() will delay.

Kyung Min Park (2):
  x86/asm: Define a few helpers in delay_waitx()
  x86/asm/delay: Introduce TPAUSE delay

 arch/x86/include/asm/mwait.h | 17 +++++++++
 arch/x86/lib/delay.c         | 82 ++++++++++++++++++++++++++++++++------------
 2 files changed, 78 insertions(+), 21 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] x86/asm: Define a few helpers in delay_waitx()
  2020-02-26 19:10 [PATCH 0/2] x86/delay: Introduce TPAUSE instruction Kyung Min Park
@ 2020-02-26 19:10 ` Kyung Min Park
  2020-03-18 20:19   ` Thomas Gleixner
  2020-02-26 19:10 ` [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay Kyung Min Park
  1 sibling, 1 reply; 8+ messages in thread
From: Kyung Min Park @ 2020-02-26 19:10 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: tglx, mingo, hpa, gregkh, ak, tony.luck, ashok.raj,
	ravi.v.shankar, fenghua.yu

Refactor code to make it easier to add a new model specific function to
delay for a number of cycles.

No functional change.

Reviewed-by: Tony Luck <tony.luck@intel.com>
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
---
 arch/x86/lib/delay.c | 58 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 21 deletions(-)

diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index c126571..6be29cf 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -90,9 +90,36 @@ static void delay_tsc(unsigned long __loops)
  * counts with TSC frequency. The input value is the loop of the
  * counter, it will exit when the timer expires.
  */
-static void delay_mwaitx(unsigned long __loops)
+static void mwaitx(u64 unused, u64 loops)
 {
-	u64 start, end, delay, loops = __loops;
+	u64 delay;
+
+	delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+	/*
+	 * Use cpu_tss_rw as a cacheline-aligned, seldomly
+	 * accessed per-cpu variable as the monitor target.
+	 */
+	__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
+
+	/*
+	 * AMD, like Intel, supports the EAX hint and EAX=0xf
+	 * means, do not enter any deep C-state and we use it
+	 * here in delay() to minimize wakeup latency.
+	 */
+	__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+}
+
+static void (*wait_func)(u64, u64);
+
+/*
+ * Call a vendor specific function to delay for a given
+ * amount of time. Because these functions may return
+ * earlier than requested, check for actual elapsed time
+ * and call again until done.
+ */
+static void delay_iterate(unsigned long __loops)
+{
+	u64 start, end, loops = __loops;
 
 	/*
 	 * Timer value of 0 causes MWAITX to wait indefinitely, unless there
@@ -104,20 +131,8 @@ static void delay_mwaitx(unsigned long __loops)
 	start = rdtsc_ordered();
 
 	for (;;) {
-		delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
-
-		/*
-		 * Use cpu_tss_rw as a cacheline-aligned, seldomly
-		 * accessed per-cpu variable as the monitor target.
-		 */
-		__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
 
-		/*
-		 * AMD, like Intel's MWAIT version, supports the EAX hint and
-		 * EAX=0xf0 means, do not enter any deep C-state and we use it
-		 * here in delay() to minimize wakeup latency.
-		 */
-		__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
+		wait_func(start, loops);
 
 		end = rdtsc_ordered();
 
@@ -134,22 +149,23 @@ static void delay_mwaitx(unsigned long __loops)
  * Since we calibrate only once at boot, this
  * function should be set once at boot and not changed
  */
-static void (*delay_fn)(unsigned long) = delay_loop;
+static void (*delay_platform)(unsigned long) = delay_loop;
 
 void use_tsc_delay(void)
 {
-	if (delay_fn == delay_loop)
-		delay_fn = delay_tsc;
+	if (delay_platform == delay_loop)
+		delay_platform = delay_tsc;
 }
 
 void use_mwaitx_delay(void)
 {
-	delay_fn = delay_mwaitx;
+	wait_func = mwaitx;
+	delay_platform = delay_iterate;
 }
 
 int read_current_timer(unsigned long *timer_val)
 {
-	if (delay_fn == delay_tsc) {
+	if (delay_platform == delay_tsc) {
 		*timer_val = rdtsc();
 		return 0;
 	}
@@ -158,7 +174,7 @@ int read_current_timer(unsigned long *timer_val)
 
 void __delay(unsigned long loops)
 {
-	delay_fn(loops);
+	delay_platform(loops);
 }
 EXPORT_SYMBOL(__delay);
 
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay
  2020-02-26 19:10 [PATCH 0/2] x86/delay: Introduce TPAUSE instruction Kyung Min Park
  2020-02-26 19:10 ` [PATCH 1/2] x86/asm: Define a few helpers in delay_waitx() Kyung Min Park
@ 2020-02-26 19:10 ` Kyung Min Park
  2020-02-26 21:10   ` Andi Kleen
  1 sibling, 1 reply; 8+ messages in thread
From: Kyung Min Park @ 2020-02-26 19:10 UTC (permalink / raw)
  To: x86, linux-kernel
  Cc: tglx, mingo, hpa, gregkh, ak, tony.luck, ashok.raj,
	ravi.v.shankar, fenghua.yu

TPAUSE instructs the processor to enter an implementation-dependent
optimized state. The instruction execution wakes up when the time-stamp
counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
The instruction execution also wakes up due to the expiration of
the operating system time-limit or by an external interrupt
or exceptions such as a debug exception or a machine check exception.

TPAUSE offers a choice of two lower power states:
 1. Light-weight power/performance optimized state C0.1
 2. Improved power/performance optimized state C0.2
This way, it can save power with low wake-up latency in comparison to
spinloop based delay. The selection between the two is governed by the
input register.

TPAUSE is available on processors with X86_FEATURE_WAITPKG.

Reviewed-by: Tony Luck <tony.luck@intel.com>
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
---
 arch/x86/include/asm/mwait.h | 17 +++++++++++++++++
 arch/x86/lib/delay.c         | 26 +++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 9d5252c..2067501 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -22,6 +22,8 @@
 #define MWAITX_ECX_TIMER_ENABLE		BIT(1)
 #define MWAITX_MAX_LOOPS		((u32)-1)
 #define MWAITX_DISABLE_CSTATES		0xf0
+#define TPAUSE_C01_STATE		1
+#define TPAUSE_C02_STATE		0
 
 static inline void __monitor(const void *eax, unsigned long ecx,
 			     unsigned long edx)
@@ -120,4 +122,19 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 	current_clr_polling();
 }
 
+/*
+ * Caller can specify whether to enter C0.1 (low latency, less
+ * power saving) or C0.2 state (saves more power, but longer wakeup
+ * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
+ * which can force requests for C0.2 to be downgraded to C0.1.
+ */
+static inline void __tpause(unsigned int ecx, unsigned int edx,
+			    unsigned int eax)
+{
+	/* "tpause %ecx, %edx, %eax;" */
+	asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
+		     :
+		     : "c"(ecx), "d"(edx), "a"(eax));
+}
+
 #endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index 6be29cf..3553150 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -86,6 +86,26 @@ static void delay_tsc(unsigned long __loops)
 }
 
 /*
+ * On Intel the TPAUSE instruction waits until any of:
+ * 1) the TSC counter exceeds the value provided in EAX:EDX
+ * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
+ * 3) an external interrupt occurs
+ */
+static void tpause(u64 start, u64 cycles)
+{
+	u64 until = start + cycles;
+	unsigned int eax, edx;
+
+	eax = (unsigned int)(until & 0xffffffff);
+	edx = (unsigned int)(until >> 32);
+
+	/* Hard code the deeper (C0.2) sleep state because exit latency is
+	 * small compared to the "microseconds" that usleep() will delay.
+	 */
+	__tpause(TPAUSE_C02_STATE, edx, eax);
+}
+
+/*
  * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
  * counts with TSC frequency. The input value is the loop of the
  * counter, it will exit when the timer expires.
@@ -153,8 +173,12 @@ static void (*delay_platform)(unsigned long) = delay_loop;
 
 void use_tsc_delay(void)
 {
-	if (delay_platform == delay_loop)
+	if (static_cpu_has(X86_FEATURE_WAITPKG)) {
+		wait_func = tpause;
+		delay_platform = delay_iterate;
+	} else if (delay_platform == delay_loop) {
 		delay_platform = delay_tsc;
+	}
 }
 
 void use_mwaitx_delay(void)
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay
  2020-02-26 19:10 ` [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay Kyung Min Park
@ 2020-02-26 21:10   ` Andi Kleen
  2020-02-26 21:20     ` Luck, Tony
  2020-02-26 21:31     ` Fenghua Yu
  0 siblings, 2 replies; 8+ messages in thread
From: Andi Kleen @ 2020-02-26 21:10 UTC (permalink / raw)
  To: Kyung Min Park
  Cc: x86, linux-kernel, tglx, mingo, hpa, gregkh, tony.luck,
	ashok.raj, ravi.v.shankar, fenghua.yu

On Wed, Feb 26, 2020 at 11:10:58AM -0800, Kyung Min Park wrote:
> TPAUSE instructs the processor to enter an implementation-dependent
> optimized state. The instruction execution wakes up when the time-stamp
> counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
> The instruction execution also wakes up due to the expiration of
> the operating system time-limit or by an external interrupt

This is actually a behavior change. Today's udelay() will continue
after processing the interrupt. Your patches don't

I don't think it's a problem though. The interrupt will cause
a long enough delay that exceed any reasonable udelay() requirements.

There would be a difference if someone did really long udelay()s, much
longer than typical interrupts, in this case you might end up
with a truncated udelay, but such long udelays are not something that we
would encourage.

I don't think you need to change anything in the code, but should
probably document this behavior.

-Andi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay
  2020-02-26 21:10   ` Andi Kleen
@ 2020-02-26 21:20     ` Luck, Tony
  2020-02-26 21:59       ` Andi Kleen
  2020-02-26 21:31     ` Fenghua Yu
  1 sibling, 1 reply; 8+ messages in thread
From: Luck, Tony @ 2020-02-26 21:20 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Kyung Min Park, x86, linux-kernel, tglx, mingo, hpa, gregkh,
	ashok.raj, ravi.v.shankar, fenghua.yu

On Wed, Feb 26, 2020 at 01:10:40PM -0800, Andi Kleen wrote:
> On Wed, Feb 26, 2020 at 11:10:58AM -0800, Kyung Min Park wrote:
> > TPAUSE instructs the processor to enter an implementation-dependent
> > optimized state. The instruction execution wakes up when the time-stamp
> > counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
> > The instruction execution also wakes up due to the expiration of
> > the operating system time-limit or by an external interrupt
> 
> This is actually a behavior change. Today's udelay() will continue
> after processing the interrupt. Your patches don't

The instruction level TPAUSE is called inside delay_wait()
that checks to see of we were interrupted early and loops to issue
another TPAUSE if needed.

-Tony

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay
  2020-02-26 21:10   ` Andi Kleen
  2020-02-26 21:20     ` Luck, Tony
@ 2020-02-26 21:31     ` Fenghua Yu
  1 sibling, 0 replies; 8+ messages in thread
From: Fenghua Yu @ 2020-02-26 21:31 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Kyung Min Park, x86, linux-kernel, tglx, mingo, hpa, gregkh,
	tony.luck, ashok.raj, ravi.v.shankar

On Wed, Feb 26, 2020 at 01:10:40PM -0800, Andi Kleen wrote:
> On Wed, Feb 26, 2020 at 11:10:58AM -0800, Kyung Min Park wrote:
> > TPAUSE instructs the processor to enter an implementation-dependent
> > optimized state. The instruction execution wakes up when the time-stamp
> > counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
> > The instruction execution also wakes up due to the expiration of
> > the operating system time-limit or by an external interrupt
> 
> This is actually a behavior change. Today's udelay() will continue
> after processing the interrupt. Your patches don't
> 
> I don't think it's a problem though. The interrupt will cause
> a long enough delay that exceed any reasonable udelay() requirements.
> 
> There would be a difference if someone did really long udelay()s, much
> longer than typical interrupts, in this case you might end up
> with a truncated udelay, but such long udelays are not something that we
> would encourage.

TPAUSE is in a loop which checks if this udelay exceeds deadline.
Coming back from interrupt, the loop checks deadline and finds
there is still left time to delay. Then udelay() goes back to TPAUSE.

Thanks.

-Fenghua

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay
  2020-02-26 21:20     ` Luck, Tony
@ 2020-02-26 21:59       ` Andi Kleen
  0 siblings, 0 replies; 8+ messages in thread
From: Andi Kleen @ 2020-02-26 21:59 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Kyung Min Park, x86, linux-kernel, tglx, mingo, hpa, gregkh,
	ashok.raj, ravi.v.shankar, fenghua.yu

On Wed, Feb 26, 2020 at 01:20:34PM -0800, Luck, Tony wrote:
> On Wed, Feb 26, 2020 at 01:10:40PM -0800, Andi Kleen wrote:
> > On Wed, Feb 26, 2020 at 11:10:58AM -0800, Kyung Min Park wrote:
> > > TPAUSE instructs the processor to enter an implementation-dependent
> > > optimized state. The instruction execution wakes up when the time-stamp
> > > counter reaches or exceeds the implicit EDX:EAX 64-bit input value.
> > > The instruction execution also wakes up due to the expiration of
> > > the operating system time-limit or by an external interrupt
> > 
> > This is actually a behavior change. Today's udelay() will continue
> > after processing the interrupt. Your patches don't
> 
> The instruction level TPAUSE is called inside delay_wait()
> that checks to see of we were interrupted early and loops to issue
> another TPAUSE if needed.

Ah right. It was already solved for mwaitx. Great.

-Andi

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] x86/asm: Define a few helpers in delay_waitx()
  2020-02-26 19:10 ` [PATCH 1/2] x86/asm: Define a few helpers in delay_waitx() Kyung Min Park
@ 2020-03-18 20:19   ` Thomas Gleixner
  0 siblings, 0 replies; 8+ messages in thread
From: Thomas Gleixner @ 2020-03-18 20:19 UTC (permalink / raw)
  To: Kyung Min Park, x86, linux-kernel
  Cc: mingo, hpa, gregkh, ak, tony.luck, ashok.raj, ravi.v.shankar, fenghua.yu

Hi!

Kyung Min Park <kyung.min.park@intel.com> writes:

The subsystem prefix wants to be 'x86/delay:' please. This has nothing to
do with asm. Applies to both patches.

"Define a few helpers in delay_waitx()" misses 'm' before 'wait' and is
not really a precise short description of the change. Something like:

  x86/delay: Refactor delay_mwaitx() for TPAUSE support

makes it pretty clear what this is about, right?

> -static void delay_mwaitx(unsigned long __loops)
> +static void mwaitx(u64 unused, u64 loops)

This is just wrong. mwaitx() is not what this function does. mwaitx is
associated to the actual instruction name. The original name was pretty
clear what this function does. Ditto for the tpause() implementation.
See below.

>  {

Also the loops argument/variable names are really misleading. It is TSC
cycles here. I know you just moved code around, but there is nothing
wrong with cleaning it up while at it.

> -	u64 start, end, delay, loops = __loops;
> +	u64 delay;
> +
> +	delay = min_t(u64, MWAITX_MAX_LOOPS, loops);

MAX_LOOPS is equally bogus. Yes I know it's not your fault ...

> +	/*
> +	 * Use cpu_tss_rw as a cacheline-aligned, seldomly
> +	 * accessed per-cpu variable as the monitor target.
> +	 */
> +	__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
> +
> +	/*
> +	 * AMD, like Intel, supports the EAX hint and EAX=0xf
> +	 * means, do not enter any deep C-state and we use it
> +	 * here in delay() to minimize wakeup latency.

You moved the comments one indentation level up. So the comments can use
the full width now, right?

> +	 */
> +	__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
> +}
> +
> +static void (*wait_func)(u64, u64);

This really wants to be named so it's obvious what this is for. And the
arguments should have names as well.  Aside of that it wants to be
marked __ro_after_init and both function pointers moved to one place and
not randomly sprinkled into the code.

> +
> +/*
> + * Call a vendor specific function to delay for a given
> + * amount of time. Because these functions may return
> + * earlier than requested, check for actual elapsed time
> + * and call again until done.

Comments can use the full width.

> + */
> +static void delay_iterate(unsigned long __loops)

delay_iterate() is weird. Granted, it iterates, but it's still a delay
variant.

Contrary to delay_loop() and delay_tsc() it halts the CPU. The iteration
is an implementation detail required to deal with early return from the
halting instruction.

As this is based on halting the CPU the obvious function name is
delay_halt(). Then the new function pointer becomes delay_halt_fn. And
the actual functions are delay_halt_mwaitx() and then
delay_halt_tpause(). So it's all consistent.

Hmm?


> +{
> +	u64 start, end, loops = __loops;

Same comment vs. loops / cycles

>  
> @@ -134,22 +149,23 @@ static void delay_mwaitx(unsigned long __loops)
>   * Since we calibrate only once at boot, this
>   * function should be set once at boot and not changed
>   */
> -static void (*delay_fn)(unsigned long) = delay_loop;
> +static void (*delay_platform)(unsigned long) = delay_loop;

What's wrong with delay_fn?

delay_platform does not make any sense to me. How is delay_loop or
delay_tsc a platform? Naming really wants to be self explaining.

Also that variable wants to be __ro_after_init and the comment above it
wants to be cleaned up as well. 'should be set' is just wrong. It is
only set once at boot time.

In patch 2/2:

> +	/* Hard code the deeper (C0.2) sleep state because exit latency is
> +	 * small compared to the "microseconds" that usleep() will delay.
> +	 */

Please use proper comment style. This is not networking code.

Find below a cleanup patch for the file which wants to go before these
changes.

Thanks,

        tglx
----
Subject: x86/delay: Preparatory code cleanup
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 Mar 2020 17:01:31 +0100

The naming conventions in the delay code are confusing at best.

All delay variants use a loops argument and or variable which originates
from the original delay_loop() implementation. But all variants except
delay_loop() are based on TSC cycles.

Rename the argument to cycles and make it type u64 to avoid these weird
expansions to u64 in the functions.

Rename MWAITX_MAX_LOOPS to MWAITX_MAX_WAIT_CYCLES for the same reason and
fixup the comment of delay_mwaitx() as well.

Mark the delay_fn function pointer __ro_after_init and fixup the comment
for it.

No functional change and preparation for the upcoming TPAUSE based delay
variant.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/mwait.h |    2 +-
 arch/x86/lib/delay.c         |   43 +++++++++++++++++++++++--------------------
 2 files changed, 24 insertions(+), 21 deletions(-)

--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -20,7 +20,7 @@
 
 #define MWAIT_ECX_INTERRUPT_BREAK	0x1
 #define MWAITX_ECX_TIMER_ENABLE		BIT(1)
-#define MWAITX_MAX_LOOPS		((u32)-1)
+#define MWAITX_MAX_WAIT_CYCLES		UINT_MAX
 #define MWAITX_DISABLE_CSTATES		0xf0
 
 static inline void __monitor(const void *eax, unsigned long ecx,
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -27,9 +27,19 @@
 # include <asm/smp.h>
 #endif
 
+static void delay_loop(u64 __loops);
+
+/*
+ * Calibration and selection of the delay mechanism happens only once
+ * during boot.
+ */
+static void (*delay_fn)(u64) __ro_after_init = delay_loop;
+
 /* simple loop based delay: */
-static void delay_loop(unsigned long loops)
+static void delay_loop(u64 __loops)
 {
+	unsigned long loops = (unsigned long) __loops;
+
 	asm volatile(
 		"	test %0,%0	\n"
 		"	jz 3f		\n"
@@ -49,9 +59,9 @@ static void delay_loop(unsigned long loo
 }
 
 /* TSC based delay: */
-static void delay_tsc(unsigned long __loops)
+static void delay_tsc(u64 cycles)
 {
-	u64 bclock, now, loops = __loops;
+	u64 bclock, now;
 	int cpu;
 
 	preempt_disable();
@@ -59,7 +69,7 @@ static void delay_tsc(unsigned long __lo
 	bclock = rdtsc_ordered();
 	for (;;) {
 		now = rdtsc_ordered();
-		if ((now - bclock) >= loops)
+		if ((now - bclock) >= cycles)
 			break;
 
 		/* Allow RT tasks to run */
@@ -77,7 +87,7 @@ static void delay_tsc(unsigned long __lo
 		 * counter for this CPU.
 		 */
 		if (unlikely(cpu != smp_processor_id())) {
-			loops -= (now - bclock);
+			cycles -= (now - bclock);
 			cpu = smp_processor_id();
 			bclock = rdtsc_ordered();
 		}
@@ -87,24 +97,24 @@ static void delay_tsc(unsigned long __lo
 
 /*
  * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
- * counts with TSC frequency. The input value is the loop of the
- * counter, it will exit when the timer expires.
+ * counts with TSC frequency. The input value is the number of TSC cycles
+ * to wait. MWAITX will also exit when the timer expires.
  */
-static void delay_mwaitx(unsigned long __loops)
+static void delay_mwaitx(u64 cycles)
 {
-	u64 start, end, delay, loops = __loops;
+	u64 start, end, delay;
 
 	/*
 	 * Timer value of 0 causes MWAITX to wait indefinitely, unless there
 	 * is a store on the memory monitored by MONITORX.
 	 */
-	if (loops == 0)
+	if (!cycles)
 		return;
 
 	start = rdtsc_ordered();
 
 	for (;;) {
-		delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
+		delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles);
 
 		/*
 		 * Use cpu_tss_rw as a cacheline-aligned, seldomly
@@ -121,21 +131,14 @@ static void delay_mwaitx(unsigned long _
 
 		end = rdtsc_ordered();
 
-		if (loops <= end - start)
+		if (cycles <= end - start)
 			break;
 
-		loops -= end - start;
-
+		cycles -= end - start;
 		start = end;
 	}
 }
 
-/*
- * Since we calibrate only once at boot, this
- * function should be set once at boot and not changed
- */
-static void (*delay_fn)(unsigned long) = delay_loop;
-
 void use_tsc_delay(void)
 {
 	if (delay_fn == delay_loop)

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-03-18 20:19 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-26 19:10 [PATCH 0/2] x86/delay: Introduce TPAUSE instruction Kyung Min Park
2020-02-26 19:10 ` [PATCH 1/2] x86/asm: Define a few helpers in delay_waitx() Kyung Min Park
2020-03-18 20:19   ` Thomas Gleixner
2020-02-26 19:10 ` [PATCH 2/2] x86/asm/delay: Introduce TPAUSE delay Kyung Min Park
2020-02-26 21:10   ` Andi Kleen
2020-02-26 21:20     ` Luck, Tony
2020-02-26 21:59       ` Andi Kleen
2020-02-26 21:31     ` Fenghua Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).