linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode
@ 2019-10-23  7:49 Yin Fengwei
  2019-10-23  8:45 ` David Laight
  0 siblings, 1 reply; 5+ messages in thread
From: Yin Fengwei @ 2019-10-23  7:49 UTC (permalink / raw)
  To: linux-kernel, linux-acpi, rjw, lenb, David.Laight; +Cc: fengwei.yin

In function acpi_idle_do_entry(), an ioport access is used for dummy
wait to guarantee hardware behavior. But it could trigger unnecessary
vmexit if kernel is running as guest in virtualization environtment.

If it's in virtualization environment, the deeper C state enter
operation (inb()) will trap to hyervisor. It's not needed to do
dummy wait after the inb() call. So we remove the dummy io port
access to avoid unnecessary VMexit.

We keep dummy io port access to maintain timing for native environment.

Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
ChangeLog:
v2 -> v3:
 - Remove dummy io port access totally for virtualization env.

v1 -> v2:
 - Use ndelay instead of dead loop for dummy delay.

 drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ed56c6d20b08..0c4a97dd6917 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
 static
 DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
 
+static void (*dummy_wait)(u64 address);
+
+static void default_dummy_wait(u64 address)
+{
+	inl(address);
+}
+
+static void default_noop_wait(u64 address)
+{
+}
+
 static int disabled_by_idle_boot_param(void)
 {
 	return boot_option_idle_override == IDLE_POLL ||
@@ -660,8 +671,13 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
 		inb(cx->address);
 		/* Dummy wait op - must do something useless after P_LVL2 read
 		   because chipsets cannot guarantee that STPCLK# signal
-		   gets asserted in time to freeze execution properly. */
-		inl(acpi_gbl_FADT.xpm_timer_block.address);
+		   gets asserted in time to freeze execution properly.
+
+		   This dummy wait is only needed for native env. If we are running
+		   as guest of a hypervisor, we don't need wait op here. We have
+		   different implementation for dummy_wait on native/virtual env. */
+
+		dummy_wait(acpi_gbl_FADT.xpm_timer_block.address);
 	}
 }
 
@@ -683,7 +699,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
 		else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
 			inb(cx->address);
 			/* See comment in acpi_idle_do_entry() */
-			inl(acpi_gbl_FADT.xpm_timer_block.address);
+			dummy_wait(acpi_gbl_FADT.xpm_timer_block.address);
 		} else
 			return -ENODEV;
 	}
@@ -912,6 +928,20 @@ static inline void acpi_processor_cstate_first_run_checks(void)
 			  max_cstate);
 	first_run++;
 
+	dummy_wait = default_dummy_wait;
+
+#ifdef	CONFIG_X86
+	/* For x86, if we are running in guest, we don't need extra
+	 * access ioport as dummy wait.
+	 */
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+		pr_err("We are in virtual env");
+		dummy_wait = default_noop_wait;
+	} else {
+		pr_err("We are not in virtual env");
+	}
+#endif
+
 	if (acpi_gbl_FADT.cst_control && !nocst) {
 		status = acpi_os_write_port(acpi_gbl_FADT.smi_command,
 					    acpi_gbl_FADT.cst_control, 8);
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* RE: [PATCH v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode
  2019-10-23  7:49 [PATCH v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode Yin Fengwei
@ 2019-10-23  8:45 ` David Laight
  2019-10-23  9:03   ` Rafael J. Wysocki
  2019-10-24  1:22   ` Yin Fengwei
  0 siblings, 2 replies; 5+ messages in thread
From: David Laight @ 2019-10-23  8:45 UTC (permalink / raw)
  To: 'Yin Fengwei', linux-kernel, linux-acpi, rjw, lenb

From: Yin Fengwei
> Sent: 23 October 2019 08:50


> In function acpi_idle_do_entry(), an ioport access is used for dummy
> wait to guarantee hardware behavior. But it could trigger unnecessary
> vmexit if kernel is running as guest in virtualization environtment.
> 
> If it's in virtualization environment, the deeper C state enter
> operation (inb()) will trap to hyervisor. It's not needed to do
> dummy wait after the inb() call. So we remove the dummy io port
> access to avoid unnecessary VMexit.
> 
> We keep dummy io port access to maintain timing for native environment.
> 
> Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
> ---
> ChangeLog:
> v2 -> v3:
>  - Remove dummy io port access totally for virtualization env.
> 
> v1 -> v2:
>  - Use ndelay instead of dead loop for dummy delay.
> 
>  drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
>  1 file changed, 33 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
> index ed56c6d20b08..0c4a97dd6917 100644
> --- a/drivers/acpi/processor_idle.c
> +++ b/drivers/acpi/processor_idle.c
> @@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
>  static
>  DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
> 
> +static void (*dummy_wait)(u64 address);
> +
> +static void default_dummy_wait(u64 address)
> +{
> +	inl(address);
> +}
> +
> +static void default_noop_wait(u64 address)
> +{
> +}
> +

Overengineered...
Just add:

static void wait_for_freeze(void)
{
#ifdef	CONFIG_X86
	/* No delay is needed if we are a guest */
	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
		return;
#endif
	/* Dummy wait op - must do something useless after P_LVL2 read
	   because chipsets cannot guarantee that STPCLK# signal
	   gets asserted in time to freeze execution properly. */
	inl(acpi_gbl_FADT.xpm_timer_block.address);
}

and use it to replace the inl().

...
> +#ifdef	CONFIG_X86
> +	/* For x86, if we are running in guest, we don't need extra
> +	 * access ioport as dummy wait.
> +	 */
> +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
> +		pr_err("We are in virtual env");
> +		dummy_wait = default_noop_wait;
> +	} else {
> +		pr_err("We are not in virtual env");
> +	}
> +#endif

WTF are the pr_err() for???

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode
  2019-10-23  8:45 ` David Laight
@ 2019-10-23  9:03   ` Rafael J. Wysocki
  2019-10-24  1:22     ` Yin Fengwei
  2019-10-24  1:22   ` Yin Fengwei
  1 sibling, 1 reply; 5+ messages in thread
From: Rafael J. Wysocki @ 2019-10-23  9:03 UTC (permalink / raw)
  To: David Laight; +Cc: Yin Fengwei, linux-kernel, linux-acpi, rjw, lenb

On Wed, Oct 23, 2019 at 10:45 AM David Laight <David.Laight@aculab.com> wrote:
>
> From: Yin Fengwei
> > Sent: 23 October 2019 08:50
>
>
> > In function acpi_idle_do_entry(), an ioport access is used for dummy
> > wait to guarantee hardware behavior. But it could trigger unnecessary
> > vmexit if kernel is running as guest in virtualization environtment.
> >
> > If it's in virtualization environment, the deeper C state enter
> > operation (inb()) will trap to hyervisor. It's not needed to do
> > dummy wait after the inb() call. So we remove the dummy io port
> > access to avoid unnecessary VMexit.
> >
> > We keep dummy io port access to maintain timing for native environment.
> >
> > Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
> > ---
> > ChangeLog:
> > v2 -> v3:
> >  - Remove dummy io port access totally for virtualization env.
> >
> > v1 -> v2:
> >  - Use ndelay instead of dead loop for dummy delay.
> >
> >  drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
> >  1 file changed, 33 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
> > index ed56c6d20b08..0c4a97dd6917 100644
> > --- a/drivers/acpi/processor_idle.c
> > +++ b/drivers/acpi/processor_idle.c
> > @@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
> >  static
> >  DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
> >
> > +static void (*dummy_wait)(u64 address);
> > +
> > +static void default_dummy_wait(u64 address)
> > +{
> > +     inl(address);
> > +}
> > +
> > +static void default_noop_wait(u64 address)
> > +{
> > +}
> > +
>
> Overengineered...
> Just add:
>
> static void wait_for_freeze(void)
> {
> #ifdef  CONFIG_X86
>         /* No delay is needed if we are a guest */
>         if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
>                 return;
> #endif
>
>         /* Dummy wait op - must do something useless after P_LVL2 read
>            because chipsets cannot guarantee that STPCLK# signal
>            gets asserted in time to freeze execution properly. */
>         inl(acpi_gbl_FADT.xpm_timer_block.address);
> }
>
> and use it to replace the inl().

I was about to make a similar comment.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode
  2019-10-23  8:45 ` David Laight
  2019-10-23  9:03   ` Rafael J. Wysocki
@ 2019-10-24  1:22   ` Yin Fengwei
  1 sibling, 0 replies; 5+ messages in thread
From: Yin Fengwei @ 2019-10-24  1:22 UTC (permalink / raw)
  To: David Laight, linux-kernel, linux-acpi, rjw, lenb



On 2019/10/23 下午4:45, David Laight wrote:
> From: Yin Fengwei
>> Sent: 23 October 2019 08:50
> 
> 
>> In function acpi_idle_do_entry(), an ioport access is used for dummy
>> wait to guarantee hardware behavior. But it could trigger unnecessary
>> vmexit if kernel is running as guest in virtualization environtment.
>>
>> If it's in virtualization environment, the deeper C state enter
>> operation (inb()) will trap to hyervisor. It's not needed to do
>> dummy wait after the inb() call. So we remove the dummy io port
>> access to avoid unnecessary VMexit.
>>
>> We keep dummy io port access to maintain timing for native environment.
>>
>> Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
>> ---
>> ChangeLog:
>> v2 -> v3:
>>   - Remove dummy io port access totally for virtualization env.
>>
>> v1 -> v2:
>>   - Use ndelay instead of dead loop for dummy delay.
>>
>>   drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
>>   1 file changed, 33 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
>> index ed56c6d20b08..0c4a97dd6917 100644
>> --- a/drivers/acpi/processor_idle.c
>> +++ b/drivers/acpi/processor_idle.c
>> @@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
>>   static
>>   DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
>>
>> +static void (*dummy_wait)(u64 address);
>> +
>> +static void default_dummy_wait(u64 address)
>> +{
>> +	inl(address);
>> +}
>> +
>> +static void default_noop_wait(u64 address)
>> +{
>> +}
>> +
> 
> Overengineered...
> Just add:
> 
> static void wait_for_freeze(void)
> {
> #ifdef	CONFIG_X86
> 	/* No delay is needed if we are a guest */
> 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> 		return;
> #endif
> 	/* Dummy wait op - must do something useless after P_LVL2 read
> 	   because chipsets cannot guarantee that STPCLK# signal
> 	   gets asserted in time to freeze execution properly. */
> 	inl(acpi_gbl_FADT.xpm_timer_block.address);
> }
> 
> and use it to replace the inl().
OK. I was trying to avoid any impact to native case.

> 
> ...
>> +#ifdef	CONFIG_X86
>> +	/* For x86, if we are running in guest, we don't need extra
>> +	 * access ioport as dummy wait.
>> +	 */
>> +	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
>> +		pr_err("We are in virtual env");
>> +		dummy_wait = default_noop_wait;
>> +	} else {
>> +		pr_err("We are not in virtual env");
>> +	}
>> +#endif
> 
> WTF are the pr_err() for???
Sorry. Didn't remove my debug code...

Regards
Yin, Fengwei

> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode
  2019-10-23  9:03   ` Rafael J. Wysocki
@ 2019-10-24  1:22     ` Yin Fengwei
  0 siblings, 0 replies; 5+ messages in thread
From: Yin Fengwei @ 2019-10-24  1:22 UTC (permalink / raw)
  To: Rafael J. Wysocki, David Laight; +Cc: linux-kernel, linux-acpi, rjw, lenb



On 2019/10/23 下午5:03, Rafael J. Wysocki wrote:
> On Wed, Oct 23, 2019 at 10:45 AM David Laight <David.Laight@aculab.com> wrote:
>>
>> From: Yin Fengwei
>>> Sent: 23 October 2019 08:50
>>
>>
>>> In function acpi_idle_do_entry(), an ioport access is used for dummy
>>> wait to guarantee hardware behavior. But it could trigger unnecessary
>>> vmexit if kernel is running as guest in virtualization environtment.
>>>
>>> If it's in virtualization environment, the deeper C state enter
>>> operation (inb()) will trap to hyervisor. It's not needed to do
>>> dummy wait after the inb() call. So we remove the dummy io port
>>> access to avoid unnecessary VMexit.
>>>
>>> We keep dummy io port access to maintain timing for native environment.
>>>
>>> Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
>>> ---
>>> ChangeLog:
>>> v2 -> v3:
>>>   - Remove dummy io port access totally for virtualization env.
>>>
>>> v1 -> v2:
>>>   - Use ndelay instead of dead loop for dummy delay.
>>>
>>>   drivers/acpi/processor_idle.c | 36 ++++++++++++++++++++++++++++++++---
>>>   1 file changed, 33 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
>>> index ed56c6d20b08..0c4a97dd6917 100644
>>> --- a/drivers/acpi/processor_idle.c
>>> +++ b/drivers/acpi/processor_idle.c
>>> @@ -58,6 +58,17 @@ struct cpuidle_driver acpi_idle_driver = {
>>>   static
>>>   DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate);
>>>
>>> +static void (*dummy_wait)(u64 address);
>>> +
>>> +static void default_dummy_wait(u64 address)
>>> +{
>>> +     inl(address);
>>> +}
>>> +
>>> +static void default_noop_wait(u64 address)
>>> +{
>>> +}
>>> +
>>
>> Overengineered...
>> Just add:
>>
>> static void wait_for_freeze(void)
>> {
>> #ifdef  CONFIG_X86
>>          /* No delay is needed if we are a guest */
>>          if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
>>                  return;
>> #endif
>>
>>          /* Dummy wait op - must do something useless after P_LVL2 read
>>             because chipsets cannot guarantee that STPCLK# signal
>>             gets asserted in time to freeze execution properly. */
>>          inl(acpi_gbl_FADT.xpm_timer_block.address);
>> }
>>
>> and use it to replace the inl().
> 
> I was about to make a similar comment.
OK. Will send v4 soon.

Regards
Yin, Fengwei

> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-10-24  1:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-23  7:49 [PATCH v3] ACPI/processor_idle: Remove dummy wait if kernel is in guest mode Yin Fengwei
2019-10-23  8:45 ` David Laight
2019-10-23  9:03   ` Rafael J. Wysocki
2019-10-24  1:22     ` Yin Fengwei
2019-10-24  1:22   ` Yin Fengwei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).