linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: "Konrad Rzeszutek Wilk" <konrad.wilk@oracle.com>
Cc: <ke.yu@intel.com>, <kevin.tian@intel.com>, <lenb@kernel.org>,
	<xen-devel@lists.xensource.com>, <linux-acpi@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 2/3] xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it.
Date: Fri, 24 Feb 2012 10:32:17 +0000	[thread overview]
Message-ID: <4F4775410200007800074992@nat28.tlf.novell.com> (raw)
In-Reply-To: <1330036270-20015-3-git-send-email-konrad.wilk@oracle.com>

>>> On 23.02.12 at 23:31, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> wrote:
> For the hypervisor to take advantage of the MWAIT support it needs
> to extract from the ACPI _CST the register address. But the
> hypervisor does not have the support to parse DSDT so it relies on
> the initial domain (dom0) to parse the ACPI Power Management information
> and push it up to the hypervisor. The pushing of the data is done
> by the processor_harveset_xen module which parses the information that
> the ACPI parser has graciously exposed in 'struct acpi_processor'.
> 
> For the ACPI parser to also expose the Cx states for MWAIT, we need
> to expose the MWAIT capability (leaf 1). Furthermore we also need to
> expose the MWAIT_LEAF capability (leaf 5) for cstate.c to properly
> function.
> 
> The hypervisor could expose these flags when it traps the XEN_EMULATE_PREFIX
> operations, but it can't do it since it needs to be backwards compatible.
> Instead we choose to use the native CPUID to figure out if the MWAIT
> capability exists and use the XEN_SET_PDC query hypercall to figure out
> if the hypervisor wants us to expose the MWAIT_LEAF capability or not.
> 
> Note: The XEN_SET_PDC query was implemented in c/s 23783:
> "ACPI: add _PDC input override mechanism".
> 
> With this in place, instead of
>  C3 ACPI IOPORT 415
> we get now
>  C3:ACPI FFH INTEL MWAIT 0x20
> 
> Note: The cpu_idle which would be calling the mwait variants for idling
> never gets set b/c we set the default pm_idle to be the hypercall variant.
> 
> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> ---
>  arch/x86/xen/enlighten.c         |   92 
> +++++++++++++++++++++++++++++++++++++-
>  include/xen/interface/platform.h |    4 +-
>  2 files changed, 94 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 12eb07b..4c82936 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -62,6 +62,14 @@
>  #include <asm/reboot.h>
>  #include <asm/stackprotector.h>
>  #include <asm/hypervisor.h>
> +#include <asm/mwait.h>
> +
> +#ifdef CONFIG_ACPI
> +#include <asm/acpi.h>
> +#include <acpi/pdc_intel.h>
> +#include <acpi/processor.h>
> +#include <xen/interface/platform.h>
> +#endif
>  
>  #include "xen-ops.h"
>  #include "mmu.h"
> @@ -200,13 +208,17 @@ static void __init xen_banner(void)
>  static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
>  static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
>  
> +static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
> +static __read_mostly unsigned int cpuid_leaf5_ecx_val;
> +static __read_mostly unsigned int cpuid_leaf5_edx_val;
> +
>  static void xen_cpuid(unsigned int *ax, unsigned int *bx,
>  		      unsigned int *cx, unsigned int *dx)
>  {
>  	unsigned maskebx = ~0;
>  	unsigned maskecx = ~0;
>  	unsigned maskedx = ~0;
> -
> +	unsigned setecx = 0;
>  	/*
>  	 * Mask out inconvenient features, to try and disable as many
>  	 * unsupported kernel subsystems as possible.
> @@ -214,9 +226,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int 
> *bx,
>  	switch (*ax) {
>  	case 1:
>  		maskecx = cpuid_leaf1_ecx_mask;
> +		setecx = cpuid_leaf1_ecx_set_mask;
>  		maskedx = cpuid_leaf1_edx_mask;
>  		break;
>  
> +	case CPUID_MWAIT_LEAF:
> +		/* Synthesize the values.. */
> +		*ax = 0;
> +		*bx = 0;
> +		*cx = cpuid_leaf5_ecx_val;
> +		*dx = cpuid_leaf5_edx_val;
> +		return;
> +
>  	case 0xb:
>  		/* Suppress extended topology stuff */
>  		maskebx = 0;
> @@ -232,9 +253,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int 
> *bx,
>  
>  	*bx &= maskebx;
>  	*cx &= maskecx;
> +	*cx |= setecx;
>  	*dx &= maskedx;
> +
>  }
>  
> +static bool __init xen_check_mwait(void)
> +{
> +#if CONFIG_ACPI

#ifdef

> +	struct xen_platform_op op = {
> +		.cmd			= XENPF_set_processor_pminfo,
> +		.u.set_pminfo.id	= -1,
> +		.u.set_pminfo.type	= XEN_PM_PDC,
> +	};
> +	uint32_t buf[3];
> +	unsigned int ax, bx, cx, dx;
> +	unsigned int mwait_mask;
> +
> +	/* We need to determine whether it is OK to expose the MWAIT
> +	 * capability to the kernel to harvest deeper than C3 states from ACPI
> +	 * _CST using the processor_harvest_xen.c module. For this to work, we
> +	 * need to gather the MWAIT_LEAF values (which the cstate.c code
> +	 * checks against). The hypervisor won't expose the MWAIT flag because
> +	 * it would break backwards compatibility; so we will find out directly
> +	 * from the hardware and hypercall.
> +	 */
> +	if (!xen_initial_domain())
> +		return false;
> +
> +	ax = 1;
> +	cx = 0;
> +
> +	native_cpuid(&ax, &bx, &cx, &dx);
> +
> +	mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
> +		     (1 << (X86_FEATURE_MWAIT % 32));
> +
> +	if ((cx & mwait_mask) != mwait_mask)
> +		return false;
> +
> +	/* We need to emulate the MWAIT_LEAF and for that we need both
> +	 * ecx and edx. The hypercall provides only partial information.
> +	 */
> +
> +	ax = CPUID_MWAIT_LEAF;
> +	bx = 0;
> +	cx = 0;
> +	dx = 0;
> +
> +	native_cpuid(&ax, &bx, &cx, &dx);
> +
> +	/* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
> +	 * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
> +	 */
> +	buf[0] = ACPI_PDC_REVISION_ID;
> +	buf[1] = 1;
> +	buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
> +
> +	set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
> +
> +	if ((HYPERVISOR_dom0_op(&op) == 0) &&
> +	    (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
> +		cpuid_leaf5_ecx_val = cx;
> +		cpuid_leaf5_edx_val = dx;
> +	}
> +	return true;
> +#else
> +	return false;
> +#endif
> +}
>  static void __init xen_init_cpuid_mask(void)
>  {
>  	unsigned int ax, bx, cx, dx;
> @@ -261,6 +348,9 @@ static void __init xen_init_cpuid_mask(void)
>  	/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
>  	if ((cx & xsave_mask) != xsave_mask)
>  		cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
> +
> +	if (xen_check_mwait())
> +		cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
>  }
>  
>  static void xen_set_debugreg(int reg, unsigned long val)
> diff --git a/include/xen/interface/platform.h 
> b/include/xen/interface/platform.h
> index c168468..6220b98 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
>  #define XEN_PM_CX   0
>  #define XEN_PM_PX   1
>  #define XEN_PM_TX   2
> -
> +#define XEN_PM_PDC  3
>  /* Px sub info type */
>  #define XEN_PX_PCT   1
>  #define XEN_PX_PSS   2
> @@ -286,6 +286,7 @@ struct xen_processor_performance {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance);
>  
> +DEFINE_GUEST_HANDLE(uint32_t);

Do you really need to introduce (step by step) handles for all those
uintNN_t types in a way different from Xen's
(__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t))?

Looks good to me otherwise.

Jan

>  struct xenpf_set_processor_pminfo {
>  	/* IN variables */
>  	uint32_t id;    /* ACPI CPU ID */
> @@ -293,6 +294,7 @@ struct xenpf_set_processor_pminfo {
>  	union {
>  		struct xen_processor_power          power;/* Cx: _CST/_CSD */
>  		struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */
> +		GUEST_HANDLE(uint32_t)              pdc;
>  	};
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
> -- 
> 1.7.9.48.g85da4d



  reply	other threads:[~2012-02-24 10:32 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-23 22:31 [PATCH] processor passthru - upload _Cx and _Pxx data to hypervisor (v5) Konrad Rzeszutek Wilk
2012-02-23 22:31 ` [PATCH 1/3] xen/setup/pm/acpi: Remove the call to boot_option_idle_override Konrad Rzeszutek Wilk
2012-02-23 22:31 ` [PATCH 2/3] xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it Konrad Rzeszutek Wilk
2012-02-24 10:32   ` Jan Beulich [this message]
2012-02-24 23:52     ` Konrad Rzeszutek Wilk
2012-02-27  8:14       ` Jan Beulich
2012-02-23 22:31 ` [PATCH 3/3] xen/processor-passthru: Provide an driver that passes struct acpi_processor data to the hypervisor Konrad Rzeszutek Wilk
2012-02-24 10:23 ` [PATCH] processor passthru - upload _Cx and _Pxx data to hypervisor (v5) Jan Beulich
2012-02-24 15:08   ` Konrad Rzeszutek Wilk
2012-02-25  0:21   ` Konrad Rzeszutek Wilk
2012-02-27  8:19     ` Jan Beulich
  -- strict thread matches above, loose matches on Subject: below --
2012-02-14  5:06 [RFC] acpi processor and cpufreq harester - aka pipe all of that up to the hypervisor (v3) Konrad Rzeszutek Wilk
2012-02-14  5:06 ` [PATCH 2/3] xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F4775410200007800074992@nat28.tlf.novell.com \
    --to=jbeulich@suse.com \
    --cc=ke.yu@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=konrad.wilk@oracle.com \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).