linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: ke.yu@intel.com, kevin.tian@intel.com, JBeulich@novell.com,
	linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org
Cc: xen-devel@lists.xensource.com,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Subject: [PATCH 2/3] xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it.
Date: Tue, 14 Feb 2012 00:06:48 -0500	[thread overview]
Message-ID: <1329196009-25268-3-git-send-email-konrad.wilk@oracle.com> (raw)
In-Reply-To: <1329196009-25268-1-git-send-email-konrad.wilk@oracle.com>

For the hypervisor to take advantage of the MWAIT support it needs
to extract from the ACPI _CST the register address. But the
hypervisor does not have the support to parse DSDT so it relies on
the initial domain (dom0) to parse the ACPI Power Management information
and push it up to the hypervisor. The pushing of the data is done
by the processor_harveset module which parses the information that
the ACPI parser has graciously exposed in 'struct acpi_processor'.

For the ACPI parser to also expose the Cx states for MWAIT, we need
to expose the MWAIT capability (leaf 1). Furthermore we also need to
expose the MWAIT_LEAF capability (leaf 5) for cstate.c to properly
function.

The hypervisor could expose these flags when it traps the XEN_EMULATE_PREFIX
operations, but it can't do it since it needs to be backwards compatible.
Instead we choose to use the native CPUID to figure out if the MWAIT
capability exists and use the XEN_SET_PDC query hypercall to figure out
if the hypervisor wants us to expose the MWAIT_LEAF capability or not.

Note: The XEN_SET_PDC query was implemented in c/s 23783:
"ACPI: add _PDC input override mechanism".

With this in place, instead of
 C3 ACPI IOPORT 415
we get now
 C3:ACPI FFH INTEL MWAIT 0x20

Note: The cpu_idle which would be calling the mwait variants for idling
never gets set b/c we set the default pm_idle to be the hypercall variant.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c         |   92 +++++++++++++++++++++++++++++++++++++-
 include/xen/interface/platform.h |    4 +-
 2 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 12eb07b..4c82936 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -62,6 +62,14 @@
 #include <asm/reboot.h>
 #include <asm/stackprotector.h>
 #include <asm/hypervisor.h>
+#include <asm/mwait.h>
+
+#ifdef CONFIG_ACPI
+#include <asm/acpi.h>
+#include <acpi/pdc_intel.h>
+#include <acpi/processor.h>
+#include <xen/interface/platform.h>
+#endif
 
 #include "xen-ops.h"
 #include "mmu.h"
@@ -200,13 +208,17 @@ static void __init xen_banner(void)
 static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
 static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
 
+static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
+static __read_mostly unsigned int cpuid_leaf5_ecx_val;
+static __read_mostly unsigned int cpuid_leaf5_edx_val;
+
 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		      unsigned int *cx, unsigned int *dx)
 {
 	unsigned maskebx = ~0;
 	unsigned maskecx = ~0;
 	unsigned maskedx = ~0;
-
+	unsigned setecx = 0;
 	/*
 	 * Mask out inconvenient features, to try and disable as many
 	 * unsupported kernel subsystems as possible.
@@ -214,9 +226,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 	switch (*ax) {
 	case 1:
 		maskecx = cpuid_leaf1_ecx_mask;
+		setecx = cpuid_leaf1_ecx_set_mask;
 		maskedx = cpuid_leaf1_edx_mask;
 		break;
 
+	case CPUID_MWAIT_LEAF:
+		/* Synthesize the values.. */
+		*ax = 0;
+		*bx = 0;
+		*cx = cpuid_leaf5_ecx_val;
+		*dx = cpuid_leaf5_edx_val;
+		return;
+
 	case 0xb:
 		/* Suppress extended topology stuff */
 		maskebx = 0;
@@ -232,9 +253,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 
 	*bx &= maskebx;
 	*cx &= maskecx;
+	*cx |= setecx;
 	*dx &= maskedx;
+
 }
 
+static bool __init xen_check_mwait(void)
+{
+#if CONFIG_ACPI
+	struct xen_platform_op op = {
+		.cmd			= XENPF_set_processor_pminfo,
+		.u.set_pminfo.id	= -1,
+		.u.set_pminfo.type	= XEN_PM_PDC,
+	};
+	uint32_t buf[3];
+	unsigned int ax, bx, cx, dx;
+	unsigned int mwait_mask;
+
+	/* We need to determine whether it is OK to expose the MWAIT
+	 * capability to the kernel to harvest deeper than C3 states from ACPI
+	 * _CST using the processor_harvest.c module. For this to work, we
+	 * need to gather the MWAIT_LEAF values (which the cstate.c code
+	 * checks against). The hypervisor won't expose the MWAIT flag because
+	 * it would break backwards compatibility; so we will find out directly
+	 * from the hardware and hypercall.
+	 */
+	if (!xen_initial_domain())
+		return false;
+
+	ax = 1;
+	cx = 0;
+
+	native_cpuid(&ax, &bx, &cx, &dx);
+
+	mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
+		     (1 << (X86_FEATURE_MWAIT % 32));
+
+	if ((cx & mwait_mask) != mwait_mask)
+		return false;
+
+	/* We need to emulate the MWAIT_LEAF and for that we need both
+	 * ecx and edx. The hypercall provides only partial information.
+	 */
+
+	ax = CPUID_MWAIT_LEAF;
+	bx = 0;
+	cx = 0;
+	dx = 0;
+
+	native_cpuid(&ax, &bx, &cx, &dx);
+
+	/* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
+	 * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
+	 */
+	buf[0] = ACPI_PDC_REVISION_ID;
+	buf[1] = 1;
+	buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
+
+	set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
+
+	if ((HYPERVISOR_dom0_op(&op) == 0) &&
+	    (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
+		cpuid_leaf5_ecx_val = cx;
+		cpuid_leaf5_edx_val = dx;
+	}
+	return true;
+#else
+	return false;
+#endif
+}
 static void __init xen_init_cpuid_mask(void)
 {
 	unsigned int ax, bx, cx, dx;
@@ -261,6 +348,9 @@ static void __init xen_init_cpuid_mask(void)
 	/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
 	if ((cx & xsave_mask) != xsave_mask)
 		cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
+
+	if (xen_check_mwait())
+		cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
 }
 
 static void xen_set_debugreg(int reg, unsigned long val)
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index c168468..6220b98 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
 #define XEN_PM_CX   0
 #define XEN_PM_PX   1
 #define XEN_PM_TX   2
-
+#define XEN_PM_PDC  3
 /* Px sub info type */
 #define XEN_PX_PCT   1
 #define XEN_PX_PSS   2
@@ -286,6 +286,7 @@ struct xen_processor_performance {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance);
 
+DEFINE_GUEST_HANDLE(uint32_t);
 struct xenpf_set_processor_pminfo {
 	/* IN variables */
 	uint32_t id;    /* ACPI CPU ID */
@@ -293,6 +294,7 @@ struct xenpf_set_processor_pminfo {
 	union {
 		struct xen_processor_power          power;/* Cx: _CST/_CSD */
 		struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+		GUEST_HANDLE(uint32_t)              pdc;
 	};
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
-- 
1.7.9.48.g85da4d


  parent reply	other threads:[~2012-02-14  5:10 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-14  5:06 [RFC] acpi processor and cpufreq harester - aka pipe all of that up to the hypervisor (v3) Konrad Rzeszutek Wilk
2012-02-14  5:06 ` [PATCH 1/3] xen/setup/pm/acpi: Remove the call to boot_option_idle_override Konrad Rzeszutek Wilk
2012-02-14  5:06 ` Konrad Rzeszutek Wilk [this message]
2012-02-14  5:06 ` [PATCH 3/3] xen/acpi/cpufreq: Provide an driver that passes struct acpi_processor data to the hypervisor Konrad Rzeszutek Wilk
2012-02-14 18:30 ` [Xen-devel] [RFC] acpi processor and cpufreq harester - aka pipe all of that up to the hypervisor (v3) Pasi Kärkkäinen
2012-02-15 16:33   ` Konrad Rzeszutek Wilk
2012-02-21  0:07   ` [RFC] follow-on patches to acpi processor and cpufreq harvester^H^H^Hpassthru (v4) Konrad Rzeszutek Wilk
2012-02-21  0:07     ` [PATCH 1/3] xen/processor-passthru: Change the name to processor-passthru Konrad Rzeszutek Wilk
2012-02-21  0:07     ` [PATCH 2/3] xen/processor-passthru: Support vCPU != pCPU - aka dom0_max_vcpus Konrad Rzeszutek Wilk
2012-02-21  0:07     ` [PATCH 3/3] xen/processor-passthru: Remove the print_hex_dump - as it is difficult to decipher it Konrad Rzeszutek Wilk
2012-02-23 22:31 [PATCH] processor passthru - upload _Cx and _Pxx data to hypervisor (v5) Konrad Rzeszutek Wilk
2012-02-23 22:31 ` [PATCH 2/3] xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it Konrad Rzeszutek Wilk
2012-02-24 10:32   ` Jan Beulich
2012-02-24 23:52     ` Konrad Rzeszutek Wilk
2012-02-27  8:14       ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1329196009-25268-3-git-send-email-konrad.wilk@oracle.com \
    --to=konrad.wilk@oracle.com \
    --cc=JBeulich@novell.com \
    --cc=ke.yu@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).