linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] Using per_cpu(processor) information to pipe it up the hypervisor. v1
@ 2012-01-31 15:32 Konrad Rzeszutek Wilk
  2012-01-31 15:32 ` [PATCH] xen/acpi: Provide a ACPI driver that sends "processor" data to the hypervisor Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 2+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-01-31 15:32 UTC (permalink / raw)
  To: linux-kernel, lenb, len.brown, linux-acpi; +Cc: xen-devel

I was wondering if you could advise me ontwo questions:
 - How to "get" ACPI PM information for processors that are down (either b/c of
   maxcpus=X or they are hot-plugged). The per_cpu(processor) is a great location
   to get all of this - but sadly it is not filled with information for dead CPUs.
   Is there another way to get this data?

 - Where should such a driver (see the patch please) live? I was thinking drivers/acpi/
   but since the "processor" is an exported symbol it could live in drivers/xen as well?
   What is your feeling about it?

Short description of the patch: It simple parses the "per_cpu(processor)" data and 
pipes it up the hypervisor and then unloads itself. It is a much shorter version
of https://lkml.org/lkml/2011/11/30/245, but it does have the caveat that it does
not work with 'maxcpus' or with hot-plugged CPUs or with limiting the amount of
CPUs a guest can see.

P.S.
Also the name of it stinks. 'Sink' or 'plumber' initially came to my mind. Any
thoughts of a better name?

Thanks!

 drivers/xen/Kconfig         |    5 +
 drivers/xen/Makefile        |    2 +-
 drivers/xen/acpi_xen_sink.c |  265 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 271 insertions(+), 1 deletions(-)


^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH] xen/acpi: Provide a ACPI driver that sends "processor" data to the hypervisor.
  2012-01-31 15:32 [RFC] Using per_cpu(processor) information to pipe it up the hypervisor. v1 Konrad Rzeszutek Wilk
@ 2012-01-31 15:32 ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 2+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-01-31 15:32 UTC (permalink / raw)
  To: linux-kernel, lenb, len.brown, linux-acpi
  Cc: xen-devel, Konrad Rzeszutek Wilk

The ACPI processor processes the _Pxx and the _Cx state information
which are populated in the 'processor' per-cpu structure. We read
the contents of that structure and pipe it up the Xen hypervisor.

We assume that the ACPI processor is smart and did all the filtering
work so that the contents is correct. After we are done parsing
the information, we unload ourselves and let the hypervisor deal
with cpufreq, cpuidle states and such.

Note: This only works right now under Intel CPUs, b/c the Xen hypervisor
does not properly process the AMD MSR_PSTATE_CUR_LIMIT under AMD.

For Intel the hypervisor needs this patch
http://old-list-archives.xen.org/archives/html/xen-devel/2011-08/msg00511.html

which passes inthe MWAIT CPU attribute.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/xen/Kconfig         |    5 +
 drivers/xen/Makefile        |    2 +-
 drivers/xen/acpi_xen_sink.c |  265 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 271 insertions(+), 1 deletions(-)
 create mode 100644 drivers/xen/acpi_xen_sink.c

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a1ced52..747ef17 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -178,4 +178,9 @@ config XEN_PRIVCMD
 	depends on XEN
 	default m
 
+config XEN_ACPI_SINK
+	tristate
+	depends on XEN && ACPI_PROCESSOR && CPU_FREQ
+	default m
+
 endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index aa31337..1585b35 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
 obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
-
+obj-$(CONFIG_XEN_ACPI_SINK)		+= acpi_xen_sink.o
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
diff --git a/drivers/xen/acpi_xen_sink.c b/drivers/xen/acpi_xen_sink.c
new file mode 100644
index 0000000..78771ca
--- /dev/null
+++ b/drivers/xen/acpi_xen_sink.c
@@ -0,0 +1,265 @@
+
+#define DEBUG 1
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/processor.h>
+#include <linux/cpumask.h>
+
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+
+#define DRV_NAME	"ACPI_Xen_Sink"
+MODULE_AUTHOR("Konrad Rzeszutek Wilk");
+MODULE_DESCRIPTION("ACPI Power Management driver to send data to Xen hypervisor");
+MODULE_LICENSE("GPL");
+
+static int __init push_cxx_to_hypervisor(struct acpi_processor *_pr)
+{
+	struct xen_platform_op op = {
+		.cmd			= XENPF_set_processor_pminfo,
+		.interface_version	= XENPF_INTERFACE_VERSION,
+		.u.set_pminfo.id	= _pr->acpi_id,
+		.u.set_pminfo.type	= XEN_PM_CX,
+	};
+	struct xen_processor_cx *xen_cx, *xen_cx_states = NULL;
+	struct acpi_processor_cx *cx;
+	int i, ok, ret = 0;
+
+	if (!_pr->flags.power_setup_done)
+		return -ENODEV;
+
+	xen_cx_states = kcalloc(_pr->power.count,
+				sizeof(struct xen_processor_cx), GFP_KERNEL);
+	if (!xen_cx_states)
+		return -ENOMEM;
+
+	for (ok = 0, i = 1; i <= _pr->power.count; i++) {
+		cx = &_pr->power.states[i];
+		if (!cx->valid)
+			continue;
+
+		xen_cx = &(xen_cx_states[ok++]);
+
+		xen_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
+		if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
+			/* TODO: double check whether anybody cares about it */
+			xen_cx->reg.bit_width = 8;
+			xen_cx->reg.bit_offset = 0;
+		} else {
+			xen_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE;
+			if (cx->entry_method == ACPI_CSTATE_FFH) {
+				/* NATIVE_CSTATE_BEYOND_HALT */
+				xen_cx->reg.bit_offset = 2;
+				xen_cx->reg.bit_width = 1; /* VENDOR_INTEL */
+			}
+		}
+		xen_cx->reg.access_size = 0;
+		xen_cx->reg.address = cx->address;
+
+		xen_cx->type = cx->type;
+		xen_cx->latency = cx->latency;
+		xen_cx->power = cx->power;
+
+		xen_cx->dpcnt = 0;
+		set_xen_guest_handle(xen_cx->dp, NULL);
+
+		pr_debug("\t_CX: ID:%d [C%d:%s]\n", _pr->acpi_id, i, cx->desc);
+	}
+	if (!ok) {
+		pr_err("No available Cx info for cpu %d\n", _pr->acpi_id);
+		kfree(xen_cx_states);
+		return -EINVAL;
+	}
+	op.u.set_pminfo.power.count = ok;
+	op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control;
+	op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check;
+	op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst;
+	op.u.set_pminfo.power.flags.power_setup_done =
+		_pr->flags.power_setup_done;
+
+	set_xen_guest_handle(op.u.set_pminfo.power.states, xen_cx_states);
+
+	if (xen_initial_domain())
+		ret = HYPERVISOR_dom0_op(&op);
+
+	kfree(xen_cx_states);
+
+	return ret;
+}
+
+
+
+static struct xen_processor_px *
+__init xen_copy_pss_data(struct acpi_processor *_pr,
+			 struct xen_processor_performance *xen_perf)
+{
+	struct xen_processor_px *xen_states = NULL;
+	int i;
+
+	xen_states = kcalloc(_pr->performance->state_count,
+			     sizeof(struct xen_processor_px), GFP_KERNEL);
+	if (!xen_states)
+		return ERR_PTR(-ENOMEM);
+
+	xen_perf->state_count = _pr->performance->state_count;
+
+	BUILD_BUG_ON(sizeof(struct xen_processor_px) !=
+		     sizeof(struct acpi_processor_px));
+	for (i = 0; i < _pr->performance->state_count; i++) {
+
+		/* Fortunatly for us, they both have the same size */
+		memcpy(&(xen_states[i]), &(_pr->performance->states[i]),
+		       sizeof(struct acpi_processor_px));
+#ifdef DEBUG
+		{
+			struct xen_processor_px *_px;
+			_px = &(xen_states[i]);
+			pr_debug("\t_PSS: [%2d]: %d, %d, %d, %d, %d, %d\n", i,
+				(u32)_px->core_frequency, (u32)_px->power,
+				(u32)_px->transition_latency,
+				(u32)_px->bus_master_latency, (u32)_px->control,
+				(u32)_px->status);
+		}
+#endif
+	}
+	return xen_states;
+}
+static int __init xen_copy_psd_data(struct acpi_processor *_pr,
+				    struct xen_processor_performance *xen_perf)
+{
+	xen_perf->shared_type = _pr->performance->shared_type;
+
+	BUILD_BUG_ON(sizeof(struct xen_psd_package) !=
+		     sizeof(struct acpi_psd_package));
+	memcpy(&(xen_perf->domain_info), &(_pr->performance->domain_info),
+	       sizeof(struct acpi_psd_package));
+
+#if DEBUG
+	{
+		struct xen_psd_package *_psd;
+		_psd = &(xen_perf->domain_info);
+		pr_debug("\t_PSD: num_entries:%d rev=%d domain=%d coord_type=%d, "
+			 "num_processers=%d\n", (u32)_psd->num_entries,
+			 (u32)_psd->revision, (u32)_psd->domain,
+			 (u32)_psd->coord_type, (u32)_psd->num_processors);
+	}
+#endif
+	return 0;
+}
+static int __init xen_copy_pct_data(struct acpi_pct_register *pct,
+				    struct xen_pct_register *_pct)
+{
+	/* It would be nice if you could just do 'memcpy(pct, _pct') but
+	 * sadly the Xen structure did not have the proper padding
+	 * so the descriptor field takes two (_pct) bytes instead of one (pct).
+	 */
+	_pct->descriptor = pct->descriptor;
+	_pct->length = pct->length;
+	_pct->space_id = pct->space_id;
+	_pct->bit_width = pct->bit_width;
+	_pct->bit_offset = pct->bit_offset;
+	_pct->reserved = pct->reserved;
+	_pct->address = pct->address;
+#ifdef DEBUG
+	pr_debug("\t_PCT: descriptor=%d, length=%d, space_id=%d, "
+		 "bit_width=%d, bit_offset=%d), reserved=%d, address=0x%x\n",
+		 _pct->descriptor, _pct->length, _pct->space_id,
+		 _pct->bit_width, _pct->bit_offset, _pct->reserved,
+		 (u32)_pct->address);
+#endif
+	return 0;
+}
+static int __init push_pxx_to_hypervisor(struct acpi_processor *_pr)
+{
+	int ret = -EINVAL;
+	struct xen_platform_op op = {
+		.cmd			= XENPF_set_processor_pminfo,
+		.interface_version	= XENPF_INTERFACE_VERSION,
+		.u.set_pminfo.id	= _pr->acpi_id,
+		.u.set_pminfo.type	= XEN_PM_PX,
+	};
+	struct xen_processor_performance *xen_perf;
+	struct xen_processor_px *xen_states = NULL;
+
+	if (!_pr->performance)
+		return -ENODEV;
+
+	xen_perf = &op.u.set_pminfo.perf;
+
+	/* PPC */
+	xen_perf->platform_limit = _pr->performance_platform_limit;
+	xen_perf->flags |= XEN_PX_PPC;
+	/* PCT */
+	xen_copy_pct_data(&(_pr->performance->control_register),
+			  &xen_perf->control_register);
+	xen_copy_pct_data(&(_pr->performance->status_register),
+			  &xen_perf->status_register);
+	xen_perf->flags |= XEN_PX_PCT;
+	/* PSS */
+	xen_states = xen_copy_pss_data(_pr, xen_perf);
+	if (!IS_ERR_OR_NULL(xen_states)) {
+		set_xen_guest_handle(xen_perf->states, xen_states);
+		xen_perf->flags |= XEN_PX_PSS;
+	}
+	/* PSD */
+	if (!xen_copy_psd_data(_pr, xen_perf))
+		xen_perf->flags |= XEN_PX_PSD;
+
+	if (xen_initial_domain())
+		ret = HYPERVISOR_dom0_op(&op);
+
+	if (!IS_ERR_OR_NULL(xen_states))
+		kfree(xen_states);
+	return ret;
+}
+
+static int __init acpi_xen_sink_init(void)
+{
+	int cpu;
+	int err = -ENODEV;
+	struct acpi_processor *_pr;
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	/* TODO: Under AMD, the information is populated
+	 * using the powernow-k8 driver which does an MSR_PSTATE_CUR_LIMIT
+	 * MSR which returns the wrong value (under Xen) so the population
+	 * of 'processors' has bogus data. So only run this under
+	 * Intel for right now. */
+	if (!cpu_has(c, X86_FEATURE_EST)) {
+		pr_err("AMD platform is not supported (yet)\n");
+		return -ENODEV;
+	}
+	/*
+	 * It is imperative that we get called _after_ acpi_processor has
+	 * loaded. Otherwise the _pr might be bogus.
+	*/
+	if (request_module("processor")) {
+		pr_err("Unable to load ACPI processor module!\n");
+		return -ENODEV;
+	}
+	for_each_possible_cpu(cpu) {
+		_pr = per_cpu(processors, cpu);
+		if (!_pr)
+			continue;
+
+		if (_pr->flags.power)
+			err = push_cxx_to_hypervisor(_pr);
+
+		if (_pr->performance->states)
+			err |= push_pxx_to_hypervisor(_pr);
+		if (err)
+			break;
+	}
+	return -ENODEV; /* force it to unload */
+}
+static void __exit acpi_xen_sink_exit(void)
+{
+}
+module_init(acpi_xen_sink_init);
+module_exit(acpi_xen_sink_exit);
-- 
1.7.7.5


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2012-01-31 15:35 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-31 15:32 [RFC] Using per_cpu(processor) information to pipe it up the hypervisor. v1 Konrad Rzeszutek Wilk
2012-01-31 15:32 ` [PATCH] xen/acpi: Provide a ACPI driver that sends "processor" data to the hypervisor Konrad Rzeszutek Wilk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).