linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 3/3] Xen physical cpus interface
@ 2012-04-19 13:33 Liu, Jinsong
  2012-04-20 19:24 ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 11+ messages in thread
From: Liu, Jinsong @ 2012-04-19 13:33 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, xen-devel, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 12816 bytes --]

>From afdd30a7769e706e9be64f80d64136e2e267ecb9 Mon Sep 17 00:00:00 2001
From: Liu, Jinsong <jinsong.liu@intel.com>
Date: Fri, 20 Apr 2012 05:11:58 +0800
Subject: [PATCH 3/3] Xen physical cpus interface

Manage physical cpus in dom0, get physical cpus info and provide sys interface.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
---
 drivers/xen/Makefile             |    1 +
 drivers/xen/pcpu.c               |  393 ++++++++++++++++++++++++++++++++++++++
 include/xen/interface/platform.h |   10 +-
 include/xen/interface/xen.h      |    1 +
 4 files changed, 404 insertions(+), 1 deletions(-)
 create mode 100644 drivers/xen/pcpu.c

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 1d3e763..d12d14d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
 obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
+obj-$(CONFIG_XEN_DOM0)			+= pcpu.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
 obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
 obj-$(CONFIG_XEN_ACPI_PROCESSOR)	+= xen-acpi-processor.o
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
new file mode 100644
index 0000000..9295def
--- /dev/null
+++ b/drivers/xen/pcpu.c
@@ -0,0 +1,393 @@
+/******************************************************************************
+ * pcpu.c
+ * Management physical cpu in dom0, get pcpu info and provide sys interface
+ *
+ * Copyright (c) 2012 Intel Corporation
+ * Author: Liu, Jinsong <jinsong.liu@intel.com>
+ * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/stat.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+struct pcpu {
+	struct list_head pcpu_list;
+	struct device dev;
+	uint32_t xen_id;
+	uint32_t apic_id;
+	uint32_t acpi_id;
+	uint32_t flags;
+};
+
+static struct bus_type xen_pcpu_subsys = {
+	.name = "xen_cpu",
+	.dev_name = "xen_cpu",
+};
+
+static DEFINE_MUTEX(xen_pcpu_lock);
+#define get_pcpu_lock() mutex_lock(&xen_pcpu_lock);
+#define put_pcpu_lock() mutex_unlock(&xen_pcpu_lock);
+
+#define XEN_PCPU "xen_cpu: "
+
+struct xen_pcpus {
+	struct list_head list;
+};
+static struct xen_pcpus xen_pcpus;
+
+static int xen_pcpu_down(uint32_t xen_id)
+{
+	int ret;
+	struct xen_platform_op op = {
+		.cmd			= XENPF_cpu_offline,
+		.interface_version	= XENPF_INTERFACE_VERSION,
+		.u.cpu_ol.cpuid		= xen_id,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	return ret;
+}
+
+static int xen_pcpu_up(uint32_t xen_id)
+{
+	int ret;
+	struct xen_platform_op op = {
+		.cmd			= XENPF_cpu_online,
+		.interface_version	= XENPF_INTERFACE_VERSION,
+		.u.cpu_ol.cpuid		= xen_id,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	return ret;
+}
+
+static ssize_t show_online(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
+}
+
+static ssize_t __ref store_online(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+	ssize_t ret;
+
+	switch (buf[0]) {
+	case '0':
+		ret = xen_pcpu_down(cpu->xen_id);
+		break;
+	case '1':
+		ret = xen_pcpu_up(cpu->xen_id);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret >= 0)
+		ret = count;
+	return ret;
+}
+static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
+
+static ssize_t show_apicid(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", cpu->apic_id);
+}
+
+static ssize_t show_acpiid(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", cpu->acpi_id);
+}
+static DEVICE_ATTR(apic_id, S_IRUGO, show_apicid, NULL);
+static DEVICE_ATTR(acpi_id, S_IRUGO, show_acpiid, NULL);
+
+static bool xen_pcpu_online(uint32_t flags)
+{
+	return !!(flags & XEN_PCPU_FLAGS_ONLINE);
+}
+
+static void pcpu_online_status(struct xenpf_pcpuinfo *info,
+			       struct pcpu *pcpu)
+{
+	if (xen_pcpu_online(info->flags) &&
+	   !xen_pcpu_online(pcpu->flags)) {
+		/* the pcpu is onlined */
+		pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
+		kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
+	} else if (!xen_pcpu_online(info->flags) &&
+		    xen_pcpu_online(pcpu->flags)) {
+		/* The pcpu is offlined */
+		pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
+		kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
+	}
+}
+
+static struct pcpu *get_pcpu(uint32_t xen_id)
+{
+	struct pcpu *pcpu = NULL;
+
+	list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) {
+		if (pcpu->xen_id == xen_id)
+			return pcpu;
+	}
+	return NULL;
+}
+
+static void pcpu_sys_remove(struct pcpu *pcpu)
+{
+	struct device *dev;
+
+	if (!pcpu)
+		return;
+
+	dev = &pcpu->dev;
+	if (dev->id)
+		device_remove_file(dev, &dev_attr_online);
+	device_remove_file(dev, &dev_attr_acpi_id);
+	device_remove_file(dev, &dev_attr_apic_id);
+	device_unregister(dev);
+}
+
+static void free_pcpu(struct pcpu *pcpu)
+{
+	if (!pcpu)
+		return;
+
+	pcpu_sys_remove(pcpu);
+	list_del(&pcpu->pcpu_list);
+	kfree(pcpu);
+}
+
+static int pcpu_sys_create(struct pcpu *pcpu)
+{
+	struct device *dev;
+	int err = -EINVAL;
+
+	if (!pcpu)
+		return err;
+
+	dev = &pcpu->dev;
+	dev->bus = &xen_pcpu_subsys;
+	dev->id = pcpu->xen_id;
+
+	err = device_register(dev);
+	if (err)
+		goto err1;
+
+	err = device_create_file(dev, &dev_attr_apic_id);
+	if (err)
+		goto err2;
+	err = device_create_file(dev, &dev_attr_acpi_id);
+	if (err)
+		goto err3;
+	/* Not open pcpu0 online access to user */
+	if (dev->id) {
+		err = device_create_file(dev, &dev_attr_online);
+		if (err)
+			goto err4;
+	}
+
+	return 0;
+
+err4:
+	device_remove_file(dev, &dev_attr_acpi_id);
+err3:
+	device_remove_file(dev, &dev_attr_apic_id);
+err2:
+	device_unregister(dev);
+err1:
+	return err;
+}
+
+static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info)
+{
+	struct pcpu *pcpu;
+	int err;
+
+	if (info->flags & XEN_PCPU_FLAGS_INVALID)
+		return ERR_PTR(-ENODEV);
+
+	pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
+	if (!pcpu)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&pcpu->pcpu_list);
+	pcpu->xen_id = info->xen_cpuid;
+	pcpu->apic_id = info->apic_id;
+	pcpu->acpi_id = info->acpi_id;
+	pcpu->flags = info->flags;
+
+	err = pcpu_sys_create(pcpu);
+	if (err) {
+		pr_warning(XEN_PCPU "Failed to register pcpu%d\n",
+			   info->xen_cpuid);
+		kfree(pcpu);
+		return ERR_PTR(-ENOENT);
+	}
+
+	list_add_tail(&pcpu->pcpu_list, &xen_pcpus.list);
+	return pcpu;
+}
+
+/*
+ * Caller should hold the pcpu lock
+ */
+static int _sync_pcpu(uint32_t cpu_num, uint32_t *ptr_max_cpu_num)
+{
+	int ret;
+	struct pcpu *pcpu = NULL;
+	struct xenpf_pcpuinfo *info;
+	struct xen_platform_op op = {
+		.cmd                   = XENPF_get_cpuinfo,
+		.interface_version     = XENPF_INTERFACE_VERSION,
+		.u.pcpu_info.xen_cpuid = cpu_num,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	if (ret)
+		return ret;
+
+	info = &op.u.pcpu_info;
+	if (ptr_max_cpu_num)
+		*ptr_max_cpu_num = info->max_present;
+
+	pcpu = get_pcpu(cpu_num);
+
+	if (info->flags & XEN_PCPU_FLAGS_INVALID) {
+		/* The pcpu has been removed */
+		if (pcpu)
+			free_pcpu(pcpu);
+		return 0;
+	}
+
+	if (!pcpu) {
+		pcpu = init_pcpu(info);
+		if (IS_ERR_OR_NULL(pcpu))
+			return -ENODEV;
+	} else
+		pcpu_online_status(info, pcpu);
+
+	return 0;
+}
+
+/*
+ * Sync dom0's pcpu information with xen hypervisor's
+ */
+static int xen_sync_pcpus(void)
+{
+	/*
+	 * Boot cpu always have cpu_id 0 in xen
+	 */
+	uint32_t cpu_num = 0, max_cpu_num = 0;
+	int err = 0;
+	struct list_head *elem, *tmp;
+	struct pcpu *pcpu;
+
+	get_pcpu_lock();
+
+	while (!err && (cpu_num <= max_cpu_num)) {
+		err = _sync_pcpu(cpu_num, &max_cpu_num);
+		cpu_num++;
+	}
+
+	if (err) {
+		list_for_each_safe(elem, tmp, &xen_pcpus.list) {
+			pcpu = list_entry(elem, struct pcpu, pcpu_list);
+			free_pcpu(pcpu);
+		}
+	}
+
+	put_pcpu_lock();
+
+	return err;
+}
+
+static void xen_pcpu_dpc(struct work_struct *work)
+{
+	xen_sync_pcpus();
+}
+static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc);
+
+static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
+{
+	schedule_work(&xen_pcpu_work);
+	return IRQ_HANDLED;
+}
+
+static int __init xen_pcpu_init(void)
+{
+	int ret;
+
+	if (!xen_initial_domain())
+		return -ENODEV;
+
+	ret = subsys_system_register(&xen_pcpu_subsys, NULL);
+	if (ret) {
+		pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
+		return ret;
+	}
+
+	INIT_LIST_HEAD(&xen_pcpus.list);
+
+	ret = xen_sync_pcpus();
+	if (ret) {
+		pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
+		return ret;
+	}
+
+	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
+				      xen_pcpu_interrupt, 0,
+				      "pcpu", NULL);
+	if (ret < 0) {
+		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
+		return ret;
+	}
+
+	return 0;
+}
+arch_initcall(xen_pcpu_init);
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 486653f..2c56d4f 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -298,7 +298,7 @@ struct xenpf_set_processor_pminfo {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
 
-#define XENPF_get_cpuinfo 55
+#define XENPF_get_cpuinfo	55
 struct xenpf_pcpuinfo {
 	/* IN */
 	uint32_t xen_cpuid;
@@ -314,6 +314,13 @@ struct xenpf_pcpuinfo {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
 
+#define XENPF_cpu_online	56
+#define XENPF_cpu_offline	57
+struct xenpf_cpu_ol {
+	uint32_t cpuid;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
+
 struct xen_platform_op {
 	uint32_t cmd;
 	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -330,6 +337,7 @@ struct xen_platform_op {
 		struct xenpf_getidletime       getidletime;
 		struct xenpf_set_processor_pminfo set_pminfo;
 		struct xenpf_pcpuinfo          pcpu_info;
+		struct xenpf_cpu_ol            cpu_ol;
 		uint8_t                        pad[128];
 	} u;
 };
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index a890804..0801468 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -80,6 +80,7 @@
 #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
+#define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
-- 
1.7.1

[-- Attachment #2: 0003-Xen-physical-cpus-interface.patch --]
[-- Type: application/octet-stream, Size: 12339 bytes --]

From afdd30a7769e706e9be64f80d64136e2e267ecb9 Mon Sep 17 00:00:00 2001
From: Liu, Jinsong <jinsong.liu@intel.com>
Date: Fri, 20 Apr 2012 05:11:58 +0800
Subject: [PATCH 3/3] Xen physical cpus interface

Manage physical cpus in dom0, get physical cpus info and provide sys interface.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
---
 drivers/xen/Makefile             |    1 +
 drivers/xen/pcpu.c               |  393 ++++++++++++++++++++++++++++++++++++++
 include/xen/interface/platform.h |   10 +-
 include/xen/interface/xen.h      |    1 +
 4 files changed, 404 insertions(+), 1 deletions(-)
 create mode 100644 drivers/xen/pcpu.c

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 1d3e763..d12d14d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
 obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
+obj-$(CONFIG_XEN_DOM0)			+= pcpu.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
 obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
 obj-$(CONFIG_XEN_ACPI_PROCESSOR)	+= xen-acpi-processor.o
diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
new file mode 100644
index 0000000..9295def
--- /dev/null
+++ b/drivers/xen/pcpu.c
@@ -0,0 +1,393 @@
+/******************************************************************************
+ * pcpu.c
+ * Management physical cpu in dom0, get pcpu info and provide sys interface
+ *
+ * Copyright (c) 2012 Intel Corporation
+ * Author: Liu, Jinsong <jinsong.liu@intel.com>
+ * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/stat.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+struct pcpu {
+	struct list_head pcpu_list;
+	struct device dev;
+	uint32_t xen_id;
+	uint32_t apic_id;
+	uint32_t acpi_id;
+	uint32_t flags;
+};
+
+static struct bus_type xen_pcpu_subsys = {
+	.name = "xen_cpu",
+	.dev_name = "xen_cpu",
+};
+
+static DEFINE_MUTEX(xen_pcpu_lock);
+#define get_pcpu_lock() mutex_lock(&xen_pcpu_lock);
+#define put_pcpu_lock() mutex_unlock(&xen_pcpu_lock);
+
+#define XEN_PCPU "xen_cpu: "
+
+struct xen_pcpus {
+	struct list_head list;
+};
+static struct xen_pcpus xen_pcpus;
+
+static int xen_pcpu_down(uint32_t xen_id)
+{
+	int ret;
+	struct xen_platform_op op = {
+		.cmd			= XENPF_cpu_offline,
+		.interface_version	= XENPF_INTERFACE_VERSION,
+		.u.cpu_ol.cpuid		= xen_id,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	return ret;
+}
+
+static int xen_pcpu_up(uint32_t xen_id)
+{
+	int ret;
+	struct xen_platform_op op = {
+		.cmd			= XENPF_cpu_online,
+		.interface_version	= XENPF_INTERFACE_VERSION,
+		.u.cpu_ol.cpuid		= xen_id,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	return ret;
+}
+
+static ssize_t show_online(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
+}
+
+static ssize_t __ref store_online(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+	ssize_t ret;
+
+	switch (buf[0]) {
+	case '0':
+		ret = xen_pcpu_down(cpu->xen_id);
+		break;
+	case '1':
+		ret = xen_pcpu_up(cpu->xen_id);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret >= 0)
+		ret = count;
+	return ret;
+}
+static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);
+
+static ssize_t show_apicid(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", cpu->apic_id);
+}
+
+static ssize_t show_acpiid(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
+
+	return sprintf(buf, "%u\n", cpu->acpi_id);
+}
+static DEVICE_ATTR(apic_id, S_IRUGO, show_apicid, NULL);
+static DEVICE_ATTR(acpi_id, S_IRUGO, show_acpiid, NULL);
+
+static bool xen_pcpu_online(uint32_t flags)
+{
+	return !!(flags & XEN_PCPU_FLAGS_ONLINE);
+}
+
+static void pcpu_online_status(struct xenpf_pcpuinfo *info,
+			       struct pcpu *pcpu)
+{
+	if (xen_pcpu_online(info->flags) &&
+	   !xen_pcpu_online(pcpu->flags)) {
+		/* the pcpu is onlined */
+		pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
+		kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
+	} else if (!xen_pcpu_online(info->flags) &&
+		    xen_pcpu_online(pcpu->flags)) {
+		/* The pcpu is offlined */
+		pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
+		kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
+	}
+}
+
+static struct pcpu *get_pcpu(uint32_t xen_id)
+{
+	struct pcpu *pcpu = NULL;
+
+	list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) {
+		if (pcpu->xen_id == xen_id)
+			return pcpu;
+	}
+	return NULL;
+}
+
+static void pcpu_sys_remove(struct pcpu *pcpu)
+{
+	struct device *dev;
+
+	if (!pcpu)
+		return;
+
+	dev = &pcpu->dev;
+	if (dev->id)
+		device_remove_file(dev, &dev_attr_online);
+	device_remove_file(dev, &dev_attr_acpi_id);
+	device_remove_file(dev, &dev_attr_apic_id);
+	device_unregister(dev);
+}
+
+static void free_pcpu(struct pcpu *pcpu)
+{
+	if (!pcpu)
+		return;
+
+	pcpu_sys_remove(pcpu);
+	list_del(&pcpu->pcpu_list);
+	kfree(pcpu);
+}
+
+static int pcpu_sys_create(struct pcpu *pcpu)
+{
+	struct device *dev;
+	int err = -EINVAL;
+
+	if (!pcpu)
+		return err;
+
+	dev = &pcpu->dev;
+	dev->bus = &xen_pcpu_subsys;
+	dev->id = pcpu->xen_id;
+
+	err = device_register(dev);
+	if (err)
+		goto err1;
+
+	err = device_create_file(dev, &dev_attr_apic_id);
+	if (err)
+		goto err2;
+	err = device_create_file(dev, &dev_attr_acpi_id);
+	if (err)
+		goto err3;
+	/* Not open pcpu0 online access to user */
+	if (dev->id) {
+		err = device_create_file(dev, &dev_attr_online);
+		if (err)
+			goto err4;
+	}
+
+	return 0;
+
+err4:
+	device_remove_file(dev, &dev_attr_acpi_id);
+err3:
+	device_remove_file(dev, &dev_attr_apic_id);
+err2:
+	device_unregister(dev);
+err1:
+	return err;
+}
+
+static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info)
+{
+	struct pcpu *pcpu;
+	int err;
+
+	if (info->flags & XEN_PCPU_FLAGS_INVALID)
+		return ERR_PTR(-ENODEV);
+
+	pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
+	if (!pcpu)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&pcpu->pcpu_list);
+	pcpu->xen_id = info->xen_cpuid;
+	pcpu->apic_id = info->apic_id;
+	pcpu->acpi_id = info->acpi_id;
+	pcpu->flags = info->flags;
+
+	err = pcpu_sys_create(pcpu);
+	if (err) {
+		pr_warning(XEN_PCPU "Failed to register pcpu%d\n",
+			   info->xen_cpuid);
+		kfree(pcpu);
+		return ERR_PTR(-ENOENT);
+	}
+
+	list_add_tail(&pcpu->pcpu_list, &xen_pcpus.list);
+	return pcpu;
+}
+
+/*
+ * Caller should hold the pcpu lock
+ */
+static int _sync_pcpu(uint32_t cpu_num, uint32_t *ptr_max_cpu_num)
+{
+	int ret;
+	struct pcpu *pcpu = NULL;
+	struct xenpf_pcpuinfo *info;
+	struct xen_platform_op op = {
+		.cmd                   = XENPF_get_cpuinfo,
+		.interface_version     = XENPF_INTERFACE_VERSION,
+		.u.pcpu_info.xen_cpuid = cpu_num,
+	};
+
+	ret = HYPERVISOR_dom0_op(&op);
+	if (ret)
+		return ret;
+
+	info = &op.u.pcpu_info;
+	if (ptr_max_cpu_num)
+		*ptr_max_cpu_num = info->max_present;
+
+	pcpu = get_pcpu(cpu_num);
+
+	if (info->flags & XEN_PCPU_FLAGS_INVALID) {
+		/* The pcpu has been removed */
+		if (pcpu)
+			free_pcpu(pcpu);
+		return 0;
+	}
+
+	if (!pcpu) {
+		pcpu = init_pcpu(info);
+		if (IS_ERR_OR_NULL(pcpu))
+			return -ENODEV;
+	} else
+		pcpu_online_status(info, pcpu);
+
+	return 0;
+}
+
+/*
+ * Sync dom0's pcpu information with xen hypervisor's
+ */
+static int xen_sync_pcpus(void)
+{
+	/*
+	 * Boot cpu always have cpu_id 0 in xen
+	 */
+	uint32_t cpu_num = 0, max_cpu_num = 0;
+	int err = 0;
+	struct list_head *elem, *tmp;
+	struct pcpu *pcpu;
+
+	get_pcpu_lock();
+
+	while (!err && (cpu_num <= max_cpu_num)) {
+		err = _sync_pcpu(cpu_num, &max_cpu_num);
+		cpu_num++;
+	}
+
+	if (err) {
+		list_for_each_safe(elem, tmp, &xen_pcpus.list) {
+			pcpu = list_entry(elem, struct pcpu, pcpu_list);
+			free_pcpu(pcpu);
+		}
+	}
+
+	put_pcpu_lock();
+
+	return err;
+}
+
+static void xen_pcpu_dpc(struct work_struct *work)
+{
+	xen_sync_pcpus();
+}
+static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc);
+
+static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
+{
+	schedule_work(&xen_pcpu_work);
+	return IRQ_HANDLED;
+}
+
+static int __init xen_pcpu_init(void)
+{
+	int ret;
+
+	if (!xen_initial_domain())
+		return -ENODEV;
+
+	ret = subsys_system_register(&xen_pcpu_subsys, NULL);
+	if (ret) {
+		pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
+		return ret;
+	}
+
+	INIT_LIST_HEAD(&xen_pcpus.list);
+
+	ret = xen_sync_pcpus();
+	if (ret) {
+		pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
+		return ret;
+	}
+
+	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
+				      xen_pcpu_interrupt, 0,
+				      "pcpu", NULL);
+	if (ret < 0) {
+		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
+		return ret;
+	}
+
+	return 0;
+}
+arch_initcall(xen_pcpu_init);
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 486653f..2c56d4f 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -298,7 +298,7 @@ struct xenpf_set_processor_pminfo {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
 
-#define XENPF_get_cpuinfo 55
+#define XENPF_get_cpuinfo	55
 struct xenpf_pcpuinfo {
 	/* IN */
 	uint32_t xen_cpuid;
@@ -314,6 +314,13 @@ struct xenpf_pcpuinfo {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
 
+#define XENPF_cpu_online	56
+#define XENPF_cpu_offline	57
+struct xenpf_cpu_ol {
+	uint32_t cpuid;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
+
 struct xen_platform_op {
 	uint32_t cmd;
 	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -330,6 +337,7 @@ struct xen_platform_op {
 		struct xenpf_getidletime       getidletime;
 		struct xenpf_set_processor_pminfo set_pminfo;
 		struct xenpf_pcpuinfo          pcpu_info;
+		struct xenpf_cpu_ol            cpu_ol;
 		uint8_t                        pad[128];
 	} u;
 };
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index a890804..0801468 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -80,6 +80,7 @@
 #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
+#define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] Xen physical cpus interface
  2012-04-19 13:33 [PATCH 3/3] Xen physical cpus interface Liu, Jinsong
@ 2012-04-20 19:24 ` Konrad Rzeszutek Wilk
  2012-05-10 14:54   ` Liu, Jinsong
  0 siblings, 1 reply; 11+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-04-20 19:24 UTC (permalink / raw)
  To: Liu, Jinsong; +Cc: xen-devel, linux-kernel

On Thu, Apr 19, 2012 at 01:33:03PM +0000, Liu, Jinsong wrote:
> >From afdd30a7769e706e9be64f80d64136e2e267ecb9 Mon Sep 17 00:00:00 2001
> From: Liu, Jinsong <jinsong.liu@intel.com>
> Date: Fri, 20 Apr 2012 05:11:58 +0800
> Subject: [PATCH 3/3] Xen physical cpus interface
> 
> Manage physical cpus in dom0, get physical cpus info and provide sys interface.

Anything that exposes SysFS attributes needs documentation in 
Documentation/ABI

Can you explain what this solves? And if there are any
userland applications that use this?


> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
> ---
>  drivers/xen/Makefile             |    1 +
>  drivers/xen/pcpu.c               |  393 ++++++++++++++++++++++++++++++++++++++
>  include/xen/interface/platform.h |   10 +-
>  include/xen/interface/xen.h      |    1 +
>  4 files changed, 404 insertions(+), 1 deletions(-)
>  create mode 100644 drivers/xen/pcpu.c
> 
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index 1d3e763..d12d14d 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
>  obj-$(CONFIG_XEN_TMEM)			+= tmem.o
>  obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
>  obj-$(CONFIG_XEN_DOM0)			+= pci.o
> +obj-$(CONFIG_XEN_DOM0)			+= pcpu.o
>  obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
>  obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
>  obj-$(CONFIG_XEN_ACPI_PROCESSOR)	+= xen-acpi-processor.o
> diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c
> new file mode 100644
> index 0000000..9295def
> --- /dev/null
> +++ b/drivers/xen/pcpu.c
> @@ -0,0 +1,393 @@
> +/******************************************************************************
> + * pcpu.c
> + * Management physical cpu in dom0, get pcpu info and provide sys interface
> + *
> + * Copyright (c) 2012 Intel Corporation
> + * Author: Liu, Jinsong <jinsong.liu@intel.com>
> + * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation; or, when distributed
> + * separately from the Linux kernel or incorporated into other
> + * software packages, subject to the following license:
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this source file (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use, copy, modify,
> + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
> + * and to permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <linux/interrupt.h>
> +#include <linux/spinlock.h>
> +#include <linux/cpu.h>
> +#include <linux/stat.h>
> +#include <xen/xen.h>
> +#include <xen/xenbus.h>
> +#include <xen/events.h>
> +#include <xen/interface/platform.h>
> +#include <asm/xen/hypervisor.h>
> +#include <asm/xen/hypercall.h>
> +
> +struct pcpu {
> +	struct list_head pcpu_list;
> +	struct device dev;
> +	uint32_t xen_id;

What is a 'xen_id'? Can you put a provide a comment
explaining the relationship between that and apic_id acpi_id
and flags?

Or better yet, just at the start of the structure have:

/*
 * @xen_id The ...
 * @apic_id ... so on.


> +	uint32_t apic_id;
> +	uint32_t acpi_id;
> +	uint32_t flags;
> +};
> +
> +static struct bus_type xen_pcpu_subsys = {

> +	.name = "xen_cpu",
> +	.dev_name = "xen_cpu",
> +};
> +
> +static DEFINE_MUTEX(xen_pcpu_lock);
> +#define get_pcpu_lock() mutex_lock(&xen_pcpu_lock);

Just use the mutex - no point of using the #define


> +#define put_pcpu_lock() mutex_unlock(&xen_pcpu_lock);
> +
> +#define XEN_PCPU "xen_cpu: "
> +
> +struct xen_pcpus {
> +	struct list_head list;

Uh, so it is just a list? 
> +};
> +static struct xen_pcpus xen_pcpus;

Can't you just do

static struct pcpu * xen_pcpus;

And then use that to add/remove items?

> +
> +static int xen_pcpu_down(uint32_t xen_id)
> +{
> +	int ret;
> +	struct xen_platform_op op = {
> +		.cmd			= XENPF_cpu_offline,
> +		.interface_version	= XENPF_INTERFACE_VERSION,
> +		.u.cpu_ol.cpuid		= xen_id,
> +	};
> +
> +	ret = HYPERVISOR_dom0_op(&op);
> +	return ret;

Just collapse it in:
	return HYPERVISOR_dom0_op..

> +}
> +
> +static int xen_pcpu_up(uint32_t xen_id)
> +{
> +	int ret;
> +	struct xen_platform_op op = {
> +		.cmd			= XENPF_cpu_online,
> +		.interface_version	= XENPF_INTERFACE_VERSION,
> +		.u.cpu_ol.cpuid		= xen_id,
> +	};
> +
> +	ret = HYPERVISOR_dom0_op(&op);
> +	return ret;

Ditto.

> +}
> +
> +static ssize_t show_online(struct device *dev,
> +			   struct device_attribute *attr,
> +			   char *buf)
> +{
> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +	return sprintf(buf, "%u\n", !!(cpu->flags & XEN_PCPU_FLAGS_ONLINE));
> +}
> +
> +static ssize_t __ref store_online(struct device *dev,
> +				  struct device_attribute *attr,
> +				  const char *buf, size_t count)
> +{
> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +	ssize_t ret;
> +

Add:
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;


> +	switch (buf[0]) {

Use strict_strtoull pls.

> +	case '0':
> +		ret = xen_pcpu_down(cpu->xen_id);
> +		break;
> +	case '1':
> +		ret = xen_pcpu_up(cpu->xen_id);
> +		break;
> +	default:
> +		ret = -EINVAL;
> +	}
> +
> +	if (ret >= 0)
> +		ret = count;
> +	return ret;
> +}
> +static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online, store_online);

> +
> +static ssize_t show_apicid(struct device *dev,
> +			   struct device_attribute *attr,
> +			   char *buf)
> +{
> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +	return sprintf(buf, "%u\n", cpu->apic_id);
> +}
> +
> +static ssize_t show_acpiid(struct device *dev,
> +			   struct device_attribute *attr,
> +			   char *buf)
> +{
> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev);
> +
> +	return sprintf(buf, "%u\n", cpu->acpi_id);
> +}
> +static DEVICE_ATTR(apic_id, S_IRUGO, show_apicid, NULL);
> +static DEVICE_ATTR(acpi_id, S_IRUGO, show_acpiid, NULL);

What benefit is there in exposing these values to the user?

> +
> +static bool xen_pcpu_online(uint32_t flags)
> +{
> +	return !!(flags & XEN_PCPU_FLAGS_ONLINE);
> +}
> +
> +static void pcpu_online_status(struct xenpf_pcpuinfo *info,
> +			       struct pcpu *pcpu)
> +{
> +	if (xen_pcpu_online(info->flags) &&
> +	   !xen_pcpu_online(pcpu->flags)) {
> +		/* the pcpu is onlined */
> +		pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
> +		kobject_uevent(&pcpu->dev.kobj, KOBJ_ONLINE);
> +	} else if (!xen_pcpu_online(info->flags) &&
> +		    xen_pcpu_online(pcpu->flags)) {
> +		/* The pcpu is offlined */
> +		pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
> +		kobject_uevent(&pcpu->dev.kobj, KOBJ_OFFLINE);
> +	}
> +}
> +
> +static struct pcpu *get_pcpu(uint32_t xen_id)
> +{
> +	struct pcpu *pcpu = NULL;
> +
> +	list_for_each_entry(pcpu, &xen_pcpus.list, pcpu_list) {
> +		if (pcpu->xen_id == xen_id)
> +			return pcpu;
> +	}
> +	return NULL;
> +}
> +
> +static void pcpu_sys_remove(struct pcpu *pcpu)
> +{
> +	struct device *dev;
> +
> +	if (!pcpu)
> +		return;
> +
> +	dev = &pcpu->dev;
> +	if (dev->id)
> +		device_remove_file(dev, &dev_attr_online);
> +	device_remove_file(dev, &dev_attr_acpi_id);
> +	device_remove_file(dev, &dev_attr_apic_id);
> +	device_unregister(dev);

So .. if you are using that, can't you use the .release
and define something like:

static void pcpu_release(struct device *dev)
{
	struct pcpu *pcpu = container_of(dev, struct pcpu, dev);

	list_del(&pcpu->pcpu_list);
	kfree(pcpu);
}
> +}
> +
> +static void free_pcpu(struct pcpu *pcpu)
> +{
> +	if (!pcpu)
> +		return;
> +
> +	pcpu_sys_remove(pcpu);

> +	list_del(&pcpu->pcpu_list);
> +	kfree(pcpu);

Those two shouldn't be there. They sould be part of the
release function.
> +}
> +
> +static int pcpu_sys_create(struct pcpu *pcpu)
> +{
> +	struct device *dev;
> +	int err = -EINVAL;
> +
> +	if (!pcpu)
> +		return err;
> +
> +	dev = &pcpu->dev;
> +	dev->bus = &xen_pcpu_subsys;
> +	dev->id = pcpu->xen_id;

And then here dev->release = &pcpu_release;

> +
> +	err = device_register(dev);
> +	if (err)
> +		goto err1;
> +
> +	err = device_create_file(dev, &dev_attr_apic_id);
> +	if (err)
> +		goto err2;
> +	err = device_create_file(dev, &dev_attr_acpi_id);
> +	if (err)
> +		goto err3;

Usually this is done with an array, like the drivers/xen/xen-balloon.c
..
But I am still not sure why these two parameters are needed?

> +	/* Not open pcpu0 online access to user */

Huh? You mean "Nobody can touch PCPU0" ?

Why? Why can they touch the other ones? And better yet,
what happens if one boots without "dom0_max_vcpus=X"
and powers of some of the CPUs?

> +	if (dev->id) {
> +		err = device_create_file(dev, &dev_attr_online);
> +		if (err)
> +			goto err4;
> +	}
> +
> +	return 0;
> +
> +err4:
> +	device_remove_file(dev, &dev_attr_acpi_id);
> +err3:
> +	device_remove_file(dev, &dev_attr_apic_id);
> +err2:
> +	device_unregister(dev);
> +err1:
> +	return err;
> +}
> +
> +static struct pcpu *init_pcpu(struct xenpf_pcpuinfo *info)
> +{
> +	struct pcpu *pcpu;
> +	int err;
> +
> +	if (info->flags & XEN_PCPU_FLAGS_INVALID)
> +		return ERR_PTR(-ENODEV);
> +
> +	pcpu = kzalloc(sizeof(struct pcpu), GFP_KERNEL);
> +	if (!pcpu)
> +		return ERR_PTR(-ENOMEM);
> +
> +	INIT_LIST_HEAD(&pcpu->pcpu_list);
> +	pcpu->xen_id = info->xen_cpuid;
> +	pcpu->apic_id = info->apic_id;
> +	pcpu->acpi_id = info->acpi_id;
> +	pcpu->flags = info->flags;
> +
> +	err = pcpu_sys_create(pcpu);
> +	if (err) {
> +		pr_warning(XEN_PCPU "Failed to register pcpu%d\n",

Not %u?
> +			   info->xen_cpuid);
> +		kfree(pcpu);
> +		return ERR_PTR(-ENOENT);
> +	}
> +

Should this be protected by the mutex? [edit: I see now
that the calleer is suppose to hold on the lock.
Mention that please right before you do any list manipulations]


> +	list_add_tail(&pcpu->pcpu_list, &xen_pcpus.list);
> +	return pcpu;
> +}
> +
> +/*
> + * Caller should hold the pcpu lock

Provide full name of the mutex lock.
> + */
> +static int _sync_pcpu(uint32_t cpu_num, uint32_t *ptr_max_cpu_num)

Why the '_' in front of the name?

For parameters do:

uint32 cpu, uint32 *max_cpu

> +{
> +	int ret;
> +	struct pcpu *pcpu = NULL;
> +	struct xenpf_pcpuinfo *info;
> +	struct xen_platform_op op = {
> +		.cmd                   = XENPF_get_cpuinfo,
> +		.interface_version     = XENPF_INTERFACE_VERSION,
> +		.u.pcpu_info.xen_cpuid = cpu_num,
> +	};
> +
> +	ret = HYPERVISOR_dom0_op(&op);
> +	if (ret)
> +		return ret;
> +
> +	info = &op.u.pcpu_info;
> +	if (ptr_max_cpu_num)
> +		*ptr_max_cpu_num = info->max_present;
> +
> +	pcpu = get_pcpu(cpu_num);
> +
> +	if (info->flags & XEN_PCPU_FLAGS_INVALID) {
> +		/* The pcpu has been removed */
> +		if (pcpu)
> +			free_pcpu(pcpu);
> +		return 0;
> +	}
> +
> +	if (!pcpu) {
> +		pcpu = init_pcpu(info);
> +		if (IS_ERR_OR_NULL(pcpu))
> +			return -ENODEV;
> +	} else
> +		pcpu_online_status(info, pcpu);
> +
> +	return 0;
> +}
> +
> +/*
> + * Sync dom0's pcpu information with xen hypervisor's
> + */
> +static int xen_sync_pcpus(void)
> +{
> +	/*
> +	 * Boot cpu always have cpu_id 0 in xen
> +	 */
> +	uint32_t cpu_num = 0, max_cpu_num = 0;

Use 'cpu' and 'max_cpu'

> +	int err = 0;
> +	struct list_head *elem, *tmp;
> +	struct pcpu *pcpu;
> +
> +	get_pcpu_lock();
> +
> +	while (!err && (cpu_num <= max_cpu_num)) {
> +		err = _sync_pcpu(cpu_num, &max_cpu_num);
> +		cpu_num++;
> +	}
> +
> +	if (err) {
> +		list_for_each_safe(elem, tmp, &xen_pcpus.list) {

s/elem/pcpu/

> +			pcpu = list_entry(elem, struct pcpu, pcpu_list);

That you can remove once you make the xen_pcpus structure as I mentioned
above.

> +			free_pcpu(pcpu);
> +		}
> +	}
> +
> +	put_pcpu_lock();

Ah, so you are locking around here.
> +
> +	return err;
> +}
> +
> +static void xen_pcpu_dpc(struct work_struct *work)

s/dpc/workqueue/

> +{
> +	xen_sync_pcpus();
> +}
> +static DECLARE_WORK(xen_pcpu_work, xen_pcpu_dpc);
> +
> +static irqreturn_t xen_pcpu_interrupt(int irq, void *dev_id)
> +{
> +	schedule_work(&xen_pcpu_work);
> +	return IRQ_HANDLED;
> +}
> +
> +static int __init xen_pcpu_init(void)
> +{
> +	int ret;
> +
> +	if (!xen_initial_domain())
> +		return -ENODEV;
> +
> +	ret = subsys_system_register(&xen_pcpu_subsys, NULL);
> +	if (ret) {
> +		pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
> +		return ret;
> +	}
> +
> +	INIT_LIST_HEAD(&xen_pcpus.list);
> +
> +	ret = xen_sync_pcpus();
> +	if (ret) {
> +		pr_warning(XEN_PCPU "Failed to sync pcpu info\n");
> +		return ret;
> +	}
> +
> +	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
> +				      xen_pcpu_interrupt, 0,
> +				      "pcpu", NULL);

"xen-pcpu"

> +	if (ret < 0) {
> +		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");

Shouldn't you delete what 'xen_sync_pcpus' did?
Or is it OK to still work without the interrupts? What is the purpose
of that interrupt? How does it actually work - the hypervisor
decides when/where to turn off CPUs?

> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +arch_initcall(xen_pcpu_init);
> diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
> index 486653f..2c56d4f 100644
> --- a/include/xen/interface/platform.h
> +++ b/include/xen/interface/platform.h
> @@ -298,7 +298,7 @@ struct xenpf_set_processor_pminfo {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
>  
> -#define XENPF_get_cpuinfo 55
> +#define XENPF_get_cpuinfo	55

Why?

>  struct xenpf_pcpuinfo {
>  	/* IN */
>  	uint32_t xen_cpuid;
> @@ -314,6 +314,13 @@ struct xenpf_pcpuinfo {
>  };
>  DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo);
>  
> +#define XENPF_cpu_online	56
> +#define XENPF_cpu_offline	57
> +struct xenpf_cpu_ol {
> +	uint32_t cpuid;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol);
> +
>  struct xen_platform_op {
>  	uint32_t cmd;
>  	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
> @@ -330,6 +337,7 @@ struct xen_platform_op {
>  		struct xenpf_getidletime       getidletime;
>  		struct xenpf_set_processor_pminfo set_pminfo;
>  		struct xenpf_pcpuinfo          pcpu_info;
> +		struct xenpf_cpu_ol            cpu_ol;
>  		uint8_t                        pad[128];
>  	} u;
>  };
> diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
> index a890804..0801468 100644
> --- a/include/xen/interface/xen.h
> +++ b/include/xen/interface/xen.h
> @@ -80,6 +80,7 @@
>  #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
>  #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
>  #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
> +#define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
>  
>  /* Architecture-specific VIRQ definitions. */
>  #define VIRQ_ARCH_0    16
> -- 
> 1.7.1



^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 3/3] Xen physical cpus interface
  2012-04-20 19:24 ` Konrad Rzeszutek Wilk
@ 2012-05-10 14:54   ` Liu, Jinsong
  2012-05-10 14:57     ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 11+ messages in thread
From: Liu, Jinsong @ 2012-05-10 14:54 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: xen-devel, linux-kernel

Konrad,

Thanks for help me review!
Update according to your suggestion. 
Add some comments below.

>> 
>> Manage physical cpus in dom0, get physical cpus info and provide sys
>> interface. 
> 
> Anything that exposes SysFS attributes needs documentation in
> Documentation/ABI

Yes, added.

> 
> Can you explain what this solves? And if there are any
> userland applications that use this?
> 

It provide cpu online/offline interface to user. User can use it for their own purpose, like power saving - by offlining some cpus when light workload it save power greatly.

> 
> 
>> +	switch (buf[0]) {
> 
> Use strict_strtoull pls.

kernel suggest:
WARNING: strict_strtoull is obsolete, use kstrtoull instead :)

> 
>> +	case '0':
>> +		ret = xen_pcpu_down(cpu->xen_id);
>> +		break;
>> +	case '1':
>> +		ret = xen_pcpu_up(cpu->xen_id);
>> +		break;
>> +	default:
>> +		ret = -EINVAL;
>> +	}
>> +
>> +	if (ret >= 0)
>> +		ret = count;
>> +	return ret;
>> +}
>> +static DEVICE_ATTR(online, S_IRUGO | S_IWUSR, show_online,
>> store_online); 
> 
>> +
>> +static ssize_t show_apicid(struct device *dev,
>> +			   struct device_attribute *attr,
>> +			   char *buf)
>> +{
>> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev); +
>> +	return sprintf(buf, "%u\n", cpu->apic_id);
>> +}
>> +
>> +static ssize_t show_acpiid(struct device *dev,
>> +			   struct device_attribute *attr,
>> +			   char *buf)
>> +{
>> +	struct pcpu *cpu = container_of(dev, struct pcpu, dev); +
>> +	return sprintf(buf, "%u\n", cpu->acpi_id);
>> +}
>> +static DEVICE_ATTR(apic_id, S_IRUGO, show_apicid, NULL);
>> +static DEVICE_ATTR(acpi_id, S_IRUGO, show_acpiid, NULL);
> 
> What benefit is there in exposing these values to the user?

Yes, no benefit so let's cancel exposing acpi_id and apic_id.

>> +
>> +static void pcpu_sys_remove(struct pcpu *pcpu)
>> +{
>> +	struct device *dev;
>> +
>> +	if (!pcpu)
>> +		return;
>> +
>> +	dev = &pcpu->dev;
>> +	if (dev->id)
>> +		device_remove_file(dev, &dev_attr_online);
>> +	device_remove_file(dev, &dev_attr_acpi_id);
>> +	device_remove_file(dev, &dev_attr_apic_id);
>> +	device_unregister(dev);
> 
> So .. if you are using that, can't you use the .release
> and define something like:
> 
> static void pcpu_release(struct device *dev)
> {
> 	struct pcpu *pcpu = container_of(dev, struct pcpu, dev);
> 
> 	list_del(&pcpu->pcpu_list);
> 	kfree(pcpu);
> }
>> +}
>> +
>> +static void free_pcpu(struct pcpu *pcpu)
>> +{
>> +	if (!pcpu)
>> +		return;
>> +
>> +	pcpu_sys_remove(pcpu);
> 
>> +	list_del(&pcpu->pcpu_list);
>> +	kfree(pcpu);
> 
> Those two shouldn't be there. They sould be part of the
> release function.
>> +}
>> +
>> +static int pcpu_sys_create(struct pcpu *pcpu)
>> +{
>> +	struct device *dev;
>> +	int err = -EINVAL;
>> +
>> +	if (!pcpu)
>> +		return err;
>> +
>> +	dev = &pcpu->dev;
>> +	dev->bus = &xen_pcpu_subsys;
>> +	dev->id = pcpu->xen_id;
> 
> And then here dev->release = &pcpu_release;
> 

Hmm, it's good if it's convenient to do it automatically via dev->release.
However, dev container (pcpu) would be free at some other error cases, so I prefer do it 'manually'.

> 
>> +	/* Not open pcpu0 online access to user */
> 
> Huh? You mean "Nobody can touch PCPU0" ?

Add comments:
        /*
         * Xen never offline cpu0 due to several restrictions
         * and assumptions. This basically doesn't add a sys control
         * to user, one cannot attempt to offline BSP.
         */

> 
> Why? Why can they touch the other ones? And better yet,
> what happens if one boots without "dom0_max_vcpus=X"
> and powers of some of the CPUs?
> 

Only those at cpu present map has its sys interface.

>> +static int __init xen_pcpu_init(void)
>> +{
>> +	int ret;
>> +
>> +	if (!xen_initial_domain())
>> +		return -ENODEV;
>> +
>> +	ret = subsys_system_register(&xen_pcpu_subsys, NULL); +	if (ret) {
>> +		pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
>> +		return ret; +	}
>> +
>> +	INIT_LIST_HEAD(&xen_pcpus.list);
>> +
>> +	ret = xen_sync_pcpus();
>> +	if (ret) {
>> +		pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); +		return ret;
>> +	}
>> +
>> +	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
>> +				      xen_pcpu_interrupt, 0,
>> +				      "pcpu", NULL);
> 
> "xen-pcpu"
> 
>> +	if (ret < 0) {
>> +		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
> 
> Shouldn't you delete what 'xen_sync_pcpus' did?

yes, add error handling.

> Or is it OK to still work without the interrupts? What is the purpose
> of that interrupt? How does it actually work - the hypervisor
> decides when/where to turn off CPUs?
> 

user online/offline cpu via sys interface --> xen implement --> inject virq back to dom0 --> sync cpu status.

Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] Xen physical cpus interface
  2012-05-10 14:54   ` Liu, Jinsong
@ 2012-05-10 14:57     ` Konrad Rzeszutek Wilk
  2012-05-10 15:20       ` Liu, Jinsong
       [not found]       ` <DE8DF0795D48FD4CA783C40EC82923351B5A2E@SHSMSX101.ccr.corp.intel.com>
  0 siblings, 2 replies; 11+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-05-10 14:57 UTC (permalink / raw)
  To: Liu, Jinsong; +Cc: xen-devel, linux-kernel

On Thu, May 10, 2012 at 02:54:07PM +0000, Liu, Jinsong wrote:
> Konrad,
> 
> Thanks for help me review!

Sure thing.
> Update according to your suggestion. 
> Add some comments below.
> 
> >> 
> >> Manage physical cpus in dom0, get physical cpus info and provide sys
> >> interface. 
> > 
> > Anything that exposes SysFS attributes needs documentation in
> > Documentation/ABI
> 
> Yes, added.
> 
> > 
> > Can you explain what this solves? And if there are any
> > userland applications that use this?
> > 
> 
> It provide cpu online/offline interface to user. User can use it for their own purpose, like power saving - by offlining some cpus when light workload it save power greatly.

OK, please include that in the descritpion.

> 
> > 
> > 
> >> +	switch (buf[0]) {
> > 
> > Use strict_strtoull pls.
> 
> kernel suggest:
> WARNING: strict_strtoull is obsolete, use kstrtoull instead :)

Ah yes.
.. snip..
> > And then here dev->release = &pcpu_release;
> > 
> 
> Hmm, it's good if it's convenient to do it automatically via dev->release.
> However, dev container (pcpu) would be free at some other error cases, so I prefer do it 'manually'.

You could also call pcpu_release(..) to do it manually.

> 
> > 
> >> +	/* Not open pcpu0 online access to user */
> > 
> > Huh? You mean "Nobody can touch PCPU0" ?
> 
> Add comments:
>         /*
>          * Xen never offline cpu0 due to several restrictions
>          * and assumptions. This basically doesn't add a sys control
>          * to user, one cannot attempt to offline BSP.
>          */
> 
> > 
> > Why? Why can they touch the other ones? And better yet,
> > what happens if one boots without "dom0_max_vcpus=X"
> > and powers of some of the CPUs?
> > 
> 
> Only those at cpu present map has its sys interface.

OK, put that in the file so folks are aware of the limitations.

> 
> >> +static int __init xen_pcpu_init(void)
> >> +{
> >> +	int ret;
> >> +
> >> +	if (!xen_initial_domain())
> >> +		return -ENODEV;
> >> +
> >> +	ret = subsys_system_register(&xen_pcpu_subsys, NULL); +	if (ret) {
> >> +		pr_warning(XEN_PCPU "Failed to register pcpu subsys\n");
> >> +		return ret; +	}
> >> +
> >> +	INIT_LIST_HEAD(&xen_pcpus.list);
> >> +
> >> +	ret = xen_sync_pcpus();
> >> +	if (ret) {
> >> +		pr_warning(XEN_PCPU "Failed to sync pcpu info\n"); +		return ret;
> >> +	}
> >> +
> >> +	ret = bind_virq_to_irqhandler(VIRQ_PCPU_STATE, 0,
> >> +				      xen_pcpu_interrupt, 0,
> >> +				      "pcpu", NULL);
> > 
> > "xen-pcpu"
> > 
> >> +	if (ret < 0) {
> >> +		pr_warning(XEN_PCPU "Failed to bind pcpu virq\n");
> > 
> > Shouldn't you delete what 'xen_sync_pcpus' did?
> 
> yes, add error handling.
> 
> > Or is it OK to still work without the interrupts? What is the purpose
> > of that interrupt? How does it actually work - the hypervisor
> > decides when/where to turn off CPUs?
> > 
> 
> user online/offline cpu via sys interface --> xen implement --> inject virq back to dom0 --> sync cpu status.

Add that in the file so the workflow is explained.
> 
> Thanks,
> Jinsong

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 3/3] Xen physical cpus interface
  2012-05-10 14:57     ` Konrad Rzeszutek Wilk
@ 2012-05-10 15:20       ` Liu, Jinsong
       [not found]       ` <DE8DF0795D48FD4CA783C40EC82923351B5A2E@SHSMSX101.ccr.corp.intel.com>
  1 sibling, 0 replies; 11+ messages in thread
From: Liu, Jinsong @ 2012-05-10 15:20 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: xen-devel, linux-kernel

Just notice your reply (so quick :)

Agree and will update later, except 1 concern below.

Konrad Rzeszutek Wilk wrote:
>> 
>> Hmm, it's good if it's convenient to do it automatically via
>> dev->release. 
>> However, dev container (pcpu) would be free at some other error
>> cases, so I prefer do it 'manually'. 
> 
> You could also call pcpu_release(..) to do it manually.
> 

that means kfree(pcpu) would be done twice at some error cases, do you think it really good?

Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 3/3] Xen physical cpus interface
       [not found]       ` <DE8DF0795D48FD4CA783C40EC82923351B5A2E@SHSMSX101.ccr.corp.intel.com>
@ 2012-05-11 13:12         ` Liu, Jinsong
  2012-05-11 14:27           ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 11+ messages in thread
From: Liu, Jinsong @ 2012-05-11 13:12 UTC (permalink / raw)
  To: 'Konrad Rzeszutek Wilk'
  Cc: 'xen-devel@lists.xensource.com',
	'linux-kernel@vger.kernel.org'

Liu, Jinsong wrote:
> Just notice your reply (so quick :)
> 
> Agree and will update later, except 1 concern below.
> 
> Konrad Rzeszutek Wilk wrote:
>>> 
>>> Hmm, it's good if it's convenient to do it automatically via
>>> dev->release. However, dev container (pcpu) would be free at some
>>> other error cases, so I prefer do it 'manually'.
>> 
>> You could also call pcpu_release(..) to do it manually.
>> 
> 
> that means kfree(pcpu) would be done twice at some error cases, do
> you think it really good? 
> 

Ping.

I think error recovery should be kept inside error logic level itself, if try to recover upper level error would bring trouble.

In our example, there are 2 logic levels:
pcpu level (as container), and dev level (subfield used for sys)
dev->release should only recover error occurred at dev/sys level, and the pcpu error should be recovered at pcpu level.

If dev->release try to recover its container pcpu level error, like list_del/kfree(pcpu), it would make confusing. i.e., considering pcpu_sys_create(), 2 error cases:
device_register fail, and device_create_file fail --> how can the caller decide kfree(pcpu) or not?

So how about recover pcpu error manually and explicitly?

Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] Xen physical cpus interface
  2012-05-11 13:12         ` Liu, Jinsong
@ 2012-05-11 14:27           ` Konrad Rzeszutek Wilk
  2012-05-11 18:04             ` Liu, Jinsong
  0 siblings, 1 reply; 11+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-05-11 14:27 UTC (permalink / raw)
  To: Liu, Jinsong
  Cc: 'xen-devel@lists.xensource.com',
	'linux-kernel@vger.kernel.org'

On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
> Liu, Jinsong wrote:
> > Just notice your reply (so quick :)
> > 
> > Agree and will update later, except 1 concern below.
> > 
> > Konrad Rzeszutek Wilk wrote:
> >>> 
> >>> Hmm, it's good if it's convenient to do it automatically via
> >>> dev->release. However, dev container (pcpu) would be free at some
> >>> other error cases, so I prefer do it 'manually'.
> >> 
> >> You could also call pcpu_release(..) to do it manually.
> >> 
> > 
> > that means kfree(pcpu) would be done twice at some error cases, do
> > you think it really good? 
> > 
> 
> Ping.
> 
> I think error recovery should be kept inside error logic level itself, if try to recover upper level error would bring trouble.
> 
> In our example, there are 2 logic levels:
> pcpu level (as container), and dev level (subfield used for sys)

So you need to untangle free_pcpu from doing both. Meaning one does the
SysFS and the other deals with free-ing the structure and removing itself
from the list.


> dev->release should only recover error occurred at dev/sys level, and the pcpu error should be recovered at pcpu level.
> 
> If dev->release try to recover its container pcpu level error, like list_del/kfree(pcpu), it would make confusing. i.e., considering pcpu_sys_create(), 2 error cases:
> device_register fail, and device_create_file fail --> how can the caller decide kfree(pcpu) or not?

Then you should free it manually. But you can do this by a wrapper
function:

__pcpu_release(..) {
	..
	/* Does the removing itself from the list and kfree the pcpu */
}
pcpu_release(..) {
	struct pcpcu *p= container_of(..)
	__pcpu_release(p);
}

dev->release = &pcpu_release;

> 
> So how about recover pcpu error manually and explicitly?
> 
> Thanks,
> Jinsong

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 3/3] Xen physical cpus interface
  2012-05-11 14:27           ` Konrad Rzeszutek Wilk
@ 2012-05-11 18:04             ` Liu, Jinsong
  2012-05-11 19:00               ` [Xen-devel] " Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 11+ messages in thread
From: Liu, Jinsong @ 2012-05-11 18:04 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: 'xen-devel@lists.xensource.com',
	'linux-kernel@vger.kernel.org'

Konrad Rzeszutek Wilk wrote:
> On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
>> Liu, Jinsong wrote:
>>> Just notice your reply (so quick :)
>>> 
>>> Agree and will update later, except 1 concern below.
>>> 
>>> Konrad Rzeszutek Wilk wrote:
>>>>> 
>>>>> Hmm, it's good if it's convenient to do it automatically via
>>>>> dev->release. However, dev container (pcpu) would be free at some
>>>>> other error cases, so I prefer do it 'manually'.
>>>> 
>>>> You could also call pcpu_release(..) to do it manually.
>>>> 
>>> 
>>> that means kfree(pcpu) would be done twice at some error cases, do
>>> you think it really good? 
>>> 
>> 
>> Ping.
>> 
>> I think error recovery should be kept inside error logic level
>> itself, if try to recover upper level error would bring trouble. 
>> 
>> In our example, there are 2 logic levels:
>> pcpu level (as container), and dev level (subfield used for sys)
> 
> So you need to untangle free_pcpu from doing both. Meaning one does
> the SysFS and the other deals with free-ing the structure and
> removing itself from the list.
> 

free_cpu is very samll, just consist of the 2 parts your said:
* pcpu_sys_remove() deal with sysfs
* list_del/kfree(pcpu) deal with pcpu

> 
>> dev->release should only recover error occurred at dev/sys level,
>> and the pcpu error should be recovered at pcpu level. 
>> 
>> If dev->release try to recover its container pcpu level error, like
>> list_del/kfree(pcpu), it would make confusing. i.e., considering
>> pcpu_sys_create(), 2 error cases: device_register fail, and
>> device_create_file fail --> how can the caller decide kfree(pcpu) or
>> not?   
> 
> Then you should free it manually. But you can do this by a wrapper
> function:
> 
> __pcpu_release(..) {
> 	..
> 	/* Does the removing itself from the list and kfree the pcpu */
> }
> pcpu_release(..) {
> 	struct pcpcu *p= container_of(..)
> 	__pcpu_release(p);
> }
> 
> dev->release = &pcpu_release;
> 

Too weird way. If we want to release dev itself it's good to use dev->release, but for pcpu I doubt it.
(consider the example I gave --> why we create issue (it maybe solved in weird method I agree), just for using dev->release?)

In kernel many dev->release keep NULL.
An example of using dev->release is cpu/mcheck/mce.c --> mce_device_release(), it *just* deal dev itself.

Thanks,
Jinsong


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Xen-devel] [PATCH 3/3] Xen physical cpus interface
  2012-05-11 18:04             ` Liu, Jinsong
@ 2012-05-11 19:00               ` Konrad Rzeszutek Wilk
  2012-05-11 20:31                 ` Liu, Jinsong
  0 siblings, 1 reply; 11+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-05-11 19:00 UTC (permalink / raw)
  To: Liu, Jinsong
  Cc: 'xen-devel@lists.xensource.com',
	'linux-kernel@vger.kernel.org'

On Fri, May 11, 2012 at 06:04:21PM +0000, Liu, Jinsong wrote:
> Konrad Rzeszutek Wilk wrote:
> > On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
> >> Liu, Jinsong wrote:
> >>> Just notice your reply (so quick :)
> >>> 
> >>> Agree and will update later, except 1 concern below.
> >>> 
> >>> Konrad Rzeszutek Wilk wrote:
> >>>>> 
> >>>>> Hmm, it's good if it's convenient to do it automatically via
> >>>>> dev->release. However, dev container (pcpu) would be free at some
> >>>>> other error cases, so I prefer do it 'manually'.
> >>>> 
> >>>> You could also call pcpu_release(..) to do it manually.
> >>>> 
> >>> 
> >>> that means kfree(pcpu) would be done twice at some error cases, do
> >>> you think it really good? 
> >>> 
> >> 
> >> Ping.
> >> 
> >> I think error recovery should be kept inside error logic level
> >> itself, if try to recover upper level error would bring trouble. 
> >> 
> >> In our example, there are 2 logic levels:
> >> pcpu level (as container), and dev level (subfield used for sys)
> > 
> > So you need to untangle free_pcpu from doing both. Meaning one does
> > the SysFS and the other deals with free-ing the structure and
> > removing itself from the list.
> > 
> 
> free_cpu is very samll, just consist of the 2 parts your said:
> * pcpu_sys_remove() deal with sysfs
> * list_del/kfree(pcpu) deal with pcpu
> 
> > 
> >> dev->release should only recover error occurred at dev/sys level,
> >> and the pcpu error should be recovered at pcpu level. 
> >> 
> >> If dev->release try to recover its container pcpu level error, like
> >> list_del/kfree(pcpu), it would make confusing. i.e., considering
> >> pcpu_sys_create(), 2 error cases: device_register fail, and
> >> device_create_file fail --> how can the caller decide kfree(pcpu) or
> >> not?   
> > 
> > Then you should free it manually. But you can do this by a wrapper
> > function:
> > 
> > __pcpu_release(..) {
> > 	..
> > 	/* Does the removing itself from the list and kfree the pcpu */
> > }
> > pcpu_release(..) {
> > 	struct pcpcu *p= container_of(..)
> > 	__pcpu_release(p);
> > }
> > 
> > dev->release = &pcpu_release;
> > 
> 
> Too weird way. If we want to release dev itself it's good to use dev->release, but for pcpu I doubt it.
> (consider the example I gave --> why we create issue (it maybe solved in weird method I agree), just for using dev->release?)
> 
> In kernel many dev->release keep NULL.
> An example of using dev->release is cpu/mcheck/mce.c --> mce_device_release(), it *just* deal dev itself.

OK? I am not sure what are we arguing here anymore?
I think using 'kfree(pcpu)' on the error paths (as long as it is
done before device_register) is OK. I think that seperating
the SysFS deletion from the pcpu deletion should be done to
avoid races. Perhaps the SysFS deletion function should also
remove the pcpu from the list.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [Xen-devel] [PATCH 3/3] Xen physical cpus interface
  2012-05-11 19:00               ` [Xen-devel] " Konrad Rzeszutek Wilk
@ 2012-05-11 20:31                 ` Liu, Jinsong
  2012-05-11 20:48                   ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 11+ messages in thread
From: Liu, Jinsong @ 2012-05-11 20:31 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: 'xen-devel@lists.xensource.com',
	'linux-kernel@vger.kernel.org'

Konrad Rzeszutek Wilk wrote:
> On Fri, May 11, 2012 at 06:04:21PM +0000, Liu, Jinsong wrote:
>> Konrad Rzeszutek Wilk wrote:
>>> On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
>>>> Liu, Jinsong wrote:
>>>>> Just notice your reply (so quick :)
>>>>> 
>>>>> Agree and will update later, except 1 concern below.
>>>>> 
>>>>> Konrad Rzeszutek Wilk wrote:
>>>>>>> 
>>>>>>> Hmm, it's good if it's convenient to do it automatically via
>>>>>>> dev->release. However, dev container (pcpu) would be free at
>>>>>>> some other error cases, so I prefer do it 'manually'.
>>>>>> 
>>>>>> You could also call pcpu_release(..) to do it manually.
>>>>>> 
>>>>> 
>>>>> that means kfree(pcpu) would be done twice at some error cases,
>>>>> do you think it really good? 
>>>>> 
>>>> 
>>>> Ping.
>>>> 
>>>> I think error recovery should be kept inside error logic level
>>>> itself, if try to recover upper level error would bring trouble.
>>>> 
>>>> In our example, there are 2 logic levels:
>>>> pcpu level (as container), and dev level (subfield used for sys)
>>> 
>>> So you need to untangle free_pcpu from doing both. Meaning one does
>>> the SysFS and the other deals with free-ing the structure and
>>> removing itself from the list.
>>> 
>> 
>> free_cpu is very samll, just consist of the 2 parts your said:
>> * pcpu_sys_remove() deal with sysfs
>> * list_del/kfree(pcpu) deal with pcpu
>> 
>>> 
>>>> dev->release should only recover error occurred at dev/sys level,
>>>> and the pcpu error should be recovered at pcpu level.
>>>> 
>>>> If dev->release try to recover its container pcpu level error, like
>>>> list_del/kfree(pcpu), it would make confusing. i.e., considering
>>>> pcpu_sys_create(), 2 error cases: device_register fail, and
>>>> device_create_file fail --> how can the caller decide kfree(pcpu)
>>>> or not?
>>> 
>>> Then you should free it manually. But you can do this by a wrapper
>>> function: 
>>> 
>>> __pcpu_release(..) {
>>> 	..
>>> 	/* Does the removing itself from the list and kfree the pcpu */ }
>>> pcpu_release(..) {
>>> 	struct pcpcu *p= container_of(..)
>>> 	__pcpu_release(p);
>>> }
>>> 
>>> dev->release = &pcpu_release;
>>> 
>> 
>> Too weird way. If we want to release dev itself it's good to use
>> dev->release, but for pcpu I doubt it. (consider the example I gave
>> --> why we create issue (it maybe solved in weird method I agree),
>> just for using dev->release?)  
>> 
>> In kernel many dev->release keep NULL.
>> An example of using dev->release is cpu/mcheck/mce.c -->
>> mce_device_release(), it *just* deal dev itself. 
> 
> OK? I am not sure what are we arguing here anymore?
> I think using 'kfree(pcpu)' on the error paths (as long as it is
> done before device_register) is OK. I think that seperating
> the SysFS deletion from the pcpu deletion should be done to
> avoid races. Perhaps the SysFS deletion function should also
> remove the pcpu from the list.

How about static array pcpu[NR_CPUS]?
It seems solve all issues we argued :)

Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [Xen-devel] [PATCH 3/3] Xen physical cpus interface
  2012-05-11 20:31                 ` Liu, Jinsong
@ 2012-05-11 20:48                   ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 11+ messages in thread
From: Konrad Rzeszutek Wilk @ 2012-05-11 20:48 UTC (permalink / raw)
  To: Liu, Jinsong
  Cc: 'xen-devel@lists.xensource.com',
	'linux-kernel@vger.kernel.org'

On Fri, May 11, 2012 at 08:31:15PM +0000, Liu, Jinsong wrote:
> Konrad Rzeszutek Wilk wrote:
> > On Fri, May 11, 2012 at 06:04:21PM +0000, Liu, Jinsong wrote:
> >> Konrad Rzeszutek Wilk wrote:
> >>> On Fri, May 11, 2012 at 01:12:13PM +0000, Liu, Jinsong wrote:
> >>>> Liu, Jinsong wrote:
> >>>>> Just notice your reply (so quick :)
> >>>>> 
> >>>>> Agree and will update later, except 1 concern below.
> >>>>> 
> >>>>> Konrad Rzeszutek Wilk wrote:
> >>>>>>> 
> >>>>>>> Hmm, it's good if it's convenient to do it automatically via
> >>>>>>> dev->release. However, dev container (pcpu) would be free at
> >>>>>>> some other error cases, so I prefer do it 'manually'.
> >>>>>> 
> >>>>>> You could also call pcpu_release(..) to do it manually.
> >>>>>> 
> >>>>> 
> >>>>> that means kfree(pcpu) would be done twice at some error cases,
> >>>>> do you think it really good? 
> >>>>> 
> >>>> 
> >>>> Ping.
> >>>> 
> >>>> I think error recovery should be kept inside error logic level
> >>>> itself, if try to recover upper level error would bring trouble.
> >>>> 
> >>>> In our example, there are 2 logic levels:
> >>>> pcpu level (as container), and dev level (subfield used for sys)
> >>> 
> >>> So you need to untangle free_pcpu from doing both. Meaning one does
> >>> the SysFS and the other deals with free-ing the structure and
> >>> removing itself from the list.
> >>> 
> >> 
> >> free_cpu is very samll, just consist of the 2 parts your said:
> >> * pcpu_sys_remove() deal with sysfs
> >> * list_del/kfree(pcpu) deal with pcpu
> >> 
> >>> 
> >>>> dev->release should only recover error occurred at dev/sys level,
> >>>> and the pcpu error should be recovered at pcpu level.
> >>>> 
> >>>> If dev->release try to recover its container pcpu level error, like
> >>>> list_del/kfree(pcpu), it would make confusing. i.e., considering
> >>>> pcpu_sys_create(), 2 error cases: device_register fail, and
> >>>> device_create_file fail --> how can the caller decide kfree(pcpu)
> >>>> or not?
> >>> 
> >>> Then you should free it manually. But you can do this by a wrapper
> >>> function: 
> >>> 
> >>> __pcpu_release(..) {
> >>> 	..
> >>> 	/* Does the removing itself from the list and kfree the pcpu */ }
> >>> pcpu_release(..) {
> >>> 	struct pcpcu *p= container_of(..)
> >>> 	__pcpu_release(p);
> >>> }
> >>> 
> >>> dev->release = &pcpu_release;
> >>> 
> >> 
> >> Too weird way. If we want to release dev itself it's good to use
> >> dev->release, but for pcpu I doubt it. (consider the example I gave
> >> --> why we create issue (it maybe solved in weird method I agree),
> >> just for using dev->release?)  
> >> 
> >> In kernel many dev->release keep NULL.
> >> An example of using dev->release is cpu/mcheck/mce.c -->
> >> mce_device_release(), it *just* deal dev itself. 
> > 
> > OK? I am not sure what are we arguing here anymore?
> > I think using 'kfree(pcpu)' on the error paths (as long as it is
> > done before device_register) is OK. I think that seperating
> > the SysFS deletion from the pcpu deletion should be done to
> > avoid races. Perhaps the SysFS deletion function should also
> > remove the pcpu from the list.
> 
> How about static array pcpu[NR_CPUS]?
> It seems solve all issues we argued :)

Ugh. That could mean a structure of more than 4K of items.
Lets stick with making it dynamic.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2012-05-11 20:54 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-19 13:33 [PATCH 3/3] Xen physical cpus interface Liu, Jinsong
2012-04-20 19:24 ` Konrad Rzeszutek Wilk
2012-05-10 14:54   ` Liu, Jinsong
2012-05-10 14:57     ` Konrad Rzeszutek Wilk
2012-05-10 15:20       ` Liu, Jinsong
     [not found]       ` <DE8DF0795D48FD4CA783C40EC82923351B5A2E@SHSMSX101.ccr.corp.intel.com>
2012-05-11 13:12         ` Liu, Jinsong
2012-05-11 14:27           ` Konrad Rzeszutek Wilk
2012-05-11 18:04             ` Liu, Jinsong
2012-05-11 19:00               ` [Xen-devel] " Konrad Rzeszutek Wilk
2012-05-11 20:31                 ` Liu, Jinsong
2012-05-11 20:48                   ` Konrad Rzeszutek Wilk

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).