All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] pseries: Move CPU dlpar into the kernel
@ 2015-06-22 20:54 Nathan Fontenot
  2015-06-22 20:56 ` [PATCH 1/6] pseries: Consolidate CPU hotplug code to hotplug-cpu.c Nathan Fontenot
                   ` (5 more replies)
  0 siblings, 6 replies; 15+ messages in thread
From: Nathan Fontenot @ 2015-06-22 20:54 UTC (permalink / raw)
  To: linuxppc-dev

To better support CPU hotplug in PowerKVM and PowerVM environments, the
handling of CPU dlpar should be done entirely in the kernel. This will allow
a common entry point for PowerVM and PowerKVM CPU dlpar requests. For both
environments, the entry point is the same one introduced in a previous
patch set that moved memory hotplug into the kernel. The entry point accepts
a rtas hotplug event which is either constructed when using the 
/sys/kernel/dlpar interface or is passed to the kernel when handling a
ras epow interrupt.

-Nathan

Patch 1/6:
- Consolidate cpu hotplug code from pseries/dlpar.c to pseries/hotplug-cpu.c

Patch 2/6:
- Factor out common code pieces for both environments

Patch 3/6:
- Update cpu dlpar error recovery

Patch 4/6:
- Add cpu hotplug remove capability

Patch 5/6:
- Add cpu hotplug add capability

Patch 6/6:
- Enable sysfs interface for cpu hotplug

 dlpar.c       |  195 -------------------
 hotplug-cpu.c |  578 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 pseries.h     |    5 
 3 files changed, 549 insertions(+), 229 deletions(-)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/6] pseries: Consolidate CPU hotplug code to hotplug-cpu.c
  2015-06-22 20:54 [PATCH 0/6] pseries: Move CPU dlpar into the kernel Nathan Fontenot
@ 2015-06-22 20:56 ` Nathan Fontenot
  2015-06-22 20:58 ` [PATCH 2/6] pseries: Factor out common cpu hotplug code Nathan Fontenot
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Nathan Fontenot @ 2015-06-22 20:56 UTC (permalink / raw)
  To: linuxppc-dev

No functional changes, this patch is simply a move of the cpu hotplug
code from pseries/dlpar.c to pseries/hotplug-cpu.c. This is in an effort
to consolidate all of the cpu hotplug code in a common place.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/dlpar.c       |  189 --------------------------
 arch/powerpc/platforms/pseries/hotplug-cpu.c |  183 +++++++++++++++++++++++++
 2 files changed, 184 insertions(+), 188 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 47d9cebe..4463e9a 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -18,7 +18,6 @@
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include "offline_states.h"
 #include "pseries.h"
 
 #include <asm/prom.h>
@@ -359,183 +358,6 @@ int dlpar_release_drc(u32 drc_index)
 	return 0;
 }
 
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-
-static int dlpar_online_cpu(struct device_node *dn)
-{
-	int rc = 0;
-	unsigned int cpu;
-	int len, nthreads, i;
-	const __be32 *intserv;
-	u32 thread;
-
-	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
-	if (!intserv)
-		return -EINVAL;
-
-	nthreads = len / sizeof(u32);
-
-	cpu_maps_update_begin();
-	for (i = 0; i < nthreads; i++) {
-		thread = be32_to_cpu(intserv[i]);
-		for_each_present_cpu(cpu) {
-			if (get_hard_smp_processor_id(cpu) != thread)
-				continue;
-			BUG_ON(get_cpu_current_state(cpu)
-					!= CPU_STATE_OFFLINE);
-			cpu_maps_update_done();
-			rc = device_online(get_cpu_device(cpu));
-			if (rc)
-				goto out;
-			cpu_maps_update_begin();
-
-			break;
-		}
-		if (cpu == num_possible_cpus())
-			printk(KERN_WARNING "Could not find cpu to online "
-			       "with physical id 0x%x\n", thread);
-	}
-	cpu_maps_update_done();
-
-out:
-	return rc;
-
-}
-
-static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
-{
-	struct device_node *dn, *parent;
-	u32 drc_index;
-	int rc;
-
-	rc = kstrtou32(buf, 0, &drc_index);
-	if (rc)
-		return -EINVAL;
-
-	rc = dlpar_acquire_drc(drc_index);
-	if (rc)
-		return -EINVAL;
-
-	parent = of_find_node_by_path("/cpus");
-	if (!parent)
-		return -ENODEV;
-
-	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
-	of_node_put(parent);
-	if (!dn)
-		return -EINVAL;
-
-	rc = dlpar_attach_node(dn);
-	if (rc) {
-		dlpar_release_drc(drc_index);
-		dlpar_free_cc_nodes(dn);
-		return rc;
-	}
-
-	rc = dlpar_online_cpu(dn);
-	if (rc)
-		return rc;
-
-	return count;
-}
-
-static int dlpar_offline_cpu(struct device_node *dn)
-{
-	int rc = 0;
-	unsigned int cpu;
-	int len, nthreads, i;
-	const __be32 *intserv;
-	u32 thread;
-
-	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
-	if (!intserv)
-		return -EINVAL;
-
-	nthreads = len / sizeof(u32);
-
-	cpu_maps_update_begin();
-	for (i = 0; i < nthreads; i++) {
-		thread = be32_to_cpu(intserv[i]);
-		for_each_present_cpu(cpu) {
-			if (get_hard_smp_processor_id(cpu) != thread)
-				continue;
-
-			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
-				break;
-
-			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
-				set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
-				cpu_maps_update_done();
-				rc = device_offline(get_cpu_device(cpu));
-				if (rc)
-					goto out;
-				cpu_maps_update_begin();
-				break;
-
-			}
-
-			/*
-			 * The cpu is in CPU_STATE_INACTIVE.
-			 * Upgrade it's state to CPU_STATE_OFFLINE.
-			 */
-			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
-			BUG_ON(plpar_hcall_norets(H_PROD, thread)
-								!= H_SUCCESS);
-			__cpu_die(cpu);
-			break;
-		}
-		if (cpu == num_possible_cpus())
-			printk(KERN_WARNING "Could not find cpu to offline "
-			       "with physical id 0x%x\n", thread);
-	}
-	cpu_maps_update_done();
-
-out:
-	return rc;
-
-}
-
-static ssize_t dlpar_cpu_release(const char *buf, size_t count)
-{
-	struct device_node *dn;
-	u32 drc_index;
-	int rc;
-
-	dn = of_find_node_by_path(buf);
-	if (!dn)
-		return -EINVAL;
-
-	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
-	if (rc) {
-		of_node_put(dn);
-		return -EINVAL;
-	}
-
-	rc = dlpar_offline_cpu(dn);
-	if (rc) {
-		of_node_put(dn);
-		return -EINVAL;
-	}
-
-	rc = dlpar_release_drc(drc_index);
-	if (rc) {
-		of_node_put(dn);
-		return rc;
-	}
-
-	rc = dlpar_detach_node(dn);
-	if (rc) {
-		dlpar_acquire_drc(drc_index);
-		return rc;
-	}
-
-	of_node_put(dn);
-
-	return count;
-}
-
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
-
 static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
 	int rc;
@@ -643,16 +465,7 @@ static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store);
 
 static int __init pseries_dlpar_init(void)
 {
-	int rc;
-
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-	ppc_md.cpu_probe = dlpar_cpu_probe;
-	ppc_md.cpu_release = dlpar_cpu_release;
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
-
-	rc = sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
-
-	return rc;
+	return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
 }
 machine_device_initcall(pseries, pseries_dlpar_init);
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6247544..c55cdbc 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -32,6 +32,7 @@
 #include <asm/xics.h>
 #include <asm/plpar_wrappers.h>
 
+#include "pseries.h"
 #include "offline_states.h"
 
 /* This version can't take the spinlock, because it never returns */
@@ -339,6 +340,183 @@ static void pseries_remove_processor(struct device_node *np)
 	cpu_maps_update_done();
 }
 
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+
+static int dlpar_online_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+			BUG_ON(get_cpu_current_state(cpu)
+					!= CPU_STATE_OFFLINE);
+			cpu_maps_update_done();
+			rc = device_online(get_cpu_device(cpu));
+			if (rc)
+				goto out;
+			cpu_maps_update_begin();
+
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to online "
+			       "with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+{
+	struct device_node *dn, *parent;
+	u32 drc_index;
+	int rc;
+
+	rc = kstrtou32(buf, 0, &drc_index);
+	if (rc)
+		return -EINVAL;
+
+	rc = dlpar_acquire_drc(drc_index);
+	if (rc)
+		return -EINVAL;
+
+	parent = of_find_node_by_path("/cpus");
+	if (!parent)
+		return -ENODEV;
+
+	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
+	of_node_put(parent);
+	if (!dn)
+		return -EINVAL;
+
+	rc = dlpar_attach_node(dn);
+	if (rc) {
+		dlpar_release_drc(drc_index);
+		dlpar_free_cc_nodes(dn);
+		return rc;
+	}
+
+	rc = dlpar_online_cpu(dn);
+	if (rc)
+		return rc;
+
+	return count;
+}
+
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+				break;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+				set_preferred_offline_state(cpu,
+							    CPU_STATE_OFFLINE);
+				cpu_maps_update_done();
+				rc = device_offline(get_cpu_device(cpu));
+				if (rc)
+					goto out;
+				cpu_maps_update_begin();
+				break;
+
+			}
+
+			/*
+			 * The cpu is in CPU_STATE_INACTIVE.
+			 * Upgrade it's state to CPU_STATE_OFFLINE.
+			 */
+			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+			BUG_ON(plpar_hcall_norets(H_PROD, thread)
+								!= H_SUCCESS);
+			__cpu_die(cpu);
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static ssize_t dlpar_cpu_release(const char *buf, size_t count)
+{
+	struct device_node *dn;
+	u32 drc_index;
+	int rc;
+
+	dn = of_find_node_by_path(buf);
+	if (!dn)
+		return -EINVAL;
+
+	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
+	if (rc) {
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	rc = dlpar_offline_cpu(dn);
+	if (rc) {
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	rc = dlpar_release_drc(drc_index);
+	if (rc) {
+		of_node_put(dn);
+		return rc;
+	}
+
+	rc = dlpar_detach_node(dn);
+	if (rc) {
+		dlpar_acquire_drc(drc_index);
+		return rc;
+	}
+
+	of_node_put(dn);
+
+	return count;
+}
+
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
 static int pseries_smp_notifier(struct notifier_block *nb,
 				unsigned long action, void *data)
 {
@@ -385,6 +563,11 @@ static int __init pseries_cpu_hotplug_init(void)
 	int cpu;
 	int qcss_tok;
 
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	ppc_md.cpu_probe = dlpar_cpu_probe;
+	ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
 	for_each_node_by_name(np, "interrupt-controller") {
 		typep = of_get_property(np, "compatible", NULL);
 		if (strstr(typep, "open-pic")) {

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 2/6] pseries: Factor out common cpu hotplug code
  2015-06-22 20:54 [PATCH 0/6] pseries: Move CPU dlpar into the kernel Nathan Fontenot
  2015-06-22 20:56 ` [PATCH 1/6] pseries: Consolidate CPU hotplug code to hotplug-cpu.c Nathan Fontenot
@ 2015-06-22 20:58 ` Nathan Fontenot
  2015-06-22 20:59 ` [PATCH 3/6] pseries: Update CPU hotplug error recovery Nathan Fontenot
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 15+ messages in thread
From: Nathan Fontenot @ 2015-06-22 20:58 UTC (permalink / raw)
  To: linuxppc-dev

Re-factor the cpu hotplug code to support cpu hotplug completely in
the kernel and using the existing sysfs probe/release interface. Move
pieces of the existing cpu hotplug code that will be common to both
interfaces into common routines. This patch does not introduce any
functional changes.
    
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |   83 ++++++++++++++------------
 1 file changed, 46 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index c55cdbc..f58d902 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -340,8 +340,6 @@ static void pseries_remove_processor(struct device_node *np)
 	cpu_maps_update_done();
 }
 
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-
 static int dlpar_online_cpu(struct device_node *dn)
 {
 	int rc = 0;
@@ -383,26 +381,16 @@ out:
 
 }
 
-static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+static ssize_t dlpar_cpu_add(struct device_node *parent, u32 drc_index)
 {
-	struct device_node *dn, *parent;
-	u32 drc_index;
+	struct device_node *dn;
 	int rc;
 
-	rc = kstrtou32(buf, 0, &drc_index);
-	if (rc)
-		return -EINVAL;
-
 	rc = dlpar_acquire_drc(drc_index);
 	if (rc)
 		return -EINVAL;
 
-	parent = of_find_node_by_path("/cpus");
-	if (!parent)
-		return -ENODEV;
-
 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
-	of_node_put(parent);
 	if (!dn)
 		return -EINVAL;
 
@@ -414,10 +402,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	}
 
 	rc = dlpar_online_cpu(dn);
-	if (rc)
-		return rc;
-
-	return count;
+	return rc;
 }
 
 static int dlpar_offline_cpu(struct device_node *dn)
@@ -476,6 +461,47 @@ out:
 
 }
 
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+{
+	int rc;
+
+	rc = dlpar_offline_cpu(dn);
+	if (rc)
+		return -EINVAL;
+
+	rc = dlpar_release_drc(drc_index);
+	if (rc)
+		return rc;
+
+	rc = dlpar_detach_node(dn);
+	if (rc)
+		dlpar_acquire_drc(drc_index);
+
+	return rc;
+}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+
+static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+{
+	struct device_node *parent;
+	u32 drc_index;
+	int rc;
+
+	rc = kstrtou32(buf, 0, &drc_index);
+	if (rc)
+		return -EINVAL;
+
+	parent = of_find_node_by_path("/cpus");
+	if (!parent)
+		return -ENODEV;
+
+	rc = dlpar_cpu_add(parent, drc_index);
+	of_node_put(parent);
+
+	return rc ? rc : count;
+}
+
 static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 {
 	struct device_node *dn;
@@ -492,27 +518,10 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 		return -EINVAL;
 	}
 
-	rc = dlpar_offline_cpu(dn);
-	if (rc) {
-		of_node_put(dn);
-		return -EINVAL;
-	}
-
-	rc = dlpar_release_drc(drc_index);
-	if (rc) {
-		of_node_put(dn);
-		return rc;
-	}
-
-	rc = dlpar_detach_node(dn);
-	if (rc) {
-		dlpar_acquire_drc(drc_index);
-		return rc;
-	}
-
+	rc = dlpar_cpu_remove(dn, drc_index);
 	of_node_put(dn);
 
-	return count;
+	return rc ? rc : count;
 }
 
 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 3/6] pseries: Update CPU hotplug error recovery
  2015-06-22 20:54 [PATCH 0/6] pseries: Move CPU dlpar into the kernel Nathan Fontenot
  2015-06-22 20:56 ` [PATCH 1/6] pseries: Consolidate CPU hotplug code to hotplug-cpu.c Nathan Fontenot
  2015-06-22 20:58 ` [PATCH 2/6] pseries: Factor out common cpu hotplug code Nathan Fontenot
@ 2015-06-22 20:59 ` Nathan Fontenot
  2015-07-21  4:46   ` [3/6] " Michael Ellerman
  2015-06-22 21:00 ` [PATCH 4/6] pseries: Add CPU dlpar remove functionality Nathan Fontenot
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 15+ messages in thread
From: Nathan Fontenot @ 2015-06-22 20:59 UTC (permalink / raw)
  To: linuxppc-dev

Update the cpu dlpar add/remove paths to do better error recovery when 
a failure occurs during the add/remove operation. This includes adding
some pr_info and pr_debug statements.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |   34 +++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index f58d902..7890b2f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -18,6 +18,8 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt
+
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -386,22 +388,35 @@ static ssize_t dlpar_cpu_add(struct device_node *parent, u32 drc_index)
 	struct device_node *dn;
 	int rc;
 
+	pr_info("Attempting to add CPU, drc index %x\n", drc_index);
+
 	rc = dlpar_acquire_drc(drc_index);
 	if (rc)
 		return -EINVAL;
 
 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
-	if (!dn)
+	if (!dn) {
+		pr_debug("Failed call to configure-connector\n");
+		dlpar_release_drc(drc_index);
 		return -EINVAL;
+	}
 
 	rc = dlpar_attach_node(dn);
 	if (rc) {
+		pr_debug("Failed to attach node (%d)\n", rc);
 		dlpar_release_drc(drc_index);
 		dlpar_free_cc_nodes(dn);
 		return rc;
 	}
 
 	rc = dlpar_online_cpu(dn);
+	if (rc) {
+		pr_debug("Failed to online cpu (%d)\n", rc);
+		dlpar_detach_node(dn);
+		dlpar_release_drc(drc_index);
+	}
+
+	pr_info("Successfully added CPU, drc index %x\n", drc_index);
 	return rc;
 }
 
@@ -465,18 +480,29 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 {
 	int rc;
 
+	pr_info("Attemping to remove CPU, drc index %x\n", drc_index);
+
 	rc = dlpar_offline_cpu(dn);
-	if (rc)
+	if (rc) {
+		pr_debug("Failed top offline cpu (%d)\n", rc);
 		return -EINVAL;
+	}
 
 	rc = dlpar_release_drc(drc_index);
-	if (rc)
+	if (rc) {
+		pr_debug("Failed to release drc (%d)\n", rc);
+		dlpar_online_cpu(dn);
 		return rc;
+	}
 
 	rc = dlpar_detach_node(dn);
-	if (rc)
+	if (rc) {
+		pr_debug("Failed to detach cpu node (%d)\n", rc);
 		dlpar_acquire_drc(drc_index);
+		dlpar_online_cpu(dn);
+	}
 
+	pr_info("Successfully removed CPU, drc index %x\n", drc_index);
 	return rc;
 }
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 4/6] pseries: Add CPU dlpar remove functionality
  2015-06-22 20:54 [PATCH 0/6] pseries: Move CPU dlpar into the kernel Nathan Fontenot
                   ` (2 preceding siblings ...)
  2015-06-22 20:59 ` [PATCH 3/6] pseries: Update CPU hotplug error recovery Nathan Fontenot
@ 2015-06-22 21:00 ` Nathan Fontenot
  2015-07-21  9:27   ` [4/6] " Michael Ellerman
  2015-07-21  9:46   ` Michael Ellerman
  2015-06-22 21:01 ` [PATCH 5/6] pseries: Add CPU dlpar add functionality Nathan Fontenot
  2015-06-22 21:02 ` [PATCH 6/6] pseries: Enable kernel CPU dlpar from sysfs Nathan Fontenot
  5 siblings, 2 replies; 15+ messages in thread
From: Nathan Fontenot @ 2015-06-22 21:00 UTC (permalink / raw)
  To: linuxppc-dev

Add the ability to dlpar remove CPUs via hotplug rtas events, either by
specifying the drc-index of the CPU to remove or providing a count of cpus 
to remove.

To accomplish we create a list of possible dr cpus and their drc indexes
so we can easily traverse the list looking for candidates to remove and
easily clean up in any cases of failure.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |  202 ++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/pseries.h     |    5 +
 2 files changed, 207 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7890b2f..49b7196 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -26,6 +26,7 @@
 #include <linux/sched.h>	/* for idle_task_exit */
 #include <linux/cpu.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/firmware.h>
@@ -506,6 +507,207 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 	return rc;
 }
 
+static struct device_node *cpu_drc_index_to_dn(struct device_node *parent,
+					       u32 drc_index)
+{
+	struct device_node *dn;
+	u32 my_index;
+	int rc;
+
+	for_each_child_of_node(parent, dn) {
+		if (of_node_cmp(dn->type, "cpu") != 0)
+			continue;
+
+		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
+		if (rc)
+			continue;
+
+		if (my_index == drc_index)
+			break;
+	}
+
+	return dn;
+}
+
+static int dlpar_cpu_remove_by_index(struct device_node *parent,
+				     u32 drc_index)
+{
+	struct device_node *dn;
+	int rc;
+
+	dn = cpu_drc_index_to_dn(parent, drc_index);
+	if (!dn)
+		return -ENODEV;
+
+	rc = dlpar_cpu_remove(dn, drc_index);
+	of_node_put(dn);
+	return rc;
+}
+
+static int dlpar_cpus_possible(struct device_node *parent)
+{
+	int dr_cpus_possible;
+
+	/* The first u32 in the ibm,drc-indexes property is the numnber
+	 * of drc entries in the property, which is the possible number
+	 * number of dr capable cpus.
+	 */
+	of_property_read_u32(parent, "ibm,drc-indexes", &dr_cpus_possible);
+	return dr_cpus_possible;
+}
+
+struct dr_cpu {
+	u32     drc_index;
+	bool    present;
+	bool    modified;
+};
+
+static struct dr_cpu *get_dlpar_cpus(struct device_node *parent)
+{
+	struct device_node *dn;
+	struct property *prop;
+	struct dr_cpu *dr_cpus;
+	const __be32 *p;
+	u32 drc_index;
+	int dr_cpus_possible, index;
+	bool first;
+
+	dr_cpus_possible = dlpar_cpus_possible(parent);
+	dr_cpus = kcalloc(dr_cpus_possible, sizeof(*dr_cpus), GFP_KERNEL);
+	if (!dr_cpus)
+		return NULL;
+
+	first = true;
+	index = 0;
+	of_property_for_each_u32(parent, "ibm,drc-indexes", prop, p,
+				 drc_index) {
+		if (first) {
+			/* The first entry is the number of drc indexes in
+			 * the property, skip it.
+			 */
+			first = false;
+			continue;
+		}
+
+		dr_cpus[index].drc_index = drc_index;
+
+		dn = cpu_drc_index_to_dn(parent, drc_index);
+		if (dn) {
+			dr_cpus[index].present = true;
+			of_node_put(dn);
+		}
+
+		index++;
+	}
+
+	return dr_cpus;
+}
+
+static int dlpar_cpu_remove_by_count(struct device_node *parent,
+				     u32 cpus_to_remove)
+{
+	struct dr_cpu *dr_cpus;
+	int dr_cpus_removed = 0;
+	int dr_cpus_present = 0;
+	int dr_cpus_possible;
+	int i, rc;
+
+	pr_info("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
+
+	dr_cpus = get_dlpar_cpus(parent);
+	if (!dr_cpus) {
+		pr_info("Could not gather dr CPU info\n");
+		return -EINVAL;
+	}
+
+	dr_cpus_possible = dlpar_cpus_possible(parent);
+
+	for (i = 0; i < dr_cpus_possible; i++) {
+		if (dr_cpus[i].present)
+			dr_cpus_present++;
+	}
+
+	/* Validate the available CPUs to remove.
+	 * NOTE: we can't remove the last CPU.
+	 */
+	if (cpus_to_remove >= dr_cpus_present) {
+		pr_err("Insufficient CPUs (%d) to satisfy remove request\n",
+		       dr_cpus_present);
+		kfree(dr_cpus);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dr_cpus_possible; i++) {
+		if (dr_cpus_removed == cpus_to_remove)
+			break;
+
+		if (!dr_cpus[i].present)
+			continue;
+
+		rc = dlpar_cpu_remove_by_index(parent, dr_cpus[i].drc_index);
+		if (!rc) {
+			dr_cpus_removed++;
+			dr_cpus[i].modified = true;
+		}
+	}
+
+	if (dr_cpus_removed != cpus_to_remove) {
+		pr_info("CPU hot-remove failed, adding any removed CPUs\n");
+
+		for (i = 0; i < dr_cpus_possible; i++) {
+			if (!dr_cpus[i].modified)
+				continue;
+
+			rc = dlpar_cpu_add(parent, dr_cpus[i].drc_index);
+			if (rc)
+				pr_info("Failed to re-add CPU (%x)\n",
+					dr_cpus[i].drc_index);
+		}
+
+		rc = -EINVAL;
+	} else {
+		rc = 0;
+	}
+
+	kfree(dr_cpus);
+	return rc;
+}
+
+int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
+{
+	struct device_node *parent;
+	u32 count, drc_index;
+	int rc;
+
+	count = hp_elog->_drc_u.drc_count;
+	drc_index = hp_elog->_drc_u.drc_index;
+
+	parent = of_find_node_by_path("/cpus");
+	if (!parent)
+		return -ENODEV;
+
+	lock_device_hotplug();
+
+	switch (hp_elog->action) {
+	case PSERIES_HP_ELOG_ACTION_REMOVE:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_cpu_remove_by_count(parent, count);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_cpu_remove_by_index(parent, drc_index);
+		else
+			rc = -EINVAL;
+		break;
+	default:
+		pr_err("Invalid action (%d) specified\n", hp_elog->action);
+		rc = -EINVAL;
+		break;
+	}
+
+	unlock_device_hotplug();
+	of_node_put(parent);
+	return rc;
+}
+
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 
 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 8411c27..58892f1 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -66,11 +66,16 @@ extern int dlpar_release_drc(u32 drc_index);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);
 #else
 static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 {
 	return -EOPNOTSUPP;
 }
+static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 /* PCI root bridge prepare function override for pseries */

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 5/6] pseries: Add CPU dlpar add functionality
  2015-06-22 20:54 [PATCH 0/6] pseries: Move CPU dlpar into the kernel Nathan Fontenot
                   ` (3 preceding siblings ...)
  2015-06-22 21:00 ` [PATCH 4/6] pseries: Add CPU dlpar remove functionality Nathan Fontenot
@ 2015-06-22 21:01 ` Nathan Fontenot
  2015-06-22 21:02 ` [PATCH 6/6] pseries: Enable kernel CPU dlpar from sysfs Nathan Fontenot
  5 siblings, 0 replies; 15+ messages in thread
From: Nathan Fontenot @ 2015-06-22 21:01 UTC (permalink / raw)
  To: linuxppc-dev

Add the ability to hotplug add cpus via rtas hotplug events by either
specifying the drc index of the CPU to add, or providing a count of the
number of CPUs to add.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c |   76 ++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 49b7196..a5d4c89 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -673,6 +673,74 @@ static int dlpar_cpu_remove_by_count(struct device_node *parent,
 	return rc;
 }
 
+static int dlpar_cpu_add_by_count(struct device_node *parent, u32 cpus_to_add)
+{
+	struct dr_cpu *dr_cpus;
+	int dr_cpus_added = 0;
+	int dr_cpus_available = 0;
+	int dr_cpus_possible;
+	int i, rc;
+
+	pr_info("Attempting to hot-add %d CPUs\n", cpus_to_add);
+
+	dr_cpus = get_dlpar_cpus(parent);
+	if (!dr_cpus) {
+		pr_info("Could not gather dr CPU info\n");
+		return -EINVAL;
+	}
+
+	dr_cpus_possible = dlpar_cpus_possible(parent);
+
+	for (i = 0; i < dr_cpus_possible; i++) {
+		if (!dr_cpus[i].present)
+			dr_cpus_available++;
+	}
+
+	/* Validate the available CPUs to add. */
+	if (cpus_to_add > dr_cpus_available) {
+		pr_err("Insufficient CPUs (%d) to satisfy add request\n",
+		       dr_cpus_available);
+		kfree(dr_cpus);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dr_cpus_possible; i++) {
+		if (dr_cpus_added == cpus_to_add)
+			break;
+
+		if (dr_cpus[i].present)
+			continue;
+
+		rc = dlpar_cpu_add(parent, dr_cpus[i].drc_index);
+		if (!rc) {
+			dr_cpus_added++;
+			dr_cpus[i].modified = true;
+		}
+	}
+
+	if (dr_cpus_added < cpus_to_add) {
+		pr_err("CPU hot-add failed, removing any added CPUs\n");
+
+		for (i = 0; i < dr_cpus_possible; i++) {
+			if (!dr_cpus[i].modified)
+				continue;
+
+			rc = dlpar_cpu_remove_by_index(parent,
+						       dr_cpus[i].drc_index);
+			if (rc)
+				pr_info("Failed to remove added CPU (%x)\n",
+					dr_cpus[i].drc_index);
+		}
+
+		rc = -EINVAL;
+	} else {
+		rc = 0;
+	}
+
+	kfree(dr_cpus);
+	return rc;
+}
+
 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 {
 	struct device_node *parent;
@@ -697,6 +765,14 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 		else
 			rc = -EINVAL;
 		break;
+	case PSERIES_HP_ELOG_ACTION_ADD:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_cpu_add_by_count(parent, count);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_cpu_add(parent, drc_index);
+		else
+			rc = -EINVAL;
+		break;
 	default:
 		pr_err("Invalid action (%d) specified\n", hp_elog->action);
 		rc = -EINVAL;

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 6/6] pseries: Enable kernel CPU dlpar from sysfs
  2015-06-22 20:54 [PATCH 0/6] pseries: Move CPU dlpar into the kernel Nathan Fontenot
                   ` (4 preceding siblings ...)
  2015-06-22 21:01 ` [PATCH 5/6] pseries: Add CPU dlpar add functionality Nathan Fontenot
@ 2015-06-22 21:02 ` Nathan Fontenot
  5 siblings, 0 replies; 15+ messages in thread
From: Nathan Fontenot @ 2015-06-22 21:02 UTC (permalink / raw)
  To: linuxppc-dev

Enable new kernel cpu hotplug functionality by allowing cpu dlpar requests
to be initiated from sysfs.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/dlpar.c |    6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 4463e9a..85b8d9e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -377,6 +377,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 	case PSERIES_HP_ELOG_RESOURCE_MEM:
 		rc = dlpar_memory(hp_elog);
 		break;
+	case PSERIES_HP_ELOG_RESOURCE_CPU:
+		rc = dlpar_cpu(hp_elog);
+		break;
 	default:
 		pr_warn_ratelimited("Invalid resource (%d) specified\n",
 				    hp_elog->resource);
@@ -406,6 +409,9 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
 	if (!strncmp(arg, "memory", 6)) {
 		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
 		arg += strlen("memory ");
+	} else if (!strncmp(arg, "cpu", 3)) {
+		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU;
+		arg += strlen("cpu ");
 	} else {
 		pr_err("Invalid resource specified: \"%s\"\n", buf);
 		rc = -EINVAL;

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [3/6] pseries: Update CPU hotplug error recovery
  2015-06-22 20:59 ` [PATCH 3/6] pseries: Update CPU hotplug error recovery Nathan Fontenot
@ 2015-07-21  4:46   ` Michael Ellerman
  2015-07-21 19:14     ` Nathan Fontenot
  0 siblings, 1 reply; 15+ messages in thread
From: Michael Ellerman @ 2015-07-21  4:46 UTC (permalink / raw)
  To: Nathan Fontenot, linuxppc-dev

On Mon, 2015-22-06 at 20:59:20 UTC, Nathan Fontenot wrote:
> Update the cpu dlpar add/remove paths to do better error recovery when 
> a failure occurs during the add/remove operation. This includes adding
> some pr_info and pr_debug statements.

So I'm happy with the idea there, but ..

> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index f58d902..7890b2f 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -18,6 +18,8 @@
>   *      2 of the License, or (at your option) any later version.
>   */
>  
> +#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt

This is good.

>  #include <linux/kernel.h>
>  #include <linux/interrupt.h>
>  #include <linux/delay.h>
> @@ -386,22 +388,35 @@ static ssize_t dlpar_cpu_add(struct device_node *parent, u32 drc_index)
>  	struct device_node *dn;
>  	int rc;
>  
> +	pr_info("Attempting to add CPU, drc index %x\n", drc_index);
> +
>  	rc = dlpar_acquire_drc(drc_index);
>  	if (rc)
>  		return -EINVAL;
>  
>  	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
> -	if (!dn)
> +	if (!dn) {
> +		pr_debug("Failed call to configure-connector\n");
> +		dlpar_release_drc(drc_index);
>  		return -EINVAL;
> +	}
>  
>  	rc = dlpar_attach_node(dn);
>  	if (rc) {
> +		pr_debug("Failed to attach node (%d)\n", rc);
>  		dlpar_release_drc(drc_index);
>  		dlpar_free_cc_nodes(dn);
>  		return rc;
>  	}
>  
>  	rc = dlpar_online_cpu(dn);
> +	if (rc) {
> +		pr_debug("Failed to online cpu (%d)\n", rc);
> +		dlpar_detach_node(dn);
> +		dlpar_release_drc(drc_index);
> +	}
> +
> +	pr_info("Successfully added CPU, drc index %x\n", drc_index);
>  	return rc;


But this is the opposite of what we want.

By default this will print two info lines for each successful cpu hotplug, but
when we get an error nothing will be printed - because pr_debug() is off by
default. What's worse, if dlpar_online_cpu() fails, the pr_debug() will produce
nothing but we will *still* print "Successfully added CPU".

So the pr_info()s should go entirely and the pr_debugs() should become
pr_warns(). The warning messages should become more verbose so they stand on
their own, ie. include the drc_index.

When everything goes perfectly there should be no output.


> @@ -465,18 +480,29 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
>  {
>  	int rc;
>  
> +	pr_info("Attemping to remove CPU, drc index %x\n", drc_index);
> +
>  	rc = dlpar_offline_cpu(dn);
> -	if (rc)
> +	if (rc) {
> +		pr_debug("Failed top offline cpu (%d)\n", rc);
                                   ^
				   should be to

>  		return -EINVAL;
> +	}
>  
>  	rc = dlpar_release_drc(drc_index);
> -	if (rc)
> +	if (rc) {
> +		pr_debug("Failed to release drc (%d)\n", rc);
> +		dlpar_online_cpu(dn);
>  		return rc;
> +	}
>  
>  	rc = dlpar_detach_node(dn);
> -	if (rc)
> +	if (rc) {
> +		pr_debug("Failed to detach cpu node (%d)\n", rc);
>  		dlpar_acquire_drc(drc_index);

But that can fail?

> +		dlpar_online_cpu(dn);

And if it did this would presumably not be safe?


cheers

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [4/6] pseries: Add CPU dlpar remove functionality
  2015-06-22 21:00 ` [PATCH 4/6] pseries: Add CPU dlpar remove functionality Nathan Fontenot
@ 2015-07-21  9:27   ` Michael Ellerman
  2015-07-21 21:34     ` Nathan Fontenot
  2015-07-21  9:46   ` Michael Ellerman
  1 sibling, 1 reply; 15+ messages in thread
From: Michael Ellerman @ 2015-07-21  9:27 UTC (permalink / raw)
  To: Nathan Fontenot, linuxppc-dev

On Mon, 2015-22-06 at 21:00:49 UTC, Nathan Fontenot wrote:
> Add the ability to dlpar remove CPUs via hotplug rtas events, either by
> specifying the drc-index of the CPU to remove or providing a count of cpus 
> to remove.
> 
> To accomplish we create a list of possible dr cpus and their drc indexes

What does "dr" mean? I know but not everyone does. If it's an acronymn it
should be uppercase and spelt out at its first usage.

> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 7890b2f..49b7196 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -506,6 +507,207 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
>  	return rc;
>  }
>  
> +static struct device_node *cpu_drc_index_to_dn(struct device_node *parent,
> +					       u32 drc_index)
> +{
> +	struct device_node *dn;
> +	u32 my_index;
> +	int rc;
> +
> +	for_each_child_of_node(parent, dn) {
> +		if (of_node_cmp(dn->type, "cpu") != 0)
> +			continue;

parent is always "/cpus", so I wonder if this should just be:

	for_each_node_by_type(dn, "cpu") {

And then you don't need parent in here at all.

Or do we realistically expect to be looking for cpus under a node other than "/cpus" ?

> +		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
> +		if (rc)
> +			continue;
> +
> +		if (my_index == drc_index)
> +			break;
> +	}
> +
> +	return dn;
> +}
> +
> +static int dlpar_cpu_remove_by_index(struct device_node *parent,
> +				     u32 drc_index)
> +{
> +	struct device_node *dn;
> +	int rc;
> +
> +	dn = cpu_drc_index_to_dn(parent, drc_index);
> +	if (!dn)
> +		return -ENODEV;
> +
> +	rc = dlpar_cpu_remove(dn, drc_index);
> +	of_node_put(dn);
> +	return rc;
> +}
> +
> +static int dlpar_cpus_possible(struct device_node *parent)
> +{
> +	int dr_cpus_possible;
> +
> +	/* The first u32 in the ibm,drc-indexes property is the numnber
> +	 * of drc entries in the property, which is the possible number
> +	 * number of dr capable cpus.
> +	 */
> +	of_property_read_u32(parent, "ibm,drc-indexes", &dr_cpus_possible);
> +	return dr_cpus_possible;
> +}
> +
> +struct dr_cpu {
> +	u32     drc_index;
> +	bool    present;
> +	bool    modified;
> +};
> +
> +static struct dr_cpu *get_dlpar_cpus(struct device_node *parent)
> +{
> +	struct device_node *dn;
> +	struct property *prop;
> +	struct dr_cpu *dr_cpus;
> +	const __be32 *p;
> +	u32 drc_index;
> +	int dr_cpus_possible, index;
> +	bool first;
> +
> +	dr_cpus_possible = dlpar_cpus_possible(parent);
> +	dr_cpus = kcalloc(dr_cpus_possible, sizeof(*dr_cpus), GFP_KERNEL);
> +	if (!dr_cpus)
> +		return NULL;
> +
> +	first = true;
> +	index = 0;
> +	of_property_for_each_u32(parent, "ibm,drc-indexes", prop, p,
> +				 drc_index) {
> +		if (first) {

What about:

		if (index == 0) {

Then you don't need first at all?

> +			/* The first entry is the number of drc indexes in
> +			 * the property, skip it.
> +			 */
> +			first = false;
> +			continue;
> +		}
> +
> +		dr_cpus[index].drc_index = drc_index;
> +
> +		dn = cpu_drc_index_to_dn(parent, drc_index);

The outer loop is iterating over all drc indexes (ie. one per cpu usually?),
and then cpu_drc_index_to_dn() will iterate over all cpus.

So that's n^2 for n = nr_cpus which is not ideal.

I realise we can't do much better, but what we can do is limit the number of
iterations of the outer loop. Because usually we will be here because we want
to offline less than nr_cpus cpus.

ie. if I ask to offline 1 cpu on a 4096 cpu system we will still do up to 16M
iterations in here, when all we needed to do was one iteration of the outer
loop.

So I think this routine should take the number of cpus we're trying to remove
and only iterate until it's found that many.

> +		if (dn) {
> +			dr_cpus[index].present = true;
> +			of_node_put(dn);
> +		}

Why do we put non present ones in the dr_cpus array at all? It just means you
have to skip them later? (in two separate places)

> +
> +		index++;
> +	}
> +
> +	return dr_cpus;
> +}
> +
> +static int dlpar_cpu_remove_by_count(struct device_node *parent,
> +				     u32 cpus_to_remove)
> +{
> +	struct dr_cpu *dr_cpus;
> +	int dr_cpus_removed = 0;
> +	int dr_cpus_present = 0;
> +	int dr_cpus_possible;
> +	int i, rc;
> +
> +	pr_info("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
> +
> +	dr_cpus = get_dlpar_cpus(parent);

So I think this should be:

> +	dr_cpus = get_dlpar_cpus(parent, cpus_to_remove);

> +	if (!dr_cpus) {
> +		pr_info("Could not gather dr CPU info\n");
> +		return -EINVAL;
> +	}

And get_dlpar_cpus() should return cpus_to_remove worth of present cpus, or it
should error, meaning the below then won't be needed:

> +	dr_cpus_possible = dlpar_cpus_possible(parent);
> +
> +	for (i = 0; i < dr_cpus_possible; i++) {
> +		if (dr_cpus[i].present)
> +			dr_cpus_present++;
> +	}
> +
> +	/* Validate the available CPUs to remove.
> +	 * NOTE: we can't remove the last CPU.
> +	 */
> +	if (cpus_to_remove >= dr_cpus_present) {
> +		pr_err("Insufficient CPUs (%d) to satisfy remove request\n",
> +		       dr_cpus_present);
> +		kfree(dr_cpus);
> +		return -EINVAL;
> +	}
> +

Having only present cpus in dr_cpus should also simplify this loop because you
don't need to skip non-present ones:

> +	for (i = 0; i < dr_cpus_possible; i++) {
> +		if (dr_cpus_removed == cpus_to_remove)
> +			break;
> +
> +		if (!dr_cpus[i].present)
> +			continue;
> +
> +		rc = dlpar_cpu_remove_by_index(parent, dr_cpus[i].drc_index);
> +		if (!rc) {
> +			dr_cpus_removed++;
> +			dr_cpus[i].modified = true;
> +		}

else .. ?

Surely you should bail on the first error?

Definitely you should, because you're going to undo everything below anyway:

> +	}
> +
> +	if (dr_cpus_removed != cpus_to_remove) {
> +		pr_info("CPU hot-remove failed, adding any removed CPUs\n");
> +
> +		for (i = 0; i < dr_cpus_possible; i++) {
> +			if (!dr_cpus[i].modified)
> +				continue;

And if you bail on the first error above you shouldn't need modified, instead
you just iterate in reverse from i using a new counter, eg. j.

> +
> +			rc = dlpar_cpu_add(parent, dr_cpus[i].drc_index);
> +			if (rc)
> +				pr_info("Failed to re-add CPU (%x)\n",
> +					dr_cpus[i].drc_index);
> +		}
> +
> +		rc = -EINVAL;
> +	} else {
> +		rc = 0;
> +	}
> +
> +	kfree(dr_cpus);
> +	return rc;
> +}
> +
> +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
> +{
> +	struct device_node *parent;
> +	u32 count, drc_index;
> +	int rc;
> +
> +	count = hp_elog->_drc_u.drc_count;
> +	drc_index = hp_elog->_drc_u.drc_index;

Those both need be32_to_cpu().

   arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15: warning: incorrect type in assignment (different base types)
   arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15:    expected unsigned int [unsigned] [usertype] count
   arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15:    got restricted __be32 [usertype] drc_count
   arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19: warning: incorrect type in assignment (different base types)
   arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19:    expected unsigned int [unsigned] [usertype] drc_index
   arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19:    got restricted __be32 [usertype] drc_index


Though looking closer I don't see where we ever pass or receive a
pseries_hp_errorlog to or from firmware? So I'm a bit confused why we're
bothering with the __be32 shenanigans. Hopefully I've just missed a detail
somewhere.

> +	parent = of_find_node_by_path("/cpus");
> +	if (!parent)
> +		return -ENODEV;
> +
> +	lock_device_hotplug();
> +
> +	switch (hp_elog->action) {
> +	case PSERIES_HP_ELOG_ACTION_REMOVE:
> +		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
> +			rc = dlpar_cpu_remove_by_count(parent, count);
> +		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
> +			rc = dlpar_cpu_remove_by_index(parent, drc_index);
> +		else
> +			rc = -EINVAL;
> +		break;
> +	default:
> +		pr_err("Invalid action (%d) specified\n", hp_elog->action);
> +		rc = -EINVAL;
> +		break;
> +	}
> +
> +	unlock_device_hotplug();
> +	of_node_put(parent);
> +	return rc;
> +}
> +
>  #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
>  
>  static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index 8411c27..58892f1 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -66,11 +66,16 @@ extern int dlpar_release_drc(u32 drc_index);
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
> +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);

I don't think this should be under CONFIG_MEMORY_HOTPLUG ?


cheers

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [4/6] pseries: Add CPU dlpar remove functionality
  2015-06-22 21:00 ` [PATCH 4/6] pseries: Add CPU dlpar remove functionality Nathan Fontenot
  2015-07-21  9:27   ` [4/6] " Michael Ellerman
@ 2015-07-21  9:46   ` Michael Ellerman
  1 sibling, 0 replies; 15+ messages in thread
From: Michael Ellerman @ 2015-07-21  9:46 UTC (permalink / raw)
  To: Nathan Fontenot, linuxppc-dev

On Mon, 2015-22-06 at 21:00:49 UTC, Nathan Fontenot wrote:
> Add the ability to dlpar remove CPUs via hotplug rtas events, either by
> specifying the drc-index of the CPU to remove or providing a count of cpus 
> to remove.
> 
> To accomplish we create a list of possible dr cpus and their drc indexes

What does "dr" mean? I know but not everyone does. If it's an acronymn it
should be uppercase and spelt out at its first usage.

> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 7890b2f..49b7196 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -506,6 +507,207 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
>  	return rc;
>  }
>  
> +static struct device_node *cpu_drc_index_to_dn(struct device_node *parent,
> +					       u32 drc_index)
> +{
> +	struct device_node *dn;
> +	u32 my_index;
> +	int rc;
> +
> +	for_each_child_of_node(parent, dn) {
> +		if (of_node_cmp(dn->type, "cpu") != 0)
> +			continue;

parent is always "/cpus", so I wonder if this should just be:

	for_each_node_by_type(dn, "cpu") {

And then you don't need parent in here at all.

Or do we realistically expect to be looking for cpus under a node other than "/cpus" ?

> +		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
> +		if (rc)
> +			continue;
> +
> +		if (my_index == drc_index)
> +			break;
> +	}
> +
> +	return dn;
> +}
> +
> +static int dlpar_cpu_remove_by_index(struct device_node *parent,
> +				     u32 drc_index)
> +{
> +	struct device_node *dn;
> +	int rc;
> +
> +	dn = cpu_drc_index_to_dn(parent, drc_index);
> +	if (!dn)
> +		return -ENODEV;
> +
> +	rc = dlpar_cpu_remove(dn, drc_index);
> +	of_node_put(dn);
> +	return rc;
> +}
> +
> +static int dlpar_cpus_possible(struct device_node *parent)
> +{
> +	int dr_cpus_possible;
> +
> +	/* The first u32 in the ibm,drc-indexes property is the numnber
> +	 * of drc entries in the property, which is the possible number
> +	 * number of dr capable cpus.
> +	 */
> +	of_property_read_u32(parent, "ibm,drc-indexes", &dr_cpus_possible);
> +	return dr_cpus_possible;
> +}
> +
> +struct dr_cpu {
> +	u32     drc_index;
> +	bool    present;
> +	bool    modified;
> +};
> +
> +static struct dr_cpu *get_dlpar_cpus(struct device_node *parent)
> +{
> +	struct device_node *dn;
> +	struct property *prop;
> +	struct dr_cpu *dr_cpus;
> +	const __be32 *p;
> +	u32 drc_index;
> +	int dr_cpus_possible, index;
> +	bool first;
> +
> +	dr_cpus_possible = dlpar_cpus_possible(parent);
> +	dr_cpus = kcalloc(dr_cpus_possible, sizeof(*dr_cpus), GFP_KERNEL);
> +	if (!dr_cpus)
> +		return NULL;
> +
> +	first = true;
> +	index = 0;
> +	of_property_for_each_u32(parent, "ibm,drc-indexes", prop, p,
> +				 drc_index) {
> +		if (first) {

What about:

		if (index == 0) {

Then you don't need first at all?

> +			/* The first entry is the number of drc indexes in
> +			 * the property, skip it.
> +			 */
> +			first = false;
> +			continue;
> +		}
> +
> +		dr_cpus[index].drc_index = drc_index;
> +
> +		dn = cpu_drc_index_to_dn(parent, drc_index);

The outer loop is iterating over all drc indexes (ie. one per cpu usually?),
and then cpu_drc_index_to_dn() will iterate over all cpus.

So that's n^2 for n = nr_cpus which is not ideal.

I realise we can't do much better, but what we can do is limit the number of
iterations of the outer loop. Because usually we will be here because we want
to offline less than nr_cpus cpus.

ie. if I ask to offline 1 cpu on a 4096 cpu system we will still do up to 16M
iterations in here, when all we needed to do was one iteration of the outer
loop.

So I think this routine should take the number of cpus we're trying to remove
and only iterate until it's found that many.

> +		if (dn) {
> +			dr_cpus[index].present = true;
> +			of_node_put(dn);
> +		}

Why do we put non present ones in the dr_cpus array at all? It just means you
have to skip them later? (in two separate places)

> +
> +		index++;
> +	}
> +
> +	return dr_cpus;
> +}
> +
> +static int dlpar_cpu_remove_by_count(struct device_node *parent,
> +				     u32 cpus_to_remove)
> +{
> +	struct dr_cpu *dr_cpus;
> +	int dr_cpus_removed = 0;
> +	int dr_cpus_present = 0;
> +	int dr_cpus_possible;
> +	int i, rc;
> +
> +	pr_info("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
> +
> +	dr_cpus = get_dlpar_cpus(parent);

So I think this should be:

> +	dr_cpus = get_dlpar_cpus(parent, cpus_to_remove);

> +	if (!dr_cpus) {
> +		pr_info("Could not gather dr CPU info\n");
> +		return -EINVAL;
> +	}

And get_dlpar_cpus() should return cpus_to_remove worth of present cpus, or it
should error, meaning the below then won't be needed:

> +	dr_cpus_possible = dlpar_cpus_possible(parent);
> +
> +	for (i = 0; i < dr_cpus_possible; i++) {
> +		if (dr_cpus[i].present)
> +			dr_cpus_present++;
> +	}
> +
> +	/* Validate the available CPUs to remove.
> +	 * NOTE: we can't remove the last CPU.
> +	 */
> +	if (cpus_to_remove >= dr_cpus_present) {
> +		pr_err("Insufficient CPUs (%d) to satisfy remove request\n",
> +		       dr_cpus_present);
> +		kfree(dr_cpus);
> +		return -EINVAL;
> +	}
> +

Having only present cpus in dr_cpus should also simplify this loop because you
don't need to skip non-present ones:

> +	for (i = 0; i < dr_cpus_possible; i++) {
> +		if (dr_cpus_removed == cpus_to_remove)
> +			break;
> +
> +		if (!dr_cpus[i].present)
> +			continue;
> +
> +		rc = dlpar_cpu_remove_by_index(parent, dr_cpus[i].drc_index);
> +		if (!rc) {
> +			dr_cpus_removed++;
> +			dr_cpus[i].modified = true;
> +		}

else .. ?

Surely you should bail on the first error?

Definitely you should, because you're going to undo everything below anyway:

> +	}
> +
> +	if (dr_cpus_removed != cpus_to_remove) {
> +		pr_info("CPU hot-remove failed, adding any removed CPUs\n");
> +
> +		for (i = 0; i < dr_cpus_possible; i++) {
> +			if (!dr_cpus[i].modified)
> +				continue;

And if you bail on the first error above you shouldn't need modified, instead
you just iterate in reverse from i using a new counter, eg. j.

> +
> +			rc = dlpar_cpu_add(parent, dr_cpus[i].drc_index);
> +			if (rc)
> +				pr_info("Failed to re-add CPU (%x)\n",
> +					dr_cpus[i].drc_index);
> +		}
> +
> +		rc = -EINVAL;
> +	} else {
> +		rc = 0;
> +	}
> +
> +	kfree(dr_cpus);
> +	return rc;
> +}
> +
> +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
> +{
> +	struct device_node *parent;
> +	u32 count, drc_index;
> +	int rc;
> +
> +	count = hp_elog->_drc_u.drc_count;
> +	drc_index = hp_elog->_drc_u.drc_index;

Those both need be32_to_cpu().

   arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15: warning: incorrect type in assignment (different base types)
   arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15:    expected unsigned int [unsigned] [usertype] count
   arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15:    got restricted __be32 [usertype] drc_count
   arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19: warning: incorrect type in assignment (different base types)
   arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19:    expected unsigned int [unsigned] [usertype] drc_index
   arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19:    got restricted __be32 [usertype] drc_index


Though looking closer I don't see where we ever pass or receive a
pseries_hp_errorlog to or from firmware? So I'm a bit confused why we're
bothering with the __be32 shenanigans. Hopefully I've just missed a detail
somewhere.

> +	parent = of_find_node_by_path("/cpus");
> +	if (!parent)
> +		return -ENODEV;
> +
> +	lock_device_hotplug();
> +
> +	switch (hp_elog->action) {
> +	case PSERIES_HP_ELOG_ACTION_REMOVE:
> +		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
> +			rc = dlpar_cpu_remove_by_count(parent, count);
> +		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
> +			rc = dlpar_cpu_remove_by_index(parent, drc_index);
> +		else
> +			rc = -EINVAL;
> +		break;
> +	default:
> +		pr_err("Invalid action (%d) specified\n", hp_elog->action);
> +		rc = -EINVAL;
> +		break;
> +	}
> +
> +	unlock_device_hotplug();
> +	of_node_put(parent);
> +	return rc;
> +}
> +
>  #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
>  
>  static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index 8411c27..58892f1 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -66,11 +66,16 @@ extern int dlpar_release_drc(u32 drc_index);
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
> +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);

I don't think this should be under CONFIG_MEMORY_HOTPLUG ?


cheers

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [3/6] pseries: Update CPU hotplug error recovery
  2015-07-21  4:46   ` [3/6] " Michael Ellerman
@ 2015-07-21 19:14     ` Nathan Fontenot
  2015-07-22  1:14       ` Michael Ellerman
  0 siblings, 1 reply; 15+ messages in thread
From: Nathan Fontenot @ 2015-07-21 19:14 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev

On 07/20/2015 11:46 PM, Michael Ellerman wrote:
> On Mon, 2015-22-06 at 20:59:20 UTC, Nathan Fontenot wrote:
>> Update the cpu dlpar add/remove paths to do better error recovery when 
>> a failure occurs during the add/remove operation. This includes adding
>> some pr_info and pr_debug statements.
> 
> So I'm happy with the idea there, but ..
> 
>> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> index f58d902..7890b2f 100644
>> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> @@ -18,6 +18,8 @@
>>   *      2 of the License, or (at your option) any later version.
>>   */
>>  
>> +#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt
> 
> This is good.
> 
>>  #include <linux/kernel.h>
>>  #include <linux/interrupt.h>
>>  #include <linux/delay.h>
>> @@ -386,22 +388,35 @@ static ssize_t dlpar_cpu_add(struct device_node *parent, u32 drc_index)
>>  	struct device_node *dn;
>>  	int rc;
>>  
>> +	pr_info("Attempting to add CPU, drc index %x\n", drc_index);
>> +
>>  	rc = dlpar_acquire_drc(drc_index);
>>  	if (rc)
>>  		return -EINVAL;
>>  
>>  	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
>> -	if (!dn)
>> +	if (!dn) {
>> +		pr_debug("Failed call to configure-connector\n");
>> +		dlpar_release_drc(drc_index);
>>  		return -EINVAL;
>> +	}
>>  
>>  	rc = dlpar_attach_node(dn);
>>  	if (rc) {
>> +		pr_debug("Failed to attach node (%d)\n", rc);
>>  		dlpar_release_drc(drc_index);
>>  		dlpar_free_cc_nodes(dn);
>>  		return rc;
>>  	}
>>  
>>  	rc = dlpar_online_cpu(dn);
>> +	if (rc) {
>> +		pr_debug("Failed to online cpu (%d)\n", rc);
>> +		dlpar_detach_node(dn);
>> +		dlpar_release_drc(drc_index);
>> +	}
>> +
>> +	pr_info("Successfully added CPU, drc index %x\n", drc_index);
>>  	return rc;
> 
> 
> But this is the opposite of what we want.
> 
> By default this will print two info lines for each successful cpu hotplug, but
> when we get an error nothing will be printed - because pr_debug() is off by
> default. What's worse, if dlpar_online_cpu() fails, the pr_debug() will produce
> nothing but we will *still* print "Successfully added CPU".
> 
> So the pr_info()s should go entirely and the pr_debugs() should become
> pr_warns(). The warning messages should become more verbose so they stand on
> their own, ie. include the drc_index.
> 
> When everything goes perfectly there should be no output.
>

So... good idea, bad implementation :)

I have a feeling I may have messed this up somewhere else in the patch set too
so I'll take a look at all the pr_* calls.
 
> 
>> @@ -465,18 +480,29 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
>>  {
>>  	int rc;
>>  
>> +	pr_info("Attemping to remove CPU, drc index %x\n", drc_index);
>> +
>>  	rc = dlpar_offline_cpu(dn);
>> -	if (rc)
>> +	if (rc) {
>> +		pr_debug("Failed top offline cpu (%d)\n", rc);
>                                    ^
> 				   should be to
> 
>>  		return -EINVAL;
>> +	}
>>  
>>  	rc = dlpar_release_drc(drc_index);
>> -	if (rc)
>> +	if (rc) {
>> +		pr_debug("Failed to release drc (%d)\n", rc);
>> +		dlpar_online_cpu(dn);
>>  		return rc;
>> +	}
>>  
>>  	rc = dlpar_detach_node(dn);
>> -	if (rc)
>> +	if (rc) {
>> +		pr_debug("Failed to detach cpu node (%d)\n", rc);
>>  		dlpar_acquire_drc(drc_index);
> 
> But that can fail?
> 
>> +		dlpar_online_cpu(dn);
> 
> And if it did this would presumably not be safe?

Ahh, good catch. I'll fix in the next version of patches.

Thanks for the review.

-Nathan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [4/6] pseries: Add CPU dlpar remove functionality
  2015-07-21  9:27   ` [4/6] " Michael Ellerman
@ 2015-07-21 21:34     ` Nathan Fontenot
  2015-07-22  1:11       ` Michael Ellerman
  0 siblings, 1 reply; 15+ messages in thread
From: Nathan Fontenot @ 2015-07-21 21:34 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev

On 07/21/2015 04:27 AM, Michael Ellerman wrote:
> On Mon, 2015-22-06 at 21:00:49 UTC, Nathan Fontenot wrote:
>> Add the ability to dlpar remove CPUs via hotplug rtas events, either by
>> specifying the drc-index of the CPU to remove or providing a count of cpus 
>> to remove.
>>
>> To accomplish we create a list of possible dr cpus and their drc indexes
> 
> What does "dr" mean? I know but not everyone does. If it's an acronymn it
> should be uppercase and spelt out at its first usage.
> 

Good point.

>> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> index 7890b2f..49b7196 100644
>> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
>> @@ -506,6 +507,207 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
>>  	return rc;
>>  }
>>  
>> +static struct device_node *cpu_drc_index_to_dn(struct device_node *parent,
>> +					       u32 drc_index)
>> +{
>> +	struct device_node *dn;
>> +	u32 my_index;
>> +	int rc;
>> +
>> +	for_each_child_of_node(parent, dn) {
>> +		if (of_node_cmp(dn->type, "cpu") != 0)
>> +			continue;
> 
> parent is always "/cpus", so I wonder if this should just be:
> 
> 	for_each_node_by_type(dn, "cpu") {
> 
> And then you don't need parent in here at all.
> 
> Or do we realistically expect to be looking for cpus under a node other than "/cpus" ?

This, combined with the comments below about iterating over the list
of dr_cpus, is going away in the next version of the patch. The 'parent'
node pointer is not passed around anymore.

> 
>> +		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
>> +		if (rc)
>> +			continue;
>> +
>> +		if (my_index == drc_index)
>> +			break;
>> +	}
>> +
>> +	return dn;
>> +}
>> +
>> +static int dlpar_cpu_remove_by_index(struct device_node *parent,
>> +				     u32 drc_index)
>> +{
>> +	struct device_node *dn;
>> +	int rc;
>> +
>> +	dn = cpu_drc_index_to_dn(parent, drc_index);
>> +	if (!dn)
>> +		return -ENODEV;
>> +
>> +	rc = dlpar_cpu_remove(dn, drc_index);
>> +	of_node_put(dn);
>> +	return rc;
>> +}
>> +
>> +static int dlpar_cpus_possible(struct device_node *parent)
>> +{
>> +	int dr_cpus_possible;
>> +
>> +	/* The first u32 in the ibm,drc-indexes property is the numnber
>> +	 * of drc entries in the property, which is the possible number
>> +	 * number of dr capable cpus.
>> +	 */
>> +	of_property_read_u32(parent, "ibm,drc-indexes", &dr_cpus_possible);
>> +	return dr_cpus_possible;
>> +}
>> +
>> +struct dr_cpu {
>> +	u32     drc_index;
>> +	bool    present;
>> +	bool    modified;
>> +};
>> +
>> +static struct dr_cpu *get_dlpar_cpus(struct device_node *parent)
>> +{
>> +	struct device_node *dn;
>> +	struct property *prop;
>> +	struct dr_cpu *dr_cpus;
>> +	const __be32 *p;
>> +	u32 drc_index;
>> +	int dr_cpus_possible, index;
>> +	bool first;
>> +
>> +	dr_cpus_possible = dlpar_cpus_possible(parent);
>> +	dr_cpus = kcalloc(dr_cpus_possible, sizeof(*dr_cpus), GFP_KERNEL);
>> +	if (!dr_cpus)
>> +		return NULL;
>> +
>> +	first = true;
>> +	index = 0;
>> +	of_property_for_each_u32(parent, "ibm,drc-indexes", prop, p,
>> +				 drc_index) {
>> +		if (first) {
> 
> What about:
> 
> 		if (index == 0) {
> 
> Then you don't need first at all?
>

This block of code is getting re-written, no need to look for the
first node in the new version.
 
>> +			/* The first entry is the number of drc indexes in
>> +			 * the property, skip it.
>> +			 */
>> +			first = false;
>> +			continue;
>> +		}
>> +
>> +		dr_cpus[index].drc_index = drc_index;
>> +
>> +		dn = cpu_drc_index_to_dn(parent, drc_index);
> 
> The outer loop is iterating over all drc indexes (ie. one per cpu usually?),
> and then cpu_drc_index_to_dn() will iterate over all cpus.
> 
> So that's n^2 for n = nr_cpus which is not ideal.
> 
> I realise we can't do much better, but what we can do is limit the number of
> iterations of the outer loop. Because usually we will be here because we want
> to offline less than nr_cpus cpus.
> 
> ie. if I ask to offline 1 cpu on a 4096 cpu system we will still do up to 16M
> iterations in here, when all we needed to do was one iteration of the outer
> loop.
> 
> So I think this routine should take the number of cpus we're trying to remove
> and only iterate until it's found that many.
> 

Yeah, now that you point it out there is a lot of no good in that code.

Re-writing this with a new approach to only create lists of what is present.

>> +		if (dn) {
>> +			dr_cpus[index].present = true;
>> +			of_node_put(dn);
>> +		}
> 
> Why do we put non present ones in the dr_cpus array at all? It just means you
> have to skip them later? (in two separate places)
>
>> +
>> +		index++;
>> +	}
>> +
>> +	return dr_cpus;
>> +}
>> +
>> +static int dlpar_cpu_remove_by_count(struct device_node *parent,
>> +				     u32 cpus_to_remove)
>> +{
>> +	struct dr_cpu *dr_cpus;
>> +	int dr_cpus_removed = 0;
>> +	int dr_cpus_present = 0;
>> +	int dr_cpus_possible;
>> +	int i, rc;
>> +
>> +	pr_info("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
>> +
>> +	dr_cpus = get_dlpar_cpus(parent);
> 
> So I think this should be:
> 
>> +	dr_cpus = get_dlpar_cpus(parent, cpus_to_remove);
> 
>> +	if (!dr_cpus) {
>> +		pr_info("Could not gather dr CPU info\n");
>> +		return -EINVAL;
>> +	}
> 
> And get_dlpar_cpus() should return cpus_to_remove worth of present cpus, or it
> should error, meaning the below then won't be needed:
>

When adding cpus by count we may need more than just cpus_to_remove worth
of present cpus. The goal was to provide all possibilities so we could
continue trying to satisfy the request even if one or more cpus fails
to remove.

>From this comment and comments below I think your approach is that we
should bail if any error occurs during cpu remove. Is this what we
should be doing?
 
>> +	dr_cpus_possible = dlpar_cpus_possible(parent);
>> +
>> +	for (i = 0; i < dr_cpus_possible; i++) {
>> +		if (dr_cpus[i].present)
>> +			dr_cpus_present++;
>> +	}
>> +
>> +	/* Validate the available CPUs to remove.
>> +	 * NOTE: we can't remove the last CPU.
>> +	 */
>> +	if (cpus_to_remove >= dr_cpus_present) {
>> +		pr_err("Insufficient CPUs (%d) to satisfy remove request\n",
>> +		       dr_cpus_present);
>> +		kfree(dr_cpus);
>> +		return -EINVAL;
>> +	}
>> +
> 
> Having only present cpus in dr_cpus should also simplify this loop because you
> don't need to skip non-present ones:

Fixed in updated code.

> 
>> +	for (i = 0; i < dr_cpus_possible; i++) {
>> +		if (dr_cpus_removed == cpus_to_remove)
>> +			break;
>> +
>> +		if (!dr_cpus[i].present)
>> +			continue;
>> +
>> +		rc = dlpar_cpu_remove_by_index(parent, dr_cpus[i].drc_index);
>> +		if (!rc) {
>> +			dr_cpus_removed++;
>> +			dr_cpus[i].modified = true;
>> +		}
> 
> else .. ?
> 
> Surely you should bail on the first error?
> 
> Definitely you should, because you're going to undo everything below anyway:

See comment above, I kept going here in the hopes that we could satisfy the
request even if a failure occurred.

> 
>> +	}
>> +
>> +	if (dr_cpus_removed != cpus_to_remove) {
>> +		pr_info("CPU hot-remove failed, adding any removed CPUs\n");
>> +
>> +		for (i = 0; i < dr_cpus_possible; i++) {
>> +			if (!dr_cpus[i].modified)
>> +				continue;
> 
> And if you bail on the first error above you shouldn't need modified, instead
> you just iterate in reverse from i using a new counter, eg. j.

Yep, I'll work that into the new code.

> 
>> +
>> +			rc = dlpar_cpu_add(parent, dr_cpus[i].drc_index);
>> +			if (rc)
>> +				pr_info("Failed to re-add CPU (%x)\n",
>> +					dr_cpus[i].drc_index);
>> +		}
>> +
>> +		rc = -EINVAL;
>> +	} else {
>> +		rc = 0;
>> +	}
>> +
>> +	kfree(dr_cpus);
>> +	return rc;
>> +}
>> +
>> +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
>> +{
>> +	struct device_node *parent;
>> +	u32 count, drc_index;
>> +	int rc;
>> +
>> +	count = hp_elog->_drc_u.drc_count;
>> +	drc_index = hp_elog->_drc_u.drc_index;
> 
> Those both need be32_to_cpu().
> 
>    arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15: warning: incorrect type in assignment (different base types)
>    arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15:    expected unsigned int [unsigned] [usertype] count
>    arch/powerpc/platforms/pseries/hotplug-cpu.c:750:15:    got restricted __be32 [usertype] drc_count
>    arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19: warning: incorrect type in assignment (different base types)
>    arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19:    expected unsigned int [unsigned] [usertype] drc_index
>    arch/powerpc/platforms/pseries/hotplug-cpu.c:751:19:    got restricted __be32 [usertype] drc_index
>

Will fix.
 
> 
> Though looking closer I don't see where we ever pass or receive a
> pseries_hp_errorlog to or from firmware? So I'm a bit confused why we're
> bothering with the __be32 shenanigans. Hopefully I've just missed a detail
> somewhere.
> 

That patch is coming. For hotplug in KVM guests the pseries_hp_errorlog
is received when we call rtas_check_exception(). Currently these are
sent up to rtas_errd in userspace, When this partchset goes in I planned
on sending a patch to have cpu and memory hotplug requests handled entirely
in the kernel instead of going to userspace.

>> +	parent = of_find_node_by_path("/cpus");
>> +	if (!parent)
>> +		return -ENODEV;
>> +
>> +	lock_device_hotplug();
>> +
>> +	switch (hp_elog->action) {
>> +	case PSERIES_HP_ELOG_ACTION_REMOVE:
>> +		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
>> +			rc = dlpar_cpu_remove_by_count(parent, count);
>> +		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
>> +			rc = dlpar_cpu_remove_by_index(parent, drc_index);
>> +		else
>> +			rc = -EINVAL;
>> +		break;
>> +	default:
>> +		pr_err("Invalid action (%d) specified\n", hp_elog->action);
>> +		rc = -EINVAL;
>> +		break;
>> +	}
>> +
>> +	unlock_device_hotplug();
>> +	of_node_put(parent);
>> +	return rc;
>> +}
>> +
>>  #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
>>  
>>  static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
>> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
>> index 8411c27..58892f1 100644
>> --- a/arch/powerpc/platforms/pseries/pseries.h
>> +++ b/arch/powerpc/platforms/pseries/pseries.h
>> @@ -66,11 +66,16 @@ extern int dlpar_release_drc(u32 drc_index);
>>  
>>  #ifdef CONFIG_MEMORY_HOTPLUG
>>  int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
>> +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);
> 
> I don't think this should be under CONFIG_MEMORY_HOTPLUG ?
> 

No it should not.

Thanks for the review.

-Nathan

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [4/6] pseries: Add CPU dlpar remove functionality
  2015-07-21 21:34     ` Nathan Fontenot
@ 2015-07-22  1:11       ` Michael Ellerman
  2015-07-22 15:42         ` Nathan Fontenot
  0 siblings, 1 reply; 15+ messages in thread
From: Michael Ellerman @ 2015-07-22  1:11 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: linuxppc-dev

On Tue, 2015-07-21 at 16:34 -0500, Nathan Fontenot wrote:
> On 07/21/2015 04:27 AM, Michael Ellerman wrote:
> > On Mon, 2015-22-06 at 21:00:49 UTC, Nathan Fontenot wrote:
> >> +static int dlpar_cpu_remove_by_count(struct device_node *parent,
> >> +				     u32 cpus_to_remove)
> >> +{
> >> +	struct dr_cpu *dr_cpus;
> >> +	int dr_cpus_removed = 0;
> >> +	int dr_cpus_present = 0;
> >> +	int dr_cpus_possible;
> >> +	int i, rc;
> >> +
> >> +	pr_info("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
> >> +
> >> +	dr_cpus = get_dlpar_cpus(parent);
> > 
> > So I think this should be:
> > 
> >> +	dr_cpus = get_dlpar_cpus(parent, cpus_to_remove);
> > 
> >> +	if (!dr_cpus) {
> >> +		pr_info("Could not gather dr CPU info\n");
> >> +		return -EINVAL;
> >> +	}
> > 
> > And get_dlpar_cpus() should return cpus_to_remove worth of present cpus, or it
> > should error, meaning the below then won't be needed:
> 
> When adding cpus by count we may need more than just cpus_to_remove worth
> of present cpus. The goal was to provide all possibilities so we could
> continue trying to satisfy the request even if one or more cpus fails
> to remove.
> 
> From this comment and comments below I think your approach is that we
> should bail if any error occurs during cpu remove. Is this what we
> should be doing?

I think so. But you can convince me otherwise if you like :)

It seems to me that we don't expect failures in the general case, so a failure
genuinely indicates something has gone wrong. In which case it's best to stop
and back out the request, rather than trying to continue and possibly making
things worse.

There's also the dilemma that if you get an error offlining one cpu, but then
continue and manage to offline enough cpus, should you report an error to the
caller (userspace)?

So I think it's better to bail on the first error, undo what's been done, and
then report the error to the caller.

> > Though looking closer I don't see where we ever pass or receive a
> > pseries_hp_errorlog to or from firmware? So I'm a bit confused why we're
> > bothering with the __be32 shenanigans. Hopefully I've just missed a detail
> > somewhere.
> > 
> 
> That patch is coming. For hotplug in KVM guests the pseries_hp_errorlog
> is received when we call rtas_check_exception(). Currently these are
> sent up to rtas_errd in userspace, When this partchset goes in I planned
> on sending a patch to have cpu and memory hotplug requests handled entirely
> in the kernel instead of going to userspace.

OK that makes sense.

I'll wait to see that patch, but when it comes I'll probably tell you to do the
endian swaps once when we receive the error log, rather than at each usage.

cheers

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [3/6] pseries: Update CPU hotplug error recovery
  2015-07-21 19:14     ` Nathan Fontenot
@ 2015-07-22  1:14       ` Michael Ellerman
  0 siblings, 0 replies; 15+ messages in thread
From: Michael Ellerman @ 2015-07-22  1:14 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: linuxppc-dev

On Tue, 2015-07-21 at 14:14 -0500, Nathan Fontenot wrote:
> On 07/20/2015 11:46 PM, Michael Ellerman wrote:
> > So the pr_info()s should go entirely and the pr_debugs() should become
> > pr_warns(). The warning messages should become more verbose so they stand on
> > their own, ie. include the drc_index.
> > 
> > When everything goes perfectly there should be no output.
> >
> 
> So... good idea, bad implementation :)

Well also a matter of perspective. For you these info messages are helpful
because you're working on the cpu hotplug code. But for other folks they're
just log spam.

> I have a feeling I may have messed this up somewhere else in the patch set too
> so I'll take a look at all the pr_* calls.

OK thanks.

cheers

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [4/6] pseries: Add CPU dlpar remove functionality
  2015-07-22  1:11       ` Michael Ellerman
@ 2015-07-22 15:42         ` Nathan Fontenot
  0 siblings, 0 replies; 15+ messages in thread
From: Nathan Fontenot @ 2015-07-22 15:42 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

On 07/21/2015 08:11 PM, Michael Ellerman wrote:
> On Tue, 2015-07-21 at 16:34 -0500, Nathan Fontenot wrote:
>> On 07/21/2015 04:27 AM, Michael Ellerman wrote:
>>> On Mon, 2015-22-06 at 21:00:49 UTC, Nathan Fontenot wrote:
>>>> +static int dlpar_cpu_remove_by_count(struct device_node *parent,
>>>> +				     u32 cpus_to_remove)
>>>> +{
>>>> +	struct dr_cpu *dr_cpus;
>>>> +	int dr_cpus_removed = 0;
>>>> +	int dr_cpus_present = 0;
>>>> +	int dr_cpus_possible;
>>>> +	int i, rc;
>>>> +
>>>> +	pr_info("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
>>>> +
>>>> +	dr_cpus = get_dlpar_cpus(parent);
>>>
>>> So I think this should be:
>>>
>>>> +	dr_cpus = get_dlpar_cpus(parent, cpus_to_remove);
>>>
>>>> +	if (!dr_cpus) {
>>>> +		pr_info("Could not gather dr CPU info\n");
>>>> +		return -EINVAL;
>>>> +	}
>>>
>>> And get_dlpar_cpus() should return cpus_to_remove worth of present cpus, or it
>>> should error, meaning the below then won't be needed:
>>
>> When adding cpus by count we may need more than just cpus_to_remove worth
>> of present cpus. The goal was to provide all possibilities so we could
>> continue trying to satisfy the request even if one or more cpus fails
>> to remove.
>>
>> From this comment and comments below I think your approach is that we
>> should bail if any error occurs during cpu remove. Is this what we
>> should be doing?
> 
> I think so. But you can convince me otherwise if you like :)
> 
> It seems to me that we don't expect failures in the general case, so a failure
> genuinely indicates something has gone wrong. In which case it's best to stop
> and back out the request, rather than trying to continue and possibly making
> things worse.
> 
> There's also the dilemma that if you get an error offlining one cpu, but then
> continue and manage to offline enough cpus, should you report an error to the
> caller (userspace)?
> 
> So I think it's better to bail on the first error, undo what's been done, and
> then report the error to the caller.

Thinking through this and I cannot come up with a reason good enough to justify
not bailing on the first error. I'll re-work the patch for this and resend.

> 
>>> Though looking closer I don't see where we ever pass or receive a
>>> pseries_hp_errorlog to or from firmware? So I'm a bit confused why we're
>>> bothering with the __be32 shenanigans. Hopefully I've just missed a detail
>>> somewhere.
>>>
>>
>> That patch is coming. For hotplug in KVM guests the pseries_hp_errorlog
>> is received when we call rtas_check_exception(). Currently these are
>> sent up to rtas_errd in userspace, When this partchset goes in I planned
>> on sending a patch to have cpu and memory hotplug requests handled entirely
>> in the kernel instead of going to userspace.
> 
> OK that makes sense.
> 
> I'll wait to see that patch, but when it comes I'll probably tell you to do the
> endian swaps once when we receive the error log, rather than at each usage.
> 

I'll make a note to look into this. The issue I find is that it gets ugly because
we need to use some of these values (such as drc_index) in cpu format at times 
and in BE at different times (such as comparing to device tree values or making
rtas calls). My goal was to pass around the values in cpu format and
do the endian swaps when needed.

-Nathan

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2015-07-22 15:42 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-22 20:54 [PATCH 0/6] pseries: Move CPU dlpar into the kernel Nathan Fontenot
2015-06-22 20:56 ` [PATCH 1/6] pseries: Consolidate CPU hotplug code to hotplug-cpu.c Nathan Fontenot
2015-06-22 20:58 ` [PATCH 2/6] pseries: Factor out common cpu hotplug code Nathan Fontenot
2015-06-22 20:59 ` [PATCH 3/6] pseries: Update CPU hotplug error recovery Nathan Fontenot
2015-07-21  4:46   ` [3/6] " Michael Ellerman
2015-07-21 19:14     ` Nathan Fontenot
2015-07-22  1:14       ` Michael Ellerman
2015-06-22 21:00 ` [PATCH 4/6] pseries: Add CPU dlpar remove functionality Nathan Fontenot
2015-07-21  9:27   ` [4/6] " Michael Ellerman
2015-07-21 21:34     ` Nathan Fontenot
2015-07-22  1:11       ` Michael Ellerman
2015-07-22 15:42         ` Nathan Fontenot
2015-07-21  9:46   ` Michael Ellerman
2015-06-22 21:01 ` [PATCH 5/6] pseries: Add CPU dlpar add functionality Nathan Fontenot
2015-06-22 21:02 ` [PATCH 6/6] pseries: Enable kernel CPU dlpar from sysfs Nathan Fontenot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.