All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support
@ 2017-06-05 12:32 Anju T Sudhakar
  2017-06-05 12:32 ` [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging Anju T Sudhakar
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Anju T Sudhakar @ 2017-06-05 12:32 UTC (permalink / raw)
  To: mpe
  Cc: linux-kernel, linuxppc-dev, ego, bsingharora, anton, sukadev,
	mikey, stewart, dja, eranian, hemant, maddy, anju, tglx

Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any            
online CPU) from each chip for nest PMUs is designated to read counters.        
                                                                                
On CPU hotplug, dying CPU is checked to see whether it is one of the            
designated cpus, if yes, next online cpu from the same chip (for nest           
units) is designated as new cpu to read counters. For this purpose, we          
introduce a new state : CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE.                  
                                                                                
Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>                        
Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>                         
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com> 
---
 arch/powerpc/include/asm/imc-pmu.h             |   3 +
 arch/powerpc/include/asm/opal-api.h            |  12 +-
 arch/powerpc/include/asm/opal.h                |   4 +
 arch/powerpc/perf/imc-pmu.c                    | 224 ++++++++++++++++++++++++-
 arch/powerpc/platforms/powernv/opal-wrappers.S |   3 +
 include/linux/cpuhotplug.h                     |   1 +
 6 files changed, 245 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 303faa8..6b1d887 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -96,6 +96,9 @@ struct imc_pmu {
  */
 #define IMC_DOMAIN_NEST		1
 
+#define IMC_COUNTER_ENABLE     1
+#define IMC_COUNTER_DISABLE    0
+
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 extern int __init init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu *pmu_ptr);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index aa150f0..e0c5c66 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -190,7 +190,10 @@
 #define OPAL_NPU_INIT_CONTEXT			146
 #define OPAL_NPU_DESTROY_CONTEXT		147
 #define OPAL_NPU_MAP_LPAR			148
-#define OPAL_LAST				148
+#define OPAL_IMC_COUNTERS_INIT                 	149
+#define OPAL_IMC_COUNTERS_START                	150
+#define OPAL_IMC_COUNTERS_STOP                 	151
+#define OPAL_LAST                              	151
 
 /* Device tree flags */
 
@@ -1009,6 +1012,13 @@ enum {
 	IMC_COUNTER_PER_SOCKET          = 0x20,
 };
 
+/* Argument to OPAL_IMC_COUNTERS_*  */
+enum {
+	OPAL_IMC_COUNTERS_NEST = 1,
+	OPAL_IMC_COUNTERS_CORE = 2,
+	OPAL_IMC_COUNTERS_THREAD = 3,
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 588fb1c..cdf78b6 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -268,6 +268,10 @@ int64_t opal_xive_free_irq(uint32_t girq);
 int64_t opal_xive_sync(uint32_t type, uint32_t id);
 int64_t opal_xive_dump(uint32_t type, uint32_t id);
 
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address, uint64_t cpu);
+int64_t opal_imc_counters_start(uint32_t type);
+int64_t opal_imc_counters_stop(uint32_t type);
+
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
 				   int depth, void *data);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 242328ee..976ba31 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -21,6 +21,12 @@
 /* Needed for sanity check */
 extern u64 nest_max_offset;
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
+static cpumask_t nest_imc_cpumask;
+static int nest_imc_cpumask_initialized;
+
+static atomic_t nest_events;
+/* Used to avoid races in calling enable/disable nest-pmu units */
+static DEFINE_MUTEX(imc_nest_reserve);
 
 struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
 {
@@ -38,9 +44,200 @@ static struct attribute_group imc_format_group = {
 	.attrs = imc_format_attrs,
 };
 
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	cpumask_t *active_mask;
+
+	active_mask = &nest_imc_cpumask;
+	return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group imc_pmu_cpumask_attr_group = {
+	.attrs = imc_pmu_cpumask_attrs,
+};
+
+/*
+ * nest_imc_stop : Does OPAL call to stop nest engine.
+ */
+static void nest_imc_stop(int *cpu_opal_rc)
+{
+	int rc;
+
+	rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
+	if (rc)
+		cpu_opal_rc[smp_processor_id()] = 1;
+}
+
+/* nest_imc_start: Does the OPAL call to enable nest counters. */
+static void nest_imc_start(int *cpu_opal_rc)
+{
+	int rc;
+
+	rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST);
+	if (rc)
+		cpu_opal_rc[smp_processor_id()] = 1;
+}
+
+/*
+ * nest_imc_control: Function to start and stop nest imc engine.
+ *		     The function is primarily called from event init
+ * 		     and event destroy.
+ */
+static int nest_imc_control(int operation)
+{
+	int *cpus_opal_rc, cpu;
+
+	/*
+	 * Memory for OPAL call return value.
+	 */
+	cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL);
+	if (!cpus_opal_rc)
+		return -ENOMEM;
+	switch (operation) {
+	case IMC_COUNTER_ENABLE:
+			/* Initialize Nest PMUs in each node using designated cpus */
+			on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_start,
+						cpus_opal_rc, 1);
+			break;
+	case IMC_COUNTER_DISABLE:
+			/* Disable the counters */
+			on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_stop,
+						cpus_opal_rc, 1);
+			break;
+	default:
+		kfree(cpus_opal_rc);
+		return -EINVAL;
+
+	}
+
+	/* Check return value array for any OPAL call failure */
+	for_each_cpu(cpu, &nest_imc_cpumask) {
+		if (cpus_opal_rc[cpu]) {
+			kfree(cpus_opal_rc);
+			return -ENODEV;
+		}
+	}
+	kfree(cpus_opal_rc);
+	return 0;
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+	struct imc_pmu **pn = per_nest_pmu_arr;
+	int i;
+
+	if (old_cpu < 0 || new_cpu < 0)
+		return;
+
+	for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
+		perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+
+}
+
+static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+{
+	int nid, target = -1;
+	const struct cpumask *l_cpumask;
+
+	/*
+	 * Check in the designated list for this cpu. Dont bother
+	 * if not one of them.
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
+		return 0;
+
+	/*
+	 * Now that this cpu is one of the designated,
+	 * find a next cpu a) which is online and b) in same chip.
+	 */
+	nid = cpu_to_node(cpu);
+	l_cpumask = cpumask_of_node(nid);
+	target = cpumask_next(cpu, l_cpumask);
+
+	/*
+	 * Update the cpumask with the target cpu and
+	 * migrate the context if needed
+	 */
+	if (target >= 0 && target < nr_cpu_ids) {
+		cpumask_set_cpu(target, &nest_imc_cpumask);
+		nest_change_cpu_context(cpu, target);
+	} else {
+		target = -1;
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
+		nest_change_cpu_context(cpu, target);
+	}
+	return 0;
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+	const struct cpumask *l_cpumask;
+	static struct cpumask tmp_mask;
+	int res;
+
+	/* Get the cpumask of this node */
+	l_cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+	/*
+	 * If this is not the first online CPU on this node, then
+	 * just return.
+	 */
+	if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
+		return 0;
+
+	/*
+	 * If this is the first online cpu on this node
+	 * disable the nest counters by making an OPAL call.
+	 */
+	res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
+	if (res)
+		return res;
+
+	/* Make this CPU the designated target for counter collection */
+	cpumask_set_cpu(cpu, &nest_imc_cpumask);
+	nest_change_cpu_context(-1, cpu);
+	return 0;
+}
+
+static int nest_pmu_cpumask_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
+				 "perf/powerpc/imc:online",
+				 ppc_nest_imc_cpu_online,
+				 ppc_nest_imc_cpu_offline);
+}
+
+static void nest_imc_counters_release(struct perf_event *event)
+{
+	int rc;
+	/*
+	 * See if we need to disable the nest PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * enable or disable the nest counters.
+	 */
+	if (atomic_dec_return(&nest_events) == 0) {
+		mutex_lock(&imc_nest_reserve);
+		rc = nest_imc_control(IMC_COUNTER_DISABLE);
+		mutex_unlock(&imc_nest_reserve);
+		if (rc)
+			pr_err("IMC: Disable counters failed\n");
+	}
+}
+
 static int nest_imc_event_init(struct perf_event *event)
 {
-	int chip_id;
+	int chip_id, rc;
 	u32 config = event->attr.config;
 	struct imc_mem_info *pcni;
 	struct imc_pmu *pmu;
@@ -80,6 +277,20 @@ static int nest_imc_event_init(struct perf_event *event)
 	 * "chip_id" and add "config" to it".
 	 */
 	event->hw.event_base = pcni->vbase[config/PAGE_SIZE] + (config & ~PAGE_MASK);
+	/*
+	 * Nest pmu units are enabled only when it is used.
+	 * See if this is triggered for the first time.
+	 * If yes, take the mutex lock and enable the nest counters.
+	 * If not, just increment the count in nest_events.
+	 */
+	if (atomic_inc_return(&nest_events) == 1) {
+		mutex_lock(&imc_nest_reserve);
+		rc = nest_imc_control(IMC_COUNTER_ENABLE);
+		mutex_unlock(&imc_nest_reserve);
+		if (rc)
+			pr_err("IMC: Unable to start the counters\n");
+	}
+	event->destroy = nest_imc_counters_release;
 	return 0;
 }
 
@@ -157,6 +368,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
 	pmu->pmu.start = imc_event_start;
 	pmu->pmu.stop = imc_event_stop;
 	pmu->pmu.read = imc_perf_event_update;
+	pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
 	pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
 	pmu->pmu.attr_groups = pmu->attr_groups;
 
@@ -226,12 +438,22 @@ static int update_events_in_group(struct imc_events *events,
  * @events:	events memory for this pmu.
  * @idx:	number of event entries created.
  * @pmu_ptr:	memory allocated for this pmu.
+ *
+ * init_imc_pmu() setup the cpu mask information for these pmus and setup
+ * the state machine hotplug notifiers as well.
  */
 int __init init_imc_pmu(struct imc_events *events, int idx,
 		 struct imc_pmu *pmu_ptr)
 {
 	int ret = -ENODEV;
 
+	/* Add cpumask and register for hotplug notification */
+	if (!nest_imc_cpumask_initialized) {
+		ret = nest_pmu_cpumask_init();
+		if (ret)
+			return ret;
+		nest_imc_cpumask_initialized = 1;
+	}
 	ret = update_events_in_group(events, idx, pmu_ptr);
 	if (ret)
 		goto err_free;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index f620572..1828b24f 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -310,3 +310,6 @@ OPAL_CALL(opal_xive_dump,			OPAL_XIVE_DUMP);
 OPAL_CALL(opal_npu_init_context,		OPAL_NPU_INIT_CONTEXT);
 OPAL_CALL(opal_npu_destroy_context,		OPAL_NPU_DESTROY_CONTEXT);
 OPAL_CALL(opal_npu_map_lpar,			OPAL_NPU_MAP_LPAR);
+OPAL_CALL(opal_imc_counters_init,              OPAL_IMC_COUNTERS_INIT);
+OPAL_CALL(opal_imc_counters_start,             OPAL_IMC_COUNTERS_START);
+OPAL_CALL(opal_imc_counters_stop,              OPAL_IMC_COUNTERS_STOP);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f2a803..dca7f2b 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -139,6 +139,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
+	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging
  2017-06-05 12:32 [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support Anju T Sudhakar
@ 2017-06-05 12:32 ` Anju T Sudhakar
  2017-06-06 10:09   ` Thomas Gleixner
  2017-06-05 12:32 ` [PATCH v9 10/10] powerpc/perf: Thread imc cpuhotplug support Anju T Sudhakar
  2017-06-06  8:39 ` [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and " Thomas Gleixner
  2 siblings, 1 reply; 11+ messages in thread
From: Anju T Sudhakar @ 2017-06-05 12:32 UTC (permalink / raw)
  To: mpe
  Cc: linux-kernel, linuxppc-dev, ego, bsingharora, anton, sukadev,
	mikey, stewart, dja, eranian, hemant, maddy, anju, tglx

From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>                            
                                                                                
Code to add PMU function to initialize a core IMC event. It also                
adds cpumask initialization function for core IMC PMU. For                      
initialization, memory is allocated per core where the data                     
for core IMC counters will be accumulated. The base address for this            
page is sent to OPAL via an OPAL call which initializes various SCOMs           
related to Core IMC initialization. Upon any errors, the pages are              
free'ed and core IMC counters are disabled using the same OPAL call.            
                                                                                
For CPU hotplugging, a cpumask is initialized which contains an online          
CPU from each core. If a cpu goes offline, we check whether that cpu            
belongs to the core imc cpumask, if yes, then, we migrate the PMU               
context to any other online cpu (if available) in that core. If a cpu           
comes back online, then this cpu will be added to the core imc cpumask          
only if there was no other cpu from that core in the previous cpumask.          
                                                                                
To register the hotplug functions for core_imc, a new state                     
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE is added to the list of existing          
states.                                                                         
                                                                                
Patch also adds OPAL device shutdown callback. Needed to disable the            
IMC core engine to handle kexec.                                                
                                                                                
Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>                         
Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>                        
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>  
---
 arch/powerpc/include/asm/imc-pmu.h        |   1 +
 arch/powerpc/perf/imc-pmu.c               | 327 +++++++++++++++++++++++++++++-
 arch/powerpc/platforms/powernv/opal-imc.c |   7 +
 include/linux/cpuhotplug.h                |   1 +
 4 files changed, 330 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 54784a5..5227660 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -102,5 +102,6 @@ struct imc_pmu {
 
 extern struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 extern struct imc_pmu *core_imc_pmu;
+extern int core_imc_control(int operation);
 extern int __init init_imc_pmu(struct imc_events *events, int idx, struct imc_pmu *pmu_ptr);
 #endif /* PPC_POWERNV_IMC_PMU_DEF_H */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 463425c..6d32c3f 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1,5 +1,5 @@
 /*
- * Nest Performance Monitor counter support.
+ * IMC Performance Monitor counter support.
  *
  * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
  *           (C) 2017 Anju T Sudhakar, IBM Corporation.
@@ -24,11 +24,15 @@ extern u64 core_max_offset;
 
 struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS];
 static cpumask_t nest_imc_cpumask;
+static cpumask_t core_imc_cpumask;
 static int nest_imc_cpumask_initialized;
 
 static atomic_t nest_events;
+static atomic_t core_events;
 /* Used to avoid races in calling enable/disable nest-pmu units */
 static DEFINE_MUTEX(imc_nest_reserve);
+/* Used to avoid races in calling enable/disable nest-pmu units */
+static DEFINE_MUTEX(imc_core_reserve);
 
 struct imc_pmu *core_imc_pmu;
 
@@ -53,9 +57,15 @@ static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
 					struct device_attribute *attr,
 					char *buf)
 {
+	struct pmu *pmu = dev_get_drvdata(dev);
 	cpumask_t *active_mask;
 
-	active_mask = &nest_imc_cpumask;
+	if (!strncmp(pmu->name, "nest_", strlen("nest_")))
+		active_mask = &nest_imc_cpumask;
+	else if (!strncmp(pmu->name, "core_", strlen("core_")))
+		active_mask = &core_imc_cpumask;
+	else
+		return 0;
 	return cpumap_print_to_pagebuf(true, buf, active_mask);
 }
 
@@ -239,6 +249,232 @@ static void nest_imc_counters_release(struct perf_event *event)
 	}
 }
 
+static void cleanup_all_core_imc_memory(struct imc_pmu *pmu_ptr)
+{
+	struct imc_mem_info *ptr = pmu_ptr->mem_info;
+
+	if (!ptr)
+		return;
+	for (; ptr; ptr++) {
+		if (ptr->vbase[0] != 0)
+			free_pages(ptr->vbase[0], 0);
+	}
+	kfree(pmu_ptr->mem_info);
+}
+
+/* Enabling of Core Engine needs a scom operation */
+static void core_imc_control_enable(int *cpu_opal_rc)
+{
+	int rc;
+
+	rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE);
+	if (rc)
+		cpu_opal_rc[smp_processor_id()] = 1;
+}
+
+/*
+ * Disabling of IMC Core Engine needs a scom operation
+ */
+static void core_imc_control_disable(int *cpu_opal_rc)
+{
+	int rc;
+
+	rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE);
+	if (rc)
+		cpu_opal_rc[smp_processor_id()] = 1;
+}
+
+int core_imc_control(int operation)
+{
+	int cpu, *cpus_opal_rc;
+
+	/* Memory for OPAL call return value. */
+	cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL);
+	if (!cpus_opal_rc)
+		return -ENOMEM;
+
+	/*
+	 * Enable or disable the core IMC PMU on each core using the
+	 * core_imc_cpumask.
+	 */
+	switch (operation) {
+
+	case IMC_COUNTER_DISABLE:
+		on_each_cpu_mask(&core_imc_cpumask,
+				(smp_call_func_t)core_imc_control_disable,
+				cpus_opal_rc, 1);
+		break;
+	case IMC_COUNTER_ENABLE:
+		on_each_cpu_mask(&core_imc_cpumask,
+				(smp_call_func_t)core_imc_control_enable,
+				cpus_opal_rc, 1);
+		break;
+	default:
+		goto fail;
+	}
+	/* Check return value array for any OPAL call failure */
+	for_each_cpu(cpu, &core_imc_cpumask) {
+		if (cpus_opal_rc[cpu])
+			goto fail;
+	}
+	return 0;
+fail:
+	if (cpus_opal_rc)
+		kfree(cpus_opal_rc);
+		return -EINVAL;
+}
+
+static void core_imc_counters_release(struct perf_event *event)
+{
+	int rc;
+	/*
+	 * See if we need to disable the IMC PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * enable or disable the core counters.
+	 */
+	if (atomic_dec_return(&core_events) == 0) {
+		mutex_lock(&imc_core_reserve);
+		rc = core_imc_control(IMC_COUNTER_DISABLE);
+		mutex_unlock(&imc_core_reserve);
+		if (rc)
+			pr_err("IMC: Disable counters failed\n");
+	}
+}
+
+/*
+ * core_imc_mem_init : Initializes memory for the current core.
+ *
+ * Uses alloc_pages_exact_nid() and uses the returned address as an argument to
+ * an opal call to configure the pdbar. The address sent as an argument is
+ * converted to physical address before the opal call is made. This is the
+ * base address at which the core imc counters are populated.
+ */
+static int  __meminit core_imc_mem_init(int cpu, int size)
+{
+	int phys_id, rc = 0, core_id = (cpu / threads_per_core);
+	struct imc_mem_info *mem_info = NULL;
+
+	phys_id = topology_physical_package_id(cpu);
+	/*
+	 * alloc_pages_exact_nid() will allocate memory for core in the
+	 * local node only.
+	 */
+	mem_info = &core_imc_pmu->mem_info[core_id];
+	mem_info->id = core_id;
+	mem_info->vbase[0] = (u64) alloc_pages_exact_nid(phys_id,
+				(size_t)size, GFP_KERNEL | __GFP_ZERO);
+	rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
+				(u64)virt_to_phys((void *)mem_info->vbase[0]),
+				get_hard_smp_processor_id(cpu));
+	if (rc) {
+		kfree(&mem_info->vbase[0]);
+		mem_info->vbase[0] = 0;
+	}
+	return rc;
+}
+
+bool is_core_imc_mem_inited(int cpu)
+{
+	struct imc_mem_info *mem_info = NULL;
+	int core_id = (cpu / threads_per_core);
+
+	mem_info = &core_imc_pmu->mem_info[core_id];
+	if ((mem_info->id == core_id) && (mem_info->vbase[0] != 0))
+		return true;
+	return false;
+}
+
+/*
+ * imc_mem_init : Function to support memory allocation for core imc.
+ */
+static int imc_mem_init(struct imc_pmu *pmu_ptr)
+{
+	int nr_cores;
+
+	if (pmu_ptr->imc_counter_mmaped)
+		return 0;
+	nr_cores = num_present_cpus() / threads_per_core;
+	pmu_ptr->mem_info = kzalloc((sizeof(struct imc_mem_info) * nr_cores), GFP_KERNEL);
+	if (!pmu_ptr->mem_info)
+		return -ENOMEM;
+	return 0;
+}
+
+static void core_imc_change_cpu_context(int old_cpu, int new_cpu)
+{
+	if (!core_imc_pmu)
+		return;
+	if (old_cpu < 0 || new_cpu < 0)
+		return;
+	perf_pmu_migrate_context(&core_imc_pmu->pmu, old_cpu, new_cpu);
+}
+
+static int ppc_core_imc_cpu_online(unsigned int cpu)
+{
+	const struct cpumask *l_cpumask;
+	static struct cpumask tmp_mask;
+	int ret = 0;
+
+	/* Get the cpumask for this core */
+	l_cpumask = cpu_sibling_mask(cpu);
+
+	/* If a cpu for this core is already set, then, don't do anything */
+	if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
+		return 0;
+
+	if (!is_core_imc_mem_inited(cpu)) {
+		ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
+		if (ret) {
+			pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
+			return ret;
+		}
+	} else
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE);
+
+	/* set the cpu in the mask, and change the context */
+	cpumask_set_cpu(cpu, &core_imc_cpumask);
+	core_imc_change_cpu_context(-1, cpu);
+	return 0;
+}
+
+static int ppc_core_imc_cpu_offline(unsigned int cpu)
+{
+	int target;
+	unsigned int ncpu;
+
+	/*
+	 * clear this cpu out of the mask, if not present in the mask,
+	 * don't bother doing anything.
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
+		return 0;
+
+	/* Find any online cpu in that core except the current "cpu" */
+	ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+
+	if (ncpu >= 0 && ncpu < nr_cpu_ids) {
+		target = ncpu;
+		cpumask_set_cpu(target, &core_imc_cpumask);
+	} else {
+		opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE);
+		target = -1;
+	}
+
+	/* migrate the context */
+	core_imc_change_cpu_context(cpu, target);
+
+	return 0;
+}
+
+static int core_imc_pmu_cpumask_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+				 "perf/powerpc/imc_core:online",
+				 ppc_core_imc_cpu_online,
+				 ppc_core_imc_cpu_offline);
+}
+
 static int nest_imc_event_init(struct perf_event *event)
 {
 	int chip_id, rc;
@@ -298,6 +534,63 @@ static int nest_imc_event_init(struct perf_event *event)
 	return 0;
 }
 
+static int core_imc_event_init(struct perf_event *event)
+{
+	int core_id, rc;
+	u64 config = event->attr.config;
+	struct imc_mem_info *pcmi;
+	struct imc_pmu *pmu;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* Sampling not supported */
+	if (event->hw.sample_period)
+		return -EINVAL;
+
+	/* unsupported modes and filters */
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	event->hw.idx = -1;
+
+	/* Sanity check for config (event offset) */
+	if (config > core_max_offset)
+		return -EINVAL;
+
+	pmu = imc_event_to_pmu(event);
+	if (!is_core_imc_mem_inited(event->cpu))
+		return -ENODEV;
+	core_id = event->cpu / threads_per_core;
+	pcmi = &pmu->mem_info[core_id];
+	if ((pcmi->id != core_id) || (pcmi->vbase[0] == 0))
+		return -ENODEV;
+	event->hw.event_base = pcmi->vbase[0] + config;
+	/*
+	 * Core pmu units are enabled only when it is used.
+	 * See if this is triggered for the first time.
+	 * If yes, take the mutex lock and enable the core counters.
+	 * If not, just increment the count in core_events.
+	 */
+	if (atomic_inc_return(&core_events) == 1) {
+		mutex_lock(&imc_core_reserve);
+		rc = core_imc_control(IMC_COUNTER_ENABLE);
+		mutex_unlock(&imc_core_reserve);
+		if (rc)
+			pr_err("IMC: Unable to start the counters\n");
+	}
+	event->destroy = core_imc_counters_release;
+	return 0;
+}
+
 static void imc_read_counter(struct perf_event *event)
 {
 	u64 *addr, data;
@@ -366,7 +659,11 @@ static int update_pmu_ops(struct imc_pmu *pmu)
 		return -EINVAL;
 
 	pmu->pmu.task_ctx_nr = perf_invalid_context;
-	pmu->pmu.event_init = nest_imc_event_init;
+	if (pmu->domain == IMC_DOMAIN_NEST) {
+		pmu->pmu.event_init = nest_imc_event_init;
+	} else if (pmu->domain == IMC_DOMAIN_CORE) {
+		pmu->pmu.event_init = core_imc_event_init;
+	}
 	pmu->pmu.add = imc_event_add;
 	pmu->pmu.del = imc_event_stop;
 	pmu->pmu.start = imc_event_start;
@@ -451,12 +748,27 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
 {
 	int ret = -ENODEV;
 
+	ret = imc_mem_init(pmu_ptr);
+	if (ret)
+		goto err_free;
+
 	/* Add cpumask and register for hotplug notification */
-	if (!nest_imc_cpumask_initialized) {
-		ret = nest_pmu_cpumask_init();
+	switch (pmu_ptr->domain) {
+	case IMC_DOMAIN_NEST:
+		if (!nest_imc_cpumask_initialized) {
+			ret = nest_pmu_cpumask_init();
+			if (ret)
+				return ret;
+			nest_imc_cpumask_initialized = 1;
+		}
+		break;
+	case IMC_DOMAIN_CORE:
+		ret = core_imc_pmu_cpumask_init();
 		if (ret)
 			return ret;
-		nest_imc_cpumask_initialized = 1;
+		break;
+	default:
+		return -1;  /* Unknown domain */
 	}
 	ret = update_events_in_group(events, idx, pmu_ptr);
 	if (ret)
@@ -482,6 +794,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
 			kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
 		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
 	}
+	/* For core_imc, we have allocated memory, we need to free it */
+	if (pmu_ptr->domain == IMC_DOMAIN_CORE)
+		cleanup_all_core_imc_memory(pmu_ptr);
 
 	return ret;
 }
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index a997f83..d0d26dd 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -532,6 +532,12 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
 	return 0;
 }
 
+static void opal_imc_counters_shutdown(struct platform_device *pdev)
+{
+	/* Disable the IMC Core functions */
+	core_imc_control(IMC_COUNTER_DISABLE);
+}
+
 static const struct of_device_id opal_imc_match[] = {
 	{ .compatible = IMC_DTB_COMPAT },
 	{},
@@ -543,6 +549,7 @@ static struct platform_driver opal_imc_driver = {
 		.of_match_table = opal_imc_match,
 	},
 	.probe = opal_imc_counters_probe,
+	.shutdown = opal_imc_counters_shutdown,
 };
 
 MODULE_DEVICE_TABLE(of, opal_imc_match);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index dca7f2b..e145fff 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -140,6 +140,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
 	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
+	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v9 10/10] powerpc/perf: Thread imc cpuhotplug support
  2017-06-05 12:32 [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support Anju T Sudhakar
  2017-06-05 12:32 ` [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging Anju T Sudhakar
@ 2017-06-05 12:32 ` Anju T Sudhakar
  2017-06-06 10:27   ` Thomas Gleixner
  2017-06-06  8:39 ` [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and " Thomas Gleixner
  2 siblings, 1 reply; 11+ messages in thread
From: Anju T Sudhakar @ 2017-06-05 12:32 UTC (permalink / raw)
  To: mpe
  Cc: linux-kernel, linuxppc-dev, ego, bsingharora, anton, sukadev,
	mikey, stewart, dja, eranian, hemant, maddy, anju, tglx

Code to add support for thread IMC on cpuhotplug.

When a cpu goes offline, the LDBAR for that cpu is disabled, and when it comes
back online the previous ldbar value is written back to the LDBAR for that cpu.

To register the hotplug functions for thread_imc, a new state
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE is added to the list of existing
states.

Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
---
 arch/powerpc/perf/imc-pmu.c | 35 +++++++++++++++++++++++++++++++++--
 include/linux/cpuhotplug.h  |  1 +
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 13ff6dc..ee582b0 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -396,14 +396,22 @@ bool is_core_imc_mem_inited(int cpu)
  */
 static void thread_imc_mem_alloc(int cpu_id)
 {
-	u64 ldbar_addr, ldbar_value;
 	int phys_id = topology_physical_package_id(cpu_id);
 
 	per_cpu_add[cpu_id] = (u64)alloc_pages_exact_nid(phys_id,
 			(size_t)IMC_THREAD_COUNTER_MEM, GFP_KERNEL | __GFP_ZERO);
+}
+
+static void thread_imc_update_ldbar(unsigned int cpu_id)
+{
+	u64 ldbar_addr, ldbar_value;
+
+	if (per_cpu_add[cpu_id] == 0)
+		thread_imc_mem_alloc(cpu_id);
+
 	ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);
 	ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
-		(u64)THREAD_IMC_ENABLE;
+			(u64)THREAD_IMC_ENABLE;
 	mtspr(SPRN_LDBAR, ldbar_value);
 }
 
@@ -442,6 +450,26 @@ static void core_imc_change_cpu_context(int old_cpu, int new_cpu)
 	perf_pmu_migrate_context(&core_imc_pmu->pmu, old_cpu, new_cpu);
 }
 
+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+	thread_imc_update_ldbar(cpu);
+	return 0;
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+	mtspr(SPRN_LDBAR, 0);
+	return 0;
+}
+
+void thread_imc_cpu_init(void)
+{
+	cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+			  "perf/powerpc/imc_thread:online",
+			  ppc_thread_imc_cpu_online,
+			  ppc_thread_imc_cpu_offline);
+}
+
 static int ppc_core_imc_cpu_online(unsigned int cpu)
 {
 	const struct cpumask *l_cpumask;
@@ -953,6 +981,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
 		if (ret)
 			return ret;
 		break;
+	case IMC_DOMAIN_THREAD:
+		thread_imc_cpu_init();
+		break;
 	default:
 		return -1;  /* Unknown domain */
 	}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index e145fff..937d1ec 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -141,6 +141,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
 	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support
  2017-06-05 12:32 [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support Anju T Sudhakar
  2017-06-05 12:32 ` [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging Anju T Sudhakar
  2017-06-05 12:32 ` [PATCH v9 10/10] powerpc/perf: Thread imc cpuhotplug support Anju T Sudhakar
@ 2017-06-06  8:39 ` Thomas Gleixner
  2017-06-07  5:26   ` Anju T Sudhakar
  2 siblings, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2017-06-06  8:39 UTC (permalink / raw)
  To: Anju T Sudhakar
  Cc: mpe, linux-kernel, linuxppc-dev, ego, bsingharora, anton,
	sukadev, mikey, stewart, dja, eranian, hemant, maddy

On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
> +/*
> + * nest_imc_stop : Does OPAL call to stop nest engine.
> + */
> +static void nest_imc_stop(int *cpu_opal_rc)
> +{
> +	int rc;
> +
> +	rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
> +	if (rc)
> +		cpu_opal_rc[smp_processor_id()] = 1;

What's wrong with just assigning the result directly?

> +/* nest_imc_start: Does the OPAL call to enable nest counters. */
> +static void nest_imc_start(int *cpu_opal_rc)
> +{
> +	int rc;
> +
> +	rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST);
> +	if (rc)
> +		cpu_opal_rc[smp_processor_id()] = 1;
> +}
> +
> +/*
> + * nest_imc_control: Function to start and stop nest imc engine.
> + *		     The function is primarily called from event init
> + * 		     and event destroy.

As I don't see other call sites, what's the secondary usage?

> + */
> +static int nest_imc_control(int operation)
> +{
> +	int *cpus_opal_rc, cpu;
> +
> +	/*
> +	 * Memory for OPAL call return value.
> +	 */
> +	cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL);
> +	if (!cpus_opal_rc)
> +		return -ENOMEM;
> +	switch (operation) {
> +	case IMC_COUNTER_ENABLE:
> +			/* Initialize Nest PMUs in each node using designated cpus */
> +			on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_start,
> +						cpus_opal_rc, 1);

That's one indentation level too deep. 

> +			break;
> +	case IMC_COUNTER_DISABLE:
> +			/* Disable the counters */
> +			on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_stop,
> +						cpus_opal_rc, 1);
> +			break;
> +	default:
> +		kfree(cpus_opal_rc);
> +		return -EINVAL;
> +
> +	}
> +
> +	/* Check return value array for any OPAL call failure */
> +	for_each_cpu(cpu, &nest_imc_cpumask) {
> +		if (cpus_opal_rc[cpu]) {
> +			kfree(cpus_opal_rc);
> +			return -ENODEV;
> +		}
> +	}
> +	kfree(cpus_opal_rc);

So you have THREE places where you do kfree(cpus_opal_rc). Sigh.

You can spare that hole alloc/free dance by simply having

static struct cpumask nest_imc_result;

	mutex_lock(&imc_nest_reserve);
	memset(&nest_imc_result, 0, sizeof(nest_imc_result));

	switch (op) {
	case IMC_COUNTER_ENABLE:
		on_each_cpu_mask(&nest_imc_cpumask, nest_imc_start, NULL, 1);
		break;
	....
	}

	res = cpumask_empty(&nest_imc_result) ? 0 : -ENODEV;
	mutex_unlock(&imc_nest_reserve);
	return res;

And in the start/stop functions:

	if (opal_imc_counters_xxx(OPAL_IMC_COUNTERS_NEST))
		cpumask_set_cpu(smp_processor_id(), &nest_imc_result);

> +static void nest_change_cpu_context(int old_cpu, int new_cpu)
> +{
> +	struct imc_pmu **pn = per_nest_pmu_arr;
> +	int i;
> +
> +	if (old_cpu < 0 || new_cpu < 0)
> +		return;
> +
> +	for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
> +		perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
> +
> +}
> +
> +static int ppc_nest_imc_cpu_offline(unsigned int cpu)
> +{
> +	int nid, target = -1;
> +	const struct cpumask *l_cpumask;
> +
> +	/*
> +	 * Check in the designated list for this cpu. Dont bother
> +	 * if not one of them.
> +	 */
> +	if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
> +		return 0;
> +
> +	/*
> +	 * Now that this cpu is one of the designated,
> +	 * find a next cpu a) which is online and b) in same chip.
> +	 */
> +	nid = cpu_to_node(cpu);
> +	l_cpumask = cpumask_of_node(nid);
> +	target = cpumask_next(cpu, l_cpumask);

So if the outgoing CPU is the highest numbered CPU on the node, then
cpumask_next() will not find a CPU, while there still might be other lower
numbered CPUs online in the node.

	target = cpumask_any_but(l_cpumask, cpu);

is what you want.

> +
> +	/*
> +	 * Update the cpumask with the target cpu and
> +	 * migrate the context if needed
> +	 */
> +	if (target >= 0 && target < nr_cpu_ids) {
> +		cpumask_set_cpu(target, &nest_imc_cpumask);
> +		nest_change_cpu_context(cpu, target);
> +	} else {
> +		target = -1;
> +		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
> +		nest_change_cpu_context(cpu, target);

Why are you calling nest_change_cpu_context()? You already know that it
will return immediately due to target < 0.

> +	}
> +	return 0;
> +}
> +
> +static int ppc_nest_imc_cpu_online(unsigned int cpu)
> +{
> +	const struct cpumask *l_cpumask;
> +	static struct cpumask tmp_mask;
> +	int res;
> +
> +	/* Get the cpumask of this node */
> +	l_cpumask = cpumask_of_node(cpu_to_node(cpu));
> +
> +	/*
> +	 * If this is not the first online CPU on this node, then
> +	 * just return.
> +	 */
> +	if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
> +		return 0;
> +
> +	/*
> +	 * If this is the first online cpu on this node
> +	 * disable the nest counters by making an OPAL call.
> +	 */
> +	res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
> +	if (res)
> +		return res;
> +
> +	/* Make this CPU the designated target for counter collection */
> +	cpumask_set_cpu(cpu, &nest_imc_cpumask);
> +	nest_change_cpu_context(-1, cpu);

Ditto

>  static int nest_imc_event_init(struct perf_event *event)
>  {
> -	int chip_id;
> +	int chip_id, rc;
>  	u32 config = event->attr.config;
>  	struct imc_mem_info *pcni;
>  	struct imc_pmu *pmu;
> @@ -80,6 +277,20 @@ static int nest_imc_event_init(struct perf_event *event)
>  	 * "chip_id" and add "config" to it".
>  	 */
>  	event->hw.event_base = pcni->vbase[config/PAGE_SIZE] + (config & ~PAGE_MASK);
> +	/*
> +	 * Nest pmu units are enabled only when it is used.
> +	 * See if this is triggered for the first time.
> +	 * If yes, take the mutex lock and enable the nest counters.
> +	 * If not, just increment the count in nest_events.
> +	 */
> +	if (atomic_inc_return(&nest_events) == 1) {
> +		mutex_lock(&imc_nest_reserve);
> +		rc = nest_imc_control(IMC_COUNTER_ENABLE);
> +		mutex_unlock(&imc_nest_reserve);
> +		if (rc)
> +			pr_err("IMC: Unable to start the counters\n");

So if that fails, nest_events will stay incremented. Is that on purpose?

> +	}
> +	event->destroy = nest_imc_counters_release;
>  	return 0;

And this happily returns success even if the enable call failed ....

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging
  2017-06-05 12:32 ` [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging Anju T Sudhakar
@ 2017-06-06 10:09   ` Thomas Gleixner
  2017-06-07  5:44     ` Anju T Sudhakar
  0 siblings, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2017-06-06 10:09 UTC (permalink / raw)
  To: Anju T Sudhakar
  Cc: mpe, linux-kernel, linuxppc-dev, ego, bsingharora, anton,
	sukadev, mikey, stewart, dja, eranian, hemant, maddy

On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
> +static void cleanup_all_core_imc_memory(struct imc_pmu *pmu_ptr)
> +{
> +	struct imc_mem_info *ptr = pmu_ptr->mem_info;
> +
> +	if (!ptr)
> +		return;

That's pointless.

> +	for (; ptr; ptr++) {

  	for (ptr = pmu_ptr->mem_info; ptr; ptr++) {

will do the right thing.

> +		if (ptr->vbase[0] != 0)
> +			free_pages(ptr->vbase[0], 0);
> +	}

and kfree can be called with a NULL pointer.

> +	kfree(pmu_ptr->mem_info);
> +}
> +
> +/* Enabling of Core Engine needs a scom operation */
> +static void core_imc_control_enable(int *cpu_opal_rc)
> +{
> +	int rc;
> +
> +	rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE);
> +	if (rc)
> +		cpu_opal_rc[smp_processor_id()] = 1;
> +}
> +
> +/*
> + * Disabling of IMC Core Engine needs a scom operation
> + */
> +static void core_imc_control_disable(int *cpu_opal_rc)
> +{
> +	int rc;
> +
> +	rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE);
> +	if (rc)
> +		cpu_opal_rc[smp_processor_id()] = 1;
> +}
> +
> +int core_imc_control(int operation)
> +{
> +	int cpu, *cpus_opal_rc;
> +
> +	/* Memory for OPAL call return value. */
> +	cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL);
> +	if (!cpus_opal_rc)
> +		return -ENOMEM;
> +
> +	/*
> +	 * Enable or disable the core IMC PMU on each core using the
> +	 * core_imc_cpumask.
> +	 */
> +	switch (operation) {
> +
> +	case IMC_COUNTER_DISABLE:
> +		on_each_cpu_mask(&core_imc_cpumask,
> +				(smp_call_func_t)core_imc_control_disable,
> +				cpus_opal_rc, 1);
> +		break;
> +	case IMC_COUNTER_ENABLE:
> +		on_each_cpu_mask(&core_imc_cpumask,
> +				(smp_call_func_t)core_imc_control_enable,
> +				cpus_opal_rc, 1);
> +		break;
> +	default:
> +		goto fail;
> +	}
> +	/* Check return value array for any OPAL call failure */
> +	for_each_cpu(cpu, &core_imc_cpumask) {
> +		if (cpus_opal_rc[cpu])
> +			goto fail;
> +	}
> +	return 0;
> +fail:
> +	if (cpus_opal_rc)
> +		kfree(cpus_opal_rc);
> +		return -EINVAL;
> +}

Interesting. You copied the other function and then implemented it
differently.

But, what's worse is that this is just duplicated code for no value. Both
the nest and core control functions call

    opal_imc_counters_xxxx(WHICH_COUNTER);

So the obvious thing to do is:

static struct cpumask imc_result_mask;
static DEFINE_MUTEX(imc_control_mutex);

static void opal_imc_XXX(void *which)
{
	if (opal_imc_counters_XXX((unsigned long) which))
		cpumask_set_cpu(smp_processor_id(), &imc_result_mask);
}

static int imc_control(unsigned long which, bool start)
{
	smp_call_func_t *fn;
	int res;

	mutex_lock(&imc_control_mutex);
	memset(&imc_result_mask, 0, sizeof(imc_result_mask));
	
	switch (which) {
	case OPAL_IMC_COUNTERS_CORE:
	case OPAL_IMC_COUNTERS_NEST:
		break;
	default:
		res = -EINVAL;
		goto out;
	}

	fn = start ? opal_imc_start : opal_imc_stop;

	on_each_cpu_mask(&core_imc_cpumask, fn, (void *) which, 1);

	res = cpumask_empty(&nest_imc_result) ? 0 : -ENODEV;
out:
	mutex_unlock(&imc_control_mutex);
	return res;

That would allow sharing of too much code, right?

> +/*
> + * core_imc_mem_init : Initializes memory for the current core.
> + *
> + * Uses alloc_pages_exact_nid() and uses the returned address as an argument to
> + * an opal call to configure the pdbar. The address sent as an argument is
> + * converted to physical address before the opal call is made. This is the
> + * base address at which the core imc counters are populated.
> + */
> +static int  __meminit core_imc_mem_init(int cpu, int size)
> +{
> +	int phys_id, rc = 0, core_id = (cpu / threads_per_core);
> +	struct imc_mem_info *mem_info = NULL;

What's that NULL initialization for?

> +
> +	phys_id = topology_physical_package_id(cpu);
> +	/*
> +	 * alloc_pages_exact_nid() will allocate memory for core in the
> +	 * local node only.
> +	 */
> +	mem_info = &core_imc_pmu->mem_info[core_id];
> +	mem_info->id = core_id;
> +	mem_info->vbase[0] = (u64) alloc_pages_exact_nid(phys_id,
> +				(size_t)size, GFP_KERNEL | __GFP_ZERO);

That allocation is guaranteed not to fail?

> +	rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
> +				(u64)virt_to_phys((void *)mem_info->vbase[0]),
> +				get_hard_smp_processor_id(cpu));
> +	if (rc) {
> +		kfree(&mem_info->vbase[0]);

Freeing memory allocated with alloc_pages_exact_nid() via kfree() is an
interesting approach.

> +		mem_info->vbase[0] = 0;

   		mem_info->vbase[0] = NULL;
  
> +	}
> +	return rc;
> +}
> +
> +bool is_core_imc_mem_inited(int cpu)
> +{
> +	struct imc_mem_info *mem_info = NULL;
> +	int core_id = (cpu / threads_per_core);
> +
> +	mem_info = &core_imc_pmu->mem_info[core_id];
> +	if ((mem_info->id == core_id) && (mem_info->vbase[0] != 0))

  != NULL

> +		return true;
> +	return false;
> +}
> +
> +/*
> + * imc_mem_init : Function to support memory allocation for core imc.
> + */
> +static int imc_mem_init(struct imc_pmu *pmu_ptr)
> +{
> +	int nr_cores;
> +
> +	if (pmu_ptr->imc_counter_mmaped)
> +		return 0;
> +	nr_cores = num_present_cpus() / threads_per_core;
> +	pmu_ptr->mem_info = kzalloc((sizeof(struct imc_mem_info) * nr_cores), GFP_KERNEL);
> +	if (!pmu_ptr->mem_info)
> +		return -ENOMEM;
> +	return 0;
> +}
> +
> +static void core_imc_change_cpu_context(int old_cpu, int new_cpu)
> +{
> +	if (!core_imc_pmu)
> +		return;

Why is that called if core_imc_pmu == NULL?

> +	if (old_cpu < 0 || new_cpu < 0)
> +		return;

And again, like in the previous one. Why is that called at all if any of
those is < 0?

> +	perf_pmu_migrate_context(&core_imc_pmu->pmu, old_cpu, new_cpu);
> +}

> +static int ppc_core_imc_cpu_online(unsigned int cpu)
> +{
> +	const struct cpumask *l_cpumask;
> +	static struct cpumask tmp_mask;
> +	int ret = 0;
> +
> +	/* Get the cpumask for this core */
> +	l_cpumask = cpu_sibling_mask(cpu);
> +
> +	/* If a cpu for this core is already set, then, don't do anything */
> +	if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
> +		return 0;
> +
> +	if (!is_core_imc_mem_inited(cpu)) {
> +		ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
> +		if (ret) {
> +			pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
> +			return ret;
> +		}
> +	} else
> +		opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE);

The else clause wants to have curly brackets as well.

> +
> +	/* set the cpu in the mask, and change the context */
> +	cpumask_set_cpu(cpu, &core_imc_cpumask);
> +	core_imc_change_cpu_context(-1, cpu);

Sigh

> +	return 0;
> +}
> +
> +static int ppc_core_imc_cpu_offline(unsigned int cpu)
> +{
> +	int target;
> +	unsigned int ncpu;
> +
> +	/*
> +	 * clear this cpu out of the mask, if not present in the mask,
> +	 * don't bother doing anything.
> +	 */
> +	if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
> +		return 0;
> +
> +	/* Find any online cpu in that core except the current "cpu" */
> +	ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
> +
> +	if (ncpu >= 0 && ncpu < nr_cpu_ids) {
> +		target = ncpu;
> +		cpumask_set_cpu(target, &core_imc_cpumask);
> +	} else {
> +		opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE);
> +		target = -1;
> +	}
> +
> +	/* migrate the context */
> +	core_imc_change_cpu_context(cpu, target);

It's really weird that more or less identical functions (aside of the mask
and the counter) are implemented differently. In this version you use
cpumask_any_but() correctly.

> +
> +	return 0;
> +}

> +static int core_imc_event_init(struct perf_event *event)
> +{

...

> +	/*
> +	 * Core pmu units are enabled only when it is used.
> +	 * See if this is triggered for the first time.
> +	 * If yes, take the mutex lock and enable the core counters.
> +	 * If not, just increment the count in core_events.
> +	 */
> +	if (atomic_inc_return(&core_events) == 1) {
> +		mutex_lock(&imc_core_reserve);
> +		rc = core_imc_control(IMC_COUNTER_ENABLE);
> +		mutex_unlock(&imc_core_reserve);
> +		if (rc)
> +			pr_err("IMC: Unable to start the counters\n");
> +	}
> +	event->destroy = core_imc_counters_release;
> +	return 0;

That's at least consistently returning success even if the counter start
failed.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v9 10/10] powerpc/perf: Thread imc cpuhotplug support
  2017-06-05 12:32 ` [PATCH v9 10/10] powerpc/perf: Thread imc cpuhotplug support Anju T Sudhakar
@ 2017-06-06 10:27   ` Thomas Gleixner
  2017-06-08 18:05     ` Madhavan Srinivasan
  0 siblings, 1 reply; 11+ messages in thread
From: Thomas Gleixner @ 2017-06-06 10:27 UTC (permalink / raw)
  To: Anju T Sudhakar
  Cc: mpe, linux-kernel, linuxppc-dev, ego, bsingharora, anton,
	sukadev, mikey, stewart, dja, eranian, hemant, maddy

On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
>  static void thread_imc_mem_alloc(int cpu_id)
>  {
> -	u64 ldbar_addr, ldbar_value;
>  	int phys_id = topology_physical_package_id(cpu_id);
>  
>  	per_cpu_add[cpu_id] = (u64)alloc_pages_exact_nid(phys_id,
>  			(size_t)IMC_THREAD_COUNTER_MEM, GFP_KERNEL | __GFP_ZERO);

It took me a while to understand that per_cpu_add is an array and not a new
fangled per cpu function which adds something to a per cpu variable. 

So why is that storing the address as u64?

And why is this a NR_CPUS sized array instead of a regular per cpu variable?

> +}
> +
> +static void thread_imc_update_ldbar(unsigned int cpu_id)
> +{
> +	u64 ldbar_addr, ldbar_value;
> +
> +	if (per_cpu_add[cpu_id] == 0)
> +		thread_imc_mem_alloc(cpu_id);

So if that allocation fails then you happily proceed. Optmistic
programming, right?

> +
>  	ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);

Ah, it's stored as u64 because you have to convert it back to a void
pointer at the place where it is actually used. Interesting approach.

>  	ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
> -		(u64)THREAD_IMC_ENABLE;
> +			(u64)THREAD_IMC_ENABLE;
>  	mtspr(SPRN_LDBAR, ldbar_value);
>  }
>  
> @@ -442,6 +450,26 @@ static void core_imc_change_cpu_context(int old_cpu, int new_cpu)
>  	perf_pmu_migrate_context(&core_imc_pmu->pmu, old_cpu, new_cpu);
>  }
>  
> +static int ppc_thread_imc_cpu_online(unsigned int cpu)
> +{
> +	thread_imc_update_ldbar(cpu);
> +	return 0;
> +}
> +
> +static int ppc_thread_imc_cpu_offline(unsigned int cpu)
> +{
> +	mtspr(SPRN_LDBAR, 0);
> +	return 0;
> +}
> +
> +void thread_imc_cpu_init(void)
> +{
> +	cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
> +			  "perf/powerpc/imc_thread:online",
> +			  ppc_thread_imc_cpu_online,
> +			  ppc_thread_imc_cpu_offline);
> +}
> +
>  static int ppc_core_imc_cpu_online(unsigned int cpu)
>  {
>  	const struct cpumask *l_cpumask;
> @@ -953,6 +981,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
>  		if (ret)
>  			return ret;
>  		break;
> +	case IMC_DOMAIN_THREAD:
> +		thread_imc_cpu_init();
> +		break;
>  	default:
>  		return -1;  /* Unknown domain */
>  	}

Just a general observation. If anything in init_imc_pmu() fails, then all
the hotplug callbacks are staying registered, at least I haven't seen
anything undoing it.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support
  2017-06-06  8:39 ` [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and " Thomas Gleixner
@ 2017-06-07  5:26   ` Anju T Sudhakar
  0 siblings, 0 replies; 11+ messages in thread
From: Anju T Sudhakar @ 2017-06-07  5:26 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: mpe, linux-kernel, linuxppc-dev, ego, bsingharora, anton,
	sukadev, mikey, stewart, dja, eranian, hemant, maddy

Hi Thomas,

Thank you for reviewing the patch.


On Tuesday 06 June 2017 02:09 PM, Thomas Gleixner wrote:
> On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
>> +/*
>> + * nest_imc_stop : Does OPAL call to stop nest engine.
>> + */
>> +static void nest_imc_stop(int *cpu_opal_rc)
>> +{
>> +	int rc;
>> +
>> +	rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
>> +	if (rc)
>> +		cpu_opal_rc[smp_processor_id()] = 1;
> What's wrong with just assigning the result directly?


yes. We can assign it directly. Will do this.

>
>> +/* nest_imc_start: Does the OPAL call to enable nest counters. */
>> +static void nest_imc_start(int *cpu_opal_rc)
>> +{
>> +	int rc;
>> +
>> +	rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST);
>> +	if (rc)
>> +		cpu_opal_rc[smp_processor_id()] = 1;
>> +}
>> +
>> +/*
>> + * nest_imc_control: Function to start and stop nest imc engine.
>> + *		     The function is primarily called from event init
>> + * 		     and event destroy.
> As I don't see other call sites, what's the secondary usage?


This function is called from event init and event destroy only.
Will fix the comment here.

>
>> + */
>> +static int nest_imc_control(int operation)
>> +{
>> +	int *cpus_opal_rc, cpu;
>> +
>> +	/*
>> +	 * Memory for OPAL call return value.
>> +	 */
>> +	cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL);
>> +	if (!cpus_opal_rc)
>> +		return -ENOMEM;
>> +	switch (operation) {
>> +	case IMC_COUNTER_ENABLE:
>> +			/* Initialize Nest PMUs in each node using designated cpus */
>> +			on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_start,
>> +						cpus_opal_rc, 1);
> That's one indentation level too deep.

My bad. Will fix this.

>> +			break;
>> +	case IMC_COUNTER_DISABLE:
>> +			/* Disable the counters */
>> +			on_each_cpu_mask(&nest_imc_cpumask, (smp_call_func_t)nest_imc_stop,
>> +						cpus_opal_rc, 1);
>> +			break;
>> +	default:
>> +		kfree(cpus_opal_rc);
>> +		return -EINVAL;
>> +
>> +	}
>> +
>> +	/* Check return value array for any OPAL call failure */
>> +	for_each_cpu(cpu, &nest_imc_cpumask) {
>> +		if (cpus_opal_rc[cpu]) {
>> +			kfree(cpus_opal_rc);
>> +			return -ENODEV;
>> +		}
>> +	}
>> +	kfree(cpus_opal_rc);
> So you have THREE places where you do kfree(cpus_opal_rc). Sigh.
>
> You can spare that hole alloc/free dance by simply having
>
> static struct cpumask nest_imc_result;
>
> 	mutex_lock(&imc_nest_reserve);
> 	memset(&nest_imc_result, 0, sizeof(nest_imc_result));
>
> 	switch (op) {
> 	case IMC_COUNTER_ENABLE:
> 		on_each_cpu_mask(&nest_imc_cpumask, nest_imc_start, NULL, 1);
> 		break;
> 	....
> 	}
>
> 	res = cpumask_empty(&nest_imc_result) ? 0 : -ENODEV;
> 	mutex_unlock(&imc_nest_reserve);
> 	return res;
>
> And in the start/stop functions:
>
> 	if (opal_imc_counters_xxx(OPAL_IMC_COUNTERS_NEST))
> 		cpumask_set_cpu(smp_processor_id(), &nest_imc_result);

Ok.

>> +static void nest_change_cpu_context(int old_cpu, int new_cpu)
>> +{
>> +	struct imc_pmu **pn = per_nest_pmu_arr;
>> +	int i;
>> +
>> +	if (old_cpu < 0 || new_cpu < 0)
>> +		return;
>> +
>> +	for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++)
>> +		perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
>> +
>> +}
>> +
>> +static int ppc_nest_imc_cpu_offline(unsigned int cpu)
>> +{
>> +	int nid, target = -1;
>> +	const struct cpumask *l_cpumask;
>> +
>> +	/*
>> +	 * Check in the designated list for this cpu. Dont bother
>> +	 * if not one of them.
>> +	 */
>> +	if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
>> +		return 0;
>> +
>> +	/*
>> +	 * Now that this cpu is one of the designated,
>> +	 * find a next cpu a) which is online and b) in same chip.
>> +	 */
>> +	nid = cpu_to_node(cpu);
>> +	l_cpumask = cpumask_of_node(nid);
>> +	target = cpumask_next(cpu, l_cpumask);
> So if the outgoing CPU is the highest numbered CPU on the node, then
> cpumask_next() will not find a CPU, while there still might be other lower
> numbered CPUs online in the node.
>
> 	target = cpumask_any_but(l_cpumask, cpu);
>
> is what you want.

In the previous revisions we designated the smallest cpu in the mask. 
And then we re wrote the
code to pick next available cpu. But this is a nice catch.  :-) Thanks!
I will fix this.

>> +
>> +	/*
>> +	 * Update the cpumask with the target cpu and
>> +	 * migrate the context if needed
>> +	 */
>> +	if (target >= 0 && target < nr_cpu_ids) {
>> +		cpumask_set_cpu(target, &nest_imc_cpumask);
>> +		nest_change_cpu_context(cpu, target);
>> +	} else {
>> +		target = -1;
>> +		opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
>> +		nest_change_cpu_context(cpu, target);
> Why are you calling nest_change_cpu_context()? You already know that it
> will return immediately due to target < 0.

Yes right. Will remove the function call here.

>> +	}
>> +	return 0;
>> +}
>> +
>> +static int ppc_nest_imc_cpu_online(unsigned int cpu)
>> +{
>> +	const struct cpumask *l_cpumask;
>> +	static struct cpumask tmp_mask;
>> +	int res;
>> +
>> +	/* Get the cpumask of this node */
>> +	l_cpumask = cpumask_of_node(cpu_to_node(cpu));
>> +
>> +	/*
>> +	 * If this is not the first online CPU on this node, then
>> +	 * just return.
>> +	 */
>> +	if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
>> +		return 0;
>> +
>> +	/*
>> +	 * If this is the first online cpu on this node
>> +	 * disable the nest counters by making an OPAL call.
>> +	 */
>> +	res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST);
>> +	if (res)
>> +		return res;
>> +
>> +	/* Make this CPU the designated target for counter collection */
>> +	cpumask_set_cpu(cpu, &nest_imc_cpumask);
>> +	nest_change_cpu_context(-1, cpu);
> Ditto
>
>>   static int nest_imc_event_init(struct perf_event *event)
>>   {
>> -	int chip_id;
>> +	int chip_id, rc;
>>   	u32 config = event->attr.config;
>>   	struct imc_mem_info *pcni;
>>   	struct imc_pmu *pmu;
>> @@ -80,6 +277,20 @@ static int nest_imc_event_init(struct perf_event *event)
>>   	 * "chip_id" and add "config" to it".
>>   	 */
>>   	event->hw.event_base = pcni->vbase[config/PAGE_SIZE] + (config & ~PAGE_MASK);
>> +	/*
>> +	 * Nest pmu units are enabled only when it is used.
>> +	 * See if this is triggered for the first time.
>> +	 * If yes, take the mutex lock and enable the nest counters.
>> +	 * If not, just increment the count in nest_events.
>> +	 */
>> +	if (atomic_inc_return(&nest_events) == 1) {
>> +		mutex_lock(&imc_nest_reserve);
>> +		rc = nest_imc_control(IMC_COUNTER_ENABLE);
>> +		mutex_unlock(&imc_nest_reserve);
>> +		if (rc)
>> +			pr_err("IMC: Unable to start the counters\n");
> So if that fails, nest_events will stay incremented. Is that on purpose?

Nice catch. Will return here and decrement the count.


Thanks and Regards,

Anju
>> +	}
>> +	event->destroy = nest_imc_counters_release;
>>   	return 0;
> And this happily returns success even if the enable call failed ....
>
> Thanks,
>
> 	tglx
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging
  2017-06-06 10:09   ` Thomas Gleixner
@ 2017-06-07  5:44     ` Anju T Sudhakar
  2017-06-07 10:34       ` Anju T Sudhakar
  2017-06-12 22:44       ` Thomas Gleixner
  0 siblings, 2 replies; 11+ messages in thread
From: Anju T Sudhakar @ 2017-06-07  5:44 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: mpe, linux-kernel, linuxppc-dev, ego, bsingharora, anton,
	sukadev, mikey, stewart, dja, eranian, hemant, maddy

Hi Thomas,

On Tuesday 06 June 2017 03:39 PM, Thomas Gleixner wrote:
> On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
>> +static void cleanup_all_core_imc_memory(struct imc_pmu *pmu_ptr)
>> +{
>> +	struct imc_mem_info *ptr = pmu_ptr->mem_info;
>> +
>> +	if (!ptr)
>> +		return;
> That's pointless.

No, it is not.  We may end up here from imc_mem_init() when the memory 
allocation for
pmu_ptr->mem_info fails. So in that case we can just return from here, 
and kfree wont be
called with a NULL pointer.

>> +	for (; ptr; ptr++) {
>    	for (ptr = pmu_ptr->mem_info; ptr; ptr++) {
>
> will do the right thing.
>
>> +		if (ptr->vbase[0] != 0)
>> +			free_pages(ptr->vbase[0], 0);
>> +	}
> and kfree can be called with a NULL pointer.
>
>> +	kfree(pmu_ptr->mem_info);
>> +}
>> +
>> +/* Enabling of Core Engine needs a scom operation */
>> +static void core_imc_control_enable(int *cpu_opal_rc)
>> +{
>> +	int rc;
>> +
>> +	rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE);
>> +	if (rc)
>> +		cpu_opal_rc[smp_processor_id()] = 1;
>> +}
>> +
>> +/*
>> + * Disabling of IMC Core Engine needs a scom operation
>> + */
>> +static void core_imc_control_disable(int *cpu_opal_rc)
>> +{
>> +	int rc;
>> +
>> +	rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE);
>> +	if (rc)
>> +		cpu_opal_rc[smp_processor_id()] = 1;
>> +}
>> +
>> +int core_imc_control(int operation)
>> +{
>> +	int cpu, *cpus_opal_rc;
>> +
>> +	/* Memory for OPAL call return value. */
>> +	cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL);
>> +	if (!cpus_opal_rc)
>> +		return -ENOMEM;
>> +
>> +	/*
>> +	 * Enable or disable the core IMC PMU on each core using the
>> +	 * core_imc_cpumask.
>> +	 */
>> +	switch (operation) {
>> +
>> +	case IMC_COUNTER_DISABLE:
>> +		on_each_cpu_mask(&core_imc_cpumask,
>> +				(smp_call_func_t)core_imc_control_disable,
>> +				cpus_opal_rc, 1);
>> +		break;
>> +	case IMC_COUNTER_ENABLE:
>> +		on_each_cpu_mask(&core_imc_cpumask,
>> +				(smp_call_func_t)core_imc_control_enable,
>> +				cpus_opal_rc, 1);
>> +		break;
>> +	default:
>> +		goto fail;
>> +	}
>> +	/* Check return value array for any OPAL call failure */
>> +	for_each_cpu(cpu, &core_imc_cpumask) {
>> +		if (cpus_opal_rc[cpu])
>> +			goto fail;
>> +	}
>> +	return 0;
>> +fail:
>> +	if (cpus_opal_rc)
>> +		kfree(cpus_opal_rc);
>> +		return -EINVAL;
>> +}
> Interesting. You copied the other function and then implemented it
> differently.
>
> But, what's worse is that this is just duplicated code for no value. Both
> the nest and core control functions call
>
>      opal_imc_counters_xxxx(WHICH_COUNTER);
>
> So the obvious thing to do is:
>
> static struct cpumask imc_result_mask;
> static DEFINE_MUTEX(imc_control_mutex);
>
> static void opal_imc_XXX(void *which)
> {
> 	if (opal_imc_counters_XXX((unsigned long) which))
> 		cpumask_set_cpu(smp_processor_id(), &imc_result_mask);
> }
>
> static int imc_control(unsigned long which, bool start)
> {
> 	smp_call_func_t *fn;
> 	int res;
>
> 	mutex_lock(&imc_control_mutex);
> 	memset(&imc_result_mask, 0, sizeof(imc_result_mask));
> 	
> 	switch (which) {
> 	case OPAL_IMC_COUNTERS_CORE:
> 	case OPAL_IMC_COUNTERS_NEST:
> 		break;
> 	default:
> 		res = -EINVAL;
> 		goto out;
> 	}
>
> 	fn = start ? opal_imc_start : opal_imc_stop;
>
> 	on_each_cpu_mask(&core_imc_cpumask, fn, (void *) which, 1);
>
> 	res = cpumask_empty(&nest_imc_result) ? 0 : -ENODEV;
> out:
> 	mutex_unlock(&imc_control_mutex);
> 	return res;
>
> That would allow sharing of too much code, right?

Yes. This looks really good. Thanks!
I will rework the code.

>> +/*
>> + * core_imc_mem_init : Initializes memory for the current core.
>> + *
>> + * Uses alloc_pages_exact_nid() and uses the returned address as an argument to
>> + * an opal call to configure the pdbar. The address sent as an argument is
>> + * converted to physical address before the opal call is made. This is the
>> + * base address at which the core imc counters are populated.
>> + */
>> +static int  __meminit core_imc_mem_init(int cpu, int size)
>> +{
>> +	int phys_id, rc = 0, core_id = (cpu / threads_per_core);
>> +	struct imc_mem_info *mem_info = NULL;
> What's that NULL initialization for?
>
>> +
>> +	phys_id = topology_physical_package_id(cpu);
>> +	/*
>> +	 * alloc_pages_exact_nid() will allocate memory for core in the
>> +	 * local node only.
>> +	 */
>> +	mem_info = &core_imc_pmu->mem_info[core_id];
>> +	mem_info->id = core_id;
>> +	mem_info->vbase[0] = (u64) alloc_pages_exact_nid(phys_id,
>> +				(size_t)size, GFP_KERNEL | __GFP_ZERO);
> That allocation is guaranteed not to fail?

Nice catch. We need a check here. Will fix this.


Thanks and Regards,

Anju

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging
  2017-06-07  5:44     ` Anju T Sudhakar
@ 2017-06-07 10:34       ` Anju T Sudhakar
  2017-06-12 22:44       ` Thomas Gleixner
  1 sibling, 0 replies; 11+ messages in thread
From: Anju T Sudhakar @ 2017-06-07 10:34 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: mpe, linux-kernel, linuxppc-dev, ego, bsingharora, anton,
	sukadev, mikey, stewart, dja, eranian, hemant, maddy



On Wednesday 07 June 2017 11:14 AM, Anju T Sudhakar wrote:
> Hi Thomas,
>
> On Tuesday 06 June 2017 03:39 PM, Thomas Gleixner wrote:
>> On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
>>> +static void cleanup_all_core_imc_memory(struct imc_pmu *pmu_ptr)
>>> +{
>>> +    struct imc_mem_info *ptr = pmu_ptr->mem_info;
>>> +
>>> +    if (!ptr)
>>> +        return;
>> That's pointless.
>
> No, it is not.  We may end up here from imc_mem_init() when the memory 
> allocation for
> pmu_ptr->mem_info fails. So in that case we can just return from here, 
> and kfree wont be
> called with a NULL pointer.
>
>>> +    for (; ptr; ptr++) {
>>        for (ptr = pmu_ptr->mem_info; ptr; ptr++) {
>>
>> will do the right thing.

Sorry, replied too soon on this. You are right, with this we can remove 
the if (!ptr) check.
thanks.
>>
>>> +        if (ptr->vbase[0] != 0)
>>> +            free_pages(ptr->vbase[0], 0);
>>> +    }
>> and kfree can be called with a NULL pointer.
>>

Yes right.

Regards,
Anju

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v9 10/10] powerpc/perf: Thread imc cpuhotplug support
  2017-06-06 10:27   ` Thomas Gleixner
@ 2017-06-08 18:05     ` Madhavan Srinivasan
  0 siblings, 0 replies; 11+ messages in thread
From: Madhavan Srinivasan @ 2017-06-08 18:05 UTC (permalink / raw)
  To: Thomas Gleixner, Anju T Sudhakar
  Cc: mpe, linux-kernel, linuxppc-dev, ego, bsingharora, anton,
	sukadev, mikey, stewart, dja, eranian, hemant



On Tuesday 06 June 2017 03:57 PM, Thomas Gleixner wrote:
> On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
>>   static void thread_imc_mem_alloc(int cpu_id)
>>   {
>> -	u64 ldbar_addr, ldbar_value;
>>   	int phys_id = topology_physical_package_id(cpu_id);
>>   
>>   	per_cpu_add[cpu_id] = (u64)alloc_pages_exact_nid(phys_id,
>>   			(size_t)IMC_THREAD_COUNTER_MEM, GFP_KERNEL | __GFP_ZERO);
> It took me a while to understand that per_cpu_add is an array and not a new
> fangled per cpu function which adds something to a per cpu variable.
>
> So why is that storing the address as u64?

Value that we need to load to the ldbar spr also has other bits
apart from memory addr. So we made it as an array. But this
should be a per_cpu pointer.  Will fix it.


>
> And why is this a NR_CPUS sized array instead of a regular per cpu variable?

Yes. will fix it.

>> +}
>> +
>> +static void thread_imc_update_ldbar(unsigned int cpu_id)
>> +{
>> +	u64 ldbar_addr, ldbar_value;
>> +
>> +	if (per_cpu_add[cpu_id] == 0)
>> +		thread_imc_mem_alloc(cpu_id);
> So if that allocation fails then you happily proceed. Optmistic
> programming, right?

Will add return value check.

>
>> +
>>   	ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);
> Ah, it's stored as u64 because you have to convert it back to a void
> pointer at the place where it is actually used. Interesting approach.
>
>>   	ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
>> -		(u64)THREAD_IMC_ENABLE;
>> +			(u64)THREAD_IMC_ENABLE;
>>   	mtspr(SPRN_LDBAR, ldbar_value);
>>   }
>>   
>> @@ -442,6 +450,26 @@ static void core_imc_change_cpu_context(int old_cpu, int new_cpu)
>>   	perf_pmu_migrate_context(&core_imc_pmu->pmu, old_cpu, new_cpu);
>>   }
>>   
>> +static int ppc_thread_imc_cpu_online(unsigned int cpu)
>> +{
>> +	thread_imc_update_ldbar(cpu);
>> +	return 0;
>> +}
>> +
>> +static int ppc_thread_imc_cpu_offline(unsigned int cpu)
>> +{
>> +	mtspr(SPRN_LDBAR, 0);
>> +	return 0;
>> +}
>> +
>> +void thread_imc_cpu_init(void)
>> +{
>> +	cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
>> +			  "perf/powerpc/imc_thread:online",
>> +			  ppc_thread_imc_cpu_online,
>> +			  ppc_thread_imc_cpu_offline);
>> +}
>> +
>>   static int ppc_core_imc_cpu_online(unsigned int cpu)
>>   {
>>   	const struct cpumask *l_cpumask;
>> @@ -953,6 +981,9 @@ int __init init_imc_pmu(struct imc_events *events, int idx,
>>   		if (ret)
>>   			return ret;
>>   		break;
>> +	case IMC_DOMAIN_THREAD:
>> +		thread_imc_cpu_init();
>> +		break;
>>   	default:
>>   		return -1;  /* Unknown domain */
>>   	}
> Just a general observation. If anything in init_imc_pmu() fails, then all
> the hotplug callbacks are staying registered, at least I haven't seen
> anything undoing it.
Yes. We did not add code to unregister the call back on failure.
Will fix that in next version.

Thanks for the review.
Maddy

> Thanks,
>
> 	tglx
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging
  2017-06-07  5:44     ` Anju T Sudhakar
  2017-06-07 10:34       ` Anju T Sudhakar
@ 2017-06-12 22:44       ` Thomas Gleixner
  1 sibling, 0 replies; 11+ messages in thread
From: Thomas Gleixner @ 2017-06-12 22:44 UTC (permalink / raw)
  To: Anju T Sudhakar
  Cc: mpe, linux-kernel, linuxppc-dev, ego, bsingharora, anton,
	sukadev, mikey, stewart, dja, eranian, hemant, maddy

On Wed, 7 Jun 2017, Anju T Sudhakar wrote:
> On Tuesday 06 June 2017 03:39 PM, Thomas Gleixner wrote:
> > On Mon, 5 Jun 2017, Anju T Sudhakar wrote:
> > > +static void cleanup_all_core_imc_memory(struct imc_pmu *pmu_ptr)
> > > +{
> > > +	struct imc_mem_info *ptr = pmu_ptr->mem_info;
> > > +
> > > +	if (!ptr)
> > > +		return;
> > That's pointless.
> 
> No, it is not.  We may end up here from imc_mem_init() when the memory
> allocation for pmu_ptr->mem_info fails. So in that case we can just
> return from here, and kfree wont be called with a NULL pointer.

What's the problem with that. kfree() CAN be called with a NULL pointer. It
has a check already.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-06-12 22:44 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-05 12:32 [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and cpuhotplug support Anju T Sudhakar
2017-06-05 12:32 ` [PATCH v9 07/10] powerpc/perf: PMU functions for Core IMC and hotplugging Anju T Sudhakar
2017-06-06 10:09   ` Thomas Gleixner
2017-06-07  5:44     ` Anju T Sudhakar
2017-06-07 10:34       ` Anju T Sudhakar
2017-06-12 22:44       ` Thomas Gleixner
2017-06-05 12:32 ` [PATCH v9 10/10] powerpc/perf: Thread imc cpuhotplug support Anju T Sudhakar
2017-06-06 10:27   ` Thomas Gleixner
2017-06-08 18:05     ` Madhavan Srinivasan
2017-06-06  8:39 ` [PATCH v9 05/10] powerpc/perf: IMC pmu cpumask and " Thomas Gleixner
2017-06-07  5:26   ` Anju T Sudhakar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.