linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
@ 2020-04-08 16:49 Robin Murphy
  2020-04-09  7:02 ` John Garry
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Robin Murphy @ 2020-04-08 16:49 UTC (permalink / raw)
  To: will, mark.rutland
  Cc: linux-arm-kernel, shameerali.kolothum.thodi, john.garry, harb,
	tuanphan, linux-kernel

IRQF_SHARED is dangerous, since it allows other agents to retarget the
IRQ's affinity without migrating PMU contexts to match, breaking the way
in which perf manages mutual exclusion for accessing events. Although
this means it's not realistically possible to support PMU IRQs being
shared with other drivers, we *can* handle sharing between multiple PMU
instances with some explicit affinity bookkeeping and manual interrupt
multiplexing.

RCU helps us handle interrupts efficiently without having to worry about
fine-grained locking for relatively-theoretical race conditions with the
probe/remove/CPU hotplug slow paths. The resulting machinery ends up
looking largely generic, so it should be feasible to factor out with a
"system PMU" base class for similar multi-instance drivers.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---

RFC because I don't have the means to test it, and if the general
approach passes muster then I'd want to tackle the aforementioned
factoring-out before merging anything anyway.

Robin.


 drivers/perf/arm_smmuv3_pmu.c | 215 ++++++++++++++++++++++++----------
 1 file changed, 152 insertions(+), 63 deletions(-)

diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index d704eccc548f..8daa7ac6e801 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -47,8 +47,11 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/msi.h>
+#include <linux/mutex.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
+#include <linux/rculist.h>
+#include <linux/refcount.h>
 #include <linux/smp.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
@@ -98,13 +101,24 @@
 
 static int cpuhp_state_num;
 
-struct smmu_pmu {
+static LIST_HEAD(smmu_pmu_affinities);
+static DEFINE_MUTEX(smmu_pmu_affinity_lock);
+
+struct smmu_pmu_affinity {
 	struct hlist_node node;
+	struct list_head affinity_list;
+	struct list_head instance_list;
+	refcount_t refcount;
+	unsigned int irq;
+	unsigned int cpu;
+};
+
+struct smmu_pmu {
 	struct perf_event *events[SMMU_PMCG_MAX_COUNTERS];
 	DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS);
 	DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS);
-	unsigned int irq;
-	unsigned int on_cpu;
+	struct smmu_pmu_affinity *affinity;
+	struct list_head affinity_list;
 	struct pmu pmu;
 	unsigned int num_counters;
 	struct device *dev;
@@ -394,7 +408,7 @@ static int smmu_pmu_event_init(struct perf_event *event)
 	 * Ensure all events are on the same cpu so all events are in the
 	 * same cpu context, to avoid races on pmu_enable etc.
 	 */
-	event->cpu = smmu_pmu->on_cpu;
+	event->cpu = smmu_pmu->affinity->cpu;
 
 	return 0;
 }
@@ -478,9 +492,10 @@ static ssize_t smmu_pmu_cpumask_show(struct device *dev,
 				     struct device_attribute *attr,
 				     char *buf)
 {
-	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct smmu_pmu_affinity *aff = to_smmu_pmu(pmu)->affinity;
 
-	return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu));
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(aff->cpu));
 }
 
 static struct device_attribute smmu_pmu_cpumask_attr =
@@ -584,50 +599,140 @@ static const struct attribute_group *smmu_pmu_attr_grps[] = {
 
 static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 {
-	struct smmu_pmu *smmu_pmu;
+	struct smmu_pmu_affinity *aff;
+	struct smmu_pmu *pmu;
 	unsigned int target;
 
-	smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node);
-	if (cpu != smmu_pmu->on_cpu)
+	aff = hlist_entry_safe(node, struct smmu_pmu_affinity, node);
+	if (cpu != aff->cpu)
 		return 0;
 
 	target = cpumask_any_but(cpu_online_mask, cpu);
 	if (target >= nr_cpu_ids)
 		return 0;
 
-	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
-	smmu_pmu->on_cpu = target;
-	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target)));
+	/* We're only reading, but this isn't the place to be involving RCU */
+	mutex_lock(&smmu_pmu_affinity_lock);
+	list_for_each_entry(pmu, &aff->instance_list, affinity_list)
+		perf_pmu_migrate_context(&pmu->pmu, aff->cpu, target);
+	mutex_unlock(&smmu_pmu_affinity_lock);
+
+	WARN_ON(irq_set_affinity_hint(aff->irq, cpumask_of(target)));
+	aff->cpu = target;
 
 	return 0;
 }
 
 static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
 {
-	struct smmu_pmu *smmu_pmu = data;
-	u64 ovsr;
-	unsigned int idx;
+	struct smmu_pmu_affinity *aff = data;
+	struct smmu_pmu *smmu_pmu;
+	irqreturn_t ret = IRQ_NONE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(smmu_pmu, &aff->instance_list, affinity_list) {
+		unsigned int idx;
+		u64 ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
 
-	ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
-	if (!ovsr)
-		return IRQ_NONE;
+		if (!ovsr)
+			continue;
 
-	writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
+		writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
 
-	for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
-		struct perf_event *event = smmu_pmu->events[idx];
-		struct hw_perf_event *hwc;
+		for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
+			struct perf_event *event = smmu_pmu->events[idx];
 
-		if (WARN_ON_ONCE(!event))
-			continue;
+			if (WARN_ON_ONCE(!event))
+				continue;
+
+			smmu_pmu_event_update(event);
+			smmu_pmu_set_period(smmu_pmu, &event->hw);
+		}
+		ret = IRQ_HANDLED;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static struct smmu_pmu_affinity *__smmu_pmu_get_affinity(int irq)
+{
+	struct smmu_pmu_affinity *aff;
+	int ret;
+
+	list_for_each_entry(aff, &smmu_pmu_affinities, affinity_list)
+		if (aff->irq == irq && refcount_inc_not_zero(&aff->refcount))
+			return aff;
+
+	aff = kzalloc(sizeof(*aff), GFP_KERNEL);
+	if (!aff)
+		return ERR_PTR(-ENOMEM);
+
+	/* Pick one CPU to be the preferred one to use */
+	aff->cpu = raw_smp_processor_id();
+	refcount_set(&aff->refcount, 1);
+
+	ret = request_irq(irq, smmu_pmu_handle_irq,
+			  IRQF_NOBALANCING | IRQF_NO_THREAD,
+			  "smmuv3-pmu", aff);
+	if (ret)
+		goto out_free_aff;
+
+	ret = irq_set_affinity_hint(irq, cpumask_of(aff->cpu));
+	if (ret)
+		goto out_free_irq;
+
+	ret = cpuhp_state_add_instance_nocalls(cpuhp_state_num, &aff->node);
+	if (ret)
+		goto out_free_irq;
+
+	list_add(&aff->affinity_list, &smmu_pmu_affinities);
+	return aff;
+
+out_free_irq:
+	free_irq(irq, aff);
+out_free_aff:
+	kfree(aff);
+	return ERR_PTR(ret);
+}
+
+static int smmu_pmu_get_affinity(struct smmu_pmu *pmu, int irq)
+{
+	struct smmu_pmu_affinity *aff;
+
+	mutex_lock(&smmu_pmu_affinity_lock);
+	aff = __smmu_pmu_get_affinity(irq);
+	mutex_unlock(&smmu_pmu_affinity_lock);
+
+	if (IS_ERR(aff))
+		return PTR_ERR(aff);
+
+	pmu->affinity = aff;
+	mutex_lock(&smmu_pmu_affinity_lock);
+	list_add_rcu(&pmu->affinity_list, &aff->instance_list);
+	mutex_unlock(&smmu_pmu_affinity_lock);
+
+	return 0;
+}
+
+static void smmu_pmu_put_affinity(struct smmu_pmu *pmu)
+{
+	struct smmu_pmu_affinity *aff = pmu->affinity;
 
-		smmu_pmu_event_update(event);
-		hwc = &event->hw;
+	mutex_lock(&smmu_pmu_affinity_lock);
+	list_del_rcu(&pmu->affinity_list);
 
-		smmu_pmu_set_period(smmu_pmu, hwc);
+	if (!refcount_dec_and_test(&aff->refcount)) {
+		mutex_unlock(&smmu_pmu_affinity_lock);
+		return;
 	}
 
-	return IRQ_HANDLED;
+	list_del(&aff->affinity_list);
+	mutex_unlock(&smmu_pmu_affinity_lock);
+
+	free_irq(aff->irq, aff);
+	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &aff->node);
+	kfree(aff);
 }
 
 static void smmu_pmu_free_msis(void *data)
@@ -652,7 +757,7 @@ static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
 		       pmu->reg_base + SMMU_PMCG_IRQ_CFG2);
 }
 
-static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
+static int smmu_pmu_setup_msi(struct smmu_pmu *pmu)
 {
 	struct msi_desc *desc;
 	struct device *dev = pmu->dev;
@@ -663,34 +768,34 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
 
 	/* MSI supported or not */
 	if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
-		return;
+		return 0;
 
 	ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
 	if (ret) {
 		dev_warn(dev, "failed to allocate MSIs\n");
-		return;
+		return ret;
 	}
 
 	desc = first_msi_entry(dev);
 	if (desc)
-		pmu->irq = desc->irq;
+		ret = desc->irq;
 
 	/* Add callback to free MSIs on teardown */
 	devm_add_action(dev, smmu_pmu_free_msis, dev);
+	return ret;
 }
 
 static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
 {
-	unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
-	int irq, ret = -ENXIO;
+	int irq;
 
-	smmu_pmu_setup_msi(pmu);
+	irq = smmu_pmu_setup_msi(pmu);
+	if (irq <= 0)
+		irq = platform_get_irq(to_platform_device(pmu->dev), 0);
+	if (irq < 0)
+		return irq;
 
-	irq = pmu->irq;
-	if (irq)
-		ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
-				       flags, "smmuv3-pmu", pmu);
-	return ret;
+	return smmu_pmu_get_affinity(pmu, irq);
 }
 
 static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
@@ -730,7 +835,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 	struct resource *res_0;
 	u32 cfgr, reg_size;
 	u64 ceid_64[2];
-	int irq, err;
+	int err;
 	char *name;
 	struct device *dev = &pdev->dev;
 
@@ -771,10 +876,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 		smmu_pmu->reloc_base = smmu_pmu->reg_base;
 	}
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq > 0)
-		smmu_pmu->irq = irq;
-
 	ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
 	ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
 	bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64,
@@ -789,12 +890,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 
 	smmu_pmu_reset(smmu_pmu);
 
-	err = smmu_pmu_setup_irq(smmu_pmu);
-	if (err) {
-		dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
-		return err;
-	}
-
 	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
 			      (res_0->start) >> SMMU_PMCG_PA_SHIFT);
 	if (!name) {
@@ -804,16 +899,9 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 
 	smmu_pmu_get_acpi_options(smmu_pmu);
 
-	/* Pick one CPU to be the preferred one to use */
-	smmu_pmu->on_cpu = raw_smp_processor_id();
-	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
-				      cpumask_of(smmu_pmu->on_cpu)));
-
-	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
-					       &smmu_pmu->node);
+	err = smmu_pmu_setup_irq(smmu_pmu);
 	if (err) {
-		dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
-			err, &res_0->start);
+		dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
 		return err;
 	}
 
@@ -832,7 +920,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 	return 0;
 
 out_unregister:
-	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
+	smmu_pmu_put_affinity(smmu_pmu);
+	synchronize_rcu();
 	return err;
 }
 
@@ -840,9 +929,9 @@ static int smmu_pmu_remove(struct platform_device *pdev)
 {
 	struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
 
+	smmu_pmu_put_affinity(smmu_pmu);
+	/* perf will synchronise RCU before devres can free smmu_pmu */
 	perf_pmu_unregister(&smmu_pmu->pmu);
-	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
-
 	return 0;
 }
 
-- 
2.23.0.dirty


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
  2020-04-08 16:49 [RFC PATCH] perf/smmuv3: Fix shared interrupt handling Robin Murphy
@ 2020-04-09  7:02 ` John Garry
  2020-04-09  9:54   ` Robin Murphy
  2020-04-30 22:11 ` Tuan Phan
  2020-06-24 11:48 ` Robin Murphy
  2 siblings, 1 reply; 9+ messages in thread
From: John Garry @ 2020-04-09  7:02 UTC (permalink / raw)
  To: Robin Murphy, will, mark.rutland
  Cc: linux-arm-kernel, Shameerali Kolothum Thodi, harb, tuanphan,
	linux-kernel

On 08/04/2020 17:49, Robin Murphy wrote:
> IRQF_SHARED is dangerous, since it allows other agents to retarget the
> IRQ's affinity without migrating PMU contexts to match, breaking the way
> in which perf manages mutual exclusion for accessing events. Although
> this means it's not realistically possible to support PMU IRQs being
> shared with other drivers, we *can* handle sharing between multiple PMU
> instances with some explicit affinity bookkeeping and manual interrupt
> multiplexing.

Hi Robin,

Out of curiosity, do we even need to support shared interrupts for any 
implementations today?

D06 board:

john@ubuntu:~$ more /proc/interrupts | grep smmuv3-pmu

  989:  0  0  0  0  ITS-pMSI 133120 Edge  smmuv3-pmu
  990:  0  0  0  0  ITS-pMSI 135168 Edge  smmuv3-pmu
  991:  0  0  0  0  ITS-pMSI 137216 Edge  smmuv3-pmu
  992:  0  0  0  0  ITS-pMSI 139264 Edge  smmuv3-pmu
  993:  0  0  0  0  ITS-pMSI 141312 Edge  smmuv3-pmu
  994:  0  0  0  0  ITS-pMSI 143360 Edge  smmuv3-pmu
  995:  0  0  0  0  ITS-pMSI 145408 Edge  smmuv3-pmu
  996:  0  0  0  0  ITS-pMSI 147456 Edge  smmuv3-pmu

Thanks,
John


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
  2020-04-09  7:02 ` John Garry
@ 2020-04-09  9:54   ` Robin Murphy
  0 siblings, 0 replies; 9+ messages in thread
From: Robin Murphy @ 2020-04-09  9:54 UTC (permalink / raw)
  To: John Garry, will, mark.rutland
  Cc: linux-arm-kernel, Shameerali Kolothum Thodi, harb, tuanphan,
	linux-kernel

On 2020-04-09 8:02 am, John Garry wrote:
> On 08/04/2020 17:49, Robin Murphy wrote:
>> IRQF_SHARED is dangerous, since it allows other agents to retarget the
>> IRQ's affinity without migrating PMU contexts to match, breaking the way
>> in which perf manages mutual exclusion for accessing events. Although
>> this means it's not realistically possible to support PMU IRQs being
>> shared with other drivers, we *can* handle sharing between multiple PMU
>> instances with some explicit affinity bookkeeping and manual interrupt
>> multiplexing.
> 
> Hi Robin,
> 
> Out of curiosity, do we even need to support shared interrupts for any 
> implementations today?

Not that I know of, but we need the mitigation in general for future 
drivers[1], and since this one already had a suspicious IRQF_SHARED it 
was the ideal victim for prototyping. I haven't dared ask about Ampere's 
SMMU story... :)

> D06 board:
> 
> john@ubuntu:~$ more /proc/interrupts | grep smmuv3-pmu
> 
>   989:  0  0  0  0  ITS-pMSI 133120 Edge  smmuv3-pmu
>   990:  0  0  0  0  ITS-pMSI 135168 Edge  smmuv3-pmu
>   991:  0  0  0  0  ITS-pMSI 137216 Edge  smmuv3-pmu
>   992:  0  0  0  0  ITS-pMSI 139264 Edge  smmuv3-pmu
>   993:  0  0  0  0  ITS-pMSI 141312 Edge  smmuv3-pmu
>   994:  0  0  0  0  ITS-pMSI 143360 Edge  smmuv3-pmu
>   995:  0  0  0  0  ITS-pMSI 145408 Edge  smmuv3-pmu
>   996:  0  0  0  0  ITS-pMSI 147456 Edge  smmuv3-pmu

Yeah, MSIs are the best way to defeat any interrupt wiring!

Robin.

[1] 
https://lore.kernel.org/linux-arm-kernel/3efa118a-5c85-6af9-e676-44087f1d398e@arm.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
  2020-04-08 16:49 [RFC PATCH] perf/smmuv3: Fix shared interrupt handling Robin Murphy
  2020-04-09  7:02 ` John Garry
@ 2020-04-30 22:11 ` Tuan Phan
  2020-06-24 11:48 ` Robin Murphy
  2 siblings, 0 replies; 9+ messages in thread
From: Tuan Phan @ 2020-04-30 22:11 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Will Deacon, Mark Rutland, linux-arm-kernel,
	shameerali.kolothum.thodi, john.garry, harb, tuanphan,
	linux-kernel

Hi Robin,

> On Apr 8, 2020, at 9:49 AM, Robin Murphy <robin.murphy@arm.com> wrote:
> 
> IRQF_SHARED is dangerous, since it allows other agents to retarget the
> IRQ's affinity without migrating PMU contexts to match, breaking the way
> in which perf manages mutual exclusion for accessing events. Although
> this means it's not realistically possible to support PMU IRQs being
> shared with other drivers, we *can* handle sharing between multiple PMU
> instances with some explicit affinity bookkeeping and manual interrupt
> multiplexing.
> 
> RCU helps us handle interrupts efficiently without having to worry about
> fine-grained locking for relatively-theoretical race conditions with the
> probe/remove/CPU hotplug slow paths. The resulting machinery ends up
> looking largely generic, so it should be feasible to factor out with a
> "system PMU" base class for similar multi-instance drivers.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> 
> RFC because I don't have the means to test it, and if the general
> approach passes muster then I'd want to tackle the aforementioned
> factoring-out before merging anything anyway.
> 
> Robin.
> 
> 
> drivers/perf/arm_smmuv3_pmu.c | 215 ++++++++++++++++++++++++----------
> 1 file changed, 152 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
> index d704eccc548f..8daa7ac6e801 100644
> --- a/drivers/perf/arm_smmuv3_pmu.c
> +++ b/drivers/perf/arm_smmuv3_pmu.c
> @@ -47,8 +47,11 @@
> #include <linux/kernel.h>
> #include <linux/list.h>
> #include <linux/msi.h>
> +#include <linux/mutex.h>
> #include <linux/perf_event.h>
> #include <linux/platform_device.h>
> +#include <linux/rculist.h>
> +#include <linux/refcount.h>
> #include <linux/smp.h>
> #include <linux/sysfs.h>
> #include <linux/types.h>
> @@ -98,13 +101,24 @@
> 
> static int cpuhp_state_num;
> 
> -struct smmu_pmu {
> +static LIST_HEAD(smmu_pmu_affinities);
> +static DEFINE_MUTEX(smmu_pmu_affinity_lock);
> +
> +struct smmu_pmu_affinity {
> 	struct hlist_node node;
> +	struct list_head affinity_list;
> +	struct list_head instance_list;
> +	refcount_t refcount;
> +	unsigned int irq;
> +	unsigned int cpu;
> +};
> +
> +struct smmu_pmu {
> 	struct perf_event *events[SMMU_PMCG_MAX_COUNTERS];
> 	DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS);
> 	DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS);
> -	unsigned int irq;
> -	unsigned int on_cpu;
> +	struct smmu_pmu_affinity *affinity;
> +	struct list_head affinity_list;
> 	struct pmu pmu;
> 	unsigned int num_counters;
> 	struct device *dev;
> @@ -394,7 +408,7 @@ static int smmu_pmu_event_init(struct perf_event *event)
> 	 * Ensure all events are on the same cpu so all events are in the
> 	 * same cpu context, to avoid races on pmu_enable etc.
> 	 */
> -	event->cpu = smmu_pmu->on_cpu;
> +	event->cpu = smmu_pmu->affinity->cpu;
> 
> 	return 0;
> }
> @@ -478,9 +492,10 @@ static ssize_t smmu_pmu_cpumask_show(struct device *dev,
> 				     struct device_attribute *attr,
> 				     char *buf)
> {
> -	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
> +	struct pmu *pmu = dev_get_drvdata(dev);
> +	struct smmu_pmu_affinity *aff = to_smmu_pmu(pmu)->affinity;
> 
> -	return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu));
> +	return cpumap_print_to_pagebuf(true, buf, cpumask_of(aff->cpu));
> }
> 
> static struct device_attribute smmu_pmu_cpumask_attr =
> @@ -584,50 +599,140 @@ static const struct attribute_group *smmu_pmu_attr_grps[] = {
> 
> static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> {
> -	struct smmu_pmu *smmu_pmu;
> +	struct smmu_pmu_affinity *aff;
> +	struct smmu_pmu *pmu;
> 	unsigned int target;
> 
> -	smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node);
> -	if (cpu != smmu_pmu->on_cpu)
> +	aff = hlist_entry_safe(node, struct smmu_pmu_affinity, node);
> +	if (cpu != aff->cpu)
> 		return 0;
> 
> 	target = cpumask_any_but(cpu_online_mask, cpu);
> 	if (target >= nr_cpu_ids)
> 		return 0;
> 
> -	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
> -	smmu_pmu->on_cpu = target;
> -	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target)));
> +	/* We're only reading, but this isn't the place to be involving RCU */
> +	mutex_lock(&smmu_pmu_affinity_lock);
> +	list_for_each_entry(pmu, &aff->instance_list, affinity_list)
> +		perf_pmu_migrate_context(&pmu->pmu, aff->cpu, target);
> +	mutex_unlock(&smmu_pmu_affinity_lock);
> +
> +	WARN_ON(irq_set_affinity_hint(aff->irq, cpumask_of(target)));
> +	aff->cpu = target;
> 
> 	return 0;
> }
> 
> static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
> {
> -	struct smmu_pmu *smmu_pmu = data;
> -	u64 ovsr;
> -	unsigned int idx;
> +	struct smmu_pmu_affinity *aff = data;
> +	struct smmu_pmu *smmu_pmu;
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(smmu_pmu, &aff->instance_list, affinity_list) {
> +		unsigned int idx;
> +		u64 ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
> 
> -	ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
> -	if (!ovsr)
> -		return IRQ_NONE;
> +		if (!ovsr)
> +			continue;
> 
> -	writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
> +		writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
> 
> -	for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
> -		struct perf_event *event = smmu_pmu->events[idx];
> -		struct hw_perf_event *hwc;
> +		for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
> +			struct perf_event *event = smmu_pmu->events[idx];
> 
> -		if (WARN_ON_ONCE(!event))
> -			continue;
> +			if (WARN_ON_ONCE(!event))
> +				continue;
> +
> +			smmu_pmu_event_update(event);
> +			smmu_pmu_set_period(smmu_pmu, &event->hw);
> +		}
> +		ret = IRQ_HANDLED;
> +	}
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
> +static struct smmu_pmu_affinity *__smmu_pmu_get_affinity(int irq)
> +{
> +	struct smmu_pmu_affinity *aff;
> +	int ret;
> +
> +	list_for_each_entry(aff, &smmu_pmu_affinities, affinity_list)
> +		if (aff->irq == irq && refcount_inc_not_zero(&aff->refcount))
> +			return aff;
> +
> +	aff = kzalloc(sizeof(*aff), GFP_KERNEL);
> +	if (!aff)
> +		return ERR_PTR(-ENOMEM);
> +
Do we need to initialize aff->instance_list?

> +	/* Pick one CPU to be the preferred one to use */
> +	aff->cpu = raw_smp_processor_id();
> +	refcount_set(&aff->refcount, 1);
> +
> +	ret = request_irq(irq, smmu_pmu_handle_irq,
> +			  IRQF_NOBALANCING | IRQF_NO_THREAD,
> +			  "smmuv3-pmu", aff);
Do we need to save irq to aff->irq?

> +	if (ret)
> +		goto out_free_aff;
> +
> +	ret = irq_set_affinity_hint(irq, cpumask_of(aff->cpu));
> +	if (ret)
> +		goto out_free_irq;
> +
> +	ret = cpuhp_state_add_instance_nocalls(cpuhp_state_num, &aff->node);
> +	if (ret)
> +		goto out_free_irq;
> +
> +	list_add(&aff->affinity_list, &smmu_pmu_affinities);
> +	return aff;
> +
> +out_free_irq:
> +	free_irq(irq, aff);
> +out_free_aff:
> +	kfree(aff);
> +	return ERR_PTR(ret);
> +}
> +
> +static int smmu_pmu_get_affinity(struct smmu_pmu *pmu, int irq)
> +{
> +	struct smmu_pmu_affinity *aff;
> +
> +	mutex_lock(&smmu_pmu_affinity_lock);
> +	aff = __smmu_pmu_get_affinity(irq);
> +	mutex_unlock(&smmu_pmu_affinity_lock);
> +
> +	if (IS_ERR(aff))
> +		return PTR_ERR(aff);
> +
> +	pmu->affinity = aff;
> +	mutex_lock(&smmu_pmu_affinity_lock);
> +	list_add_rcu(&pmu->affinity_list, &aff->instance_list);
> +	mutex_unlock(&smmu_pmu_affinity_lock);
> +
> +	return 0;
> +}
> +
> +static void smmu_pmu_put_affinity(struct smmu_pmu *pmu)
> +{
> +	struct smmu_pmu_affinity *aff = pmu->affinity;
> 
> -		smmu_pmu_event_update(event);
> -		hwc = &event->hw;
> +	mutex_lock(&smmu_pmu_affinity_lock);
> +	list_del_rcu(&pmu->affinity_list);
> 
> -		smmu_pmu_set_period(smmu_pmu, hwc);
> +	if (!refcount_dec_and_test(&aff->refcount)) {
> +		mutex_unlock(&smmu_pmu_affinity_lock);
> +		return;
> 	}
> 
> -	return IRQ_HANDLED;
> +	list_del(&aff->affinity_list);
> +	mutex_unlock(&smmu_pmu_affinity_lock);
> +
> +	free_irq(aff->irq, aff);
> +	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &aff->node);
> +	kfree(aff);
> }
> 
> static void smmu_pmu_free_msis(void *data)
> @@ -652,7 +757,7 @@ static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
> 		       pmu->reg_base + SMMU_PMCG_IRQ_CFG2);
> }
> 
> -static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
> +static int smmu_pmu_setup_msi(struct smmu_pmu *pmu)
> {
> 	struct msi_desc *desc;
> 	struct device *dev = pmu->dev;
> @@ -663,34 +768,34 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
> 
> 	/* MSI supported or not */
> 	if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
> -		return;
> +		return 0;
> 
> 	ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
> 	if (ret) {
> 		dev_warn(dev, "failed to allocate MSIs\n");
> -		return;
> +		return ret;
> 	}
> 
> 	desc = first_msi_entry(dev);
> 	if (desc)
> -		pmu->irq = desc->irq;
> +		ret = desc->irq;
> 
> 	/* Add callback to free MSIs on teardown */
> 	devm_add_action(dev, smmu_pmu_free_msis, dev);
> +	return ret;
> }
> 
> static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
> {
> -	unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
> -	int irq, ret = -ENXIO;
> +	int irq;
> 
> -	smmu_pmu_setup_msi(pmu);
> +	irq = smmu_pmu_setup_msi(pmu);
> +	if (irq <= 0)
> +		irq = platform_get_irq(to_platform_device(pmu->dev), 0);
> +	if (irq < 0)
> +		return irq;
> 
> -	irq = pmu->irq;
> -	if (irq)
> -		ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
> -				       flags, "smmuv3-pmu", pmu);
> -	return ret;
> +	return smmu_pmu_get_affinity(pmu, irq);
> }
> 
> static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
> @@ -730,7 +835,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
> 	struct resource *res_0;
> 	u32 cfgr, reg_size;
> 	u64 ceid_64[2];
> -	int irq, err;
> +	int err;
> 	char *name;
> 	struct device *dev = &pdev->dev;
> 
> @@ -771,10 +876,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
> 		smmu_pmu->reloc_base = smmu_pmu->reg_base;
> 	}
> 
> -	irq = platform_get_irq(pdev, 0);
> -	if (irq > 0)
> -		smmu_pmu->irq = irq;
> -
> 	ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
> 	ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
> 	bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64,
> @@ -789,12 +890,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
> 
> 	smmu_pmu_reset(smmu_pmu);
> 
> -	err = smmu_pmu_setup_irq(smmu_pmu);
> -	if (err) {
> -		dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
> -		return err;
> -	}
> -
> 	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
> 			      (res_0->start) >> SMMU_PMCG_PA_SHIFT);
> 	if (!name) {
> @@ -804,16 +899,9 @@ static int smmu_pmu_probe(struct platform_device *pdev)
> 
> 	smmu_pmu_get_acpi_options(smmu_pmu);
> 
> -	/* Pick one CPU to be the preferred one to use */
> -	smmu_pmu->on_cpu = raw_smp_processor_id();
> -	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
> -				      cpumask_of(smmu_pmu->on_cpu)));
> -
> -	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
> -					       &smmu_pmu->node);
> +	err = smmu_pmu_setup_irq(smmu_pmu);
> 	if (err) {
> -		dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
> -			err, &res_0->start);
> +		dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
> 		return err;
> 	}
> 
> @@ -832,7 +920,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
> 	return 0;
> 
> out_unregister:
> -	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
> +	smmu_pmu_put_affinity(smmu_pmu);
> +	synchronize_rcu();
> 	return err;
> }
> 
> @@ -840,9 +929,9 @@ static int smmu_pmu_remove(struct platform_device *pdev)
> {
> 	struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
> 
> +	smmu_pmu_put_affinity(smmu_pmu);
> +	/* perf will synchronise RCU before devres can free smmu_pmu */
> 	perf_pmu_unregister(&smmu_pmu->pmu);
> -	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
> -
> 	return 0;
> }
> 
> -- 
> 2.23.0.dirty
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
  2020-04-08 16:49 [RFC PATCH] perf/smmuv3: Fix shared interrupt handling Robin Murphy
  2020-04-09  7:02 ` John Garry
  2020-04-30 22:11 ` Tuan Phan
@ 2020-06-24 11:48 ` Robin Murphy
  2020-06-24 12:50   ` Will Deacon
  2 siblings, 1 reply; 9+ messages in thread
From: Robin Murphy @ 2020-06-24 11:48 UTC (permalink / raw)
  To: will, mark.rutland
  Cc: tuanphan, john.garry, linux-kernel, shameerali.kolothum.thodi,
	harb, linux-arm-kernel

Hi Will, Mark,

On 2020-04-08 17:49, Robin Murphy wrote:
> IRQF_SHARED is dangerous, since it allows other agents to retarget the
> IRQ's affinity without migrating PMU contexts to match, breaking the way
> in which perf manages mutual exclusion for accessing events. Although
> this means it's not realistically possible to support PMU IRQs being
> shared with other drivers, we *can* handle sharing between multiple PMU
> instances with some explicit affinity bookkeeping and manual interrupt
> multiplexing.
> 
> RCU helps us handle interrupts efficiently without having to worry about
> fine-grained locking for relatively-theoretical race conditions with the
> probe/remove/CPU hotplug slow paths. The resulting machinery ends up
> looking largely generic, so it should be feasible to factor out with a
> "system PMU" base class for similar multi-instance drivers.
> 
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> 
> RFC because I don't have the means to test it, and if the general
> approach passes muster then I'd want to tackle the aforementioned
> factoring-out before merging anything anyway.

Any comments on whether it's worth pursuing this?

Robin.

>   drivers/perf/arm_smmuv3_pmu.c | 215 ++++++++++++++++++++++++----------
>   1 file changed, 152 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
> index d704eccc548f..8daa7ac6e801 100644
> --- a/drivers/perf/arm_smmuv3_pmu.c
> +++ b/drivers/perf/arm_smmuv3_pmu.c
> @@ -47,8 +47,11 @@
>   #include <linux/kernel.h>
>   #include <linux/list.h>
>   #include <linux/msi.h>
> +#include <linux/mutex.h>
>   #include <linux/perf_event.h>
>   #include <linux/platform_device.h>
> +#include <linux/rculist.h>
> +#include <linux/refcount.h>
>   #include <linux/smp.h>
>   #include <linux/sysfs.h>
>   #include <linux/types.h>
> @@ -98,13 +101,24 @@
>   
>   static int cpuhp_state_num;
>   
> -struct smmu_pmu {
> +static LIST_HEAD(smmu_pmu_affinities);
> +static DEFINE_MUTEX(smmu_pmu_affinity_lock);
> +
> +struct smmu_pmu_affinity {
>   	struct hlist_node node;
> +	struct list_head affinity_list;
> +	struct list_head instance_list;
> +	refcount_t refcount;
> +	unsigned int irq;
> +	unsigned int cpu;
> +};
> +
> +struct smmu_pmu {
>   	struct perf_event *events[SMMU_PMCG_MAX_COUNTERS];
>   	DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS);
>   	DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS);
> -	unsigned int irq;
> -	unsigned int on_cpu;
> +	struct smmu_pmu_affinity *affinity;
> +	struct list_head affinity_list;
>   	struct pmu pmu;
>   	unsigned int num_counters;
>   	struct device *dev;
> @@ -394,7 +408,7 @@ static int smmu_pmu_event_init(struct perf_event *event)
>   	 * Ensure all events are on the same cpu so all events are in the
>   	 * same cpu context, to avoid races on pmu_enable etc.
>   	 */
> -	event->cpu = smmu_pmu->on_cpu;
> +	event->cpu = smmu_pmu->affinity->cpu;
>   
>   	return 0;
>   }
> @@ -478,9 +492,10 @@ static ssize_t smmu_pmu_cpumask_show(struct device *dev,
>   				     struct device_attribute *attr,
>   				     char *buf)
>   {
> -	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
> +	struct pmu *pmu = dev_get_drvdata(dev);
> +	struct smmu_pmu_affinity *aff = to_smmu_pmu(pmu)->affinity;
>   
> -	return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu));
> +	return cpumap_print_to_pagebuf(true, buf, cpumask_of(aff->cpu));
>   }
>   
>   static struct device_attribute smmu_pmu_cpumask_attr =
> @@ -584,50 +599,140 @@ static const struct attribute_group *smmu_pmu_attr_grps[] = {
>   
>   static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
>   {
> -	struct smmu_pmu *smmu_pmu;
> +	struct smmu_pmu_affinity *aff;
> +	struct smmu_pmu *pmu;
>   	unsigned int target;
>   
> -	smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node);
> -	if (cpu != smmu_pmu->on_cpu)
> +	aff = hlist_entry_safe(node, struct smmu_pmu_affinity, node);
> +	if (cpu != aff->cpu)
>   		return 0;
>   
>   	target = cpumask_any_but(cpu_online_mask, cpu);
>   	if (target >= nr_cpu_ids)
>   		return 0;
>   
> -	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
> -	smmu_pmu->on_cpu = target;
> -	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target)));
> +	/* We're only reading, but this isn't the place to be involving RCU */
> +	mutex_lock(&smmu_pmu_affinity_lock);
> +	list_for_each_entry(pmu, &aff->instance_list, affinity_list)
> +		perf_pmu_migrate_context(&pmu->pmu, aff->cpu, target);
> +	mutex_unlock(&smmu_pmu_affinity_lock);
> +
> +	WARN_ON(irq_set_affinity_hint(aff->irq, cpumask_of(target)));
> +	aff->cpu = target;
>   
>   	return 0;
>   }
>   
>   static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data)
>   {
> -	struct smmu_pmu *smmu_pmu = data;
> -	u64 ovsr;
> -	unsigned int idx;
> +	struct smmu_pmu_affinity *aff = data;
> +	struct smmu_pmu *smmu_pmu;
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	rcu_read_lock();
> +	list_for_each_entry_rcu(smmu_pmu, &aff->instance_list, affinity_list) {
> +		unsigned int idx;
> +		u64 ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
>   
> -	ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0);
> -	if (!ovsr)
> -		return IRQ_NONE;
> +		if (!ovsr)
> +			continue;
>   
> -	writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
> +		writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0);
>   
> -	for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
> -		struct perf_event *event = smmu_pmu->events[idx];
> -		struct hw_perf_event *hwc;
> +		for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) {
> +			struct perf_event *event = smmu_pmu->events[idx];
>   
> -		if (WARN_ON_ONCE(!event))
> -			continue;
> +			if (WARN_ON_ONCE(!event))
> +				continue;
> +
> +			smmu_pmu_event_update(event);
> +			smmu_pmu_set_period(smmu_pmu, &event->hw);
> +		}
> +		ret = IRQ_HANDLED;
> +	}
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
> +static struct smmu_pmu_affinity *__smmu_pmu_get_affinity(int irq)
> +{
> +	struct smmu_pmu_affinity *aff;
> +	int ret;
> +
> +	list_for_each_entry(aff, &smmu_pmu_affinities, affinity_list)
> +		if (aff->irq == irq && refcount_inc_not_zero(&aff->refcount))
> +			return aff;
> +
> +	aff = kzalloc(sizeof(*aff), GFP_KERNEL);
> +	if (!aff)
> +		return ERR_PTR(-ENOMEM);
> +
> +	/* Pick one CPU to be the preferred one to use */
> +	aff->cpu = raw_smp_processor_id();
> +	refcount_set(&aff->refcount, 1);
> +
> +	ret = request_irq(irq, smmu_pmu_handle_irq,
> +			  IRQF_NOBALANCING | IRQF_NO_THREAD,
> +			  "smmuv3-pmu", aff);
> +	if (ret)
> +		goto out_free_aff;
> +
> +	ret = irq_set_affinity_hint(irq, cpumask_of(aff->cpu));
> +	if (ret)
> +		goto out_free_irq;
> +
> +	ret = cpuhp_state_add_instance_nocalls(cpuhp_state_num, &aff->node);
> +	if (ret)
> +		goto out_free_irq;
> +
> +	list_add(&aff->affinity_list, &smmu_pmu_affinities);
> +	return aff;
> +
> +out_free_irq:
> +	free_irq(irq, aff);
> +out_free_aff:
> +	kfree(aff);
> +	return ERR_PTR(ret);
> +}
> +
> +static int smmu_pmu_get_affinity(struct smmu_pmu *pmu, int irq)
> +{
> +	struct smmu_pmu_affinity *aff;
> +
> +	mutex_lock(&smmu_pmu_affinity_lock);
> +	aff = __smmu_pmu_get_affinity(irq);
> +	mutex_unlock(&smmu_pmu_affinity_lock);
> +
> +	if (IS_ERR(aff))
> +		return PTR_ERR(aff);
> +
> +	pmu->affinity = aff;
> +	mutex_lock(&smmu_pmu_affinity_lock);
> +	list_add_rcu(&pmu->affinity_list, &aff->instance_list);
> +	mutex_unlock(&smmu_pmu_affinity_lock);
> +
> +	return 0;
> +}
> +
> +static void smmu_pmu_put_affinity(struct smmu_pmu *pmu)
> +{
> +	struct smmu_pmu_affinity *aff = pmu->affinity;
>   
> -		smmu_pmu_event_update(event);
> -		hwc = &event->hw;
> +	mutex_lock(&smmu_pmu_affinity_lock);
> +	list_del_rcu(&pmu->affinity_list);
>   
> -		smmu_pmu_set_period(smmu_pmu, hwc);
> +	if (!refcount_dec_and_test(&aff->refcount)) {
> +		mutex_unlock(&smmu_pmu_affinity_lock);
> +		return;
>   	}
>   
> -	return IRQ_HANDLED;
> +	list_del(&aff->affinity_list);
> +	mutex_unlock(&smmu_pmu_affinity_lock);
> +
> +	free_irq(aff->irq, aff);
> +	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &aff->node);
> +	kfree(aff);
>   }
>   
>   static void smmu_pmu_free_msis(void *data)
> @@ -652,7 +757,7 @@ static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
>   		       pmu->reg_base + SMMU_PMCG_IRQ_CFG2);
>   }
>   
> -static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
> +static int smmu_pmu_setup_msi(struct smmu_pmu *pmu)
>   {
>   	struct msi_desc *desc;
>   	struct device *dev = pmu->dev;
> @@ -663,34 +768,34 @@ static void smmu_pmu_setup_msi(struct smmu_pmu *pmu)
>   
>   	/* MSI supported or not */
>   	if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI))
> -		return;
> +		return 0;
>   
>   	ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg);
>   	if (ret) {
>   		dev_warn(dev, "failed to allocate MSIs\n");
> -		return;
> +		return ret;
>   	}
>   
>   	desc = first_msi_entry(dev);
>   	if (desc)
> -		pmu->irq = desc->irq;
> +		ret = desc->irq;
>   
>   	/* Add callback to free MSIs on teardown */
>   	devm_add_action(dev, smmu_pmu_free_msis, dev);
> +	return ret;
>   }
>   
>   static int smmu_pmu_setup_irq(struct smmu_pmu *pmu)
>   {
> -	unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD;
> -	int irq, ret = -ENXIO;
> +	int irq;
>   
> -	smmu_pmu_setup_msi(pmu);
> +	irq = smmu_pmu_setup_msi(pmu);
> +	if (irq <= 0)
> +		irq = platform_get_irq(to_platform_device(pmu->dev), 0);
> +	if (irq < 0)
> +		return irq;
>   
> -	irq = pmu->irq;
> -	if (irq)
> -		ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq,
> -				       flags, "smmuv3-pmu", pmu);
> -	return ret;
> +	return smmu_pmu_get_affinity(pmu, irq);
>   }
>   
>   static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu)
> @@ -730,7 +835,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>   	struct resource *res_0;
>   	u32 cfgr, reg_size;
>   	u64 ceid_64[2];
> -	int irq, err;
> +	int err;
>   	char *name;
>   	struct device *dev = &pdev->dev;
>   
> @@ -771,10 +876,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>   		smmu_pmu->reloc_base = smmu_pmu->reg_base;
>   	}
>   
> -	irq = platform_get_irq(pdev, 0);
> -	if (irq > 0)
> -		smmu_pmu->irq = irq;
> -
>   	ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0);
>   	ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1);
>   	bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64,
> @@ -789,12 +890,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>   
>   	smmu_pmu_reset(smmu_pmu);
>   
> -	err = smmu_pmu_setup_irq(smmu_pmu);
> -	if (err) {
> -		dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
> -		return err;
> -	}
> -
>   	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx",
>   			      (res_0->start) >> SMMU_PMCG_PA_SHIFT);
>   	if (!name) {
> @@ -804,16 +899,9 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>   
>   	smmu_pmu_get_acpi_options(smmu_pmu);
>   
> -	/* Pick one CPU to be the preferred one to use */
> -	smmu_pmu->on_cpu = raw_smp_processor_id();
> -	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
> -				      cpumask_of(smmu_pmu->on_cpu)));
> -
> -	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
> -					       &smmu_pmu->node);
> +	err = smmu_pmu_setup_irq(smmu_pmu);
>   	if (err) {
> -		dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
> -			err, &res_0->start);
> +		dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start);
>   		return err;
>   	}
>   
> @@ -832,7 +920,8 @@ static int smmu_pmu_probe(struct platform_device *pdev)
>   	return 0;
>   
>   out_unregister:
> -	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
> +	smmu_pmu_put_affinity(smmu_pmu);
> +	synchronize_rcu();
>   	return err;
>   }
>   
> @@ -840,9 +929,9 @@ static int smmu_pmu_remove(struct platform_device *pdev)
>   {
>   	struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev);
>   
> +	smmu_pmu_put_affinity(smmu_pmu);
> +	/* perf will synchronise RCU before devres can free smmu_pmu */
>   	perf_pmu_unregister(&smmu_pmu->pmu);
> -	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
> -
>   	return 0;
>   }
>   
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
  2020-06-24 11:48 ` Robin Murphy
@ 2020-06-24 12:50   ` Will Deacon
  2020-06-24 13:08     ` Robin Murphy
  0 siblings, 1 reply; 9+ messages in thread
From: Will Deacon @ 2020-06-24 12:50 UTC (permalink / raw)
  To: Robin Murphy
  Cc: mark.rutland, tuanphan, john.garry, linux-kernel,
	shameerali.kolothum.thodi, harb, linux-arm-kernel

On Wed, Jun 24, 2020 at 12:48:14PM +0100, Robin Murphy wrote:
> On 2020-04-08 17:49, Robin Murphy wrote:
> > IRQF_SHARED is dangerous, since it allows other agents to retarget the
> > IRQ's affinity without migrating PMU contexts to match, breaking the way
> > in which perf manages mutual exclusion for accessing events. Although
> > this means it's not realistically possible to support PMU IRQs being
> > shared with other drivers, we *can* handle sharing between multiple PMU
> > instances with some explicit affinity bookkeeping and manual interrupt
> > multiplexing.
> > 
> > RCU helps us handle interrupts efficiently without having to worry about
> > fine-grained locking for relatively-theoretical race conditions with the
> > probe/remove/CPU hotplug slow paths. The resulting machinery ends up
> > looking largely generic, so it should be feasible to factor out with a
> > "system PMU" base class for similar multi-instance drivers.
> > 
> > Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> > ---
> > 
> > RFC because I don't have the means to test it, and if the general
> > approach passes muster then I'd want to tackle the aforementioned
> > factoring-out before merging anything anyway.
> 
> Any comments on whether it's worth pursuing this?

Sorry, I don't really get the problem that it's solving. Is there a crash
log somewhere I can look at? If all the users of the IRQ are managed by
this driver, why is IRQF_SHARED dangerous?

Will

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
  2020-06-24 12:50   ` Will Deacon
@ 2020-06-24 13:08     ` Robin Murphy
  2020-07-03 13:42       ` Will Deacon
  0 siblings, 1 reply; 9+ messages in thread
From: Robin Murphy @ 2020-06-24 13:08 UTC (permalink / raw)
  To: Will Deacon
  Cc: mark.rutland, tuanphan, john.garry, linux-kernel,
	shameerali.kolothum.thodi, harb, linux-arm-kernel

On 2020-06-24 13:50, Will Deacon wrote:
> On Wed, Jun 24, 2020 at 12:48:14PM +0100, Robin Murphy wrote:
>> On 2020-04-08 17:49, Robin Murphy wrote:
>>> IRQF_SHARED is dangerous, since it allows other agents to retarget the
>>> IRQ's affinity without migrating PMU contexts to match, breaking the way
>>> in which perf manages mutual exclusion for accessing events. Although
>>> this means it's not realistically possible to support PMU IRQs being
>>> shared with other drivers, we *can* handle sharing between multiple PMU
>>> instances with some explicit affinity bookkeeping and manual interrupt
>>> multiplexing.
>>>
>>> RCU helps us handle interrupts efficiently without having to worry about
>>> fine-grained locking for relatively-theoretical race conditions with the
>>> probe/remove/CPU hotplug slow paths. The resulting machinery ends up
>>> looking largely generic, so it should be feasible to factor out with a
>>> "system PMU" base class for similar multi-instance drivers.
>>>
>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>> ---
>>>
>>> RFC because I don't have the means to test it, and if the general
>>> approach passes muster then I'd want to tackle the aforementioned
>>> factoring-out before merging anything anyway.
>>
>> Any comments on whether it's worth pursuing this?
> 
> Sorry, I don't really get the problem that it's solving. Is there a crash
> log somewhere I can look at? If all the users of the IRQ are managed by
> this driver, why is IRQF_SHARED dangerous?

Because as-is, multiple PMU instances may make different choices about 
which CPU they associate with, change the shared IRQ affinity behind 
each others' backs, and break the "IRQ handler runs on event->cpu" 
assumption that perf core relies on for correctness. I'm not sure how 
likely it would be to actually crash rather than just lead to subtle 
nastiness, but wither way it's not good, and since people seem to be 
tempted to wire up system PMU instances this way we could do with a 
general approach for dealing with it.

Robin.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
  2020-06-24 13:08     ` Robin Murphy
@ 2020-07-03 13:42       ` Will Deacon
  2020-07-03 14:42         ` Robin Murphy
  0 siblings, 1 reply; 9+ messages in thread
From: Will Deacon @ 2020-07-03 13:42 UTC (permalink / raw)
  To: Robin Murphy
  Cc: mark.rutland, tuanphan, john.garry, linux-kernel,
	shameerali.kolothum.thodi, harb, linux-arm-kernel

On Wed, Jun 24, 2020 at 02:08:30PM +0100, Robin Murphy wrote:
> On 2020-06-24 13:50, Will Deacon wrote:
> > On Wed, Jun 24, 2020 at 12:48:14PM +0100, Robin Murphy wrote:
> > > On 2020-04-08 17:49, Robin Murphy wrote:
> > > > IRQF_SHARED is dangerous, since it allows other agents to retarget the
> > > > IRQ's affinity without migrating PMU contexts to match, breaking the way
> > > > in which perf manages mutual exclusion for accessing events. Although
> > > > this means it's not realistically possible to support PMU IRQs being
> > > > shared with other drivers, we *can* handle sharing between multiple PMU
> > > > instances with some explicit affinity bookkeeping and manual interrupt
> > > > multiplexing.
> > > > 
> > > > RCU helps us handle interrupts efficiently without having to worry about
> > > > fine-grained locking for relatively-theoretical race conditions with the
> > > > probe/remove/CPU hotplug slow paths. The resulting machinery ends up
> > > > looking largely generic, so it should be feasible to factor out with a
> > > > "system PMU" base class for similar multi-instance drivers.
> > > > 
> > > > Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> > > > ---
> > > > 
> > > > RFC because I don't have the means to test it, and if the general
> > > > approach passes muster then I'd want to tackle the aforementioned
> > > > factoring-out before merging anything anyway.
> > > 
> > > Any comments on whether it's worth pursuing this?
> > 
> > Sorry, I don't really get the problem that it's solving. Is there a crash
> > log somewhere I can look at? If all the users of the IRQ are managed by
> > this driver, why is IRQF_SHARED dangerous?
> 
> Because as-is, multiple PMU instances may make different choices about which
> CPU they associate with, change the shared IRQ affinity behind each others'
> backs, and break the "IRQ handler runs on event->cpu" assumption that perf
> core relies on for correctness. I'm not sure how likely it would be to
> actually crash rather than just lead to subtle nastiness, but wither way
> it's not good, and since people seem to be tempted to wire up system PMU
> instances this way we could do with a general approach for dealing with it.

Ok, thanks for the explanation. If we're just talking about multiple
instances of the same driver, why is it not sufficient to have a static
atomic_t initialised to -1 which tracks the current affinity and then just
CAS that during probe()? Hotplug notifiers can just check whether or not
it points to an online CPU

Will

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [RFC PATCH] perf/smmuv3: Fix shared interrupt handling
  2020-07-03 13:42       ` Will Deacon
@ 2020-07-03 14:42         ` Robin Murphy
  0 siblings, 0 replies; 9+ messages in thread
From: Robin Murphy @ 2020-07-03 14:42 UTC (permalink / raw)
  To: Will Deacon
  Cc: mark.rutland, tuanphan, john.garry, linux-kernel,
	shameerali.kolothum.thodi, harb, linux-arm-kernel

On 2020-07-03 14:42, Will Deacon wrote:
> On Wed, Jun 24, 2020 at 02:08:30PM +0100, Robin Murphy wrote:
>> On 2020-06-24 13:50, Will Deacon wrote:
>>> On Wed, Jun 24, 2020 at 12:48:14PM +0100, Robin Murphy wrote:
>>>> On 2020-04-08 17:49, Robin Murphy wrote:
>>>>> IRQF_SHARED is dangerous, since it allows other agents to retarget the
>>>>> IRQ's affinity without migrating PMU contexts to match, breaking the way
>>>>> in which perf manages mutual exclusion for accessing events. Although
>>>>> this means it's not realistically possible to support PMU IRQs being
>>>>> shared with other drivers, we *can* handle sharing between multiple PMU
>>>>> instances with some explicit affinity bookkeeping and manual interrupt
>>>>> multiplexing.
>>>>>
>>>>> RCU helps us handle interrupts efficiently without having to worry about
>>>>> fine-grained locking for relatively-theoretical race conditions with the
>>>>> probe/remove/CPU hotplug slow paths. The resulting machinery ends up
>>>>> looking largely generic, so it should be feasible to factor out with a
>>>>> "system PMU" base class for similar multi-instance drivers.
>>>>>
>>>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>>>> ---
>>>>>
>>>>> RFC because I don't have the means to test it, and if the general
>>>>> approach passes muster then I'd want to tackle the aforementioned
>>>>> factoring-out before merging anything anyway.
>>>>
>>>> Any comments on whether it's worth pursuing this?
>>>
>>> Sorry, I don't really get the problem that it's solving. Is there a crash
>>> log somewhere I can look at? If all the users of the IRQ are managed by
>>> this driver, why is IRQF_SHARED dangerous?
>>
>> Because as-is, multiple PMU instances may make different choices about which
>> CPU they associate with, change the shared IRQ affinity behind each others'
>> backs, and break the "IRQ handler runs on event->cpu" assumption that perf
>> core relies on for correctness. I'm not sure how likely it would be to
>> actually crash rather than just lead to subtle nastiness, but wither way
>> it's not good, and since people seem to be tempted to wire up system PMU
>> instances this way we could do with a general approach for dealing with it.
> 
> Ok, thanks for the explanation. If we're just talking about multiple
> instances of the same driver, why is it not sufficient to have a static
> atomic_t initialised to -1 which tracks the current affinity and then just
> CAS that during probe()? Hotplug notifiers can just check whether or not
> it points to an online CPU

Yeah, forcing *all* PMUs owned by a driver to be affine to the same CPU 
is another way to go about it, however it slightly penalises systems 
that are wired up sensibly and *would* otherwise be able to distribute 
non-shared affinities around in a balanced manner (optimising the 
initial pmu->cpu selection in the face of NUMA is an exercise still on 
the table in some cases).

And we'd still have to have all the "has another instance already 
requested this IRQ or not yet?" logic (the general condition is "1 <= 
number of IRQs <= number of PMUs"), plus some way for the global 
affinity to migrate all the PMU contexts and IRQs at once in a 
controlled and race-free manner, so things wouldn't be *massively* 
simpler even then.

Robin.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-07-03 14:42 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-08 16:49 [RFC PATCH] perf/smmuv3: Fix shared interrupt handling Robin Murphy
2020-04-09  7:02 ` John Garry
2020-04-09  9:54   ` Robin Murphy
2020-04-30 22:11 ` Tuan Phan
2020-06-24 11:48 ` Robin Murphy
2020-06-24 12:50   ` Will Deacon
2020-06-24 13:08     ` Robin Murphy
2020-07-03 13:42       ` Will Deacon
2020-07-03 14:42         ` Robin Murphy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).