linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4] ARM: imx: Added perf functionality to mmdc driver
@ 2016-08-31 17:00 Zhengyu Shen
  2016-09-05  2:34 ` Shawn Guo
  2016-09-05 14:10 ` Mark Rutland
  0 siblings, 2 replies; 3+ messages in thread
From: Zhengyu Shen @ 2016-08-31 17:00 UTC (permalink / raw)
  To: shawnguo
  Cc: frank.li, linux-arm-kernel, linux-kernel, peterz, mingo, acme,
	alexander.shishkin, lznuaa, mark.rutland, Zhengyu Shen

MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64
and LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high
performance, and optimized. MMDC is present on i.MX6 Quad and i.MX6
QuadPlus devices, but this driver only supports i.MX6 Quad at the moment.
MMDC provides registers for performance counters which read via this
driver to help debug memory throughput and similar issues.

$ perf stat -a -e mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/ dd if=/dev/zero of=/dev/null bs=1M count=5000
Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':

         898021787      mmdc/busy-cycles/
          14819600      mmdc/read-accesses/
            471.30 MB   mmdc/read-bytes/
        2815419216      mmdc/total-cycles/
          13367354      mmdc/write-accesses/
            427.76 MB   mmdc/write-bytes/

       5.334757334 seconds time elapsed

Signed-off-by: Zhengyu Shen <zhengyu.shen@nxp.com>
Signed-off-by: Frank Li <frank.li@nxp.com>
---
Changes from v3 to v4:
    Tested and fixed crash relating to removing events with perf fuzzer
    Adjusted formatting
    Moved all perf event code under CONFIG_PERF_EVENTS
	Switched cpuhp_setup_state to cpuhp_setup_state_nocalls

Changes from v2 to v3:
    Use WARN_ONCE instead of returning generic error values
    Replace CPU Notifiers with newer state machine hotplug
    Added additional checks on event_init for grouping and sampling
    Remove useless mmdc_enable_profiling function
    Added comments
    Moved start index of events from 0x01 to 0x00
    Added a counter to pmu_mmdc to only stop hrtimer after all events are finished
    Replace readl_relaxed and writel_relaxed with readl and writel
    Removed duplicate update function
    Used devm_kasprintf when naming mmdcs probed

Changes from v1 to v2:
    Added cpumask and migration handling support to driver
    Validated event during event_init
    Added code to properly stop counters
    Used perf_invalid_context instead of perf_sw_context
    Added hrtimer to poll for overflow
    Added better description
    Added support for multiple mmdcs

 arch/arm/mach-imx/mmdc.c | 423 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 421 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
index db9621c..77357c3 100644
--- a/arch/arm/mach-imx/mmdc.c
+++ b/arch/arm/mach-imx/mmdc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011 Freescale Semiconductor, Inc.
+ * Copyright 2011,2016 Freescale Semiconductor, Inc.
  * Copyright 2011 Linaro Ltd.
  *
  * The code contained herein is licensed under the GNU General Public
@@ -10,12 +10,17 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 
+#include <linux/hrtimer.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 
 #include "common.h"
 
@@ -27,8 +32,421 @@
 #define BM_MMDC_MDMISC_DDR_TYPE	0x18
 #define BP_MMDC_MDMISC_DDR_TYPE	0x3
 
+#define TOTAL_CYCLES		0x0
+#define BUSY_CYCLES		0x1
+#define READ_ACCESSES		0x2
+#define WRITE_ACCESSES		0x3
+#define READ_BYTES		0x4
+#define WRITE_BYTES		0x5
+
+/* Enables, resets, freezes, overflow profiling*/
+#define DBG_DIS			0x0
+#define DBG_EN			0x1
+#define DBG_RST			0x2
+#define PRF_FRZ			0x4
+#define CYC_OVF			0x8
+
+#define MMDC_MADPCR0	0x410
+#define MMDC_MADPSR0	0x418
+#define MMDC_MADPSR1	0x41C
+#define MMDC_MADPSR2	0x420
+#define MMDC_MADPSR3	0x424
+#define MMDC_MADPSR4	0x428
+#define MMDC_MADPSR5	0x42C
+
+#define MMDC_NUM_COUNTERS	6
+
+#define to_mmdc_pmu(p) container_of(p, struct mmdc_pmu, pmu)
+
 static int ddr_type;
 
+#ifdef CONFIG_PERF_EVENTS
+
+static DEFINE_IDA(mmdc_ida);
+
+PMU_EVENT_ATTR_STRING(total-cycles, mmdc_total_cycles, "event=0x00")
+PMU_EVENT_ATTR_STRING(busy-cycles, mmdc_busy_cycles, "event=0x01")
+PMU_EVENT_ATTR_STRING(read-accesses, mmdc_read_accesses, "event=0x02")
+PMU_EVENT_ATTR_STRING(write-accesses, mmdc_write_accesses, "config=0x03")
+PMU_EVENT_ATTR_STRING(read-bytes, mmdc_read_bytes, "event=0x04")
+PMU_EVENT_ATTR_STRING(read-bytes.unit, mmdc_read_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(read-bytes.scale, mmdc_read_bytes_scale, "0.000001");
+PMU_EVENT_ATTR_STRING(write-bytes, mmdc_write_bytes, "event=0x05")
+PMU_EVENT_ATTR_STRING(write-bytes.unit, mmdc_write_bytes_unit, "MB");
+PMU_EVENT_ATTR_STRING(write-bytes.scale, mmdc_write_bytes_scale, "0.000001");
+
+struct mmdc_pmu {
+	struct pmu pmu;
+	void __iomem *mmdc_base;
+	cpumask_t cpu;
+	struct hrtimer hrtimer;
+	unsigned int irq;
+	unsigned int active_events;
+	struct device *dev;
+	struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
+	spinlock_t mmdc_active_events_lock;
+};
+
+static struct mmdc_pmu *cpuhp_mmdc_pmu;
+
+/*
+ * Polling period is set to one second, overflow of total-cycles (the fastest
+ * increasing counter) takes ten seconds so one second is safe
+ */
+static unsigned int mmdc_poll_period_us = 1000000;
+
+module_param_named(pmu_poll_period_us, mmdc_poll_period_us, uint,
+		S_IRUGO | S_IWUSR);
+
+static ktime_t mmdc_timer_period(void)
+{
+	return ns_to_ktime((u64)mmdc_poll_period_us * 1000);
+}
+
+static ssize_t mmdc_cpumask_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmdc_pmu *pmu_mmdc = dev_get_drvdata(dev);
+
+	return cpumap_print_to_pagebuf(true, buf, &pmu_mmdc->cpu);
+}
+
+static struct device_attribute mmdc_cpumask_attr =
+__ATTR(cpumask, S_IRUGO, mmdc_cpumask_show, NULL);
+
+static struct attribute *mmdc_cpumask_attrs[] = {
+	&mmdc_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_cpumask_attr_group = {
+	.attrs = mmdc_cpumask_attrs,
+};
+
+static struct attribute *mmdc_events_attrs[] = {
+	&mmdc_total_cycles.attr.attr,
+	&mmdc_busy_cycles.attr.attr,
+	&mmdc_read_accesses.attr.attr,
+	&mmdc_write_accesses.attr.attr,
+	&mmdc_read_bytes.attr.attr,
+	&mmdc_read_bytes_unit.attr.attr,
+	&mmdc_read_bytes_scale.attr.attr,
+	&mmdc_write_bytes.attr.attr,
+	&mmdc_write_bytes_unit.attr.attr,
+	&mmdc_write_bytes_scale.attr.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_events_attr_group = {
+	.name = "events",
+	.attrs = mmdc_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+static struct attribute *mmdc_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group mmdc_format_attr_group = {
+	.name = "format",
+	.attrs = mmdc_format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+	&mmdc_events_attr_group,
+	&mmdc_format_attr_group,
+	&mmdc_cpumask_attr_group,
+	NULL,
+};
+
+static u32 mmdc_read_counter(struct mmdc_pmu *pmu_mmdc, int cfg)
+{
+	void __iomem *mmdc_base, *reg;
+
+	mmdc_base = pmu_mmdc->mmdc_base;
+
+	switch (cfg) {
+	case TOTAL_CYCLES:
+		reg = mmdc_base + MMDC_MADPSR0;
+		break;
+	case BUSY_CYCLES:
+		reg = mmdc_base + MMDC_MADPSR1;
+		break;
+	case READ_ACCESSES:
+		reg = mmdc_base + MMDC_MADPSR2;
+		break;
+	case WRITE_ACCESSES:
+		reg = mmdc_base + MMDC_MADPSR3;
+		break;
+	case READ_BYTES:
+		reg = mmdc_base + MMDC_MADPSR4;
+		break;
+	case WRITE_BYTES:
+		reg = mmdc_base + MMDC_MADPSR5;
+		break;
+	default:
+		return WARN_ONCE(1,
+			"invalid configuration %d for mmdc counter", cfg);
+	}
+	return readl(reg);
+}
+
+static int mmdc_pmu_offline_cpu(unsigned int cpu)
+{
+	struct mmdc_pmu *pmu_mmdc = cpuhp_mmdc_pmu;
+	int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &pmu_mmdc->cpu))
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&pmu_mmdc->pmu, cpu, target);
+	cpumask_set_cpu(target, &pmu_mmdc->cpu);
+	if (pmu_mmdc->irq)
+		WARN_ON(irq_set_affinity_hint(pmu_mmdc->irq, &pmu_mmdc->cpu) != 0);
+
+	return 0;
+}
+
+static int mmdc_event_init(struct perf_event *event)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	int cfg = event->attr.config;
+	struct perf_event *sibling;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (event->cpu < 0) {
+		dev_warn(pmu_mmdc->dev, "Can't provide per-task data!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->attr.exclude_user		||
+			event->attr.exclude_kernel	||
+			event->attr.exclude_hv		||
+			event->attr.exclude_idle	||
+			event->attr.exclude_host	||
+			event->attr.exclude_guest	||
+			event->attr.sample_period)
+		return -EINVAL;
+
+	if (cfg < 0 || cfg >= MMDC_NUM_COUNTERS)
+		return -EINVAL;
+
+	if (event->group_leader->pmu != event->pmu &&
+			!is_software_event(event->group_leader))
+		return -EINVAL;
+
+	list_for_each_entry(sibling, &event->group_leader->sibling_list,
+			group_entry)
+		if (sibling->pmu != event->pmu &&
+				!is_software_event(sibling))
+			return -EINVAL;
+
+	event->cpu = cpumask_first(&pmu_mmdc->cpu);
+	return 0;
+}
+
+static void mmdc_event_update(struct perf_event *event)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	u32 val;
+	u64 prev_val;
+
+	prev_val = local64_read(&event->count);
+	val = mmdc_read_counter(pmu_mmdc, (int) event->attr.config);
+	local64_add(val - (u32)(prev_val & 0xFFFFFFFF), &event->count);
+}
+
+static void mmdc_event_start(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	void __iomem *mmdc_base, *reg;
+
+	mmdc_base = pmu_mmdc->mmdc_base;
+	reg = mmdc_base + MMDC_MADPCR0;
+
+	/*
+	 * hrtimer is required because mmdc does not provide an interrupt so
+	 * polling is necessary
+	 */
+	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
+			HRTIMER_MODE_REL_PINNED);
+
+	writel(DBG_RST, reg);
+	writel(DBG_EN, reg);
+}
+
+static int mmdc_event_add(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	int cfg = (int)event->attr.config;
+
+	if (WARN_ONCE((cfg < 0 || cfg >= MMDC_NUM_COUNTERS),
+				"invalid configuration %d for mmdc", cfg))
+		return -1;
+
+	local64_set(&event->count, 0);
+	if (flags & PERF_EF_START)
+		mmdc_event_start(event, flags);
+
+	spin_lock(&pmu_mmdc->mmdc_active_events_lock);
+	pmu_mmdc->mmdc_events[cfg] = event;
+	pmu_mmdc->active_events++;
+	spin_unlock(&pmu_mmdc->mmdc_active_events_lock);
+
+	return 0;
+}
+
+static void mmdc_event_stop(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	void __iomem *mmdc_base, *reg;
+	int cfg = (int)event->attr.config;
+
+	mmdc_base = pmu_mmdc->mmdc_base;
+	reg = mmdc_base + MMDC_MADPCR0;
+
+	if (WARN_ONCE((cfg < 0 || cfg >= MMDC_NUM_COUNTERS),
+				"invalid configuration %d for mmdc counter", cfg))
+		return;
+
+	spin_lock(&pmu_mmdc->mmdc_active_events_lock);
+	if (pmu_mmdc->active_events <= 0)
+		hrtimer_cancel(&pmu_mmdc->hrtimer);
+	spin_unlock(&pmu_mmdc->mmdc_active_events_lock);
+
+	writel(PRF_FRZ, reg);
+	mmdc_event_update(event);
+}
+
+static void mmdc_event_del(struct perf_event *event, int flags)
+{
+	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
+	int cfg = (int)event->attr.config;
+
+	spin_lock(&pmu_mmdc->mmdc_active_events_lock);
+	pmu_mmdc->mmdc_events[cfg] = NULL;
+	pmu_mmdc->active_events--;
+	spin_unlock(&pmu_mmdc->mmdc_active_events_lock);
+
+	mmdc_event_stop(event, PERF_EF_UPDATE);
+}
+
+static void mmdc_overflow_handler(struct mmdc_pmu *pmu_mmdc)
+{
+	int i;
+
+	for (i = 0; i < MMDC_NUM_COUNTERS; i++) {
+		struct perf_event *event = pmu_mmdc->mmdc_events[i];
+
+		if (event)
+			mmdc_event_update(event);
+	}
+}
+
+static enum hrtimer_restart mmdc_timer_handler(struct hrtimer *hrtimer)
+{
+	struct mmdc_pmu *pmu_mmdc = container_of(hrtimer, struct mmdc_pmu,
+			hrtimer);
+
+	mmdc_overflow_handler(pmu_mmdc);
+	hrtimer_forward_now(hrtimer, mmdc_timer_period());
+
+	return HRTIMER_RESTART;
+}
+
+static int mmdc_pmu_init(struct mmdc_pmu *pmu_mmdc,
+		void __iomem *mmdc_base, struct device *dev)
+{
+	int mmdc_num;
+
+	*pmu_mmdc = (struct mmdc_pmu) {
+		.pmu = (struct pmu) {
+			.task_ctx_nr    = perf_invalid_context,
+			.attr_groups    = attr_groups,
+			.event_init     = mmdc_event_init,
+			.add            = mmdc_event_add,
+			.del            = mmdc_event_del,
+			.start          = mmdc_event_start,
+			.stop           = mmdc_event_stop,
+			.read           = mmdc_event_update,
+		},
+		.mmdc_base = mmdc_base,
+		.dev = dev,
+		.active_events = 0,
+	};
+
+	mmdc_num = ida_simple_get(&mmdc_ida, 0, 0, GFP_KERNEL);
+	cpumask_set_cpu(smp_processor_id(), &pmu_mmdc->cpu);
+	spin_lock_init(&pmu_mmdc->mmdc_active_events_lock);
+
+	cpuhp_mmdc_pmu = pmu_mmdc;
+	cpuhp_setup_state_nocalls(CPUHP_ONLINE,
+			"MMDC_ONLINE", NULL,
+			mmdc_pmu_offline_cpu);
+
+	return mmdc_num;
+}
+
+static int imx_mmdc_remove(struct platform_device *pdev)
+{
+	struct mmdc_pmu *pmu_mmdc = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&pmu_mmdc->pmu);
+	cpuhp_remove_state_nocalls(CPUHP_ONLINE);
+	cpuhp_mmdc_pmu = NULL;
+	kfree(pmu_mmdc);
+	return 0;
+}
+
+static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base)
+{
+	struct mmdc_pmu *pmu_mmdc;
+	char *name;
+	int mmdc_num;
+	int ret;
+
+	pmu_mmdc = kzalloc(sizeof(*pmu_mmdc), GFP_KERNEL);
+	if (!pmu_mmdc) {
+		pr_err("failed to allocate PMU device!\n");
+		return -ENOMEM;
+	}
+
+	mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
+	if (mmdc_num == 0)
+		name = "mmdc";
+	else
+		name = devm_kasprintf(&pdev->dev,
+				GFP_KERNEL, "mmdc_%d", mmdc_num);
+
+	hrtimer_init(&pmu_mmdc->hrtimer, CLOCK_MONOTONIC,
+			HRTIMER_MODE_REL);
+	pmu_mmdc->hrtimer.function = mmdc_timer_handler;
+
+	ret = perf_pmu_register(&(pmu_mmdc->pmu), name, -1);
+	platform_set_drvdata(pdev, pmu_mmdc);
+	if (ret)
+		goto pmu_register_err;
+	return 0;
+
+pmu_register_err:
+	pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret);
+	hrtimer_cancel(&pmu_mmdc->hrtimer);
+	kfree(pmu_mmdc);
+	return ret;
+}
+
+#else
+#define imx_mmdc_remove NULL
+#define imx_mmdc_perf_init(pdev, mmdc_base) 0
+#endif
+
 static int imx_mmdc_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -62,7 +480,7 @@ static int imx_mmdc_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 
-	return 0;
+	return imx_mmdc_perf_init(pdev, mmdc_base);
 }
 
 int imx_mmdc_get_ddr_type(void)
@@ -81,6 +499,7 @@ static struct platform_driver imx_mmdc_driver = {
 		.of_match_table = imx_mmdc_dt_ids,
 	},
 	.probe		= imx_mmdc_probe,
+	.remove		= imx_mmdc_remove,
 };
 
 static int __init imx_mmdc_init(void)
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v4] ARM: imx: Added perf functionality to mmdc driver
  2016-08-31 17:00 [PATCH v4] ARM: imx: Added perf functionality to mmdc driver Zhengyu Shen
@ 2016-09-05  2:34 ` Shawn Guo
  2016-09-05 14:10 ` Mark Rutland
  1 sibling, 0 replies; 3+ messages in thread
From: Shawn Guo @ 2016-09-05  2:34 UTC (permalink / raw)
  To: Zhengyu Shen
  Cc: mark.rutland, peterz, frank.li, linux-kernel, acme,
	alexander.shishkin, mingo, lznuaa, linux-arm-kernel

On Wed, Aug 31, 2016 at 12:00:29PM -0500, Zhengyu Shen wrote:
> MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64
> and LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high
> performance, and optimized. MMDC is present on i.MX6 Quad and i.MX6
> QuadPlus devices, but this driver only supports i.MX6 Quad at the moment.
> MMDC provides registers for performance counters which read via this
> driver to help debug memory throughput and similar issues.
> 
> $ perf stat -a -e mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/ dd if=/dev/zero of=/dev/null bs=1M count=5000
> Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':
> 
>          898021787      mmdc/busy-cycles/
>           14819600      mmdc/read-accesses/
>             471.30 MB   mmdc/read-bytes/
>         2815419216      mmdc/total-cycles/
>           13367354      mmdc/write-accesses/
>             427.76 MB   mmdc/write-bytes/
> 
>        5.334757334 seconds time elapsed
> 
> Signed-off-by: Zhengyu Shen <zhengyu.shen@nxp.com>
> Signed-off-by: Frank Li <frank.li@nxp.com>

I'm fine with this version, but still want a Reviewed-by tag from Mark
to apply the patch.

Shawn

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v4] ARM: imx: Added perf functionality to mmdc driver
  2016-08-31 17:00 [PATCH v4] ARM: imx: Added perf functionality to mmdc driver Zhengyu Shen
  2016-09-05  2:34 ` Shawn Guo
@ 2016-09-05 14:10 ` Mark Rutland
  1 sibling, 0 replies; 3+ messages in thread
From: Mark Rutland @ 2016-09-05 14:10 UTC (permalink / raw)
  To: Zhengyu Shen
  Cc: shawnguo, frank.li, linux-arm-kernel, linux-kernel, peterz,
	mingo, acme, alexander.shishkin, lznuaa

Hi,

On Wed, Aug 31, 2016 at 12:00:29PM -0500, Zhengyu Shen wrote:
> MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64
> and LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high
> performance, and optimized. MMDC is present on i.MX6 Quad and i.MX6
> QuadPlus devices, but this driver only supports i.MX6 Quad at the moment.
> MMDC provides registers for performance counters which read via this
> driver to help debug memory throughput and similar issues.

This is largely looking better, but there a few issues which remain,
some of which are major (e.g. erroneous use of perf_event::count, which
will result in misleading output). Full details below.

> +PMU_EVENT_ATTR_STRING(total-cycles, mmdc_total_cycles, "event=0x00")
> +PMU_EVENT_ATTR_STRING(busy-cycles, mmdc_busy_cycles, "event=0x01")
> +PMU_EVENT_ATTR_STRING(read-accesses, mmdc_read_accesses, "event=0x02")
> +PMU_EVENT_ATTR_STRING(write-accesses, mmdc_write_accesses, "config=0x03")
> +PMU_EVENT_ATTR_STRING(read-bytes, mmdc_read_bytes, "event=0x04")
> +PMU_EVENT_ATTR_STRING(read-bytes.unit, mmdc_read_bytes_unit, "MB");
> +PMU_EVENT_ATTR_STRING(read-bytes.scale, mmdc_read_bytes_scale, "0.000001");
> +PMU_EVENT_ATTR_STRING(write-bytes, mmdc_write_bytes, "event=0x05")
> +PMU_EVENT_ATTR_STRING(write-bytes.unit, mmdc_write_bytes_unit, "MB");
> +PMU_EVENT_ATTR_STRING(write-bytes.scale, mmdc_write_bytes_scale, "0.000001");
> +
> +struct mmdc_pmu {
> +	struct pmu pmu;
> +	void __iomem *mmdc_base;
> +	cpumask_t cpu;
> +	struct hrtimer hrtimer;
> +	unsigned int irq;

As mmdc_pmu::irq is never set, it's pointless. Please remove it.

> +	unsigned int active_events;
> +	struct device *dev;
> +	struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
> +	spinlock_t mmdc_active_events_lock;

The core code serialises pmu::{add,del,start,stop} by taking ctx->lock
and disabling IRQs, so you shouldn't need to enforce that locally.
Please remove this lock.

[...]

> +static struct mmdc_pmu *cpuhp_mmdc_pmu;

Elsewhere in the driver you use IDA, implying there can be multiple
instances. So this is insufficient. Use the new multi-instance support
[1], as we have for the CCN [2]. That way you don't need a global
singleton.

[...]

> +static int mmdc_pmu_offline_cpu(unsigned int cpu)
> +{
> +	struct mmdc_pmu *pmu_mmdc = cpuhp_mmdc_pmu;
> +	int target;
> +
> +	if (!cpumask_test_and_clear_cpu(cpu, &pmu_mmdc->cpu))
> +		return 0;
> +
> +	target = cpumask_any_but(cpu_online_mask, cpu);
> +	if (target >= nr_cpu_ids)
> +		return 0;
> +
> +	perf_pmu_migrate_context(&pmu_mmdc->pmu, cpu, target);
> +	cpumask_set_cpu(target, &pmu_mmdc->cpu);
> +	if (pmu_mmdc->irq)
> +		WARN_ON(irq_set_affinity_hint(pmu_mmdc->irq, &pmu_mmdc->cpu) != 0);

As mmdc_pmu::irq is never set, I think we can get rid of the IRQ
manipulation logic here, and just have the context migration.

> +
> +	return 0;
> +}
> +
> +static int mmdc_event_init(struct perf_event *event)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	int cfg = event->attr.config;
> +	struct perf_event *sibling;
> +
> +	if (event->attr.type != event->pmu->type)
> +		return -ENOENT;
> +
> +	if (event->cpu < 0) {
> +		dev_warn(pmu_mmdc->dev, "Can't provide per-task data!\n");
> +		return -EOPNOTSUPP;
> +	}

Please also check is_sampling_event(event) and event->attach_state. See
the l2x0 driver [3].

> +
> +	if (event->attr.exclude_user		||
> +			event->attr.exclude_kernel	||
> +			event->attr.exclude_hv		||
> +			event->attr.exclude_idle	||
> +			event->attr.exclude_host	||
> +			event->attr.exclude_guest	||
> +			event->attr.sample_period)
> +		return -EINVAL;
> +
> +	if (cfg < 0 || cfg >= MMDC_NUM_COUNTERS)
> +		return -EINVAL;

Do you ever plan to use the other config fields in future? It might be
worth sanity-checking that they are all zero.

> +	if (event->group_leader->pmu != event->pmu &&
> +			!is_software_event(event->group_leader))
> +		return -EINVAL;
> +
> +	list_for_each_entry(sibling, &event->group_leader->sibling_list,
> +			group_entry)
> +		if (sibling->pmu != event->pmu &&
> +				!is_software_event(sibling))
> +			return -EINVAL;

Given that several expressions are split over multiple lines, this would
be clearer with braces around the loop.

You should also ensure that the group is possible to schedule (i.e.
doesn't require more counters than the HW has). If all counters are
equivalent, that should just involve counting how many HW events the
group has. See the l2x0 driver [3] for an example.

> +
> +	event->cpu = cpumask_first(&pmu_mmdc->cpu);
> +	return 0;
> +}
> +
> +static void mmdc_event_update(struct perf_event *event)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	u32 val;
> +	u64 prev_val;
> +
> +	prev_val = local64_read(&event->count);
> +	val = mmdc_read_counter(pmu_mmdc, (int) event->attr.config);

That cast shouldn't be necessary.

> +	local64_add(val - (u32)(prev_val & 0xFFFFFFFF), &event->count);
> +}

This isn't right. You should be using hw_perf_event::prev_count to hold
the shadowed hardware counter value. The event::count field is an
aggregate, so you can't rely on the lower 32 bits in this way.

> +static void mmdc_event_start(struct perf_event *event, int flags)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	void __iomem *mmdc_base, *reg;
> +
> +	mmdc_base = pmu_mmdc->mmdc_base;
> +	reg = mmdc_base + MMDC_MADPCR0;
> +
> +	/*
> +	 * hrtimer is required because mmdc does not provide an interrupt so
> +	 * polling is necessary
> +	 */
> +	hrtimer_start(&pmu_mmdc->hrtimer, mmdc_timer_period(),
> +			HRTIMER_MODE_REL_PINNED);

Do this in mmdc_event_add() where you manipulate the count. That will
make things easier to follow.

> +
> +	writel(DBG_RST, reg);

You should update the shadow value (hw_perf_event::prev_count) here.

> +	writel(DBG_EN, reg);
> +}
> +
> +static int mmdc_event_add(struct perf_event *event, int flags)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	int cfg = (int)event->attr.config;

That cast shouldn't be necessary.

> +
> +	if (WARN_ONCE((cfg < 0 || cfg >= MMDC_NUM_COUNTERS),
> +				"invalid configuration %d for mmdc", cfg))
> +		return -1;
> +
> +	local64_set(&event->count, 0);

This is wrong. The count is supposed to be the aggregate count so far,
and should *never* be reset. As with the above comments, use
hw_perf_event::prev_count to hold a shadow count, and update this
whenever the HW is actually updated. i.e. zero it in mmdc_event_start()
where you actually zero the counter.

> +	if (flags & PERF_EF_START)
> +		mmdc_event_start(event, flags);
> +
> +	spin_lock(&pmu_mmdc->mmdc_active_events_lock);
> +	pmu_mmdc->mmdc_events[cfg] = event;
> +	pmu_mmdc->active_events++;
> +	spin_unlock(&pmu_mmdc->mmdc_active_events_lock);
> +
> +	return 0;
> +}
> +
> +static void mmdc_event_stop(struct perf_event *event, int flags)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	void __iomem *mmdc_base, *reg;
> +	int cfg = (int)event->attr.config;

That cast shouldn't be necessary.

> +
> +	mmdc_base = pmu_mmdc->mmdc_base;
> +	reg = mmdc_base + MMDC_MADPCR0;
> +
> +	if (WARN_ONCE((cfg < 0 || cfg >= MMDC_NUM_COUNTERS),
> +				"invalid configuration %d for mmdc counter", cfg))
> +		return;
> +
> +	spin_lock(&pmu_mmdc->mmdc_active_events_lock);
> +	if (pmu_mmdc->active_events <= 0)
> +		hrtimer_cancel(&pmu_mmdc->hrtimer);
> +	spin_unlock(&pmu_mmdc->mmdc_active_events_lock);

Do this in mmdc_event_del() where you decrement the count.

> +
> +	writel(PRF_FRZ, reg);
> +	mmdc_event_update(event);
> +}
> +
> +static void mmdc_event_del(struct perf_event *event, int flags)
> +{
> +	struct mmdc_pmu *pmu_mmdc = to_mmdc_pmu(event->pmu);
> +	int cfg = (int)event->attr.config;

That cast shouldn't be necessary.

[...]

> +static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base)
> +{
> +	struct mmdc_pmu *pmu_mmdc;
> +	char *name;
> +	int mmdc_num;
> +	int ret;
> +
> +	pmu_mmdc = kzalloc(sizeof(*pmu_mmdc), GFP_KERNEL);
> +	if (!pmu_mmdc) {
> +		pr_err("failed to allocate PMU device!\n");
> +		return -ENOMEM;
> +	}
> +
> +	mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev);
> +	if (mmdc_num == 0)
> +		name = "mmdc";
> +	else
> +		name = devm_kasprintf(&pdev->dev,
> +				GFP_KERNEL, "mmdc_%d", mmdc_num);

Please use "mmdc%d" consistently.

Thanks,
Mark.

[1] https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/log/?h=smp/hotplug
[2] https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git/commit/?h=smp/hotplug&id=8df038725ad5351a9730759e0a24a5c5d96be661
[3] http://lists.infradead.org/pipermail/linux-arm-kernel/2016-August/452843.html

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-09-05 14:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-31 17:00 [PATCH v4] ARM: imx: Added perf functionality to mmdc driver Zhengyu Shen
2016-09-05  2:34 ` Shawn Guo
2016-09-05 14:10 ` Mark Rutland

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).