All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] perf: imx8_ddr_perf: calculate ddr bandwidth via virtual event read-bytes/write-bytes
@ 2019-07-09  5:46 Joakim Zhang
  2019-07-09 15:56 ` Will Deacon
  0 siblings, 1 reply; 4+ messages in thread
From: Joakim Zhang @ 2019-07-09  5:46 UTC (permalink / raw)
  To: will, mark.rutland, Frank Li
  Cc: linux-arm-kernel, dl-linux-imx, kernel, Joakim Zhang

We can calculate ddr bandwidth via virtual event read-bytes/write-bytes
based on ddr burst width, which actually share event
read-cycles/write-cycles. Burst width is 32 bit on i.MX8 board.

The ddr interface will generate 2 up edges and 2 down edges in an
internal clock cycle, so it can pass 4 beats of data. 4 bytes of each
beat if ddr burst width is 32 bit.

for example:
perf stat -a -e imx8_ddr0/read-bytes/,imx8_ddr0/write-bytes/ ls

Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
---
 drivers/perf/fsl_imx8_ddr_perf.c | 96 ++++++++++++++++++++------------
 1 file changed, 61 insertions(+), 35 deletions(-)

diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 63fe21600072..e7dbaf4d2387 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -88,46 +88,53 @@ ddr_pmu_event_show(struct device *dev, struct device_attribute *attr,
 	struct perf_pmu_events_attr *pmu_attr;
 
 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
-	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+	return sprintf(page, "%s", pmu_attr->event_str);
 }
 
-#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id)				\
+#define IMX8_DDR_PMU_EVENT_ATTR(_name, _str)				\
 	(&((struct perf_pmu_events_attr[]) {				\
 		{ .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
-		  .id = _id, }						\
+		  .id = 0,						\
+		  .event_str = _str, }					\
 	})[0].attr.attr)
 
 static struct attribute *ddr_perf_events_attrs[] = {
-	IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID),
-	IMX8_DDR_PMU_EVENT_ATTR(selfresh, 0x01),
-	IMX8_DDR_PMU_EVENT_ATTR(read-accesses, 0x04),
-	IMX8_DDR_PMU_EVENT_ATTR(write-accesses, 0x05),
-	IMX8_DDR_PMU_EVENT_ATTR(read-queue-depth, 0x08),
-	IMX8_DDR_PMU_EVENT_ATTR(write-queue-depth, 0x09),
-	IMX8_DDR_PMU_EVENT_ATTR(lp-read-credit-cnt, 0x10),
-	IMX8_DDR_PMU_EVENT_ATTR(hp-read-credit-cnt, 0x11),
-	IMX8_DDR_PMU_EVENT_ATTR(write-credit-cnt, 0x12),
-	IMX8_DDR_PMU_EVENT_ATTR(read-command, 0x20),
-	IMX8_DDR_PMU_EVENT_ATTR(write-command, 0x21),
-	IMX8_DDR_PMU_EVENT_ATTR(read-modify-write-command, 0x22),
-	IMX8_DDR_PMU_EVENT_ATTR(hp-read, 0x23),
-	IMX8_DDR_PMU_EVENT_ATTR(hp-req-nocredit, 0x24),
-	IMX8_DDR_PMU_EVENT_ATTR(hp-xact-credit, 0x25),
-	IMX8_DDR_PMU_EVENT_ATTR(lp-req-nocredit, 0x26),
-	IMX8_DDR_PMU_EVENT_ATTR(lp-xact-credit, 0x27),
-	IMX8_DDR_PMU_EVENT_ATTR(wr-xact-credit, 0x29),
-	IMX8_DDR_PMU_EVENT_ATTR(read-cycles, 0x2a),
-	IMX8_DDR_PMU_EVENT_ATTR(write-cycles, 0x2b),
-	IMX8_DDR_PMU_EVENT_ATTR(read-write-transition, 0x30),
-	IMX8_DDR_PMU_EVENT_ATTR(precharge, 0x31),
-	IMX8_DDR_PMU_EVENT_ATTR(activate, 0x32),
-	IMX8_DDR_PMU_EVENT_ATTR(load-mode, 0x33),
-	IMX8_DDR_PMU_EVENT_ATTR(perf-mwr, 0x34),
-	IMX8_DDR_PMU_EVENT_ATTR(read, 0x35),
-	IMX8_DDR_PMU_EVENT_ATTR(read-activate, 0x36),
-	IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
-	IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
-	IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
+	IMX8_DDR_PMU_EVENT_ATTR(cycles, "event=0x00"),
+	IMX8_DDR_PMU_EVENT_ATTR(selfresh, "event=0x01"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-accesses, "event=0x04"),
+	IMX8_DDR_PMU_EVENT_ATTR(write-accesses, "event=0x05"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-queue-depth, "event=0x08"),
+	IMX8_DDR_PMU_EVENT_ATTR(write-queue-depth, "event=0x09"),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-read-credit-cnt, "event=0x10"),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-read-credit-cnt, "event=0x11"),
+	IMX8_DDR_PMU_EVENT_ATTR(write-credit-cnt, "event=0x12"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-command, "event=0x20"),
+	IMX8_DDR_PMU_EVENT_ATTR(write-command, "event=0x21"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-modify-write-command, "event=0x22"),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-read, "event=0x23"),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-req-nocredit, "event=0x24"),
+	IMX8_DDR_PMU_EVENT_ATTR(hp-xact-credit, "event=0x25"),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-req-nocredit, "event=0x26"),
+	IMX8_DDR_PMU_EVENT_ATTR(lp-xact-credit, "event=0x27"),
+	IMX8_DDR_PMU_EVENT_ATTR(wr-xact-credit, "event=0x29"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-cycles, "event=0x2a"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-bytes, "event=0x12a"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-bytes.unit, "MB"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-bytes.scale, "0.000001"),
+	IMX8_DDR_PMU_EVENT_ATTR(write-cycles, "event=0x2b"),
+	IMX8_DDR_PMU_EVENT_ATTR(write-bytes, "event=0x12b"),
+	IMX8_DDR_PMU_EVENT_ATTR(write-bytes.unit, "MB"),
+	IMX8_DDR_PMU_EVENT_ATTR(write-bytes.scale, "0.000001"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-write-transition, "event=0x30"),
+	IMX8_DDR_PMU_EVENT_ATTR(precharge, "event=0x31"),
+	IMX8_DDR_PMU_EVENT_ATTR(activate, "event=0x32"),
+	IMX8_DDR_PMU_EVENT_ATTR(load-mode, "event=0x33"),
+	IMX8_DDR_PMU_EVENT_ATTR(perf-mwr, "event=0x34"),
+	IMX8_DDR_PMU_EVENT_ATTR(read, "event=0x35"),
+	IMX8_DDR_PMU_EVENT_ATTR(read-activate, "event=0x36"),
+	IMX8_DDR_PMU_EVENT_ATTR(refresh, "event=0x37"),
+	IMX8_DDR_PMU_EVENT_ATTR(write, "event=0x38"),
+	IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, "event=0x39"),
 	NULL,
 };
 
@@ -136,7 +143,7 @@ static struct attribute_group ddr_perf_events_attr_group = {
 	.attrs = ddr_perf_events_attrs,
 };
 
-PMU_FORMAT_ATTR(event, "config:0-7");
+PMU_FORMAT_ATTR(event, "config:0-8");
 
 static struct attribute *ddr_perf_format_attrs[] = {
 	&format_attr_event.attr,
@@ -243,6 +250,17 @@ static void ddr_perf_event_update(struct perf_event *event)
 
 	delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF;
 
+	/*
+	 * Calculate ddr read/write bandwidth via read-bytes/write-bytes events,
+	 * actually using read-cycles/write-cycles events.
+	 *
+	 * The ddr interface will generate 2 up edges and 2 down edges in an
+	 * internal clock cycle, so it can pass 4 beats of data, and 4 bytes of
+	 * each beat if ddr burst width is 32 bit.
+	 */
+	if (event->attr.config == 0x12a || event->attr.config == 0x12b)
+		delta = delta * 4 * 4;
+
 	local64_add(delta, &event->count);
 }
 
@@ -260,7 +278,15 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
 		 */
 		writel(0, pmu->base + reg);
 		val = CNTL_EN | CNTL_CLEAR;
-		val |= FIELD_PREP(CNTL_CSV_MASK, config);
+
+		/*
+		 * Virtual events(read-bytes/write-bytes) share real
+		 * events(read-cycles/write-cycles).
+		 */
+		if (config == 0x12a || config == 0x12b)
+			val |= FIELD_PREP(CNTL_CSV_MASK, (config - 0x100));
+		else
+			val |= FIELD_PREP(CNTL_CSV_MASK, config);
 		writel(val, pmu->base + reg);
 	} else {
 		/* Disable counter */
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf: imx8_ddr_perf: calculate ddr bandwidth via virtual event read-bytes/write-bytes
  2019-07-09  5:46 [PATCH] perf: imx8_ddr_perf: calculate ddr bandwidth via virtual event read-bytes/write-bytes Joakim Zhang
@ 2019-07-09 15:56 ` Will Deacon
  2019-07-12  7:18   ` Joakim Zhang
  0 siblings, 1 reply; 4+ messages in thread
From: Will Deacon @ 2019-07-09 15:56 UTC (permalink / raw)
  To: Joakim Zhang
  Cc: mark.rutland, Frank Li, dl-linux-imx, kernel, linux-arm-kernel

On Tue, Jul 09, 2019 at 05:46:44AM +0000, Joakim Zhang wrote:
> We can calculate ddr bandwidth via virtual event read-bytes/write-bytes
> based on ddr burst width, which actually share event
> read-cycles/write-cycles. Burst width is 32 bit on i.MX8 board.
> 
> The ddr interface will generate 2 up edges and 2 down edges in an
> internal clock cycle, so it can pass 4 beats of data. 4 bytes of each
> beat if ddr burst width is 32 bit.
> 
> for example:
> perf stat -a -e imx8_ddr0/read-bytes/,imx8_ddr0/write-bytes/ ls

I don't think you should be doing this in the kernel. Can you look at
implementing it in perf tool instead by adding a .json file for your
PMU and expressing this compound event using "MetricExpr"?

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] perf: imx8_ddr_perf: calculate ddr bandwidth via virtual event read-bytes/write-bytes
  2019-07-09 15:56 ` Will Deacon
@ 2019-07-12  7:18   ` Joakim Zhang
  2019-07-31 18:01     ` Will Deacon
  0 siblings, 1 reply; 4+ messages in thread
From: Joakim Zhang @ 2019-07-12  7:18 UTC (permalink / raw)
  To: Will Deacon
  Cc: mark.rutland, Frank Li, dl-linux-imx, kernel, linux-arm-kernel


> -----Original Message-----
> From: Will Deacon <will@kernel.org>
> Sent: 2019年7月9日 23:56
> To: Joakim Zhang <qiangqing.zhang@nxp.com>
> Cc: mark.rutland@arm.com; Frank Li <frank.li@nxp.com>;
> kernel@pengutronix.de; dl-linux-imx <linux-imx@nxp.com>;
> linux-arm-kernel@lists.infradead.org
> Subject: Re: [PATCH] perf: imx8_ddr_perf: calculate ddr bandwidth via virtual
> event read-bytes/write-bytes
> 
> On Tue, Jul 09, 2019 at 05:46:44AM +0000, Joakim Zhang wrote:
> > We can calculate ddr bandwidth via virtual event
> > read-bytes/write-bytes based on ddr burst width, which actually share
> > event read-cycles/write-cycles. Burst width is 32 bit on i.MX8 board.
> >
> > The ddr interface will generate 2 up edges and 2 down edges in an
> > internal clock cycle, so it can pass 4 beats of data. 4 bytes of each
> > beat if ddr burst width is 32 bit.
> >
> > for example:
> > perf stat -a -e imx8_ddr0/read-bytes/,imx8_ddr0/write-bytes/ ls
> 
> I don't think you should be doing this in the kernel. Can you look at
> implementing it in perf tool instead by adding a .json file for your PMU and
> expressing this compound event using "MetricExpr"?

Hi Will,

I try to implement it in perf tool, but it shows nothing with perf list metricgroup.

A. Add JSON metric.
diff --git a/tools/perf/pmu-events/arch/arm64/imx/imx8/ddr-metric.json b/tools/perf/pmu-events/arch/arm64/imx/imx8/ddr-metric.json
new file mode 100644
index 000000000000..3588dc5a4f46
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/imx/imx8/ddr-metric.json
@@ -0,0 +1,20 @@
+[
+    {
+       "PublicDescription": "Calculate DDR read bandwidth based on read-cycles event. DDR interface generates 2 up and 2 down edges in an internal clock cycle, can pass 4 beats of data. 4 bytes of each beat if DDR burst width is 32 bit.",
+       "MetricName": "imx8_ddr0/read-bytes/",
+       "MetricGroup": "bandwidth",
+       "MetricExpr": "imx8_ddr0/read-cycles/ * 4 * 4",
+        "Unit": "MB",
+       "ScaleUnit": "0.000001",
+       "BriefDescription": "DDR read bandwidth"
+    },
+    {
+       "PublicDescription": "Calculate DDR write bandwidth based on write-cycles event. DDR interface generates 2 up and 2 down edges in an internal clock cycle, can pass 4 beats of data. 4 bytes of each beat if DDR burst width is 32 bit.",
+       "MetricName": "imx8_ddr0/write-bytes/",
+       "MetricGroup": "bandwidth",
+       "MetricExpr": "imx8_ddr0/write-cycles * 4 * 4",
+        "Unit": "MB",
+       "ScaleUnit": "0.000001",
+       "BriefDescription": "DDR write bandwidth"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 927fcddcb4aa..12d378dca0d1 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -20,3 +20,4 @@
 0x00000000430f0af0,v1,cavium/thunderx2,core
 0x00000000480fd010,v1,hisilicon/hip08,core
 0x00000000500f0000,v1,ampere/emag,core
+0x000000004108d040,v1,imx/imx8,core

B. Test
root@imx8qxpmek:~# perf list metricgroup

List of pre-defined events (to be used in -e):

root@imx8qxpmek:~#

It generates the C source file, 'pmu-events.c', as /tools/perf/pmu-events/README said, but it can't generate this C file on my side.

Please tell me if something I missed. Thanks a lot!

Best Regards,
Joakim Zhang
> Will
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf: imx8_ddr_perf: calculate ddr bandwidth via virtual event read-bytes/write-bytes
  2019-07-12  7:18   ` Joakim Zhang
@ 2019-07-31 18:01     ` Will Deacon
  0 siblings, 0 replies; 4+ messages in thread
From: Will Deacon @ 2019-07-31 18:01 UTC (permalink / raw)
  To: Joakim Zhang
  Cc: mark.rutland, Frank Li, dl-linux-imx, kernel, linux-arm-kernel

On Fri, Jul 12, 2019 at 07:18:14AM +0000, Joakim Zhang wrote:
> > On Tue, Jul 09, 2019 at 05:46:44AM +0000, Joakim Zhang wrote:
> > > We can calculate ddr bandwidth via virtual event
> > > read-bytes/write-bytes based on ddr burst width, which actually share
> > > event read-cycles/write-cycles. Burst width is 32 bit on i.MX8 board.
> > >
> > > The ddr interface will generate 2 up edges and 2 down edges in an
> > > internal clock cycle, so it can pass 4 beats of data. 4 bytes of each
> > > beat if ddr burst width is 32 bit.
> > >
> > > for example:
> > > perf stat -a -e imx8_ddr0/read-bytes/,imx8_ddr0/write-bytes/ ls
> > 
> > I don't think you should be doing this in the kernel. Can you look at
> > implementing it in perf tool instead by adding a .json file for your PMU and
> > expressing this compound event using "MetricExpr"?
> 
> I try to implement it in perf tool, but it shows nothing with perf list
> metricgroup.
> 
> A. Add JSON metric.
> diff --git a/tools/perf/pmu-events/arch/arm64/imx/imx8/ddr-metric.json b/tools/perf/pmu-events/arch/arm64/imx/imx8/ddr-metric.json
> new file mode 100644
> index 000000000000..3588dc5a4f46
> --- /dev/null
> +++ b/tools/perf/pmu-events/arch/arm64/imx/imx8/ddr-metric.json
> @@ -0,0 +1,20 @@
> +[
> +    {
> +       "PublicDescription": "Calculate DDR read bandwidth based on read-cycles event. DDR interface generates 2 up and 2 down edges in an internal clock cycle, can pass 4 beats of data. 4 bytes of each beat if DDR burst width is 32 bit.",
> +       "MetricName": "imx8_ddr0/read-bytes/",
> +       "MetricGroup": "bandwidth",
> +       "MetricExpr": "imx8_ddr0/read-cycles/ * 4 * 4",

This is probably being parsed as:

	imx8_ddr0 / read - cycles / * 4 * 4

I think you may need to use something like:

	imx8_ddr0@read\\-cycles@ * 4 * 4

although it's a bit weird to hardcode the instance number '0' in there.
It would be better for perf to add that itself, imo.

Anyway, I haven't used this before, so you need to read the code and figure
out how it works. Some support does appear to be there, so that's what
you're going to have to work with.

Will

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-07-31 18:01 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-09  5:46 [PATCH] perf: imx8_ddr_perf: calculate ddr bandwidth via virtual event read-bytes/write-bytes Joakim Zhang
2019-07-09 15:56 ` Will Deacon
2019-07-12  7:18   ` Joakim Zhang
2019-07-31 18:01     ` Will Deacon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.