iommu.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist
@ 2020-08-27  9:33 Barry Song
  2020-08-28  7:41 ` Jean-Philippe Brucker
  2020-08-28 10:29 ` Will Deacon
  0 siblings, 2 replies; 8+ messages in thread
From: Barry Song @ 2020-08-27  9:33 UTC (permalink / raw)
  To: iommu, linux-arm-kernel; +Cc: robin.murphy, will, linuxarm

cmdq_issue_cmdlist() is the hotspot that uses a lot of time. This patch
adds tracepoints for it to help debug.

Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
---
 * can furthermore develop an eBPF program to benchmark using this trace

  cmdlistlat.c:
#include <uapi/linux/ptrace.h>

BPF_HASH(start, u32);
BPF_HISTOGRAM(dist);

TRACEPOINT_PROBE(arm_smmu_v3, issue_cmdlist_entry)
{
        u32 pid;
        u64 ts, *val;

        pid = bpf_get_current_pid_tgid();
        ts = bpf_ktime_get_ns();
        start.update(&pid, &ts);
        return 0;
}

TRACEPOINT_PROBE(arm_smmu_v3, issue_cmdlist_exit)
{
        u32 pid;
        u64 *tsp, delta;

        pid = bpf_get_current_pid_tgid();
        tsp = start.lookup(&pid);

        if (tsp != 0) {
                delta = bpf_ktime_get_ns() - *tsp;
                dist.increment(bpf_log2l(delta));
                start.delete(&pid);
        }

        return 0;
}

 cmdlistlat.py:
#!/usr/bin/python3
#
from __future__ import print_function
from bcc import BPF
from ctypes import c_ushort, c_int, c_ulonglong
from time import sleep
from sys import argv

def usage():
        print("USAGE: %s [interval [count]]" % argv[0])
        exit()

# arguments
interval = 5
count = -1
if len(argv) > 1:
        try:
                interval = int(argv[1])
                if interval == 0:
                        raise
                if len(argv) > 2:
                        count = int(argv[2])
        except: # also catches -h, --help
                usage()

# load BPF program
b = BPF(src_file = "cmdlistlat.c")

# header
print("Tracing... Hit Ctrl-C to end.")

# output
loop = 0
do_exit = 0
while (1):
        if count > 0:
                loop += 1
                if loop > count:
                        exit()
        try:
                sleep(interval)
        except KeyboardInterrupt:
                pass; do_exit = 1

        print()
        b["dist"].print_log2_hist("nsecs")
        b["dist"].clear()
        if do_exit:
                exit()


 drivers/iommu/arm/arm-smmu-v3/Makefile        |  1 +
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h | 48 +++++++++++++++++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  8 ++++
 3 files changed, 57 insertions(+)
 create mode 100644 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h

diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
index 569e24e9f162..dba1087f91f3 100644
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
+ccflags-y += -I$(src)                   # needed for trace events
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
new file mode 100644
index 000000000000..29ab96706124
--- /dev/null
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Hisilicon Limited.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM arm_smmu_v3
+
+#if !defined(_ARM_SMMU_V3_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _ARM_SMMU_V3_TRACE_H
+
+#include <linux/tracepoint.h>
+
+struct device;
+
+DECLARE_EVENT_CLASS(issue_cmdlist_class,
+	TP_PROTO(struct device *dev, int n, bool sync),
+	TP_ARGS(dev, n, sync),
+
+	TP_STRUCT__entry(
+		__string(device, dev_name(dev))
+		__field(int, n)
+		__field(bool, sync)
+	),
+	TP_fast_assign(
+		__assign_str(device, dev_name(dev));
+		__entry->n = n;
+		__entry->sync = sync;
+	),
+	TP_printk("%s cmd number=%d sync=%d",
+			__get_str(device), __entry->n, __entry->sync)
+);
+
+#define DEFINE_ISSUE_CMDLIST_EVENT(name)       \
+DEFINE_EVENT(issue_cmdlist_class, name,        \
+	TP_PROTO(struct device *dev, int n, bool sync), \
+	TP_ARGS(dev, n, sync))
+
+DEFINE_ISSUE_CMDLIST_EVENT(issue_cmdlist_entry);
+DEFINE_ISSUE_CMDLIST_EVENT(issue_cmdlist_exit);
+
+#endif /* _ARM_SMMU_V3_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE arm-smmu-v3-trace
+#include <trace/define_trace.h>
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 7332251dd8cd..e2d7d5f1d234 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -33,6 +33,8 @@
 
 #include <linux/amba/bus.h>
 
+#include "arm-smmu-v3-trace.h"
+
 /* MMIO registers */
 #define ARM_SMMU_IDR0			0x0
 #define IDR0_ST_LVL			GENMASK(28, 27)
@@ -1389,6 +1391,8 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	}, head = llq;
 	int ret = 0;
 
+	trace_issue_cmdlist_entry(smmu->dev, n, sync);
+
 	/* 1. Allocate some space in the queue */
 	local_irq_save(flags);
 	llq.val = READ_ONCE(cmdq->q.llq.val);
@@ -1493,6 +1497,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	}
 
 	local_irq_restore(flags);
+	trace_issue_cmdlist_exit(smmu->dev, n, sync);
 	return ret;
 }
 
@@ -4166,6 +4171,9 @@ static struct platform_driver arm_smmu_driver = {
 };
 module_platform_driver(arm_smmu_driver);
 
+#define CREATE_TRACE_POINTS
+#include "arm-smmu-v3-trace.h"
+
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
 MODULE_ALIAS("platform:arm-smmu-v3");
-- 
2.27.0


_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist
  2020-08-27  9:33 [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist Barry Song
@ 2020-08-28  7:41 ` Jean-Philippe Brucker
  2020-08-28  7:55   ` Song Bao Hua (Barry Song)
  2020-08-28 10:29 ` Will Deacon
  1 sibling, 1 reply; 8+ messages in thread
From: Jean-Philippe Brucker @ 2020-08-28  7:41 UTC (permalink / raw)
  To: Barry Song; +Cc: will, iommu, robin.murphy, linuxarm, linux-arm-kernel

Hi,

On Thu, Aug 27, 2020 at 09:33:51PM +1200, Barry Song wrote:
> cmdq_issue_cmdlist() is the hotspot that uses a lot of time. This patch
> adds tracepoints for it to help debug.
> 
> Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> ---
>  * can furthermore develop an eBPF program to benchmark using this trace

Have you tried using kprobe and kretprobe instead of tracepoints?
Any noticeable performance drop?

Thanks,
Jean

> 
>   cmdlistlat.c:
> #include <uapi/linux/ptrace.h>
> 
> BPF_HASH(start, u32);
> BPF_HISTOGRAM(dist);
> 
> TRACEPOINT_PROBE(arm_smmu_v3, issue_cmdlist_entry)
> {
>         u32 pid;
>         u64 ts, *val;
> 
>         pid = bpf_get_current_pid_tgid();
>         ts = bpf_ktime_get_ns();
>         start.update(&pid, &ts);
>         return 0;
> }
> 
> TRACEPOINT_PROBE(arm_smmu_v3, issue_cmdlist_exit)
> {
>         u32 pid;
>         u64 *tsp, delta;
> 
>         pid = bpf_get_current_pid_tgid();
>         tsp = start.lookup(&pid);
> 
>         if (tsp != 0) {
>                 delta = bpf_ktime_get_ns() - *tsp;
>                 dist.increment(bpf_log2l(delta));
>                 start.delete(&pid);
>         }
> 
>         return 0;
> }
> 
>  cmdlistlat.py:
> #!/usr/bin/python3
> #
> from __future__ import print_function
> from bcc import BPF
> from ctypes import c_ushort, c_int, c_ulonglong
> from time import sleep
> from sys import argv
> 
> def usage():
>         print("USAGE: %s [interval [count]]" % argv[0])
>         exit()
> 
> # arguments
> interval = 5
> count = -1
> if len(argv) > 1:
>         try:
>                 interval = int(argv[1])
>                 if interval == 0:
>                         raise
>                 if len(argv) > 2:
>                         count = int(argv[2])
>         except: # also catches -h, --help
>                 usage()
> 
> # load BPF program
> b = BPF(src_file = "cmdlistlat.c")
> 
> # header
> print("Tracing... Hit Ctrl-C to end.")
> 
> # output
> loop = 0
> do_exit = 0
> while (1):
>         if count > 0:
>                 loop += 1
>                 if loop > count:
>                         exit()
>         try:
>                 sleep(interval)
>         except KeyboardInterrupt:
>                 pass; do_exit = 1
> 
>         print()
>         b["dist"].print_log2_hist("nsecs")
>         b["dist"].clear()
>         if do_exit:
>                 exit()
> 
> 
>  drivers/iommu/arm/arm-smmu-v3/Makefile        |  1 +
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h | 48 +++++++++++++++++++
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  8 ++++
>  3 files changed, 57 insertions(+)
>  create mode 100644 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile
> index 569e24e9f162..dba1087f91f3 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/Makefile
> +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
> @@ -1,2 +1,3 @@
>  # SPDX-License-Identifier: GPL-2.0
> +ccflags-y += -I$(src)                   # needed for trace events
>  obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
> new file mode 100644
> index 000000000000..29ab96706124
> --- /dev/null
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
> @@ -0,0 +1,48 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2020 Hisilicon Limited.
> + */
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM arm_smmu_v3
> +
> +#if !defined(_ARM_SMMU_V3_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _ARM_SMMU_V3_TRACE_H
> +
> +#include <linux/tracepoint.h>
> +
> +struct device;
> +
> +DECLARE_EVENT_CLASS(issue_cmdlist_class,
> +	TP_PROTO(struct device *dev, int n, bool sync),
> +	TP_ARGS(dev, n, sync),
> +
> +	TP_STRUCT__entry(
> +		__string(device, dev_name(dev))
> +		__field(int, n)
> +		__field(bool, sync)
> +	),
> +	TP_fast_assign(
> +		__assign_str(device, dev_name(dev));
> +		__entry->n = n;
> +		__entry->sync = sync;
> +	),
> +	TP_printk("%s cmd number=%d sync=%d",
> +			__get_str(device), __entry->n, __entry->sync)
> +);
> +
> +#define DEFINE_ISSUE_CMDLIST_EVENT(name)       \
> +DEFINE_EVENT(issue_cmdlist_class, name,        \
> +	TP_PROTO(struct device *dev, int n, bool sync), \
> +	TP_ARGS(dev, n, sync))
> +
> +DEFINE_ISSUE_CMDLIST_EVENT(issue_cmdlist_entry);
> +DEFINE_ISSUE_CMDLIST_EVENT(issue_cmdlist_exit);
> +
> +#endif /* _ARM_SMMU_V3_TRACE_H */
> +
> +#undef TRACE_INCLUDE_PATH
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_PATH .
> +#define TRACE_INCLUDE_FILE arm-smmu-v3-trace
> +#include <trace/define_trace.h>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 7332251dd8cd..e2d7d5f1d234 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -33,6 +33,8 @@
>  
>  #include <linux/amba/bus.h>
>  
> +#include "arm-smmu-v3-trace.h"
> +
>  /* MMIO registers */
>  #define ARM_SMMU_IDR0			0x0
>  #define IDR0_ST_LVL			GENMASK(28, 27)
> @@ -1389,6 +1391,8 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
>  	}, head = llq;
>  	int ret = 0;
>  
> +	trace_issue_cmdlist_entry(smmu->dev, n, sync);
> +
>  	/* 1. Allocate some space in the queue */
>  	local_irq_save(flags);
>  	llq.val = READ_ONCE(cmdq->q.llq.val);
> @@ -1493,6 +1497,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
>  	}
>  
>  	local_irq_restore(flags);
> +	trace_issue_cmdlist_exit(smmu->dev, n, sync);
>  	return ret;
>  }
>  
> @@ -4166,6 +4171,9 @@ static struct platform_driver arm_smmu_driver = {
>  };
>  module_platform_driver(arm_smmu_driver);
>  
> +#define CREATE_TRACE_POINTS
> +#include "arm-smmu-v3-trace.h"
> +
>  MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
>  MODULE_AUTHOR("Will Deacon <will@kernel.org>");
>  MODULE_ALIAS("platform:arm-smmu-v3");
> -- 
> 2.27.0
> 
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist
  2020-08-28  7:41 ` Jean-Philippe Brucker
@ 2020-08-28  7:55   ` Song Bao Hua (Barry Song)
  2020-08-28  8:33     ` Jean-Philippe Brucker
  0 siblings, 1 reply; 8+ messages in thread
From: Song Bao Hua (Barry Song) @ 2020-08-28  7:55 UTC (permalink / raw)
  To: Jean-Philippe Brucker
  Cc: will, iommu, robin.murphy, Linuxarm, linux-arm-kernel



> -----Original Message-----
> From: Jean-Philippe Brucker [mailto:jean-philippe@linaro.org]
> Sent: Friday, August 28, 2020 7:41 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> Cc: iommu@lists.linux-foundation.org; linux-arm-kernel@lists.infradead.org;
> robin.murphy@arm.com; will@kernel.org; Linuxarm <linuxarm@huawei.com>
> Subject: Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for
> cmdq_issue_cmdlist
> 
> Hi,
> 
> On Thu, Aug 27, 2020 at 09:33:51PM +1200, Barry Song wrote:
> > cmdq_issue_cmdlist() is the hotspot that uses a lot of time. This
> > patch adds tracepoints for it to help debug.
> >
> > Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> > ---
> >  * can furthermore develop an eBPF program to benchmark using this
> > trace
> 
> Have you tried using kprobe and kretprobe instead of tracepoints?
> Any noticeable performance drop?

Yes. Pls read this email.
kprobe overhead and OPTPROBES implementation on ARM64
https://www.spinics.net/lists/arm-kernel/msg828788.html

> 
> Thanks,
> Jean
> 
> >
> >   cmdlistlat.c:
> > #include <uapi/linux/ptrace.h>
> >
> > BPF_HASH(start, u32);
> > BPF_HISTOGRAM(dist);
> >
> > TRACEPOINT_PROBE(arm_smmu_v3, issue_cmdlist_entry) {
> >         u32 pid;
> >         u64 ts, *val;
> >
> >         pid = bpf_get_current_pid_tgid();
> >         ts = bpf_ktime_get_ns();
> >         start.update(&pid, &ts);
> >         return 0;
> > }
> >
> > TRACEPOINT_PROBE(arm_smmu_v3, issue_cmdlist_exit) {
> >         u32 pid;
> >         u64 *tsp, delta;
> >
> >         pid = bpf_get_current_pid_tgid();
> >         tsp = start.lookup(&pid);
> >
> >         if (tsp != 0) {
> >                 delta = bpf_ktime_get_ns() - *tsp;
> >                 dist.increment(bpf_log2l(delta));
> >                 start.delete(&pid);
> >         }
> >
> >         return 0;
> > }
> >
> >  cmdlistlat.py:
> > #!/usr/bin/python3
> > #
> > from __future__ import print_function
> > from bcc import BPF
> > from ctypes import c_ushort, c_int, c_ulonglong from time import sleep
> > from sys import argv
> >
> > def usage():
> >         print("USAGE: %s [interval [count]]" % argv[0])
> >         exit()
> >
> > # arguments
> > interval = 5
> > count = -1
> > if len(argv) > 1:
> >         try:
> >                 interval = int(argv[1])
> >                 if interval == 0:
> >                         raise
> >                 if len(argv) > 2:
> >                         count = int(argv[2])
> >         except: # also catches -h, --help
> >                 usage()
> >
> > # load BPF program
> > b = BPF(src_file = "cmdlistlat.c")
> >
> > # header
> > print("Tracing... Hit Ctrl-C to end.")
> >
> > # output
> > loop = 0
> > do_exit = 0
> > while (1):
> >         if count > 0:
> >                 loop += 1
> >                 if loop > count:
> >                         exit()
> >         try:
> >                 sleep(interval)
> >         except KeyboardInterrupt:
> >                 pass; do_exit = 1
> >
> >         print()
> >         b["dist"].print_log2_hist("nsecs")
> >         b["dist"].clear()
> >         if do_exit:
> >                 exit()
> >
> >
> >  drivers/iommu/arm/arm-smmu-v3/Makefile        |  1 +
> >  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h | 48
> +++++++++++++++++++
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  8 ++++
> >  3 files changed, 57 insertions(+)
> >  create mode 100644
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile
> > b/drivers/iommu/arm/arm-smmu-v3/Makefile
> > index 569e24e9f162..dba1087f91f3 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/Makefile
> > +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
> > @@ -1,2 +1,3 @@
> >  # SPDX-License-Identifier: GPL-2.0
> > +ccflags-y += -I$(src)                   # needed for trace events
> >  obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o diff --git
> > a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
> > new file mode 100644
> > index 000000000000..29ab96706124
> > --- /dev/null
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-trace.h
> > @@ -0,0 +1,48 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (C) 2020 Hisilicon Limited.
> > + */
> > +
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM arm_smmu_v3
> > +
> > +#if !defined(_ARM_SMMU_V3_TRACE_H) ||
> > +defined(TRACE_HEADER_MULTI_READ) #define _ARM_SMMU_V3_TRACE_H
> > +
> > +#include <linux/tracepoint.h>
> > +
> > +struct device;
> > +
> > +DECLARE_EVENT_CLASS(issue_cmdlist_class,
> > +	TP_PROTO(struct device *dev, int n, bool sync),
> > +	TP_ARGS(dev, n, sync),
> > +
> > +	TP_STRUCT__entry(
> > +		__string(device, dev_name(dev))
> > +		__field(int, n)
> > +		__field(bool, sync)
> > +	),
> > +	TP_fast_assign(
> > +		__assign_str(device, dev_name(dev));
> > +		__entry->n = n;
> > +		__entry->sync = sync;
> > +	),
> > +	TP_printk("%s cmd number=%d sync=%d",
> > +			__get_str(device), __entry->n, __entry->sync) );
> > +
> > +#define DEFINE_ISSUE_CMDLIST_EVENT(name)       \
> > +DEFINE_EVENT(issue_cmdlist_class, name,        \
> > +	TP_PROTO(struct device *dev, int n, bool sync), \
> > +	TP_ARGS(dev, n, sync))
> > +
> > +DEFINE_ISSUE_CMDLIST_EVENT(issue_cmdlist_entry);
> > +DEFINE_ISSUE_CMDLIST_EVENT(issue_cmdlist_exit);
> > +
> > +#endif /* _ARM_SMMU_V3_TRACE_H */
> > +
> > +#undef TRACE_INCLUDE_PATH
> > +#undef TRACE_INCLUDE_FILE
> > +#define TRACE_INCLUDE_PATH .
> > +#define TRACE_INCLUDE_FILE arm-smmu-v3-trace #include
> > +<trace/define_trace.h>
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index 7332251dd8cd..e2d7d5f1d234 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -33,6 +33,8 @@
> >
> >  #include <linux/amba/bus.h>
> >
> > +#include "arm-smmu-v3-trace.h"
> > +
> >  /* MMIO registers */
> >  #define ARM_SMMU_IDR0			0x0
> >  #define IDR0_ST_LVL			GENMASK(28, 27)
> > @@ -1389,6 +1391,8 @@ static int arm_smmu_cmdq_issue_cmdlist(struct
> arm_smmu_device *smmu,
> >  	}, head = llq;
> >  	int ret = 0;
> >
> > +	trace_issue_cmdlist_entry(smmu->dev, n, sync);
> > +
> >  	/* 1. Allocate some space in the queue */
> >  	local_irq_save(flags);
> >  	llq.val = READ_ONCE(cmdq->q.llq.val); @@ -1493,6 +1497,7 @@ static
> > int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
> >  	}
> >
> >  	local_irq_restore(flags);
> > +	trace_issue_cmdlist_exit(smmu->dev, n, sync);
> >  	return ret;
> >  }
> >
> > @@ -4166,6 +4171,9 @@ static struct platform_driver arm_smmu_driver =
> > {  };  module_platform_driver(arm_smmu_driver);
> >
> > +#define CREATE_TRACE_POINTS
> > +#include "arm-smmu-v3-trace.h"
> > +
> >  MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3
> > implementations");  MODULE_AUTHOR("Will Deacon <will@kernel.org>");
> > MODULE_ALIAS("platform:arm-smmu-v3");
> > --
> > 2.27.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist
  2020-08-28  7:55   ` Song Bao Hua (Barry Song)
@ 2020-08-28  8:33     ` Jean-Philippe Brucker
  0 siblings, 0 replies; 8+ messages in thread
From: Jean-Philippe Brucker @ 2020-08-28  8:33 UTC (permalink / raw)
  To: Song Bao Hua (Barry Song)
  Cc: will, iommu, robin.murphy, Linuxarm, linux-arm-kernel

On Fri, Aug 28, 2020 at 07:55:18AM +0000, Song Bao Hua (Barry Song) wrote:
> 
> 
> > -----Original Message-----
> > From: Jean-Philippe Brucker [mailto:jean-philippe@linaro.org]
> > Sent: Friday, August 28, 2020 7:41 PM
> > To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> > Cc: iommu@lists.linux-foundation.org; linux-arm-kernel@lists.infradead.org;
> > robin.murphy@arm.com; will@kernel.org; Linuxarm <linuxarm@huawei.com>
> > Subject: Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for
> > cmdq_issue_cmdlist
> > 
> > Hi,
> > 
> > On Thu, Aug 27, 2020 at 09:33:51PM +1200, Barry Song wrote:
> > > cmdq_issue_cmdlist() is the hotspot that uses a lot of time. This
> > > patch adds tracepoints for it to help debug.
> > >
> > > Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> > > ---
> > >  * can furthermore develop an eBPF program to benchmark using this
> > > trace
> > 
> > Have you tried using kprobe and kretprobe instead of tracepoints?
> > Any noticeable performance drop?
> 
> Yes. Pls read this email.
> kprobe overhead and OPTPROBES implementation on ARM64
> https://www.spinics.net/lists/arm-kernel/msg828788.html

Thanks for the pointer. I'm asking because I've been working on BPF
trampolines (fentry/fexit) for arm64, which reduce the overhead of BPF
tracing, and am looking for confirmation that it's needed before spending
more time on it. Perhaps OPTPROBE would be a better first step, though, it
seems more generic.

Thanks,
Jean
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist
  2020-08-27  9:33 [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist Barry Song
  2020-08-28  7:41 ` Jean-Philippe Brucker
@ 2020-08-28 10:29 ` Will Deacon
  2020-08-28 11:02   ` Song Bao Hua (Barry Song)
  1 sibling, 1 reply; 8+ messages in thread
From: Will Deacon @ 2020-08-28 10:29 UTC (permalink / raw)
  To: Barry Song; +Cc: iommu, robin.murphy, linux-arm-kernel, linuxarm

On Thu, Aug 27, 2020 at 09:33:51PM +1200, Barry Song wrote:
> cmdq_issue_cmdlist() is the hotspot that uses a lot of time. This patch
> adds tracepoints for it to help debug.
> 
> Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> ---
>  * can furthermore develop an eBPF program to benchmark using this trace

Hmm, don't these things have a history of becoming ABI? If so, I don't
really want them in the driver at all, sorry. Do other drivers overcome
this somehow?

Will
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist
  2020-08-28 10:29 ` Will Deacon
@ 2020-08-28 11:02   ` Song Bao Hua (Barry Song)
  2020-08-28 11:18     ` Robin Murphy
  0 siblings, 1 reply; 8+ messages in thread
From: Song Bao Hua (Barry Song) @ 2020-08-28 11:02 UTC (permalink / raw)
  To: Will Deacon; +Cc: iommu, robin.murphy, linux-arm-kernel, Linuxarm



> -----Original Message-----
> From: Will Deacon [mailto:will@kernel.org]
> Sent: Friday, August 28, 2020 10:29 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> Cc: iommu@lists.linux-foundation.org; linux-arm-kernel@lists.infradead.org;
> robin.murphy@arm.com; joro@8bytes.org; Linuxarm <linuxarm@huawei.com>
> Subject: Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for
> cmdq_issue_cmdlist
> 
> On Thu, Aug 27, 2020 at 09:33:51PM +1200, Barry Song wrote:
> > cmdq_issue_cmdlist() is the hotspot that uses a lot of time. This patch
> > adds tracepoints for it to help debug.
> >
> > Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> > ---
> >  * can furthermore develop an eBPF program to benchmark using this trace
> 
> Hmm, don't these things have a history of becoming ABI? If so, I don't
> really want them in the driver at all, sorry. Do other drivers overcome
> this somehow?

This kind of tracepoints mainly works as a low-overhead probe point for debug purpose. I don't think any
application would depend on it. It is for debugging. And there are lots of tracepoints in other drivers
even in iommu driver core and intel_iommu driver :-)

developers use it in one of the below ways:

1. get trace print from the ring buffer by reading debugfs
root@ubuntu:/sys/kernel/debug/tracing/events/arm_smmu_v3# echo 1 > enable
# cat /sys/kernel/debug/tracing/trace_pipe
<idle>-0     [058] ..s1 125444.768083: issue_cmdlist_exit: arm-smmu-v3.2.auto cmd number=1 sync=1                                    
          <idle>-0     [058] ..s1 125444.768084: issue_cmdlist_entry: arm-smmu-v3.2.auto cmd number=1 sync=1                                   
          <idle>-0     [058] ..s1 125444.768085: issue_cmdlist_exit: arm-smmu-v3.2.auto cmd number=1 sync=1                                    
          <idle>-0     [058] ..s1 125444.768165: issue_cmdlist_entry: arm-smmu-v3.2.auto cmd number=1 sync=1                                   
          <idle>-0     [058] ..s1 125444.768168: issue_cmdlist_exit: arm-smmu-v3.2.auto cmd number=1 sync=1                                    
          <idle>-0     [058] ..s1 125444.768169: issue_cmdlist_entry: arm-smmu-v3.2.auto cmd number=1 sync=1                                   
          <idle>-0     [058] ..s1 125444.768171: issue_cmdlist_exit: arm-smmu-v3.2.auto cmd number=1 sync=1
          <idle>-0     [058] ..s1 125444.768259: issue_cmdlist_entry: arm-smmu-v3.2.auto cmd number=1 sync=1                                   
          ...

This can replace printk with much much lower overhead.

2. add a hook function in tracepoint to do some latency measure and time statistics just like the eBPF example
I gave after the commit log.

Using it, I can get the histogram of the execution time of cmdq_issue_cmdlist():
   nsecs               : count     distribution 
         0 -> 1          : 0        |                                        | 
         2 -> 3          : 0        |                                        | 
         4 -> 7          : 0        |                                        | 
         8 -> 15         : 0        |                                        | 
        16 -> 31         : 0        |                                        | 
        32 -> 63         : 0        |                                        | 
        64 -> 127        : 0        |                                        | 
       128 -> 255        : 0        |                                        | 
       256 -> 511        : 0        |                                        | 
       512 -> 1023       : 58       |                                        | 
      1024 -> 2047       : 22763    |****************************************| 
      2048 -> 4095       : 13238    |***********************                 | 

I feel it is very common to do this kind of things for analyzing the performance issue. For example, to easy the analysis
of softirq latency, softirq.c has the below code:

asmlinkage __visible void __softirq_entry __do_softirq(void)
{
	...
		trace_softirq_entry(vec_nr);
		h->action(h);
		trace_softirq_exit(vec_nr);
	...
}

> 
> Will

Thanks
Barry

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist
  2020-08-28 11:02   ` Song Bao Hua (Barry Song)
@ 2020-08-28 11:18     ` Robin Murphy
  2020-08-28 11:58       ` Song Bao Hua (Barry Song)
  0 siblings, 1 reply; 8+ messages in thread
From: Robin Murphy @ 2020-08-28 11:18 UTC (permalink / raw)
  To: Song Bao Hua (Barry Song), Will Deacon; +Cc: iommu, linux-arm-kernel, Linuxarm

On 2020-08-28 12:02, Song Bao Hua (Barry Song) wrote:
> 
> 
>> -----Original Message-----
>> From: Will Deacon [mailto:will@kernel.org]
>> Sent: Friday, August 28, 2020 10:29 PM
>> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
>> Cc: iommu@lists.linux-foundation.org; linux-arm-kernel@lists.infradead.org;
>> robin.murphy@arm.com; joro@8bytes.org; Linuxarm <linuxarm@huawei.com>
>> Subject: Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for
>> cmdq_issue_cmdlist
>>
>> On Thu, Aug 27, 2020 at 09:33:51PM +1200, Barry Song wrote:
>>> cmdq_issue_cmdlist() is the hotspot that uses a lot of time. This patch
>>> adds tracepoints for it to help debug.
>>>
>>> Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
>>> ---
>>>   * can furthermore develop an eBPF program to benchmark using this trace
>>
>> Hmm, don't these things have a history of becoming ABI? If so, I don't
>> really want them in the driver at all, sorry. Do other drivers overcome
>> this somehow?
> 
> This kind of tracepoints mainly works as a low-overhead probe point for debug purpose. I don't think any
> application would depend on it. It is for debugging. And there are lots of tracepoints in other drivers
> even in iommu driver core and intel_iommu driver :-)
> 
> developers use it in one of the below ways:
> 
> 1. get trace print from the ring buffer by reading debugfs
> root@ubuntu:/sys/kernel/debug/tracing/events/arm_smmu_v3# echo 1 > enable
> # cat /sys/kernel/debug/tracing/trace_pipe
> <idle>-0     [058] ..s1 125444.768083: issue_cmdlist_exit: arm-smmu-v3.2.auto cmd number=1 sync=1
>            <idle>-0     [058] ..s1 125444.768084: issue_cmdlist_entry: arm-smmu-v3.2.auto cmd number=1 sync=1
>            <idle>-0     [058] ..s1 125444.768085: issue_cmdlist_exit: arm-smmu-v3.2.auto cmd number=1 sync=1
>            <idle>-0     [058] ..s1 125444.768165: issue_cmdlist_entry: arm-smmu-v3.2.auto cmd number=1 sync=1
>            <idle>-0     [058] ..s1 125444.768168: issue_cmdlist_exit: arm-smmu-v3.2.auto cmd number=1 sync=1
>            <idle>-0     [058] ..s1 125444.768169: issue_cmdlist_entry: arm-smmu-v3.2.auto cmd number=1 sync=1
>            <idle>-0     [058] ..s1 125444.768171: issue_cmdlist_exit: arm-smmu-v3.2.auto cmd number=1 sync=1
>            <idle>-0     [058] ..s1 125444.768259: issue_cmdlist_entry: arm-smmu-v3.2.auto cmd number=1 sync=1
>            ...
> 
> This can replace printk with much much lower overhead.
> 
> 2. add a hook function in tracepoint to do some latency measure and time statistics just like the eBPF example
> I gave after the commit log.
> 
> Using it, I can get the histogram of the execution time of cmdq_issue_cmdlist():
>     nsecs               : count     distribution
>           0 -> 1          : 0        |                                        |
>           2 -> 3          : 0        |                                        |
>           4 -> 7          : 0        |                                        |
>           8 -> 15         : 0        |                                        |
>          16 -> 31         : 0        |                                        |
>          32 -> 63         : 0        |                                        |
>          64 -> 127        : 0        |                                        |
>         128 -> 255        : 0        |                                        |
>         256 -> 511        : 0        |                                        |
>         512 -> 1023       : 58       |                                        |
>        1024 -> 2047       : 22763    |****************************************|
>        2048 -> 4095       : 13238    |***********************                 |
> 
> I feel it is very common to do this kind of things for analyzing the performance issue. For example, to easy the analysis
> of softirq latency, softirq.c has the below code:
> 
> asmlinkage __visible void __softirq_entry __do_softirq(void)
> {
> 	...
> 		trace_softirq_entry(vec_nr);
> 		h->action(h);
> 		trace_softirq_exit(vec_nr);
> 	...
> }

If you only want to measure entry and exit of one specific function, 
though, can't the function graph tracer already do that?

Otherwise, pursuing optprobes sounds like a worthwhile thing to do since 
that should benefit everyone, rather than just the 6 people on the 
planet who might care about arm_smmu_issue_cmdlist(). As long as it 
doesn't involve whole new ISA extensions like the RISC-V proposal ;)

Robin.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist
  2020-08-28 11:18     ` Robin Murphy
@ 2020-08-28 11:58       ` Song Bao Hua (Barry Song)
  0 siblings, 0 replies; 8+ messages in thread
From: Song Bao Hua (Barry Song) @ 2020-08-28 11:58 UTC (permalink / raw)
  To: Robin Murphy, Will Deacon; +Cc: iommu, linux-arm-kernel, Linuxarm



> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy@arm.com]
> Sent: Friday, August 28, 2020 11:18 PM
> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>; Will Deacon
> <will@kernel.org>
> Cc: iommu@lists.linux-foundation.org; linux-arm-kernel@lists.infradead.org;
> joro@8bytes.org; Linuxarm <linuxarm@huawei.com>
> Subject: Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for
> cmdq_issue_cmdlist
> 
> On 2020-08-28 12:02, Song Bao Hua (Barry Song) wrote:
> >
> >
> >> -----Original Message-----
> >> From: Will Deacon [mailto:will@kernel.org]
> >> Sent: Friday, August 28, 2020 10:29 PM
> >> To: Song Bao Hua (Barry Song) <song.bao.hua@hisilicon.com>
> >> Cc: iommu@lists.linux-foundation.org;
> linux-arm-kernel@lists.infradead.org;
> >> robin.murphy@arm.com; joro@8bytes.org; Linuxarm
> <linuxarm@huawei.com>
> >> Subject: Re: [PATCH] iommu/arm-smmu-v3: add tracepoints for
> >> cmdq_issue_cmdlist
> >>
> >> On Thu, Aug 27, 2020 at 09:33:51PM +1200, Barry Song wrote:
> >>> cmdq_issue_cmdlist() is the hotspot that uses a lot of time. This patch
> >>> adds tracepoints for it to help debug.
> >>>
> >>> Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
> >>> ---
> >>>   * can furthermore develop an eBPF program to benchmark using this
> trace
> >>
> >> Hmm, don't these things have a history of becoming ABI? If so, I don't
> >> really want them in the driver at all, sorry. Do other drivers overcome
> >> this somehow?
> >
> > This kind of tracepoints mainly works as a low-overhead probe point for
> debug purpose. I don't think any
> > application would depend on it. It is for debugging. And there are lots of
> tracepoints in other drivers
> > even in iommu driver core and intel_iommu driver :-)
> >
> > developers use it in one of the below ways:
> >
> > 1. get trace print from the ring buffer by reading debugfs
> > root@ubuntu:/sys/kernel/debug/tracing/events/arm_smmu_v3# echo 1 >
> enable
> > # cat /sys/kernel/debug/tracing/trace_pipe
> > <idle>-0     [058] ..s1 125444.768083: issue_cmdlist_exit:
> arm-smmu-v3.2.auto cmd number=1 sync=1
> >            <idle>-0     [058] ..s1 125444.768084: issue_cmdlist_entry:
> arm-smmu-v3.2.auto cmd number=1 sync=1
> >            <idle>-0     [058] ..s1 125444.768085: issue_cmdlist_exit:
> arm-smmu-v3.2.auto cmd number=1 sync=1
> >            <idle>-0     [058] ..s1 125444.768165: issue_cmdlist_entry:
> arm-smmu-v3.2.auto cmd number=1 sync=1
> >            <idle>-0     [058] ..s1 125444.768168: issue_cmdlist_exit:
> arm-smmu-v3.2.auto cmd number=1 sync=1
> >            <idle>-0     [058] ..s1 125444.768169: issue_cmdlist_entry:
> arm-smmu-v3.2.auto cmd number=1 sync=1
> >            <idle>-0     [058] ..s1 125444.768171: issue_cmdlist_exit:
> arm-smmu-v3.2.auto cmd number=1 sync=1
> >            <idle>-0     [058] ..s1 125444.768259: issue_cmdlist_entry:
> arm-smmu-v3.2.auto cmd number=1 sync=1
> >            ...
> >
> > This can replace printk with much much lower overhead.
> >
> > 2. add a hook function in tracepoint to do some latency measure and time
> statistics just like the eBPF example
> > I gave after the commit log.
> >
> > Using it, I can get the histogram of the execution time of
> cmdq_issue_cmdlist():
> >     nsecs               : count     distribution
> >           0 -> 1          : 0        |
> |
> >           2 -> 3          : 0        |
> |
> >           4 -> 7          : 0        |
> |
> >           8 -> 15         : 0        |
> |
> >          16 -> 31         : 0        |
> |
> >          32 -> 63         : 0        |
> |
> >          64 -> 127        : 0        |
> |
> >         128 -> 255        : 0        |
> |
> >         256 -> 511        : 0        |
> |
> >         512 -> 1023       : 58       |
> |
> >        1024 -> 2047       : 22763
> |****************************************|
> >        2048 -> 4095       : 13238    |***********************
> |
> >
> > I feel it is very common to do this kind of things for analyzing the
> performance issue. For example, to easy the analysis
> > of softirq latency, softirq.c has the below code:
> >
> > asmlinkage __visible void __softirq_entry __do_softirq(void)
> > {
> > 	...
> > 		trace_softirq_entry(vec_nr);
> > 		h->action(h);
> > 		trace_softirq_exit(vec_nr);
> > 	...
> > }
> 
> If you only want to measure entry and exit of one specific function,
> though, can't the function graph tracer already do that?

Function graph is able to do this specific thing while it is not good to support developers
to use BPF code to do various analysis in various fancy ways. Another disadvanrage of
functiongraph is that it will add the overhead of ftrace of child functions to the parent
function:
a()
{
	b();
	c();
}
b()
{
	d();
}
We have some overhead of ftrace for b(), c(), d(), and all overhead will be added into a(). That can makes
the execution time of a() much longer.

On the other hand, in my original plan, the tracepoints in smmu-v3 driver would not be only in the entry and
exit of this function, it would be in some other places like
before and after the step 1, lock contention
before and after the step 5, wait for the completion of cmd_sync

and some other critical code path which can help analyze the latency of arm-smmu-v3.

I was using the two tracepoints to start the discussion. It happens these two can somehow be implemented
by function graph.
> 
> Otherwise, pursuing optprobes sounds like a worthwhile thing to do since
> that should benefit everyone, rather than just the 6 people on the
> planet who might care about arm_smmu_issue_cmdlist(). As long as it
> doesn't involve whole new ISA extensions like the RISC-V proposal ;)
>

It is a bit sad that only 6 people are caring about the function. Don't know where other
ARM64 server guys go :-)

It seems optprobes/kprobe and tracepoints will work side by side. They are not trying
to replace each other since they both have their own advantages and disadvantages.

If both you and Jean think optprobes is a good direction to go for arm64, I am happy to
start some feasibility study.

> Robin.

Thanks
Barry
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-08-28 11:58 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-27  9:33 [PATCH] iommu/arm-smmu-v3: add tracepoints for cmdq_issue_cmdlist Barry Song
2020-08-28  7:41 ` Jean-Philippe Brucker
2020-08-28  7:55   ` Song Bao Hua (Barry Song)
2020-08-28  8:33     ` Jean-Philippe Brucker
2020-08-28 10:29 ` Will Deacon
2020-08-28 11:02   ` Song Bao Hua (Barry Song)
2020-08-28 11:18     ` Robin Murphy
2020-08-28 11:58       ` Song Bao Hua (Barry Song)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).