All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Song Liu <songliubraving@fb.com>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Mark Rutland <mark.rutland@arm.com>, Jiri Olsa <jolsa@redhat.com>,
	kernel-team@fb.com
Subject: Re: [PATCH v6 3/4] perf-stat: enable counting events for BPF programs
Date: Tue, 29 Dec 2020 16:22:15 +0900	[thread overview]
Message-ID: <CAM9d7ciBsQqp2C5jWnitaK1Lttrq46NMcTLwE70oaqm82T88+Q@mail.gmail.com> (raw)
In-Reply-To: <20201228174054.907740-4-songliubraving@fb.com>

On Tue, Dec 29, 2020 at 2:41 AM Song Liu <songliubraving@fb.com> wrote:
>
> Introduce perf-stat -b option, which counts events for BPF programs, like:
>
> [root@localhost ~]# ~/perf stat -e ref-cycles,cycles -b 254 -I 1000
>      1.487903822            115,200      ref-cycles
>      1.487903822             86,012      cycles
>      2.489147029             80,560      ref-cycles
>      2.489147029             73,784      cycles
>      3.490341825             60,720      ref-cycles
>      3.490341825             37,797      cycles
>      4.491540887             37,120      ref-cycles
>      4.491540887             31,963      cycles
>
> The example above counts cycles and ref-cycles of BPF program of id 254.
> This is similar to bpftool-prog-profile command, but more flexible.
>
> perf-stat -b creates per-cpu perf_event and loads fentry/fexit BPF
> programs (monitor-progs) to the target BPF program (target-prog). The
> monitor-progs read perf_event before and after the target-prog, and
> aggregate the difference in a BPF map. Then the user space reads data
> from these maps.
>
> A new struct bpf_counter is introduced to provide common interface that
> uses BPF programs/maps to count perf events.
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  tools/perf/Makefile.perf                      |   2 +-
>  tools/perf/builtin-stat.c                     |  77 ++++-
>  tools/perf/util/Build                         |   1 +
>  tools/perf/util/bpf_counter.c                 | 296 ++++++++++++++++++
>  tools/perf/util/bpf_counter.h                 |  72 +++++
>  .../util/bpf_skel/bpf_prog_profiler.bpf.c     |  93 ++++++
>  tools/perf/util/evsel.c                       |   9 +
>  tools/perf/util/evsel.h                       |   6 +
>  tools/perf/util/stat-display.c                |   4 +-
>  tools/perf/util/stat.c                        |   2 +-
>  tools/perf/util/target.c                      |  34 +-
>  tools/perf/util/target.h                      |  10 +
>  12 files changed, 588 insertions(+), 18 deletions(-)
>  create mode 100644 tools/perf/util/bpf_counter.c
>  create mode 100644 tools/perf/util/bpf_counter.h
>  create mode 100644 tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c
>
> diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
> index d182a2dbb9bbd..8c4e039c3b813 100644
> --- a/tools/perf/Makefile.perf
> +++ b/tools/perf/Makefile.perf
> @@ -1015,7 +1015,7 @@ python-clean:
>
>  SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
>  SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
> -SKELETONS :=
> +SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
>
>  ifdef BUILD_BPF_SKEL
>  BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 8cc24967bc273..09bffb3fbcdd4 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -67,6 +67,7 @@
>  #include "util/top.h"
>  #include "util/affinity.h"
>  #include "util/pfm.h"
> +#include "util/bpf_counter.h"
>  #include "asm/bug.h"
>
>  #include <linux/time64.h>
> @@ -409,12 +410,31 @@ static int read_affinity_counters(struct timespec *rs)
>         return 0;
>  }
>
> +static int read_bpf_map_counters(void)
> +{
> +       struct evsel *counter;
> +       int err;
> +
> +       evlist__for_each_entry(evsel_list, counter) {
> +               err = bpf_counter__read(counter);
> +               if (err)
> +                       return err;
> +       }
> +       return 0;
> +}
> +
>  static void read_counters(struct timespec *rs)
>  {
>         struct evsel *counter;
> +       int err;
>
> -       if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0))
> -               return;
> +       if (!stat_config.stop_read_counter) {
> +               err = read_bpf_map_counters();
> +               if (err == -EAGAIN)
> +                       err = read_affinity_counters(rs);

Instead of checking the error code, can we do something like

  if (target__has_bpf(target))
      read_bpf_map_counters();

?

> +               if (err < 0)
> +                       return;
> +       }
>
>         evlist__for_each_entry(evsel_list, counter) {
>                 if (counter->err)
> @@ -496,11 +516,20 @@ static bool handle_interval(unsigned int interval, int *times)
>         return false;
>  }
>
> -static void enable_counters(void)
> +static int enable_counters(void)
>  {
> +       struct evsel *evsel;
> +       int err;
> +
> +       evlist__for_each_entry(evsel_list, evsel) {
> +               err = bpf_counter__enable(evsel);
> +               if (err)
> +                       return err;

Ditto.


> +       }
> +
>         if (stat_config.initial_delay < 0) {
>                 pr_info(EVLIST_DISABLED_MSG);
> -               return;
> +               return 0;
>         }
>
>         if (stat_config.initial_delay > 0) {
> @@ -518,6 +547,7 @@ static void enable_counters(void)
>                 if (stat_config.initial_delay > 0)
>                         pr_info(EVLIST_ENABLED_MSG);
>         }
> +       return 0;
>  }
>
>  static void disable_counters(void)
> @@ -720,7 +750,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
>         const bool forks = (argc > 0);
>         bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
>         struct affinity affinity;
> -       int i, cpu;
> +       int i, cpu, err;
>         bool second_pass = false;
>
>         if (forks) {
> @@ -737,6 +767,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
>         if (affinity__setup(&affinity) < 0)
>                 return -1;
>
> +       evlist__for_each_entry(evsel_list, counter) {
> +               if (bpf_counter__load(counter, &target))
> +                       return -1;
> +       }
> +

Ditto.


>         evlist__for_each_cpu (evsel_list, i, cpu) {
>                 affinity__set(&affinity, cpu);
>
> @@ -850,7 +885,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
>         }
>
>         if (STAT_RECORD) {
> -               int err, fd = perf_data__fd(&perf_stat.data);
> +               int fd = perf_data__fd(&perf_stat.data);
>
>                 if (is_pipe) {
>                         err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));

[SNIP]
>  perf-$(CONFIG_LIBELF) += probe-file.o
> diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
> new file mode 100644
> index 0000000000000..f2cb86a40c882
> --- /dev/null
> +++ b/tools/perf/util/bpf_counter.c
> @@ -0,0 +1,296 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/* Copyright (c) 2019 Facebook */
> +
> +#include <limits.h>
> +#include <unistd.h>
> +#include <sys/time.h>
> +#include <sys/resource.h>
> +#include <linux/err.h>
> +#include <linux/zalloc.h>
> +#include <bpf/bpf.h>
> +#include <bpf/btf.h>
> +#include <bpf/libbpf.h>
> +
> +#include "bpf_counter.h"
> +#include "counts.h"
> +#include "debug.h"
> +#include "evsel.h"
> +#include "target.h"
> +
> +#include "bpf_skel/bpf_prog_profiler.skel.h"
> +
> +static inline void *u64_to_ptr(__u64 ptr)
> +{
> +       return (void *)(unsigned long)ptr;
> +}
> +
> +static void set_max_rlimit(void)
> +{
> +       struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
> +
> +       setrlimit(RLIMIT_MEMLOCK, &rinf);
> +}

This looks scary..


> +
> +static struct bpf_counter *bpf_counter_alloc(void)
> +{
> +       struct bpf_counter *counter;
> +
> +       counter = zalloc(sizeof(*counter));
> +       if (counter)
> +               INIT_LIST_HEAD(&counter->list);
> +       return counter;
> +}
> +
> +static int bpf_program_profiler__destroy(struct evsel *evsel)
> +{
> +       struct bpf_counter *counter;
> +
> +       list_for_each_entry(counter, &evsel->bpf_counter_list, list)
> +               bpf_prog_profiler_bpf__destroy(counter->skel);
> +       INIT_LIST_HEAD(&evsel->bpf_counter_list);
> +       return 0;
> +}
> +
> +static char *bpf_target_prog_name(int tgt_fd)
> +{
> +       struct bpf_prog_info_linear *info_linear;
> +       struct bpf_func_info *func_info;
> +       const struct btf_type *t;
> +       char *name = NULL;
> +       struct btf *btf;
> +
> +       info_linear = bpf_program__get_prog_info_linear(
> +               tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
> +       if (IS_ERR_OR_NULL(info_linear)) {
> +               pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd);
> +               return NULL;
> +       }
> +
> +       if (info_linear->info.btf_id == 0 ||
> +           btf__get_from_id(info_linear->info.btf_id, &btf)) {
> +               pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
> +               goto out;
> +       }
> +
> +       func_info = u64_to_ptr(info_linear->info.func_info);
> +       t = btf__type_by_id(btf, func_info[0].type_id);
> +       if (!t) {
> +               pr_debug("btf %d doesn't have type %d\n",
> +                        info_linear->info.btf_id, func_info[0].type_id);
> +               goto out;
> +       }
> +       name = strdup(btf__name_by_offset(btf, t->name_off));
> +out:
> +       free(info_linear);
> +       return name;
> +}
> +
> +static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
> +{
> +       struct bpf_prog_profiler_bpf *skel;
> +       struct bpf_counter *counter;
> +       struct bpf_program *prog;
> +       char *prog_name;
> +       int prog_fd;
> +       int err;
> +
> +       prog_fd = bpf_prog_get_fd_by_id(prog_id);
> +       if (prog_fd < 0) {
> +               pr_err("Failed to open fd for bpf prog %u\n", prog_id);
> +               return -1;
> +       }
> +       counter = bpf_counter_alloc();
> +       if (!counter) {
> +               close(prog_fd);
> +               return -1;
> +       }
> +
> +       skel = bpf_prog_profiler_bpf__open();
> +       if (!skel) {
> +               pr_err("Failed to open bpf skeleton\n");
> +               goto err_out;
> +       }
> +       skel->rodata->num_cpu = evsel__nr_cpus(evsel);
> +
> +       bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel));
> +       bpf_map__resize(skel->maps.fentry_readings, 1);
> +       bpf_map__resize(skel->maps.accum_readings, 1);
> +
> +       prog_name = bpf_target_prog_name(prog_fd);
> +       if (!prog_name) {
> +               pr_err("Failed to get program name for bpf prog %u. Does it have BTF?\n", prog_id);
> +               goto err_out;
> +       }
> +
> +       bpf_object__for_each_program(prog, skel->obj) {
> +               err = bpf_program__set_attach_target(prog, prog_fd, prog_name);
> +               if (err) {
> +                       pr_err("bpf_program__set_attach_target failed.\n"
> +                              "Does bpf prog %u have BTF?\n", prog_id);
> +                       goto err_out;
> +               }
> +       }
> +       set_max_rlimit();
> +       err = bpf_prog_profiler_bpf__load(skel);
> +       if (err) {
> +               pr_err("bpf_prog_profiler_bpf__load failed\n");
> +               goto err_out;
> +       }
> +
> +       counter->skel = skel;
> +       list_add(&counter->list, &evsel->bpf_counter_list);
> +       close(prog_fd);
> +       return 0;
> +err_out:
> +       free(counter);
> +       close(prog_fd);

I don't know how the 'skel' part is managed, is it safe to leave?


> +       return -1;
> +}
> +
> +static int bpf_program_profiler__load(struct evsel *evsel, struct target *target)
> +{
> +       char *bpf_str, *bpf_str_, *tok, *saveptr = NULL, *p;
> +       u32 prog_id;
> +       int ret;
> +
> +       bpf_str_ = bpf_str = strdup(target->bpf_str);
> +       if (!bpf_str)
> +               return -1;
> +
> +       while ((tok = strtok_r(bpf_str, ",", &saveptr)) != NULL) {
> +               prog_id = strtoul(tok, &p, 10);
> +               if (prog_id == 0 || prog_id == UINT_MAX ||
> +                   (*p != '\0' && *p != ',')) {
> +                       pr_err("Failed to parse bpf prog ids %s\n",
> +                              target->bpf_str);
> +                       return -1;
> +               }
> +
> +               ret = bpf_program_profiler_load_one(evsel, prog_id);
> +               if (ret) {
> +                       bpf_program_profiler__destroy(evsel);
> +                       free(bpf_str_);
> +                       return -1;
> +               }
> +               bpf_str = NULL;
> +       }
> +       free(bpf_str_);
> +       return 0;
> +}
> +
> +static int bpf_program_profiler__enable(struct evsel *evsel)
> +{
> +       struct bpf_counter *counter;
> +       int ret;
> +
> +       list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
> +               ret = bpf_prog_profiler_bpf__attach(counter->skel);
> +               if (ret) {
> +                       bpf_program_profiler__destroy(evsel);
> +                       return ret;
> +               }
> +       }
> +       return 0;
> +}
> +
> +static int bpf_program_profiler__read(struct evsel *evsel)
> +{
> +       int num_cpu = evsel__nr_cpus(evsel);
> +       struct bpf_perf_event_value values[num_cpu];
> +       struct bpf_counter *counter;
> +       int reading_map_fd;
> +       __u32 key = 0;
> +       int err, cpu;
> +
> +       if (list_empty(&evsel->bpf_counter_list))
> +               return -EAGAIN;
> +
> +       for (cpu = 0; cpu < num_cpu; cpu++) {
> +               perf_counts(evsel->counts, cpu, 0)->val = 0;
> +               perf_counts(evsel->counts, cpu, 0)->ena = 0;
> +               perf_counts(evsel->counts, cpu, 0)->run = 0;
> +       }

Hmm.. not sure it's correct to reset counters here.


> +       list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
> +               struct bpf_prog_profiler_bpf *skel = counter->skel;
> +
> +               reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
> +
> +               err = bpf_map_lookup_elem(reading_map_fd, &key, values);
> +               if (err) {
> +                       fprintf(stderr, "failed to read value\n");
> +                       return err;
> +               }
> +
> +               for (cpu = 0; cpu < num_cpu; cpu++) {
> +                       perf_counts(evsel->counts, cpu, 0)->val += values[cpu].counter;
> +                       perf_counts(evsel->counts, cpu, 0)->ena += values[cpu].enabled;
> +                       perf_counts(evsel->counts, cpu, 0)->run += values[cpu].running;
> +               }
> +       }

So this just aggregates all the counters in BPF programs, right?


> +       return 0;
> +}
> +
> +static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
> +                                           int fd)
> +{
> +       struct bpf_prog_profiler_bpf *skel;
> +       struct bpf_counter *counter;
> +       int ret;
> +
> +       list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
> +               skel = counter->skel;
> +               ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
> +                                         &cpu, &fd, BPF_ANY);
> +               if (ret)
> +                       return ret;
> +       }
> +       return 0;
> +}
> +
> +struct bpf_counter_ops bpf_program_profiler_ops = {
> +       .load       = bpf_program_profiler__load,
> +       .enable     = bpf_program_profiler__enable,
> +       .read       = bpf_program_profiler__read,
> +       .destroy    = bpf_program_profiler__destroy,
> +       .install_pe = bpf_program_profiler__install_pe,

What is 'pe'?

Btw, do you think other kinds of bpf programs are added later?
It seems 'perf stat -b' is somewhat coupled with this profiler ops.
Will it be possible to run other ops in a same evsel?


> +};
> +
> +int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
> +{
> +       if (list_empty(&evsel->bpf_counter_list))
> +               return 0;
> +       return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
> +}
> +
> +int bpf_counter__load(struct evsel *evsel, struct target *target)
> +{
> +       if (target__has_bpf(target))
> +               evsel->bpf_counter_ops = &bpf_program_profiler_ops;
> +
> +       if (evsel->bpf_counter_ops)
> +               return evsel->bpf_counter_ops->load(evsel, target);
> +       return 0;
> +}
> +
> +int bpf_counter__enable(struct evsel *evsel)
> +{
> +       if (list_empty(&evsel->bpf_counter_list))
> +               return 0;
> +       return evsel->bpf_counter_ops->enable(evsel);
> +}
> +
> +int bpf_counter__read(struct evsel *evsel)
> +{
> +       if (list_empty(&evsel->bpf_counter_list))
> +               return -EAGAIN;
> +       return evsel->bpf_counter_ops->read(evsel);
> +}
> +
> +void bpf_counter__destroy(struct evsel *evsel)
> +{
> +       if (list_empty(&evsel->bpf_counter_list))
> +               return;
> +       evsel->bpf_counter_ops->destroy(evsel);
> +       evsel->bpf_counter_ops = NULL;
> +}

[SNIP]
> diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
> index 6ef01a83b24e9..f132c6c2eef81 100644
> --- a/tools/perf/util/target.h
> +++ b/tools/perf/util/target.h
> @@ -10,6 +10,7 @@ struct target {
>         const char   *tid;
>         const char   *cpu_list;
>         const char   *uid_str;
> +       const char   *bpf_str;
>         uid_t        uid;
>         bool         system_wide;
>         bool         uses_mmap;
> @@ -36,6 +37,10 @@ enum target_errno {
>         TARGET_ERRNO__PID_OVERRIDE_SYSTEM,
>         TARGET_ERRNO__UID_OVERRIDE_SYSTEM,
>         TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD,
> +       TARGET_ERRNO__BPF_OVERRIDE_CPU,
> +       TARGET_ERRNO__BPF_OVERRIDE_PID,
> +       TARGET_ERRNO__BPF_OVERRIDE_UID,
> +       TARGET_ERRNO__BPF_OVERRIDE_THREAD,
>
>         /* for target__parse_uid() */
>         TARGET_ERRNO__INVALID_UID,
> @@ -59,6 +64,11 @@ static inline bool target__has_cpu(struct target *target)
>         return target->system_wide || target->cpu_list;
>  }
>
> +static inline bool target__has_bpf(struct target *target)
> +{
> +       return target->bpf_str;
> +}
> +
>  static inline bool target__none(struct target *target)
>  {
>         return !target__has_task(target) && !target__has_cpu(target);

Shouldn't it have && !target__has_bpf() too?

Thanks,
Namhyung

  parent reply	other threads:[~2020-12-29  7:23 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-28 17:40 [PATCH v6 0/4] Introduce perf-stat -b for BPF programs Song Liu
2020-12-28 17:40 ` [PATCH v6 1/4] bpftool: add Makefile target bootstrap Song Liu
2020-12-28 17:40 ` [PATCH v6 2/4] perf: support build BPF skeletons with perf Song Liu
2020-12-29  7:01   ` Namhyung Kim
2020-12-29 11:48     ` Arnaldo Carvalho de Melo
2020-12-29 17:14       ` Song Liu
2020-12-29 18:16         ` Arnaldo Carvalho de Melo
2020-12-28 17:40 ` [PATCH v6 3/4] perf-stat: enable counting events for BPF programs Song Liu
2020-12-28 20:11   ` Arnaldo Carvalho de Melo
2020-12-28 23:43     ` Song Liu
2020-12-29  5:53       ` Song Liu
2020-12-29 15:15       ` Arnaldo Carvalho de Melo
2020-12-29 18:42         ` Song Liu
2020-12-29 18:48           ` Arnaldo Carvalho de Melo
2020-12-29 19:11             ` Song Liu
2020-12-29 19:18               ` Arnaldo Carvalho de Melo
2020-12-29 19:23                 ` Arnaldo Carvalho de Melo
2020-12-29 19:32                   ` Arnaldo Carvalho de Melo
2020-12-29 21:40                     ` Song Liu
2020-12-29  7:22   ` Namhyung Kim [this message]
2020-12-29 17:46     ` Song Liu
2020-12-29 17:59       ` Song Liu
2020-12-28 17:40 ` [PATCH v6 4/4] perf-stat: add documentation for -b option Song Liu
2020-12-29  7:24   ` Namhyung Kim
2020-12-29 16:59     ` Song Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAM9d7ciBsQqp2C5jWnitaK1Lttrq46NMcTLwE70oaqm82T88+Q@mail.gmail.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=songliubraving@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.