* [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
@ 2019-03-01 15:41 ` Alexey Budankov
2019-03-05 12:25 ` Jiri Olsa
` (2 more replies)
2019-03-01 15:43 ` [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics Alexey Budankov
` (8 subsequent siblings)
9 siblings, 3 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 15:41 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Implemented -f,--mmap-flush option that specifies minimal size of data
chunk that is extracted from mmaped kernel buffer to store into a trace.
$ tools/perf/perf record -f 1024 -e cycles -- matrix.gcc
$ tools/perf/perf record --aio -f 1024 -e cycles -- matrix.gcc
Option can serve two purposes the first one is to increase the compression
ratio of a trace data and the second one is to avoid live-lock-like self
monitoring in system wide (-a) profiling mode.
The default option value is 1 byte what means that every time trace
writing thread finds some new data in the mmaped buffer the data is
extracted, possibly compressed and written to a trace. Larger data chunks
are compressed more effectively in comparison to smaller chunks so
extraction of larger chunks from the kernel buffer is preferable from
perspective of trace size reduction. So the implemented option allows
specifying minimal data chunk size that is more than 1 byte to influence
data compression ratio. Also at some cases executing more write syscalls
with smaller data size can take longer than executing less write syscalls
with bigger data size due to syscall overhead so extracting bigger data
chunks specified by the option value could additionally decrease runtime
overhead.
Profiling in system wide mode with compression (-a -z) can additionally
induce data into the kernel buffers along with the data from monitored
processes. If performance data rate and volume from the monitored processes
is high then trace streaming and compression activity in the tool is also
high and it can lead to subtle live-lock effect of endless activity when
compression of single new byte from some of mmaped kernel buffer leads to
eneration of the next single byte at some mmaped buffer so perf tool trace
writing thread never stops on polling event file descriptors.
Implemented sync param is the mean to force data move independently from
the threshold value. Despite the provided flush value from the command
line, the tool needs capability to drain memory buffers, at least in the
end of the collection.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/Documentation/perf-record.txt | 13 ++++++
tools/perf/builtin-record.c | 53 +++++++++++++++++++++---
tools/perf/perf.h | 1 +
tools/perf/util/evlist.c | 6 +--
tools/perf/util/evlist.h | 3 +-
tools/perf/util/mmap.c | 4 +-
tools/perf/util/mmap.h | 3 +-
7 files changed, 71 insertions(+), 12 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 8f0c2be34848..9fa33ce9bc00 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -459,6 +459,19 @@ Set affinity mask of trace reading thread according to the policy defined by 'mo
node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
cpu - thread affinity mask is set to cpu of the processed mmap buffer
+-f::
+--mmap-flush=n::
+Specify minimal number of bytes that is extracted from mmap data pages and stored
+into a trace. Maximal allowed value is a quarter of the size of mmaped data pages.
+The default option value is 1 what means that every time trace writing thread finds
+some new data in the mmaped buffer the data is extracted, possibly compressed (-z)
+and written to a trace. Larger data chunks are compressed more effectively in
+comparison to smaller chunks so extraction of larger chunks from the mmap data pages
+is preferable from perspective of trace size reduction. Also at some cases
+executing less trace write syscalls with bigger data size can take shorter than
+executing more trace write syscalls with smaller data size thus lowering runtime
+profiling overhead.
+
--all-kernel::
Configure all used events to run in kernel space.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f3f7f3100336..9b02a68e8c23 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -334,6 +334,29 @@ static int record__aio_enabled(struct record *rec)
return rec->opts.nr_cblocks > 0;
}
+#define MMAP_FLUSH_DEFAULT 1
+static int record__mmap_flush_parse(const struct option *opt,
+ const char *str,
+ int unset)
+{
+ int mmap_len;
+ struct record_opts *opts = (struct record_opts *)opt->value;
+
+ if (unset)
+ return 0;
+
+ if (str)
+ opts->mmap_flush = strtol(str, NULL, 0);
+ if (!opts->mmap_flush)
+ opts->mmap_flush = MMAP_FLUSH_DEFAULT;
+
+ mmap_len = perf_evlist__mmap_size(opts->mmap_pages);
+ if (opts->mmap_flush > mmap_len / 4)
+ opts->mmap_flush = mmap_len / 4;
+
+ return 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -543,7 +566,8 @@ static int record__mmap_evlist(struct record *rec,
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode,
- opts->nr_cblocks, opts->affinity) < 0) {
+ opts->nr_cblocks, opts->affinity,
+ opts->mmap_flush) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@@ -733,7 +757,7 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
}
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
- bool overwrite)
+ bool overwrite, bool sync)
{
u64 bytes_written = rec->bytes_written;
int i;
@@ -756,12 +780,19 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
off = record__aio_get_pos(trace_fd);
for (i = 0; i < evlist->nr_mmaps; i++) {
+ u64 flush = MMAP_FLUSH_DEFAULT;
struct perf_mmap *map = &maps[i];
if (map->base) {
record__adjust_affinity(rec, map);
+ if (sync) {
+ flush = map->flush;
+ map->flush = MMAP_FLUSH_DEFAULT;
+ }
if (!record__aio_enabled(rec)) {
if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+ if (sync)
+ map->flush = flush;
rc = -1;
goto out;
}
@@ -774,10 +805,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
idx = record__aio_sync(map, false);
if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
record__aio_set_pos(trace_fd, off);
+ if (sync)
+ map->flush = flush;
rc = -1;
goto out;
}
}
+ if (sync)
+ map->flush = flush;
}
if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
@@ -803,15 +838,15 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
return rc;
}
-static int record__mmap_read_all(struct record *rec)
+static int record__mmap_read_all(struct record *rec, bool sync)
{
int err;
- err = record__mmap_read_evlist(rec, rec->evlist, false);
+ err = record__mmap_read_evlist(rec, rec->evlist, false, sync);
if (err)
return err;
- return record__mmap_read_evlist(rec, rec->evlist, true);
+ return record__mmap_read_evlist(rec, rec->evlist, true, sync);
}
static void record__init_features(struct record *rec)
@@ -1310,7 +1345,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (trigger_is_hit(&switch_output_trigger) || done || draining)
perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
- if (record__mmap_read_all(rec) < 0) {
+ if (record__mmap_read_all(rec, false) < 0) {
trigger_error(&auxtrace_snapshot_trigger);
trigger_error(&switch_output_trigger);
err = -1;
@@ -1411,6 +1446,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
+ record__mmap_read_all(rec, true);
record__aio_mmap_read_sync(rec);
if (forks) {
@@ -1813,6 +1849,7 @@ static struct record record = {
.uses_mmap = true,
.default_per_cpu = true,
},
+ .mmap_flush = MMAP_FLUSH_DEFAULT,
},
.tool = {
.sample = process_sample_event,
@@ -1879,6 +1916,9 @@ static struct option __record_options[] = {
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
"number of mmap data pages and AUX area tracing mmap pages",
record__parse_mmap_pages),
+ OPT_CALLBACK('f', "mmap-flush", &record.opts, "bytes",
+ "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
+ record__mmap_flush_parse),
OPT_BOOLEAN(0, "group", &record.opts.group,
"put the counters into a counter group"),
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
@@ -2182,6 +2222,7 @@ int cmd_record(int argc, const char **argv)
pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
+ pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
err = __cmd_record(&record, argc, argv);
out:
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index b120e547ddc7..7886cc9771cf 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -85,6 +85,7 @@ struct record_opts {
u64 clockid_res_ns;
int nr_cblocks;
int affinity;
+ int mmap_flush;
};
enum perf_affinity {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 08cedb643ea6..937039faac59 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1022,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
*/
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite, int nr_cblocks, int affinity)
+ bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
@@ -1032,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
* Its value is decided by evsel's write_backward.
* So &mp should not be passed through const pointer.
*/
- struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity };
+ struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush };
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1064,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
{
- return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS);
+ return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1);
}
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 744906dd4887..edf18811e39f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -165,7 +165,8 @@ unsigned long perf_event_mlock_kb_in_pages(void);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite, int nr_cblocks, int affinity);
+ bool auxtrace_overwrite, int nr_cblocks,
+ int affinity, int flush);
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index cdc7740fc181..ef3d79b2c90b 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -440,6 +440,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
perf_mmap__setup_affinity_mask(map, mp);
+ map->flush = mp->flush;
+
if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
&mp->auxtrace_mp, map->base, fd))
return -1;
@@ -492,7 +494,7 @@ static int __perf_mmap__read_init(struct perf_mmap *md)
md->start = md->overwrite ? head : old;
md->end = md->overwrite ? old : head;
- if (md->start == md->end)
+ if ((md->end - md->start) < md->flush)
return -EAGAIN;
size = md->end - md->start;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e566c19b242b..b82f8c2d55c4 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -39,6 +39,7 @@ struct perf_mmap {
} aio;
#endif
cpu_set_t affinity_mask;
+ u64 flush;
};
/*
@@ -70,7 +71,7 @@ enum bkw_mmap_state {
};
struct mmap_params {
- int prot, mask, nr_cblocks, affinity;
+ int prot, mask, nr_cblocks, affinity, flush;
struct auxtrace_mmap_params auxtrace_mp;
};
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option
2019-03-01 15:41 ` [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option Alexey Budankov
@ 2019-03-05 12:25 ` Jiri Olsa
2019-03-07 8:28 ` Alexey Budankov
2019-03-05 12:25 ` Jiri Olsa
2019-03-05 12:26 ` Jiri Olsa
2 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:25 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:41:44PM +0300, Alexey Budankov wrote:
SNIP
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index f3f7f3100336..9b02a68e8c23 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -334,6 +334,29 @@ static int record__aio_enabled(struct record *rec)
> return rec->opts.nr_cblocks > 0;
> }
>
> +#define MMAP_FLUSH_DEFAULT 1
> +static int record__mmap_flush_parse(const struct option *opt,
> + const char *str,
> + int unset)
> +{
> + int mmap_len;
> + struct record_opts *opts = (struct record_opts *)opt->value;
> +
> + if (unset)
> + return 0;
> +
> + if (str)
> + opts->mmap_flush = strtol(str, NULL, 0);
> + if (!opts->mmap_flush)
> + opts->mmap_flush = MMAP_FLUSH_DEFAULT;
> +
> + mmap_len = perf_evlist__mmap_size(opts->mmap_pages);
> + if (opts->mmap_flush > mmap_len / 4)
> + opts->mmap_flush = mmap_len / 4;
> +
if there's point in this config option, pelase make it
configurable as the other size options we have with
the B/K/M suffixes
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-07 8:28 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:28 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:25, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:41:44PM +0300, Alexey Budankov wrote:
>
> SNIP
>
>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>> index f3f7f3100336..9b02a68e8c23 100644
>> --- a/tools/perf/builtin-record.c
>> +++ b/tools/perf/builtin-record.c
>> @@ -334,6 +334,29 @@ static int record__aio_enabled(struct record *rec)
>> return rec->opts.nr_cblocks > 0;
>> }
>>
>> +#define MMAP_FLUSH_DEFAULT 1
>> +static int record__mmap_flush_parse(const struct option *opt,
>> + const char *str,
>> + int unset)
>> +{
>> + int mmap_len;
>> + struct record_opts *opts = (struct record_opts *)opt->value;
>> +
>> + if (unset)
>> + return 0;
>> +
>> + if (str)
>> + opts->mmap_flush = strtol(str, NULL, 0);
>> + if (!opts->mmap_flush)
>> + opts->mmap_flush = MMAP_FLUSH_DEFAULT;
>> +
>> + mmap_len = perf_evlist__mmap_size(opts->mmap_pages);
>> + if (opts->mmap_flush > mmap_len / 4)
>> + opts->mmap_flush = mmap_len / 4;
>> +
>
> if there's point in this config option, pelase make it
> configurable as the other size options we have with
> the B/K/M suffixes
in v7.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option
2019-03-01 15:41 ` [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option Alexey Budankov
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-05 12:25 ` Jiri Olsa
2019-03-07 8:28 ` Alexey Budankov
2019-03-05 12:26 ` Jiri Olsa
2 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:25 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:41:44PM +0300, Alexey Budankov wrote:
SNIP
> static int process_synthesized_event(struct perf_tool *tool,
> union perf_event *event,
> struct perf_sample *sample __maybe_unused,
> @@ -543,7 +566,8 @@ static int record__mmap_evlist(struct record *rec,
> if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
> opts->auxtrace_mmap_pages,
> opts->auxtrace_snapshot_mode,
> - opts->nr_cblocks, opts->affinity) < 0) {
> + opts->nr_cblocks, opts->affinity,
> + opts->mmap_flush) < 0) {
> if (errno == EPERM) {
> pr_err("Permission error mapping pages.\n"
> "Consider increasing "
> @@ -733,7 +757,7 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
> }
>
> static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
> - bool overwrite)
> + bool overwrite, bool sync)
> {
> u64 bytes_written = rec->bytes_written;
> int i;
> @@ -756,12 +780,19 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
> off = record__aio_get_pos(trace_fd);
>
> for (i = 0; i < evlist->nr_mmaps; i++) {
> + u64 flush = MMAP_FLUSH_DEFAULT;
hum, why does this need the initial value?
> struct perf_mmap *map = &maps[i];
>
> if (map->base) {
> record__adjust_affinity(rec, map);
> + if (sync) {
> + flush = map->flush;
> + map->flush = MMAP_FLUSH_DEFAULT;
> + }
> if (!record__aio_enabled(rec)) {
> if (perf_mmap__push(map, rec, record__pushfn) != 0) {
> + if (sync)
> + map->flush = flush;
> rc = -1;
> goto out;
> }
> @@ -774,10 +805,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
> idx = record__aio_sync(map, false);
> if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
> record__aio_set_pos(trace_fd, off);
> + if (sync)
> + map->flush = flush;
> rc = -1;
> goto out;
> }
> }
> + if (sync)
> + map->flush = flush;
is there a point to set this back in case of sync == true?
we are going out at this point, right?
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-07 8:28 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:28 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:25, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:41:44PM +0300, Alexey Budankov wrote:
>
> SNIP
>
>> static int process_synthesized_event(struct perf_tool *tool,
>> union perf_event *event,
>> struct perf_sample *sample __maybe_unused,
>> @@ -543,7 +566,8 @@ static int record__mmap_evlist(struct record *rec,
>> if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
>> opts->auxtrace_mmap_pages,
>> opts->auxtrace_snapshot_mode,
>> - opts->nr_cblocks, opts->affinity) < 0) {
>> + opts->nr_cblocks, opts->affinity,
>> + opts->mmap_flush) < 0) {
>> if (errno == EPERM) {
>> pr_err("Permission error mapping pages.\n"
>> "Consider increasing "
>> @@ -733,7 +757,7 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
>> }
>>
>> static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
>> - bool overwrite)
>> + bool overwrite, bool sync)
>> {
>> u64 bytes_written = rec->bytes_written;
>> int i;
>> @@ -756,12 +780,19 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
>> off = record__aio_get_pos(trace_fd);
>>
>> for (i = 0; i < evlist->nr_mmaps; i++) {
>> + u64 flush = MMAP_FLUSH_DEFAULT;
>
> hum, why does this need the initial value?
Compiler reported some warning or even error so staying on the safe side.
>
>> struct perf_mmap *map = &maps[i];
>>
>> if (map->base) {
>> record__adjust_affinity(rec, map);
>> + if (sync) {
>> + flush = map->flush;
>> + map->flush = MMAP_FLUSH_DEFAULT;
>> + }
>> if (!record__aio_enabled(rec)) {
>> if (perf_mmap__push(map, rec, record__pushfn) != 0) {
>> + if (sync)
>> + map->flush = flush;
>> rc = -1;
>> goto out;
>> }
>> @@ -774,10 +805,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
>> idx = record__aio_sync(map, false);
>> if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
>> record__aio_set_pos(trace_fd, off);
>> + if (sync)
>> + map->flush = flush;
>> rc = -1;
>> goto out;
>> }
>> }
>> + if (sync)
>> + map->flush = flush;
>
> is there a point to set this back in case of sync == true?
> we are going out at this point, right?
Right, but function should work independently of any external assumption.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option
2019-03-01 15:41 ` [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option Alexey Budankov
2019-03-05 12:25 ` Jiri Olsa
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-05 12:26 ` Jiri Olsa
2019-03-07 8:42 ` Alexey Budankov
2019-03-07 8:54 ` Alexey Budankov
2 siblings, 2 replies; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:26 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:41:44PM +0300, Alexey Budankov wrote:
>
> Implemented -f,--mmap-flush option that specifies minimal size of data
> chunk that is extracted from mmaped kernel buffer to store into a trace.
>
> $ tools/perf/perf record -f 1024 -e cycles -- matrix.gcc
> $ tools/perf/perf record --aio -f 1024 -e cycles -- matrix.gcc
>
> Option can serve two purposes the first one is to increase the compression
> ratio of a trace data and the second one is to avoid live-lock-like self
> monitoring in system wide (-a) profiling mode.
>
> The default option value is 1 byte what means that every time trace
> writing thread finds some new data in the mmaped buffer the data is
> extracted, possibly compressed and written to a trace. Larger data chunks
> are compressed more effectively in comparison to smaller chunks so
> extraction of larger chunks from the kernel buffer is preferable from
> perspective of trace size reduction. So the implemented option allows
> specifying minimal data chunk size that is more than 1 byte to influence
> data compression ratio. Also at some cases executing more write syscalls
> with smaller data size can take longer than executing less write syscalls
> with bigger data size due to syscall overhead so extracting bigger data
> chunks specified by the option value could additionally decrease runtime
> overhead.
>
> Profiling in system wide mode with compression (-a -z) can additionally
> induce data into the kernel buffers along with the data from monitored
> processes. If performance data rate and volume from the monitored processes
> is high then trace streaming and compression activity in the tool is also
> high and it can lead to subtle live-lock effect of endless activity when
> compression of single new byte from some of mmaped kernel buffer leads to
> eneration of the next single byte at some mmaped buffer so perf tool trace
> writing thread never stops on polling event file descriptors.
>
> Implemented sync param is the mean to force data move independently from
> the threshold value. Despite the provided flush value from the command
> line, the tool needs capability to drain memory buffers, at least in the
> end of the collection.
>
> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
> ---
> tools/perf/Documentation/perf-record.txt | 13 ++++++
> tools/perf/builtin-record.c | 53 +++++++++++++++++++++---
> tools/perf/perf.h | 1 +
> tools/perf/util/evlist.c | 6 +--
> tools/perf/util/evlist.h | 3 +-
> tools/perf/util/mmap.c | 4 +-
> tools/perf/util/mmap.h | 3 +-
> 7 files changed, 71 insertions(+), 12 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index 8f0c2be34848..9fa33ce9bc00 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -459,6 +459,19 @@ Set affinity mask of trace reading thread according to the policy defined by 'mo
> node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
> cpu - thread affinity mask is set to cpu of the processed mmap buffer
>
> +-f::
> +--mmap-flush=n::
> +Specify minimal number of bytes that is extracted from mmap data pages and stored
> +into a trace. Maximal allowed value is a quarter of the size of mmaped data pages.
> +The default option value is 1 what means that every time trace writing thread finds
> +some new data in the mmaped buffer the data is extracted, possibly compressed (-z)
> +and written to a trace. Larger data chunks are compressed more effectively in
> +comparison to smaller chunks so extraction of larger chunks from the mmap data pages
> +is preferable from perspective of trace size reduction. Also at some cases
> +executing less trace write syscalls with bigger data size can take shorter than
> +executing more trace write syscalls with smaller data size thus lowering runtime
> +profiling overhead.
I was wondering if that's the same we would achieve with ring buffer
watermak config on kernel side.. but I guess it does not hurt to
have something on user side.. I'm just not sure it makes sense to have
a config option for that
I'd understand if we configure some sane value when compression is
enabled.. if it makes sense to have this option, I'd allow it only
when compression is enabled
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-07 8:42 ` Alexey Budankov
2019-03-07 8:54 ` Alexey Budankov
1 sibling, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:42 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:26, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:41:44PM +0300, Alexey Budankov wrote:
>>
>> Implemented -f,--mmap-flush option that specifies minimal size of data
>> chunk that is extracted from mmaped kernel buffer to store into a trace.
>>
>> $ tools/perf/perf record -f 1024 -e cycles -- matrix.gcc
>> $ tools/perf/perf record --aio -f 1024 -e cycles -- matrix.gcc
>>
>> Option can serve two purposes the first one is to increase the compression
>> ratio of a trace data and the second one is to avoid live-lock-like self
>> monitoring in system wide (-a) profiling mode.
>>
>> The default option value is 1 byte what means that every time trace
>> writing thread finds some new data in the mmaped buffer the data is
>> extracted, possibly compressed and written to a trace. Larger data chunks
>> are compressed more effectively in comparison to smaller chunks so
>> extraction of larger chunks from the kernel buffer is preferable from
>> perspective of trace size reduction. So the implemented option allows
>> specifying minimal data chunk size that is more than 1 byte to influence
>> data compression ratio. Also at some cases executing more write syscalls
>> with smaller data size can take longer than executing less write syscalls
>> with bigger data size due to syscall overhead so extracting bigger data
>> chunks specified by the option value could additionally decrease runtime
>> overhead.
>>
>> Profiling in system wide mode with compression (-a -z) can additionally
>> induce data into the kernel buffers along with the data from monitored
>> processes. If performance data rate and volume from the monitored processes
>> is high then trace streaming and compression activity in the tool is also
>> high and it can lead to subtle live-lock effect of endless activity when
>> compression of single new byte from some of mmaped kernel buffer leads to
>> eneration of the next single byte at some mmaped buffer so perf tool trace
>> writing thread never stops on polling event file descriptors.
>>
>> Implemented sync param is the mean to force data move independently from
>> the threshold value. Despite the provided flush value from the command
>> line, the tool needs capability to drain memory buffers, at least in the
>> end of the collection.
>>
>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
>> ---
>> tools/perf/Documentation/perf-record.txt | 13 ++++++
>> tools/perf/builtin-record.c | 53 +++++++++++++++++++++---
>> tools/perf/perf.h | 1 +
>> tools/perf/util/evlist.c | 6 +--
>> tools/perf/util/evlist.h | 3 +-
>> tools/perf/util/mmap.c | 4 +-
>> tools/perf/util/mmap.h | 3 +-
>> 7 files changed, 71 insertions(+), 12 deletions(-)
>>
>> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
>> index 8f0c2be34848..9fa33ce9bc00 100644
>> --- a/tools/perf/Documentation/perf-record.txt
>> +++ b/tools/perf/Documentation/perf-record.txt
>> @@ -459,6 +459,19 @@ Set affinity mask of trace reading thread according to the policy defined by 'mo
>> node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
>> cpu - thread affinity mask is set to cpu of the processed mmap buffer
>>
>> +-f::
>> +--mmap-flush=n::
>> +Specify minimal number of bytes that is extracted from mmap data pages and stored
>> +into a trace. Maximal allowed value is a quarter of the size of mmaped data pages.
>> +The default option value is 1 what means that every time trace writing thread finds
>> +some new data in the mmaped buffer the data is extracted, possibly compressed (-z)
>> +and written to a trace. Larger data chunks are compressed more effectively in
>> +comparison to smaller chunks so extraction of larger chunks from the mmap data pages
>> +is preferable from perspective of trace size reduction. Also at some cases
>> +executing less trace write syscalls with bigger data size can take shorter than
>> +executing more trace write syscalls with smaller data size thus lowering runtime
>> +profiling overhead.
>
> I was wondering if that's the same we would achieve with ring buffer
> watermak config on kernel side.. but I guess it does not hurt to
> have something on user side.. I'm just not sure it makes sense to have
> a config option for that
Watermark acts similar but not the same as this option. Watermark signals tool thread
that blocks on poll() and it is not the HPC case of high data rate and volume when the
thread mostly checks mmaped buffer content in CPU busy loop almost never blocking on poll().
~Alexey
>
> I'd understand if we configure some sane value when compression is
> enabled.. if it makes sense to have this option, I'd allow it only
> when compression is enabled
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option
2019-03-05 12:26 ` Jiri Olsa
2019-03-07 8:42 ` Alexey Budankov
@ 2019-03-07 8:54 ` Alexey Budankov
1 sibling, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:54 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:26, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:41:44PM +0300, Alexey Budankov wrote:
>>
>> Implemented -f,--mmap-flush option that specifies minimal size of data
>> chunk that is extracted from mmaped kernel buffer to store into a trace.
>>
>> $ tools/perf/perf record -f 1024 -e cycles -- matrix.gcc
>> $ tools/perf/perf record --aio -f 1024 -e cycles -- matrix.gcc
>>
>> Option can serve two purposes the first one is to increase the compression
>> ratio of a trace data and the second one is to avoid live-lock-like self
>> monitoring in system wide (-a) profiling mode.
>>
>> The default option value is 1 byte what means that every time trace
>> writing thread finds some new data in the mmaped buffer the data is
>> extracted, possibly compressed and written to a trace. Larger data chunks
>> are compressed more effectively in comparison to smaller chunks so
>> extraction of larger chunks from the kernel buffer is preferable from
>> perspective of trace size reduction. So the implemented option allows
>> specifying minimal data chunk size that is more than 1 byte to influence
>> data compression ratio. Also at some cases executing more write syscalls
>> with smaller data size can take longer than executing less write syscalls
>> with bigger data size due to syscall overhead so extracting bigger data
>> chunks specified by the option value could additionally decrease runtime
>> overhead.
>>
>> Profiling in system wide mode with compression (-a -z) can additionally
>> induce data into the kernel buffers along with the data from monitored
>> processes. If performance data rate and volume from the monitored processes
>> is high then trace streaming and compression activity in the tool is also
>> high and it can lead to subtle live-lock effect of endless activity when
>> compression of single new byte from some of mmaped kernel buffer leads to
>> eneration of the next single byte at some mmaped buffer so perf tool trace
>> writing thread never stops on polling event file descriptors.
>>
>> Implemented sync param is the mean to force data move independently from
>> the threshold value. Despite the provided flush value from the command
>> line, the tool needs capability to drain memory buffers, at least in the
>> end of the collection.
>>
>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
>> ---
>> tools/perf/Documentation/perf-record.txt | 13 ++++++
>> tools/perf/builtin-record.c | 53 +++++++++++++++++++++---
>> tools/perf/perf.h | 1 +
>> tools/perf/util/evlist.c | 6 +--
>> tools/perf/util/evlist.h | 3 +-
>> tools/perf/util/mmap.c | 4 +-
>> tools/perf/util/mmap.h | 3 +-
>> 7 files changed, 71 insertions(+), 12 deletions(-)
>>
>> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
>> index 8f0c2be34848..9fa33ce9bc00 100644
>> --- a/tools/perf/Documentation/perf-record.txt
>> +++ b/tools/perf/Documentation/perf-record.txt
>> @@ -459,6 +459,19 @@ Set affinity mask of trace reading thread according to the policy defined by 'mo
>> node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
>> cpu - thread affinity mask is set to cpu of the processed mmap buffer
>>
>> +-f::
>> +--mmap-flush=n::
>> +Specify minimal number of bytes that is extracted from mmap data pages and stored
>> +into a trace. Maximal allowed value is a quarter of the size of mmaped data pages.
>> +The default option value is 1 what means that every time trace writing thread finds
>> +some new data in the mmaped buffer the data is extracted, possibly compressed (-z)
>> +and written to a trace. Larger data chunks are compressed more effectively in
>> +comparison to smaller chunks so extraction of larger chunks from the mmap data pages
>> +is preferable from perspective of trace size reduction. Also at some cases
>> +executing less trace write syscalls with bigger data size can take shorter than
>> +executing more trace write syscalls with smaller data size thus lowering runtime
>> +profiling overhead.
>
> I was wondering if that's the same we would achieve with ring buffer
> watermak config on kernel side.. but I guess it does not hurt to
> have something on user side.. I'm just not sure it makes sense to have
> a config option for that
>
> I'd understand if we configure some sane value when compression is
> enabled.. if it makes sense to have this option, I'd allow it only
> when compression is enabled
This threshold already exists in the code with default value of 1 byte. New option
provides configuration capability for the threshold keeping default the same.
The option is the mean to get around the live lock issue in the tool in case of
intensive system wide profiling and the issue is not specific to the compression.
The option gives benefit jointly with compression. The bigger chink is compressed
the higher compression ratio it has, limit exists, of course.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
2019-03-01 15:41 ` [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option Alexey Budankov
@ 2019-03-01 15:43 ` Alexey Budankov
2019-03-05 12:26 ` Jiri Olsa
2019-03-01 15:46 ` [PATCH v5 04/10] perf record: implement COMPRESSED event record and its attributes Alexey Budankov
` (7 subsequent siblings)
9 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Define bytes_transferred and bytes_compressed metrics to calculate
comp_ratio=transferred/compressed in the end of the data collection.
bytes_transferred accumulates the amount of bytes that was extracted from
the mmaped kernel buffers for compression. bytes_compressed accumulates
the amount of bytes that was received after applying compression to
move to a storage.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-record.c | 8 ++++++++
tools/perf/util/env.h | 1 +
tools/perf/util/session.h | 2 ++
3 files changed, 11 insertions(+)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9b02a68e8c23..ab121bc27c6d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1449,6 +1449,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__mmap_read_all(rec, true);
record__aio_mmap_read_sync(rec);
+ if (!quiet && rec->session->bytes_transferred && rec->session->bytes_compressed) {
+ float ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
+
+ session->header.env.comp_ratio = ratio + 0.5;
+ fprintf(stderr, "[ perf record: Compressed %.3f MB to %.3f MB, ratio is %.3f ]\n",
+ rec->session->bytes_transferred / 1024.0 / 1024.0, rec->session->bytes_compressed / 1024.0 / 1024.0, ratio);
+ }
+
if (forks) {
int exit_status;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index d01b8355f4ca..fb39e9af128f 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -64,6 +64,7 @@ struct perf_env {
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
u64 clockid_res_ns;
+ u32 comp_ratio;
};
extern struct perf_env perf_env;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d96eccd7d27f..0e14884f28b2 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -35,6 +35,8 @@ struct perf_session {
struct ordered_events ordered_events;
struct perf_data *data;
struct perf_tool *tool;
+ u64 bytes_transferred;
+ u64 bytes_compressed;
};
struct perf_tool;
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics
2019-03-01 15:43 ` [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics Alexey Budankov
@ 2019-03-05 12:26 ` Jiri Olsa
2019-03-07 8:29 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:26 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:43:28PM +0300, Alexey Budankov wrote:
>
> Define bytes_transferred and bytes_compressed metrics to calculate
> comp_ratio=transferred/compressed in the end of the data collection.
>
> bytes_transferred accumulates the amount of bytes that was extracted from
> the mmaped kernel buffers for compression. bytes_compressed accumulates
> the amount of bytes that was received after applying compression to
> move to a storage.
>
> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
> ---
> tools/perf/builtin-record.c | 8 ++++++++
> tools/perf/util/env.h | 1 +
> tools/perf/util/session.h | 2 ++
> 3 files changed, 11 insertions(+)
>
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 9b02a68e8c23..ab121bc27c6d 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1449,6 +1449,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
> record__mmap_read_all(rec, true);
> record__aio_mmap_read_sync(rec);
>
> + if (!quiet && rec->session->bytes_transferred && rec->session->bytes_compressed) {
> + float ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
> +
> + session->header.env.comp_ratio = ratio + 0.5;
what's the + 0.5 for?
> + fprintf(stderr, "[ perf record: Compressed %.3f MB to %.3f MB, ratio is %.3f ]\n",
> + rec->session->bytes_transferred / 1024.0 / 1024.0, rec->session->bytes_compressed / 1024.0 / 1024.0, ratio);
> + }
> +
could this be below together with the current:
fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
thanks,
jirka
> if (forks) {
> int exit_status;
>
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index d01b8355f4ca..fb39e9af128f 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -64,6 +64,7 @@ struct perf_env {
> struct memory_node *memory_nodes;
> unsigned long long memory_bsize;
> u64 clockid_res_ns;
> + u32 comp_ratio;
> };
>
> extern struct perf_env perf_env;
> diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
> index d96eccd7d27f..0e14884f28b2 100644
> --- a/tools/perf/util/session.h
> +++ b/tools/perf/util/session.h
> @@ -35,6 +35,8 @@ struct perf_session {
> struct ordered_events ordered_events;
> struct perf_data *data;
> struct perf_tool *tool;
> + u64 bytes_transferred;
> + u64 bytes_compressed;
> };
>
> struct perf_tool;
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-07 8:29 ` Alexey Budankov
2019-03-11 8:19 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:29 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:26, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:43:28PM +0300, Alexey Budankov wrote:
>>
>> Define bytes_transferred and bytes_compressed metrics to calculate
>> comp_ratio=transferred/compressed in the end of the data collection.
>>
>> bytes_transferred accumulates the amount of bytes that was extracted from
>> the mmaped kernel buffers for compression. bytes_compressed accumulates
>> the amount of bytes that was received after applying compression to
>> move to a storage.
>>
>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
>> ---
>> tools/perf/builtin-record.c | 8 ++++++++
>> tools/perf/util/env.h | 1 +
>> tools/perf/util/session.h | 2 ++
>> 3 files changed, 11 insertions(+)
>>
>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>> index 9b02a68e8c23..ab121bc27c6d 100644
>> --- a/tools/perf/builtin-record.c
>> +++ b/tools/perf/builtin-record.c
>> @@ -1449,6 +1449,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>> record__mmap_read_all(rec, true);
>> record__aio_mmap_read_sync(rec);
>>
>> + if (!quiet && rec->session->bytes_transferred && rec->session->bytes_compressed) {
>> + float ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
>> +
>> + session->header.env.comp_ratio = ratio + 0.5;
>
> what's the + 0.5 for?
Arithmetic rounding before type cast.
>
>> + fprintf(stderr, "[ perf record: Compressed %.3f MB to %.3f MB, ratio is %.3f ]\n",
>> + rec->session->bytes_transferred / 1024.0 / 1024.0, rec->session->bytes_compressed / 1024.0 / 1024.0, ratio);
>> + }
>> +
>
> could this be below together with the current:
>
> fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
Please provide an example of what you exactly mean.
~Alexey
>
> thanks,
> jirka
>
>> if (forks) {
>> int exit_status;
>>
>> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
>> index d01b8355f4ca..fb39e9af128f 100644
>> --- a/tools/perf/util/env.h
>> +++ b/tools/perf/util/env.h
>> @@ -64,6 +64,7 @@ struct perf_env {
>> struct memory_node *memory_nodes;
>> unsigned long long memory_bsize;
>> u64 clockid_res_ns;
>> + u32 comp_ratio;
>> };
>>
>> extern struct perf_env perf_env;
>> diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
>> index d96eccd7d27f..0e14884f28b2 100644
>> --- a/tools/perf/util/session.h
>> +++ b/tools/perf/util/session.h
>> @@ -35,6 +35,8 @@ struct perf_session {
>> struct ordered_events ordered_events;
>> struct perf_data *data;
>> struct perf_tool *tool;
>> + u64 bytes_transferred;
>> + u64 bytes_compressed;
>> };
>>
>> struct perf_tool;
>> --
>> 2.20.1
>>
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics
2019-03-07 8:29 ` Alexey Budankov
@ 2019-03-11 8:19 ` Alexey Budankov
2019-03-11 12:33 ` Jiri Olsa
0 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-11 8:19 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 07.03.2019 11:29, Alexey Budankov wrote:
>
> On 05.03.2019 15:26, Jiri Olsa wrote:
>> On Fri, Mar 01, 2019 at 06:43:28PM +0300, Alexey Budankov wrote:
>>>
>>> Define bytes_transferred and bytes_compressed metrics to calculate
>>> comp_ratio=transferred/compressed in the end of the data collection.
>>>
>>> bytes_transferred accumulates the amount of bytes that was extracted from
>>> the mmaped kernel buffers for compression. bytes_compressed accumulates
>>> the amount of bytes that was received after applying compression to
>>> move to a storage.
>>>
>>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
>>> ---
>>> tools/perf/builtin-record.c | 8 ++++++++
>>> tools/perf/util/env.h | 1 +
>>> tools/perf/util/session.h | 2 ++
>>> 3 files changed, 11 insertions(+)
>>>
>>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>>> index 9b02a68e8c23..ab121bc27c6d 100644
>>> --- a/tools/perf/builtin-record.c
>>> +++ b/tools/perf/builtin-record.c
>>> @@ -1449,6 +1449,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>>> record__mmap_read_all(rec, true);
>>> record__aio_mmap_read_sync(rec);
>>>
>>> + if (!quiet && rec->session->bytes_transferred && rec->session->bytes_compressed) {
>>> + float ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
>>> +
>>> + session->header.env.comp_ratio = ratio + 0.5;
>>
>> what's the + 0.5 for?
>
> Arithmetic rounding before type cast.
>
>>
>>> + fprintf(stderr, "[ perf record: Compressed %.3f MB to %.3f MB, ratio is %.3f ]\n",
>>> + rec->session->bytes_transferred / 1024.0 / 1024.0, rec->session->bytes_compressed / 1024.0 / 1024.0, ratio);
>>> + }
>>> +
>>
>> could this be below together with the current:
>>
>> fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
>
> Please provide an example of what you exactly mean.
It could look like this:
[ perf record: Woken up 101 times to write data ]
[ perf record: Captured and wrote 5.173 MB perf.data (714077 samples), compressed 27.241 MB to 5.160 MB, ratio is 5.279 ]
Any comments?
~Alexey
>
> ~Alexey
>
>>
>> thanks,
>> jirka
>>
>>> if (forks) {
>>> int exit_status;
>>>
>>> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
>>> index d01b8355f4ca..fb39e9af128f 100644
>>> --- a/tools/perf/util/env.h
>>> +++ b/tools/perf/util/env.h
>>> @@ -64,6 +64,7 @@ struct perf_env {
>>> struct memory_node *memory_nodes;
>>> unsigned long long memory_bsize;
>>> u64 clockid_res_ns;
>>> + u32 comp_ratio;
>>> };
>>>
>>> extern struct perf_env perf_env;
>>> diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
>>> index d96eccd7d27f..0e14884f28b2 100644
>>> --- a/tools/perf/util/session.h
>>> +++ b/tools/perf/util/session.h
>>> @@ -35,6 +35,8 @@ struct perf_session {
>>> struct ordered_events ordered_events;
>>> struct perf_data *data;
>>> struct perf_tool *tool;
>>> + u64 bytes_transferred;
>>> + u64 bytes_compressed;
>>> };
>>>
>>> struct perf_tool;
>>> --
>>> 2.20.1
>>>
>>
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics
2019-03-11 8:19 ` Alexey Budankov
@ 2019-03-11 12:33 ` Jiri Olsa
2019-03-11 13:41 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-11 12:33 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Mon, Mar 11, 2019 at 11:19:03AM +0300, Alexey Budankov wrote:
> On 07.03.2019 11:29, Alexey Budankov wrote:
> >
> > On 05.03.2019 15:26, Jiri Olsa wrote:
> >> On Fri, Mar 01, 2019 at 06:43:28PM +0300, Alexey Budankov wrote:
> >>>
> >>> Define bytes_transferred and bytes_compressed metrics to calculate
> >>> comp_ratio=transferred/compressed in the end of the data collection.
> >>>
> >>> bytes_transferred accumulates the amount of bytes that was extracted from
> >>> the mmaped kernel buffers for compression. bytes_compressed accumulates
> >>> the amount of bytes that was received after applying compression to
> >>> move to a storage.
> >>>
> >>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
> >>> ---
> >>> tools/perf/builtin-record.c | 8 ++++++++
> >>> tools/perf/util/env.h | 1 +
> >>> tools/perf/util/session.h | 2 ++
> >>> 3 files changed, 11 insertions(+)
> >>>
> >>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> >>> index 9b02a68e8c23..ab121bc27c6d 100644
> >>> --- a/tools/perf/builtin-record.c
> >>> +++ b/tools/perf/builtin-record.c
> >>> @@ -1449,6 +1449,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
> >>> record__mmap_read_all(rec, true);
> >>> record__aio_mmap_read_sync(rec);
> >>>
> >>> + if (!quiet && rec->session->bytes_transferred && rec->session->bytes_compressed) {
> >>> + float ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
> >>> +
> >>> + session->header.env.comp_ratio = ratio + 0.5;
> >>
> >> what's the + 0.5 for?
> >
> > Arithmetic rounding before type cast.
> >
> >>
> >>> + fprintf(stderr, "[ perf record: Compressed %.3f MB to %.3f MB, ratio is %.3f ]\n",
> >>> + rec->session->bytes_transferred / 1024.0 / 1024.0, rec->session->bytes_compressed / 1024.0 / 1024.0, ratio);
> >>> + }
> >>> +
> >>
> >> could this be below together with the current:
> >>
> >> fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
> >
> > Please provide an example of what you exactly mean.
>
> It could look like this:
>
> [ perf record: Woken up 101 times to write data ]
> [ perf record: Captured and wrote 5.173 MB perf.data (714077 samples), compressed 27.241 MB to 5.160 MB, ratio is 5.279 ]
yes, maybe we don't need to repeat the original size twice:
[ perf record: Captured and wrote 5.173 MB perf.data (714077 samples), compressed (original 27.241 MB, ratio 5.279) ]
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics
2019-03-11 12:33 ` Jiri Olsa
@ 2019-03-11 13:41 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-11 13:41 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 11.03.2019 15:33, Jiri Olsa wrote:
> On Mon, Mar 11, 2019 at 11:19:03AM +0300, Alexey Budankov wrote:
>> On 07.03.2019 11:29, Alexey Budankov wrote:
>>>
>>> On 05.03.2019 15:26, Jiri Olsa wrote:
>>>> On Fri, Mar 01, 2019 at 06:43:28PM +0300, Alexey Budankov wrote:
>>>>>
>>>>> Define bytes_transferred and bytes_compressed metrics to calculate
>>>>> comp_ratio=transferred/compressed in the end of the data collection.
>>>>>
>>>>> bytes_transferred accumulates the amount of bytes that was extracted from
>>>>> the mmaped kernel buffers for compression. bytes_compressed accumulates
>>>>> the amount of bytes that was received after applying compression to
>>>>> move to a storage.
>>>>>
>>>>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
>>>>> ---
>>>>> tools/perf/builtin-record.c | 8 ++++++++
>>>>> tools/perf/util/env.h | 1 +
>>>>> tools/perf/util/session.h | 2 ++
>>>>> 3 files changed, 11 insertions(+)
>>>>>
>>>>> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
>>>>> index 9b02a68e8c23..ab121bc27c6d 100644
>>>>> --- a/tools/perf/builtin-record.c
>>>>> +++ b/tools/perf/builtin-record.c
>>>>> @@ -1449,6 +1449,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>>>>> record__mmap_read_all(rec, true);
>>>>> record__aio_mmap_read_sync(rec);
>>>>>
>>>>> + if (!quiet && rec->session->bytes_transferred && rec->session->bytes_compressed) {
>>>>> + float ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
>>>>> +
>>>>> + session->header.env.comp_ratio = ratio + 0.5;
>>>>
>>>> what's the + 0.5 for?
>>>
>>> Arithmetic rounding before type cast.
>>>
>>>>
>>>>> + fprintf(stderr, "[ perf record: Compressed %.3f MB to %.3f MB, ratio is %.3f ]\n",
>>>>> + rec->session->bytes_transferred / 1024.0 / 1024.0, rec->session->bytes_compressed / 1024.0 / 1024.0, ratio);
>>>>> + }
>>>>> +
>>>>
>>>> could this be below together with the current:
>>>>
>>>> fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
>>>
>>> Please provide an example of what you exactly mean.
>>
>> It could look like this:
>>
>> [ perf record: Woken up 101 times to write data ]
>> [ perf record: Captured and wrote 5.173 MB perf.data (714077 samples), compressed 27.241 MB to 5.160 MB, ratio is 5.279 ]
>
> yes, maybe we don't need to repeat the original size twice:
>
> [ perf record: Captured and wrote 5.173 MB perf.data (714077 samples), compressed (original 27.241 MB, ratio 5.279) ]
Ok, lets have it like this.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v5 04/10] perf record: implement COMPRESSED event record and its attributes
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
2019-03-01 15:41 ` [PATCH v5 02/10] perf record: implement -f,--mmap-flush=<threshold> option Alexey Budankov
2019-03-01 15:43 ` [PATCH v5 03/10] perf session: define bytes_transferred and bytes_compressed metrics Alexey Budankov
@ 2019-03-01 15:46 ` Alexey Budankov
2019-03-01 15:52 ` [PATCH v5 06/10] perf util: introduce Zstd based streaming compression API Alexey Budankov
` (6 subsequent siblings)
9 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 15:46 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Implemented PERF_RECORD_COMPRESSED event, related data types, header
feature and functions to write, read and print feature attributes
from the trace header section.
comp_mmap_len preserves the size of mmaped kernel buffer that was used
during collection. comp_mmap_len size is used on loading stage as the
size of decomp buffer for decompression of COMPRESSED events content.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-record.c | 9 ++++++
tools/perf/perf.h | 1 +
tools/perf/util/env.h | 10 +++++++
tools/perf/util/event.c | 1 +
tools/perf/util/event.h | 7 +++++
| 55 ++++++++++++++++++++++++++++++++++++-
| 1 +
7 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ab121bc27c6d..9eae28d77291 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -357,6 +357,11 @@ static int record__mmap_flush_parse(const struct option *opt,
return 0;
}
+static int record__comp_enabled(struct record *rec)
+{
+ return rec->opts.comp_level > 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -872,6 +877,9 @@ static void record__init_features(struct record *rec)
if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+ if (!record__comp_enabled(rec))
+ perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
+
perf_header__clear_feat(&session->header, HEADER_STAT);
}
@@ -1210,6 +1218,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
err = -1;
goto out_child;
}
+ session->header.env.comp_mmap_len = session->evlist->mmap_len;
err = bpf__apply_obj_config();
if (err) {
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 7886cc9771cf..2c6caad45b10 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -86,6 +86,7 @@ struct record_opts {
int nr_cblocks;
int affinity;
int mmap_flush;
+ unsigned int comp_level;
};
enum perf_affinity {
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index fb39e9af128f..7990d63ab764 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -65,6 +65,16 @@ struct perf_env {
unsigned long long memory_bsize;
u64 clockid_res_ns;
u32 comp_ratio;
+ u32 comp_ver;
+ u32 comp_type;
+ u32 comp_level;
+ u32 comp_mmap_len;
+};
+
+enum perf_compress_type {
+ PERF_COMP_NONE = 0,
+ PERF_COMP_ZSTD,
+ PERF_COMP_MAX
};
extern struct perf_env perf_env;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index ba7be74fad6e..d1ad6c419724 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -68,6 +68,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
[PERF_RECORD_TIME_CONV] = "TIME_CONV",
[PERF_RECORD_HEADER_FEATURE] = "FEATURE",
+ [PERF_RECORD_COMPRESSED] = "COMPRESSED",
};
static const char *perf_ns__names[] = {
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 36ae7e92dab1..8a13aefe734e 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -254,6 +254,7 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_EVENT_UPDATE = 78,
PERF_RECORD_TIME_CONV = 79,
PERF_RECORD_HEADER_FEATURE = 80,
+ PERF_RECORD_COMPRESSED = 81,
PERF_RECORD_HEADER_MAX
};
@@ -626,6 +627,11 @@ struct feature_event {
char data[];
};
+struct compressed_event {
+ struct perf_event_header header;
+ char data[];
+};
+
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
@@ -659,6 +665,7 @@ union perf_event {
struct feature_event feat;
struct ksymbol_event ksymbol_event;
struct bpf_event bpf_event;
+ struct compressed_event pack;
};
void perf_event__print_totals(void);
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 01b324c275b9..5dadc6e4df76 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1244,6 +1244,30 @@ static int write_mem_topology(struct feat_fd *ff __maybe_unused,
return ret;
}
+static int write_compressed(struct feat_fd *ff __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ int ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_ver), sizeof(ff->ph->env.comp_ver));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_type), sizeof(ff->ph->env.comp_type));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_level), sizeof(ff->ph->env.comp_level));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_ratio), sizeof(ff->ph->env.comp_ratio));
+ if (ret)
+ return ret;
+
+ return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1531,6 +1555,13 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
+static void print_compressed(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
+ ff->ph->env.comp_type == PERF_COMP_ZSTD ? "Zstd" : "Unknown",
+ ff->ph->env.comp_level, ff->ph->env.comp_ratio);
+}
+
static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
{
const char *delimiter = "# pmu mappings: ";
@@ -2373,6 +2404,27 @@ static int process_clockid(struct feat_fd *ff,
return 0;
}
+static int process_compressed(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ if (do_read_u32(ff, &(ff->ph->env.comp_ver)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_type)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_level)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_ratio)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len)))
+ return -1;
+
+ return 0;
+}
+
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2432,7 +2484,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPN(CACHE, cache, true),
FEAT_OPR(SAMPLE_TIME, sample_time, false),
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
- FEAT_OPR(CLOCKID, clockid, false)
+ FEAT_OPR(CLOCKID, clockid, false),
+ FEAT_OPR(COMPRESSED, compressed, false)
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 0d553ddca0a3..ee867075dc64 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -39,6 +39,7 @@ enum {
HEADER_SAMPLE_TIME,
HEADER_MEM_TOPOLOGY,
HEADER_CLOCKID,
+ HEADER_COMPRESSED,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v5 06/10] perf util: introduce Zstd based streaming compression API
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (2 preceding siblings ...)
2019-03-01 15:46 ` [PATCH v5 04/10] perf record: implement COMPRESSED event record and its attributes Alexey Budankov
@ 2019-03-01 15:52 ` Alexey Budankov
2019-03-05 12:26 ` Jiri Olsa
2019-03-01 15:58 ` [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression Alexey Budankov
` (5 subsequent siblings)
9 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 15:52 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Implemented functions are based on Zstd streaming compression API.
The functions are used in runtime to compress data that come from
mmaped kernel buffer data and then stored into a trace.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/util/Build | 2 +
tools/perf/util/compress.h | 18 ++++++++
tools/perf/util/zstd.c | 95 ++++++++++++++++++++++++++++++++++++++
3 files changed, 115 insertions(+)
create mode 100644 tools/perf/util/zstd.c
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8dd3102301ea..920ee8bebd83 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -145,6 +145,8 @@ perf-y += scripting-engines/
perf-$(CONFIG_ZLIB) += zlib.o
perf-$(CONFIG_LZMA) += lzma.o
+perf-y += zstd.o
+
perf-y += demangle-java.o
perf-y += demangle-rust.o
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index 892e92e7e7fc..e0987616db94 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -2,6 +2,11 @@
#ifndef PERF_COMPRESS_H
#define PERF_COMPRESS_H
+#include <stdbool.h>
+#ifdef HAVE_ZSTD_SUPPORT
+#include <zstd.h>
+#endif
+
#ifdef HAVE_ZLIB_SUPPORT
int gzip_decompress_to_file(const char *input, int output_fd);
bool gzip_is_compressed(const char *input);
@@ -12,4 +17,17 @@ int lzma_decompress_to_file(const char *input, int output_fd);
bool lzma_is_compressed(const char *input);
#endif
+struct zstd_data {
+#ifdef HAVE_ZSTD_SUPPORT
+ ZSTD_CStream *cstream;
+#endif
+};
+
+int zstd_init(struct zstd_data *data, int level);
+int zstd_fini(struct zstd_data *data);
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data,
+ void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
+ size_t process_header(void *record, size_t increment));
+
#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
new file mode 100644
index 000000000000..e5d44d0f6b5d
--- /dev/null
+++ b/tools/perf/util/zstd.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include "util/compress.h"
+#include "util/debug.h"
+
+#ifdef HAVE_ZSTD_SUPPORT
+
+int zstd_init(struct zstd_data *data, int level)
+{
+ size_t ret;
+
+ data->cstream = ZSTD_createCStream();
+ if (data->cstream == NULL) {
+ pr_err("Couldn't create compression stream.\n");
+ return -1;
+ }
+
+ ret = ZSTD_initCStream(data->cstream, level);
+ if (ZSTD_isError(ret)) {
+ pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
+ return -1;
+ }
+
+ return 0;
+}
+
+int zstd_fini(struct zstd_data *data)
+{
+ if (data->cstream) {
+ ZSTD_freeCStream(data->cstream);
+ data->cstream = NULL;
+ }
+
+ return 0;
+}
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data,
+ void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
+ size_t process_header(void *record, size_t increment))
+{
+ size_t ret, size, compressed = 0;
+ ZSTD_inBuffer input = { src, src_size, 0 };
+ ZSTD_outBuffer output;
+ void *record;
+
+ while (input.pos < input.size) {
+ record = dst;
+ size = process_header(record, 0);
+ compressed += size;
+ dst += size;
+ dst_size -= size;
+ output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
+ max_record_size : dst_size, 0 };
+ ret = ZSTD_compressStream(data->cstream, &output, &input);
+ ZSTD_flushStream(data->cstream, &output);
+ if (ZSTD_isError(ret)) {
+ pr_err("failed to compress %ld bytes: %s\n",
+ (long)src_size, ZSTD_getErrorName(ret));
+ memcpy(dst, src, src_size);
+ return src_size;
+ }
+ size = output.pos;
+ size = process_header(record, size);
+ compressed += size;
+ dst += size;
+ dst_size -= size;
+ }
+
+ return compressed;
+}
+
+#else /* !HAVE_ZSTD_SUPPORT */
+
+int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
+{
+ return 0;
+}
+
+int zstd_fini(struct zstd_data *data __maybe_unused)
+{
+ return 0;
+}
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
+ void *dst __maybe_unused, size_t dst_size __maybe_unused,
+ void *src __maybe_unused, size_t src_size __maybe_unused,
+ size_t max_record_size __maybe_unused,
+ size_t process_header(void *record, size_t increment) __maybe_unused)
+{
+ return 0;
+}
+
+#endif
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v5 06/10] perf util: introduce Zstd based streaming compression API
2019-03-01 15:52 ` [PATCH v5 06/10] perf util: introduce Zstd based streaming compression API Alexey Budankov
@ 2019-03-05 12:26 ` Jiri Olsa
2019-03-07 8:29 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:26 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:52:10PM +0300, Alexey Budankov wrote:
>
> Implemented functions are based on Zstd streaming compression API.
> The functions are used in runtime to compress data that come from
> mmaped kernel buffer data and then stored into a trace.
please describe the usage of the api in the changelog
thanks,
jirka
>
> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
> ---
> tools/perf/util/Build | 2 +
> tools/perf/util/compress.h | 18 ++++++++
> tools/perf/util/zstd.c | 95 ++++++++++++++++++++++++++++++++++++++
> 3 files changed, 115 insertions(+)
> create mode 100644 tools/perf/util/zstd.c
>
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 8dd3102301ea..920ee8bebd83 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -145,6 +145,8 @@ perf-y += scripting-engines/
>
> perf-$(CONFIG_ZLIB) += zlib.o
> perf-$(CONFIG_LZMA) += lzma.o
> +perf-y += zstd.o
> +
> perf-y += demangle-java.o
> perf-y += demangle-rust.o
>
> diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
> index 892e92e7e7fc..e0987616db94 100644
> --- a/tools/perf/util/compress.h
> +++ b/tools/perf/util/compress.h
> @@ -2,6 +2,11 @@
> #ifndef PERF_COMPRESS_H
> #define PERF_COMPRESS_H
>
> +#include <stdbool.h>
> +#ifdef HAVE_ZSTD_SUPPORT
> +#include <zstd.h>
> +#endif
> +
> #ifdef HAVE_ZLIB_SUPPORT
> int gzip_decompress_to_file(const char *input, int output_fd);
> bool gzip_is_compressed(const char *input);
> @@ -12,4 +17,17 @@ int lzma_decompress_to_file(const char *input, int output_fd);
> bool lzma_is_compressed(const char *input);
> #endif
>
> +struct zstd_data {
> +#ifdef HAVE_ZSTD_SUPPORT
> + ZSTD_CStream *cstream;
> +#endif
> +};
> +
> +int zstd_init(struct zstd_data *data, int level);
> +int zstd_fini(struct zstd_data *data);
> +
> +size_t zstd_compress_stream_to_records(struct zstd_data *data,
> + void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
> + size_t process_header(void *record, size_t increment));
> +
> #endif /* PERF_COMPRESS_H */
> diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
> new file mode 100644
> index 000000000000..e5d44d0f6b5d
> --- /dev/null
> +++ b/tools/perf/util/zstd.c
> @@ -0,0 +1,95 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <string.h>
> +
> +#include "util/compress.h"
> +#include "util/debug.h"
> +
> +#ifdef HAVE_ZSTD_SUPPORT
> +
> +int zstd_init(struct zstd_data *data, int level)
> +{
> + size_t ret;
> +
> + data->cstream = ZSTD_createCStream();
> + if (data->cstream == NULL) {
> + pr_err("Couldn't create compression stream.\n");
> + return -1;
> + }
> +
> + ret = ZSTD_initCStream(data->cstream, level);
> + if (ZSTD_isError(ret)) {
> + pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +int zstd_fini(struct zstd_data *data)
> +{
> + if (data->cstream) {
> + ZSTD_freeCStream(data->cstream);
> + data->cstream = NULL;
> + }
> +
> + return 0;
> +}
> +
> +size_t zstd_compress_stream_to_records(struct zstd_data *data,
> + void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
> + size_t process_header(void *record, size_t increment))
> +{
> + size_t ret, size, compressed = 0;
> + ZSTD_inBuffer input = { src, src_size, 0 };
> + ZSTD_outBuffer output;
> + void *record;
> +
> + while (input.pos < input.size) {
> + record = dst;
> + size = process_header(record, 0);
> + compressed += size;
> + dst += size;
> + dst_size -= size;
> + output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
> + max_record_size : dst_size, 0 };
> + ret = ZSTD_compressStream(data->cstream, &output, &input);
> + ZSTD_flushStream(data->cstream, &output);
> + if (ZSTD_isError(ret)) {
> + pr_err("failed to compress %ld bytes: %s\n",
> + (long)src_size, ZSTD_getErrorName(ret));
> + memcpy(dst, src, src_size);
> + return src_size;
> + }
> + size = output.pos;
> + size = process_header(record, size);
> + compressed += size;
> + dst += size;
> + dst_size -= size;
> + }
> +
> + return compressed;
> +}
> +
> +#else /* !HAVE_ZSTD_SUPPORT */
> +
> +int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
> +{
> + return 0;
> +}
> +
> +int zstd_fini(struct zstd_data *data __maybe_unused)
> +{
> + return 0;
> +}
> +
> +size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
> + void *dst __maybe_unused, size_t dst_size __maybe_unused,
> + void *src __maybe_unused, size_t src_size __maybe_unused,
> + size_t max_record_size __maybe_unused,
> + size_t process_header(void *record, size_t increment) __maybe_unused)
> +{
> + return 0;
> +}
> +
> +#endif
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 06/10] perf util: introduce Zstd based streaming compression API
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-07 8:29 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:29 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:26, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:52:10PM +0300, Alexey Budankov wrote:
>>
>> Implemented functions are based on Zstd streaming compression API.
>> The functions are used in runtime to compress data that come from
>> mmaped kernel buffer data and then stored into a trace.
>
> please describe the usage of the api in the changelog
in v7.
zstd_init(), zstd_fini() are used for initialization and
finalization of API usage to allocate and deallocate zstd objects.
zstd_compress_stream_to_records() is used to convert parts of mmaped
kernel buffer into an array of PERF_EVENT_COMPRESSED records.
~Alexey
>
> thanks,
> jirka
>
>>
>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
>> ---
>> tools/perf/util/Build | 2 +
>> tools/perf/util/compress.h | 18 ++++++++
>> tools/perf/util/zstd.c | 95 ++++++++++++++++++++++++++++++++++++++
>> 3 files changed, 115 insertions(+)
>> create mode 100644 tools/perf/util/zstd.c
>>
>> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
>> index 8dd3102301ea..920ee8bebd83 100644
>> --- a/tools/perf/util/Build
>> +++ b/tools/perf/util/Build
>> @@ -145,6 +145,8 @@ perf-y += scripting-engines/
>>
>> perf-$(CONFIG_ZLIB) += zlib.o
>> perf-$(CONFIG_LZMA) += lzma.o
>> +perf-y += zstd.o
>> +
>> perf-y += demangle-java.o
>> perf-y += demangle-rust.o
>>
>> diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
>> index 892e92e7e7fc..e0987616db94 100644
>> --- a/tools/perf/util/compress.h
>> +++ b/tools/perf/util/compress.h
>> @@ -2,6 +2,11 @@
>> #ifndef PERF_COMPRESS_H
>> #define PERF_COMPRESS_H
>>
>> +#include <stdbool.h>
>> +#ifdef HAVE_ZSTD_SUPPORT
>> +#include <zstd.h>
>> +#endif
>> +
>> #ifdef HAVE_ZLIB_SUPPORT
>> int gzip_decompress_to_file(const char *input, int output_fd);
>> bool gzip_is_compressed(const char *input);
>> @@ -12,4 +17,17 @@ int lzma_decompress_to_file(const char *input, int output_fd);
>> bool lzma_is_compressed(const char *input);
>> #endif
>>
>> +struct zstd_data {
>> +#ifdef HAVE_ZSTD_SUPPORT
>> + ZSTD_CStream *cstream;
>> +#endif
>> +};
>> +
>> +int zstd_init(struct zstd_data *data, int level);
>> +int zstd_fini(struct zstd_data *data);
>> +
>> +size_t zstd_compress_stream_to_records(struct zstd_data *data,
>> + void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
>> + size_t process_header(void *record, size_t increment));
>> +
>> #endif /* PERF_COMPRESS_H */
>> diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
>> new file mode 100644
>> index 000000000000..e5d44d0f6b5d
>> --- /dev/null
>> +++ b/tools/perf/util/zstd.c
>> @@ -0,0 +1,95 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +#include <string.h>
>> +
>> +#include "util/compress.h"
>> +#include "util/debug.h"
>> +
>> +#ifdef HAVE_ZSTD_SUPPORT
>> +
>> +int zstd_init(struct zstd_data *data, int level)
>> +{
>> + size_t ret;
>> +
>> + data->cstream = ZSTD_createCStream();
>> + if (data->cstream == NULL) {
>> + pr_err("Couldn't create compression stream.\n");
>> + return -1;
>> + }
>> +
>> + ret = ZSTD_initCStream(data->cstream, level);
>> + if (ZSTD_isError(ret)) {
>> + pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
>> + return -1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +int zstd_fini(struct zstd_data *data)
>> +{
>> + if (data->cstream) {
>> + ZSTD_freeCStream(data->cstream);
>> + data->cstream = NULL;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +size_t zstd_compress_stream_to_records(struct zstd_data *data,
>> + void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
>> + size_t process_header(void *record, size_t increment))
>> +{
>> + size_t ret, size, compressed = 0;
>> + ZSTD_inBuffer input = { src, src_size, 0 };
>> + ZSTD_outBuffer output;
>> + void *record;
>> +
>> + while (input.pos < input.size) {
>> + record = dst;
>> + size = process_header(record, 0);
>> + compressed += size;
>> + dst += size;
>> + dst_size -= size;
>> + output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
>> + max_record_size : dst_size, 0 };
>> + ret = ZSTD_compressStream(data->cstream, &output, &input);
>> + ZSTD_flushStream(data->cstream, &output);
>> + if (ZSTD_isError(ret)) {
>> + pr_err("failed to compress %ld bytes: %s\n",
>> + (long)src_size, ZSTD_getErrorName(ret));
>> + memcpy(dst, src, src_size);
>> + return src_size;
>> + }
>> + size = output.pos;
>> + size = process_header(record, size);
>> + compressed += size;
>> + dst += size;
>> + dst_size -= size;
>> + }
>> +
>> + return compressed;
>> +}
>> +
>> +#else /* !HAVE_ZSTD_SUPPORT */
>> +
>> +int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
>> +{
>> + return 0;
>> +}
>> +
>> +int zstd_fini(struct zstd_data *data __maybe_unused)
>> +{
>> + return 0;
>> +}
>> +
>> +size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
>> + void *dst __maybe_unused, size_t dst_size __maybe_unused,
>> + void *src __maybe_unused, size_t src_size __maybe_unused,
>> + size_t max_record_size __maybe_unused,
>> + size_t process_header(void *record, size_t increment) __maybe_unused)
>> +{
>> + return 0;
>> +}
>> +
>> +#endif
>> --
>> 2.20.1
>>
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (3 preceding siblings ...)
2019-03-01 15:52 ` [PATCH v5 06/10] perf util: introduce Zstd based streaming compression API Alexey Budankov
@ 2019-03-01 15:58 ` Alexey Budankov
2019-03-05 0:01 ` Andi Kleen
` (5 more replies)
2019-03-01 16:06 ` [PATCH v5 08/10] perf report: implement record trace decompression Alexey Budankov
` (4 subsequent siblings)
9 siblings, 6 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 15:58 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Implemented -z,--compression_level=n option that enables compression
of mmaped kernel data buffers content in runtime during perf record
sampling collection.
Compression is implemented using the functions from zstd.c. As the
memory to operate on the compression employs mmap->data buffer in case
of serial trace writing and mmap AIO buffers in case of AIO trace
writing. If Zstd streaming compression API fails for some reason the
data to be compressed are just copied into the memory buffers using
memcpy().
Compressed trace frame consists of an array of PERF_RECORD_COMPRESSED
records. Each element of the array is not longer that 64KiB because of
u16 size limitation and comprised of perf_event_header followed by the
compressed chunk that is decompressed on the loading stage. --mmap-flush
option value can be used to avoid compression of every single byte of
data and possibly increase compression ratio.
Compression overhead has been measured for serial and AIO trace writing
when profiling matrix multiplication workload:
-------------------------------------------------------------
| SERIAL | AIO-1 |
-----------------------------------------------------------------
|-z | OVH(x) | ratio(x) size(MiB) | OVH(x) | ratio(x) size(MiB) |
|----------------------------------------------------------------
| 0 | 1,00 | 1,000 179,424 | 1,00 | 1,000 187,527 |
| 1 | 1,04 | 8,427 181,148 | 1,01 | 8,474 188,562 |
| 2 | 1,07 | 8,055 186,953 | 1,03 | 7,912 191,773 |
| 3 | 1,04 | 8,283 181,908 | 1,03 | 8,220 191,078 |
| 5 | 1,09 | 8,101 187,705 | 1,05 | 7,780 190,065 |
| 8 | 1,05 | 9,217 179,191 | 1,12 | 6,111 193,024 |
-----------------------------------------------------------------
OVH = (Execution time with -z N) / (Execution time with -z 0)
ratio - compression ratio
size - number of bytes that was compressed
size ~= trace size x ratio
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/Documentation/perf-record.txt | 5 ++
tools/perf/builtin-record.c | 85 ++++++++++++++++++++----
tools/perf/util/mmap.c | 31 ++++++---
tools/perf/util/mmap.h | 13 ++--
tools/perf/util/session.h | 2 +
5 files changed, 110 insertions(+), 26 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 9fa33ce9bc00..872c20917df7 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -472,6 +472,11 @@ executing less trace write syscalls with bigger data sets can take shorter than
executing more trace write syscalls with smaller data sets thus lowering runtime
profiling overhead.
+-z::
+--compression-level=n::
+Produce compressed trace using specified compression level n (no compression: 0 - default,
+fastest compression: 1, smallest trace: 22)
+
--all-kernel::
Configure all used events to run in kernel space.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 954141c491b0..26f07b880a0a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -237,7 +237,7 @@ static int record__aio_sync(struct perf_mmap *md, bool sync_all)
} while (1);
}
-static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
+static int record__aio_pushfn(void *to, void *bf, size_t size, off_t off, struct aiocb *cblock)
{
struct record *rec = to;
int ret, trace_fd = rec->session->data->file.fd;
@@ -264,13 +264,15 @@ static void record__aio_set_pos(int trace_fd, off_t pos)
lseek(trace_fd, pos, SEEK_SET);
}
+static int record__aio_enabled(struct record *rec);
+
static void record__aio_mmap_read_sync(struct record *rec)
{
int i;
struct perf_evlist *evlist = rec->evlist;
struct perf_mmap *maps = evlist->mmap;
- if (!rec->opts.nr_cblocks)
+ if (!record__aio_enabled(rec))
return;
for (i = 0; i < evlist->nr_mmaps; i++) {
@@ -292,13 +294,17 @@ static int record__aio_parse(const struct option *opt,
if (unset) {
opts->nr_cblocks = 0;
- } else {
- if (str)
- opts->nr_cblocks = strtol(str, NULL, 0);
- if (!opts->nr_cblocks)
- opts->nr_cblocks = nr_cblocks_default;
+ return 0;
}
+ if (str)
+ opts->nr_cblocks = strtol(str, NULL, 0);
+ if (!opts->nr_cblocks)
+ opts->nr_cblocks = nr_cblocks_default;
+
+ if (opts->nr_cblocks > nr_cblocks_max)
+ opts->nr_cblocks = nr_cblocks_max;
+
return 0;
}
#else /* HAVE_AIO_SUPPORT */
@@ -309,8 +315,9 @@ static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all _
return -1;
}
-static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
- void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
+static int record__aio_pushfn(void *to __maybe_unused, void *bf __maybe_unused,
+ size_t size __maybe_unused, off_t off __maybe_unused,
+ struct aiocb *cblock __maybe_unused)
{
return -1;
}
@@ -761,6 +768,40 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
}
}
+static size_t record__process_comp_header(void *record, size_t increment)
+{
+ struct compressed_event *event = record;
+ size_t size = sizeof(struct compressed_event);
+
+ if (increment) {
+ event->header.size += increment;
+ return increment;
+ } else {
+ event->header.type = PERF_RECORD_COMPRESSED;
+ event->header.size = size;
+ return size;
+ }
+}
+
+static size_t record__zstd_compress(void *data, void *dst, size_t dst_size,
+ void *src, size_t src_size)
+{
+ size_t compressed;
+ struct perf_session *session = data;
+ /* maximum size of record data size (2^16 - 1 - header) */
+ const size_t max_record_size = (1 << 8 * sizeof(u16)) -
+ 1 - sizeof(struct compressed_event);
+
+ compressed = zstd_compress_stream_to_records(&(session->zstd_data),
+ dst, dst_size, src, src_size, max_record_size,
+ record__process_comp_header);
+
+ session->bytes_transferred += src_size;
+ session->bytes_compressed += compressed;
+
+ return compressed;
+}
+
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
bool overwrite, bool sync)
{
@@ -770,6 +811,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
struct perf_mmap *maps;
int trace_fd = rec->data.file.fd;
off_t off;
+ struct perf_session *session = rec->session;
+ perf_mmap__compress_fn_t compress_fn;
if (!evlist)
return 0;
@@ -781,6 +824,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
return 0;
+ compress_fn = record__comp_enabled(rec) ? record__zstd_compress : NULL;
+
if (record__aio_enabled(rec))
off = record__aio_get_pos(trace_fd);
@@ -795,7 +840,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
map->flush = MMAP_FLUSH_DEFAULT;
}
if (!record__aio_enabled(rec)) {
- if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(map, rec, record__pushfn,
+ compress_fn, session) != 0) {
if (sync)
map->flush = flush;
rc = -1;
@@ -808,7 +854,8 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
* becomes available after previous aio write request.
*/
idx = record__aio_sync(map, false);
- if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
+ if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off,
+ compress_fn, session) != 0) {
record__aio_set_pos(trace_fd, off);
if (sync)
map->flush = flush;
@@ -1189,6 +1236,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
fd = perf_data__fd(data);
rec->session = session;
+ if (zstd_init(&(session->zstd_data), rec->opts.comp_level) < 0) {
+ pr_err("Compression initialization failed.\n");
+ return -1;
+ }
+
+ session->header.env.comp_type = PERF_COMP_ZSTD;
+ session->header.env.comp_level = rec->opts.comp_level;
+
record__init_features(rec);
if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
@@ -1518,6 +1573,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
out_delete_session:
+ zstd_fini(&(session->zstd_data));
perf_session__delete(session);
return status;
}
@@ -2038,6 +2094,10 @@ static struct option __record_options[] = {
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
record__parse_affinity),
+#ifdef HAVE_ZSTD_SUPPORT
+ OPT_UINTEGER('z', "compression-level", &record.opts.comp_level,
+ "Produce compressed trace (default: 0, fastest: 1, smallest: 22)"),
+#endif
OPT_END()
};
@@ -2235,8 +2295,7 @@ int cmd_record(int argc, const char **argv)
if (rec->opts.nr_cblocks > nr_cblocks_max)
rec->opts.nr_cblocks = nr_cblocks_max;
- if (verbose > 0)
- pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
+ pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index d85e73fc82e2..724237a253b4 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -291,14 +291,15 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
}
int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
- int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
- off_t *off)
+ int push(void *to, void *buf, size_t size, off_t off, struct aiocb *cblock),
+ off_t *off, perf_mmap__compress_fn_t compress, void *comp_data)
{
u64 head = perf_mmap__read_head(md);
unsigned char *data = md->base + page_size;
unsigned long size, size0 = 0;
void *buf;
int rc = 0;
+ size_t mmap_len = perf_mmap__mmap_len(md);
rc = perf_mmap__read_init(md);
if (rc < 0)
@@ -327,14 +328,20 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
buf = &data[md->start & md->mask];
size = md->mask + 1 - (md->start & md->mask);
md->start += size;
- memcpy(md->aio.data[idx], buf, size);
size0 = size;
+ if (!compress)
+ memcpy(md->aio.data[idx], buf, size);
+ else
+ size0 = compress(comp_data, md->aio.data[idx], mmap_len, buf, size);
}
buf = &data[md->start & md->mask];
size = md->end - md->start;
md->start += size;
- memcpy(md->aio.data[idx] + size0, buf, size);
+ if (!compress)
+ memcpy(md->aio.data[idx] + size0, buf, size);
+ else
+ size = compress(comp_data, md->aio.data[idx] + size0, mmap_len - size0, buf, size);
/*
* Increment md->refcount to guard md->data[idx] buffer
@@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
md->prev = head;
perf_mmap__consume(md);
- rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
+ rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
if (!rc) {
*off += size0 + size;
} else {
@@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
}
int perf_mmap__push(struct perf_mmap *md, void *to,
- int push(struct perf_mmap *map, void *to, void *buf, size_t size))
+ int push(struct perf_mmap *map, void *to, void *buf, size_t size),
+ perf_mmap__compress_fn_t compress, void *comp_data)
{
u64 head = perf_mmap__read_head(md);
unsigned char *data = md->base + page_size;
unsigned long size;
void *buf;
int rc = 0;
+ size_t mmap_len = perf_mmap__mmap_len(md);
rc = perf_mmap__read_init(md);
if (rc < 0)
@@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
buf = &data[md->start & md->mask];
size = md->mask + 1 - (md->start & md->mask);
md->start += size;
-
+ if (compress) {
+ size = compress(comp_data, md->data, mmap_len, buf, size);
+ buf = md->data;
+ }
if (push(md, to, buf, size) < 0) {
rc = -1;
goto out;
@@ -584,7 +596,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
buf = &data[md->start & md->mask];
size = md->end - md->start;
md->start += size;
-
+ if (compress) {
+ size = compress(comp_data, md->data, mmap_len, buf, size);
+ buf = md->data;
+ }
if (push(md, to, buf, size) < 0) {
rc = -1;
goto out;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index a02427d609c0..2df3882c4b83 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -99,16 +99,19 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
union perf_event *perf_mmap__read_event(struct perf_mmap *map);
+typedef size_t (*perf_mmap__compress_fn_t)(void *data, void *dst, size_t dst_size,
+ void *src, size_t src_size);
int perf_mmap__push(struct perf_mmap *md, void *to,
- int push(struct perf_mmap *map, void *to, void *buf, size_t size));
+ int push(struct perf_mmap *map, void *to, void *buf, size_t size),
+ perf_mmap__compress_fn_t compress, void *compress_data);
#ifdef HAVE_AIO_SUPPORT
int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
- int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
- off_t *off);
+ int push(void *to, void *buf, size_t size, off_t off, struct aiocb *cblock),
+ off_t *off, perf_mmap__compress_fn_t compress, void *compress_data);
#else
static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused,
- int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused,
- off_t *off __maybe_unused)
+ int push(void *to, void *buf, size_t size, off_t off, struct aiocb *cblock) __maybe_unused,
+ off_t *off __maybe_unused, perf_mmap__compress_fn_t compress __maybe_unused, void *compress_data __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 0e14884f28b2..6c984c895924 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -8,6 +8,7 @@
#include "machine.h"
#include "data.h"
#include "ordered-events.h"
+#include "util/compress.h"
#include <linux/kernel.h>
#include <linux/rbtree.h>
#include <linux/perf_event.h>
@@ -37,6 +38,7 @@ struct perf_session {
struct perf_tool *tool;
u64 bytes_transferred;
u64 bytes_compressed;
+ struct zstd_data zstd_data;
};
struct perf_tool;
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-01 15:58 ` [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression Alexey Budankov
@ 2019-03-05 0:01 ` Andi Kleen
2019-03-05 9:19 ` Alexey Budankov
2019-03-05 12:25 ` Jiri Olsa
` (4 subsequent siblings)
5 siblings, 1 reply; 52+ messages in thread
From: Andi Kleen @ 2019-03-05 0:01 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim,
Alexander Shishkin, Peter Zijlstra, Ingo Molnar, linux-kernel
On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
Could do this as a follow up patch, but at some point the new
records need to be documented in Documentation/perf.data-file-format.txt
-Andi
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-05 0:01 ` Andi Kleen
@ 2019-03-05 9:19 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-05 9:19 UTC (permalink / raw)
To: Andi Kleen
Cc: Arnaldo Carvalho de Melo, Jiri Olsa, Namhyung Kim,
Alexander Shishkin, Peter Zijlstra, Ingo Molnar, linux-kernel
On 05.03.2019 3:01, Andi Kleen wrote:
> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>
> Could do this as a follow up patch, but at some point the new
> records need to be documented in Documentation/perf.data-file-format.txt
Well, let's have it as a part of v6 04/10.
Thanks,
Alexey
>
> -Andi
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-01 15:58 ` [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression Alexey Budankov
2019-03-05 0:01 ` Andi Kleen
@ 2019-03-05 12:25 ` Jiri Olsa
2019-03-07 8:39 ` Alexey Budankov
2019-03-05 12:25 ` Jiri Olsa
` (3 subsequent siblings)
5 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:25 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
SNIP
>
> /*
> * Increment md->refcount to guard md->data[idx] buffer
> @@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
> md->prev = head;
> perf_mmap__consume(md);
>
> - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
> + rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
> if (!rc) {
> *off += size0 + size;
> } else {
> @@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
> }
>
> int perf_mmap__push(struct perf_mmap *md, void *to,
> - int push(struct perf_mmap *map, void *to, void *buf, size_t size))
> + int push(struct perf_mmap *map, void *to, void *buf, size_t size),
> + perf_mmap__compress_fn_t compress, void *comp_data)
> {
> u64 head = perf_mmap__read_head(md);
> unsigned char *data = md->base + page_size;
> unsigned long size;
> void *buf;
> int rc = 0;
> + size_t mmap_len = perf_mmap__mmap_len(md);
>
> rc = perf_mmap__read_init(md);
> if (rc < 0)
> @@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
> buf = &data[md->start & md->mask];
> size = md->mask + 1 - (md->start & md->mask);
> md->start += size;
> -
> + if (compress) {
> + size = compress(comp_data, md->data, mmap_len, buf, size);
> + buf = md->data;
> + }
> if (push(md, to, buf, size) < 0) {
> rc = -1;
> goto out;
when we discussed the compress callback should be another layer
in perf_mmap__push I was thinking more of the layered/fifo design,
like:
normaly we call:
perf_mmap__push(... push = record__pushfn ...)
-> reads mmap data and calls push(data), which translates as:
record__pushfn(data);
- which stores the data
for compressed it'd be:
perf_mmap__push(... push = compressed_push ...)
-> reads mmap data and calls push(data), which translates as:
compressed_push(data)
-> reads data, compresses them and calls, next push callback in line:
record__pushfn(data)
- which stores the data
there'd need to be the logic for compressed_push to
remember the 'next push' function
but I think this was the original idea behind the
perf_mmap__push -> it gets the data and pushes them for
the next processing.. it should stay as simple as that
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-07 8:39 ` Alexey Budankov
2019-03-07 12:14 ` Jiri Olsa
0 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:39 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:25, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>
> SNIP
>
>>
>> /*
>> * Increment md->refcount to guard md->data[idx] buffer
>> @@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
>> md->prev = head;
>> perf_mmap__consume(md);
>>
>> - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
>> + rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
>> if (!rc) {
>> *off += size0 + size;
>> } else {
>> @@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
>> }
>>
>> int perf_mmap__push(struct perf_mmap *md, void *to,
>> - int push(struct perf_mmap *map, void *to, void *buf, size_t size))
>> + int push(struct perf_mmap *map, void *to, void *buf, size_t size),
>> + perf_mmap__compress_fn_t compress, void *comp_data)
>> {
>> u64 head = perf_mmap__read_head(md);
>> unsigned char *data = md->base + page_size;
>> unsigned long size;
>> void *buf;
>> int rc = 0;
>> + size_t mmap_len = perf_mmap__mmap_len(md);
>>
>> rc = perf_mmap__read_init(md);
>> if (rc < 0)
>> @@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
>> buf = &data[md->start & md->mask];
>> size = md->mask + 1 - (md->start & md->mask);
>> md->start += size;
>> -
>> + if (compress) {
>> + size = compress(comp_data, md->data, mmap_len, buf, size);
>> + buf = md->data;
>> + }
>> if (push(md, to, buf, size) < 0) {
>> rc = -1;
>> goto out;
>
> when we discussed the compress callback should be another layer
> in perf_mmap__push I was thinking more of the layered/fifo design,
> like:
>
> normaly we call:
>
> perf_mmap__push(... push = record__pushfn ...)
> -> reads mmap data and calls push(data), which translates as:
>
> record__pushfn(data);
> - which stores the data
>
>
> for compressed it'd be:
>
> perf_mmap__push(... push = compressed_push ...)
>
> -> reads mmap data and calls push(data), which translates as:
>
> compressed_push(data)
> -> reads data, compresses them and calls, next push callback in line:
>
> record__pushfn(data)
> - which stores the data
>
>
> there'd need to be the logic for compressed_push to
> remember the 'next push' function
That is suboptimal for AIO. Also compression is an independent operation that
could be applied on any of push stages you mean.
>
> but I think this was the original idea behind the
> perf_mmap__push -> it gets the data and pushes them for
> the next processing.. it should stay as simple as that
Agree on keeping simplicity and, at the moment, there is no any push to the next
processing in the code so provided implementation fits as for serial as for AIO
at the same time sticking to simplicity as much as possibly. If you see something
that would fit better please speak up and share.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-07 8:39 ` Alexey Budankov
@ 2019-03-07 12:14 ` Jiri Olsa
2019-03-07 15:26 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-07 12:14 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Thu, Mar 07, 2019 at 11:39:46AM +0300, Alexey Budankov wrote:
>
> On 05.03.2019 15:25, Jiri Olsa wrote:
> > On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
> >
> > SNIP
> >
> >>
> >> /*
> >> * Increment md->refcount to guard md->data[idx] buffer
> >> @@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
> >> md->prev = head;
> >> perf_mmap__consume(md);
> >>
> >> - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
> >> + rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
> >> if (!rc) {
> >> *off += size0 + size;
> >> } else {
> >> @@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
> >> }
> >>
> >> int perf_mmap__push(struct perf_mmap *md, void *to,
> >> - int push(struct perf_mmap *map, void *to, void *buf, size_t size))
> >> + int push(struct perf_mmap *map, void *to, void *buf, size_t size),
> >> + perf_mmap__compress_fn_t compress, void *comp_data)
> >> {
> >> u64 head = perf_mmap__read_head(md);
> >> unsigned char *data = md->base + page_size;
> >> unsigned long size;
> >> void *buf;
> >> int rc = 0;
> >> + size_t mmap_len = perf_mmap__mmap_len(md);
> >>
> >> rc = perf_mmap__read_init(md);
> >> if (rc < 0)
> >> @@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
> >> buf = &data[md->start & md->mask];
> >> size = md->mask + 1 - (md->start & md->mask);
> >> md->start += size;
> >> -
> >> + if (compress) {
> >> + size = compress(comp_data, md->data, mmap_len, buf, size);
> >> + buf = md->data;
> >> + }
> >> if (push(md, to, buf, size) < 0) {
> >> rc = -1;
> >> goto out;
> >
> > when we discussed the compress callback should be another layer
> > in perf_mmap__push I was thinking more of the layered/fifo design,
> > like:
> >
> > normaly we call:
> >
> > perf_mmap__push(... push = record__pushfn ...)
> > -> reads mmap data and calls push(data), which translates as:
> >
> > record__pushfn(data);
> > - which stores the data
> >
> >
> > for compressed it'd be:
> >
> > perf_mmap__push(... push = compressed_push ...)
> >
> > -> reads mmap data and calls push(data), which translates as:
> >
> > compressed_push(data)
> > -> reads data, compresses them and calls, next push callback in line:
> >
> > record__pushfn(data)
> > - which stores the data
> >
> >
> > there'd need to be the logic for compressed_push to
> > remember the 'next push' function
>
> That is suboptimal for AIO. Also compression is an independent operation that
> could be applied on any of push stages you mean.
not sure what you mean by suboptimal, but I think
that it can still happen in subsequent push callback
>
> >
> > but I think this was the original idea behind the
> > perf_mmap__push -> it gets the data and pushes them for
> > the next processing.. it should stay as simple as that
>
> Agree on keeping simplicity and, at the moment, there is no any push to the next
> processing in the code so provided implementation fits as for serial as for AIO
> at the same time sticking to simplicity as much as possibly. If you see something
> that would fit better please speak up and share.
I have to insist that perf_mmap__push stays untouched
and we do other processing in the push callbacks
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-07 12:14 ` Jiri Olsa
@ 2019-03-07 15:26 ` Alexey Budankov
2019-03-07 15:56 ` Alexey Budankov
2019-03-08 10:46 ` Jiri Olsa
0 siblings, 2 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 15:26 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 07.03.2019 15:14, Jiri Olsa wrote:
> On Thu, Mar 07, 2019 at 11:39:46AM +0300, Alexey Budankov wrote:
>>
>> On 05.03.2019 15:25, Jiri Olsa wrote:
>>> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>>>
>>> SNIP
>>>
>>>>
>>>> /*
>>>> * Increment md->refcount to guard md->data[idx] buffer
>>>> @@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
>>>> md->prev = head;
>>>> perf_mmap__consume(md);
>>>>
>>>> - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
>>>> + rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
>>>> if (!rc) {
>>>> *off += size0 + size;
>>>> } else {
>>>> @@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
>>>> }
>>>>
>>>> int perf_mmap__push(struct perf_mmap *md, void *to,
>>>> - int push(struct perf_mmap *map, void *to, void *buf, size_t size))
>>>> + int push(struct perf_mmap *map, void *to, void *buf, size_t size),
>>>> + perf_mmap__compress_fn_t compress, void *comp_data)
>>>> {
>>>> u64 head = perf_mmap__read_head(md);
>>>> unsigned char *data = md->base + page_size;
>>>> unsigned long size;
>>>> void *buf;
>>>> int rc = 0;
>>>> + size_t mmap_len = perf_mmap__mmap_len(md);
>>>>
>>>> rc = perf_mmap__read_init(md);
>>>> if (rc < 0)
>>>> @@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
>>>> buf = &data[md->start & md->mask];
>>>> size = md->mask + 1 - (md->start & md->mask);
>>>> md->start += size;
>>>> -
>>>> + if (compress) {
>>>> + size = compress(comp_data, md->data, mmap_len, buf, size);
>>>> + buf = md->data;
>>>> + }
>>>> if (push(md, to, buf, size) < 0) {
>>>> rc = -1;
>>>> goto out;
>>>
>>> when we discussed the compress callback should be another layer
>>> in perf_mmap__push I was thinking more of the layered/fifo design,
>>> like:
>>>
>>> normaly we call:
>>>
>>> perf_mmap__push(... push = record__pushfn ...)
>>> -> reads mmap data and calls push(data), which translates as:
>>>
>>> record__pushfn(data);
>>> - which stores the data
>>>
>>>
>>> for compressed it'd be:
>>>
>>> perf_mmap__push(... push = compressed_push ...)
>>>
>>> -> reads mmap data and calls push(data), which translates as:
>>>
>>> compressed_push(data)
>>> -> reads data, compresses them and calls, next push callback in line:
>>>
>>> record__pushfn(data)
>>> - which stores the data
>>>
>>>
>>> there'd need to be the logic for compressed_push to
>>> remember the 'next push' function
>>
>> That is suboptimal for AIO. Also compression is an independent operation that
>> could be applied on any of push stages you mean.
>
> not sure what you mean by suboptimal, but I think
> that it can still happen in subsequent push callback
>
>>
>>>
>>> but I think this was the original idea behind the
>>> perf_mmap__push -> it gets the data and pushes them for
>>> the next processing.. it should stay as simple as that
>>
>> Agree on keeping simplicity and, at the moment, there is no any push to the next
>> processing in the code so provided implementation fits as for serial as for AIO
>> at the same time sticking to simplicity as much as possibly. If you see something
>> that would fit better please speak up and share.
>
> I have to insist that perf_mmap__push stays untouched
> and we do other processing in the push callbacks
What is about perf_mmap__aio_push()?
Without compression it does
memcpy(), memcpy(), aio_push()
With compression its does
memcpy_with_compression(), memcpy_with_compression(), aio_push()
and deviation that increases amount of copy operations i.e. implementing three or more
is suboptimal in terms of runtime overhead and data loss decrease
Compression for serial streaming can be implemented in push() callback.
AIO case would go with compression over a parameter in aio_push().
So the both trace writing schemas could be optimally extended.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-07 15:26 ` Alexey Budankov
@ 2019-03-07 15:56 ` Alexey Budankov
2019-03-08 10:46 ` Jiri Olsa
1 sibling, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 15:56 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 07.03.2019 18:26, Alexey Budankov wrote:
>
> On 07.03.2019 15:14, Jiri Olsa wrote:
>> On Thu, Mar 07, 2019 at 11:39:46AM +0300, Alexey Budankov wrote:
>>>
>>> On 05.03.2019 15:25, Jiri Olsa wrote:
>>>> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>>>>
>>>> SNIP
>>>>
>>>>>
>>>>> /*
>>>>> * Increment md->refcount to guard md->data[idx] buffer
>>>>> @@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
>>>>> md->prev = head;
>>>>> perf_mmap__consume(md);
>>>>>
>>>>> - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
>>>>> + rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
>>>>> if (!rc) {
>>>>> *off += size0 + size;
>>>>> } else {
>>>>> @@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
>>>>> }
>>>>>
>>>>> int perf_mmap__push(struct perf_mmap *md, void *to,
>>>>> - int push(struct perf_mmap *map, void *to, void *buf, size_t size))
>>>>> + int push(struct perf_mmap *map, void *to, void *buf, size_t size),
>>>>> + perf_mmap__compress_fn_t compress, void *comp_data)
>>>>> {
>>>>> u64 head = perf_mmap__read_head(md);
>>>>> unsigned char *data = md->base + page_size;
>>>>> unsigned long size;
>>>>> void *buf;
>>>>> int rc = 0;
>>>>> + size_t mmap_len = perf_mmap__mmap_len(md);
>>>>>
>>>>> rc = perf_mmap__read_init(md);
>>>>> if (rc < 0)
>>>>> @@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
>>>>> buf = &data[md->start & md->mask];
>>>>> size = md->mask + 1 - (md->start & md->mask);
>>>>> md->start += size;
>>>>> -
>>>>> + if (compress) {
>>>>> + size = compress(comp_data, md->data, mmap_len, buf, size);
>>>>> + buf = md->data;
>>>>> + }
>>>>> if (push(md, to, buf, size) < 0) {
>>>>> rc = -1;
>>>>> goto out;
>>>>
>>>> when we discussed the compress callback should be another layer
>>>> in perf_mmap__push I was thinking more of the layered/fifo design,
>>>> like:
>>>>
>>>> normaly we call:
>>>>
>>>> perf_mmap__push(... push = record__pushfn ...)
>>>> -> reads mmap data and calls push(data), which translates as:
>>>>
>>>> record__pushfn(data);
>>>> - which stores the data
>>>>
>>>>
>>>> for compressed it'd be:
>>>>
>>>> perf_mmap__push(... push = compressed_push ...)
>>>>
>>>> -> reads mmap data and calls push(data), which translates as:
>>>>
>>>> compressed_push(data)
>>>> -> reads data, compresses them and calls, next push callback in line:
>>>>
>>>> record__pushfn(data)
>>>> - which stores the data
>>>>
>>>>
>>>> there'd need to be the logic for compressed_push to
>>>> remember the 'next push' function
>>>
>>> That is suboptimal for AIO. Also compression is an independent operation that
>>> could be applied on any of push stages you mean.
>>
>> not sure what you mean by suboptimal, but I think
>> that it can still happen in subsequent push callback
>>
>>>
>>>>
>>>> but I think this was the original idea behind the
>>>> perf_mmap__push -> it gets the data and pushes them for
>>>> the next processing.. it should stay as simple as that
>>>
>>> Agree on keeping simplicity and, at the moment, there is no any push to the next
>>> processing in the code so provided implementation fits as for serial as for AIO
>>> at the same time sticking to simplicity as much as possibly. If you see something
>>> that would fit better please speak up and share.
>>
>> I have to insist that perf_mmap__push stays untouched
>> and we do other processing in the push callbacks
>
> What is about perf_mmap__aio_push()?
>
> Without compression it does
> memcpy(), memcpy(), aio_push()
>
> With compression its does
> memcpy_with_compression(), memcpy_with_compression(), aio_push()
>
> and deviation that increases amount of copy operations i.e. implementing three or more
> is suboptimal in terms of runtime overhead and data loss decrease
>
> Compression for serial streaming can be implemented in push() callback.
> AIO case would go with compression over a parameter in aio_push().
I meant compress_fn and comp_data parameters _for_ aio_push().
> So the both trace writing schemas could be optimally extended.
>
> ~Alexey
>
>>
>> jirka
>>
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-07 15:26 ` Alexey Budankov
2019-03-07 15:56 ` Alexey Budankov
@ 2019-03-08 10:46 ` Jiri Olsa
2019-03-10 15:55 ` Alexey Budankov
2019-03-10 16:17 ` Alexey Budankov
1 sibling, 2 replies; 52+ messages in thread
From: Jiri Olsa @ 2019-03-08 10:46 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Thu, Mar 07, 2019 at 06:26:47PM +0300, Alexey Budankov wrote:
>
> On 07.03.2019 15:14, Jiri Olsa wrote:
> > On Thu, Mar 07, 2019 at 11:39:46AM +0300, Alexey Budankov wrote:
> >>
> >> On 05.03.2019 15:25, Jiri Olsa wrote:
> >>> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
> >>>
> >>> SNIP
> >>>
> >>>>
> >>>> /*
> >>>> * Increment md->refcount to guard md->data[idx] buffer
> >>>> @@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
> >>>> md->prev = head;
> >>>> perf_mmap__consume(md);
> >>>>
> >>>> - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
> >>>> + rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
> >>>> if (!rc) {
> >>>> *off += size0 + size;
> >>>> } else {
> >>>> @@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
> >>>> }
> >>>>
> >>>> int perf_mmap__push(struct perf_mmap *md, void *to,
> >>>> - int push(struct perf_mmap *map, void *to, void *buf, size_t size))
> >>>> + int push(struct perf_mmap *map, void *to, void *buf, size_t size),
> >>>> + perf_mmap__compress_fn_t compress, void *comp_data)
> >>>> {
> >>>> u64 head = perf_mmap__read_head(md);
> >>>> unsigned char *data = md->base + page_size;
> >>>> unsigned long size;
> >>>> void *buf;
> >>>> int rc = 0;
> >>>> + size_t mmap_len = perf_mmap__mmap_len(md);
> >>>>
> >>>> rc = perf_mmap__read_init(md);
> >>>> if (rc < 0)
> >>>> @@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
> >>>> buf = &data[md->start & md->mask];
> >>>> size = md->mask + 1 - (md->start & md->mask);
> >>>> md->start += size;
> >>>> -
> >>>> + if (compress) {
> >>>> + size = compress(comp_data, md->data, mmap_len, buf, size);
> >>>> + buf = md->data;
> >>>> + }
> >>>> if (push(md, to, buf, size) < 0) {
> >>>> rc = -1;
> >>>> goto out;
> >>>
> >>> when we discussed the compress callback should be another layer
> >>> in perf_mmap__push I was thinking more of the layered/fifo design,
> >>> like:
> >>>
> >>> normaly we call:
> >>>
> >>> perf_mmap__push(... push = record__pushfn ...)
> >>> -> reads mmap data and calls push(data), which translates as:
> >>>
> >>> record__pushfn(data);
> >>> - which stores the data
> >>>
> >>>
> >>> for compressed it'd be:
> >>>
> >>> perf_mmap__push(... push = compressed_push ...)
> >>>
> >>> -> reads mmap data and calls push(data), which translates as:
> >>>
> >>> compressed_push(data)
> >>> -> reads data, compresses them and calls, next push callback in line:
> >>>
> >>> record__pushfn(data)
> >>> - which stores the data
> >>>
> >>>
> >>> there'd need to be the logic for compressed_push to
> >>> remember the 'next push' function
> >>
> >> That is suboptimal for AIO. Also compression is an independent operation that
> >> could be applied on any of push stages you mean.
> >
> > not sure what you mean by suboptimal, but I think
> > that it can still happen in subsequent push callback
> >
> >>
> >>>
> >>> but I think this was the original idea behind the
> >>> perf_mmap__push -> it gets the data and pushes them for
> >>> the next processing.. it should stay as simple as that
> >>
> >> Agree on keeping simplicity and, at the moment, there is no any push to the next
> >> processing in the code so provided implementation fits as for serial as for AIO
> >> at the same time sticking to simplicity as much as possibly. If you see something
> >> that would fit better please speak up and share.
> >
> > I have to insist that perf_mmap__push stays untouched
> > and we do other processing in the push callbacks
>
> What is about perf_mmap__aio_push()?
>
> Without compression it does
> memcpy(), memcpy(), aio_push()
>
> With compression its does
> memcpy_with_compression(), memcpy_with_compression(), aio_push()
so to be on the same page.. normal processing without compression is:
perf_mmap__push does:
push(mmap buf)
record__pushfn
record__write
write(buf)
perf_mmap__aio_push does:
memcpy(aio buf, mmap buf)
push(aio buf)
record__aio_pushfn
record__aio_write
aio_write(aio buf)
and for compression it would be:
perf_mmap__push does:
push(mmap buf)
compress_push
memcpy(compress buffer, mmapbuf) EXTRA copy
record__pushfn
record__write
write(buf)
perf_mmap__aio_push does:
memcpy(aio buf, mmap buf)
memcpy(compress buffer, mmapbuf) EXTRA copy
push(aio buf)
record__aio_pushfn
record__aio_write
aio_write(aio buf)
side note: that actualy makes me think why do we even have perf_mmap__aio_push,
it looks like we could copy the buf in the callback push function with no harm?
so.. there's one extra memcpy for compression, is it right?
I might miss some part which makes this scheme unusable..
thanks,
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-08 10:46 ` Jiri Olsa
@ 2019-03-10 15:55 ` Alexey Budankov
2019-03-10 16:17 ` Alexey Budankov
1 sibling, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-10 15:55 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 08.03.2019 13:46, Jiri Olsa wrote:
> On Thu, Mar 07, 2019 at 06:26:47PM +0300, Alexey Budankov wrote:
>>
>> On 07.03.2019 15:14, Jiri Olsa wrote:
>>> On Thu, Mar 07, 2019 at 11:39:46AM +0300, Alexey Budankov wrote:
>>>>
>>>> On 05.03.2019 15:25, Jiri Olsa wrote:
>>>>> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>>>>>
>>>>> SNIP
>>>>>
>>>>>>
>>>>>> /*
>>>>>> * Increment md->refcount to guard md->data[idx] buffer
>>>>>> @@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
>>>>>> md->prev = head;
>>>>>> perf_mmap__consume(md);
>>>>>>
>>>>>> - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
>>>>>> + rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
>>>>>> if (!rc) {
>>>>>> *off += size0 + size;
>>>>>> } else {
>>>>>> @@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
>>>>>> }
>>>>>>
>>>>>> int perf_mmap__push(struct perf_mmap *md, void *to,
>>>>>> - int push(struct perf_mmap *map, void *to, void *buf, size_t size))
>>>>>> + int push(struct perf_mmap *map, void *to, void *buf, size_t size),
>>>>>> + perf_mmap__compress_fn_t compress, void *comp_data)
>>>>>> {
>>>>>> u64 head = perf_mmap__read_head(md);
>>>>>> unsigned char *data = md->base + page_size;
>>>>>> unsigned long size;
>>>>>> void *buf;
>>>>>> int rc = 0;
>>>>>> + size_t mmap_len = perf_mmap__mmap_len(md);
>>>>>>
>>>>>> rc = perf_mmap__read_init(md);
>>>>>> if (rc < 0)
>>>>>> @@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
>>>>>> buf = &data[md->start & md->mask];
>>>>>> size = md->mask + 1 - (md->start & md->mask);
>>>>>> md->start += size;
>>>>>> -
>>>>>> + if (compress) {
>>>>>> + size = compress(comp_data, md->data, mmap_len, buf, size);
>>>>>> + buf = md->data;
>>>>>> + }
>>>>>> if (push(md, to, buf, size) < 0) {
>>>>>> rc = -1;
>>>>>> goto out;
>>>>>
>>>>> when we discussed the compress callback should be another layer
>>>>> in perf_mmap__push I was thinking more of the layered/fifo design,
>>>>> like:
>>>>>
>>>>> normaly we call:
>>>>>
>>>>> perf_mmap__push(... push = record__pushfn ...)
>>>>> -> reads mmap data and calls push(data), which translates as:
>>>>>
>>>>> record__pushfn(data);
>>>>> - which stores the data
>>>>>
>>>>>
>>>>> for compressed it'd be:
>>>>>
>>>>> perf_mmap__push(... push = compressed_push ...)
>>>>>
>>>>> -> reads mmap data and calls push(data), which translates as:
>>>>>
>>>>> compressed_push(data)
>>>>> -> reads data, compresses them and calls, next push callback in line:
>>>>>
>>>>> record__pushfn(data)
>>>>> - which stores the data
>>>>>
>>>>>
>>>>> there'd need to be the logic for compressed_push to
>>>>> remember the 'next push' function
>>>>
>>>> That is suboptimal for AIO. Also compression is an independent operation that
>>>> could be applied on any of push stages you mean.
>>>
>>> not sure what you mean by suboptimal, but I think
>>> that it can still happen in subsequent push callback
>>>
>>>>
>>>>>
>>>>> but I think this was the original idea behind the
>>>>> perf_mmap__push -> it gets the data and pushes them for
>>>>> the next processing.. it should stay as simple as that
>>>>
>>>> Agree on keeping simplicity and, at the moment, there is no any push to the next
>>>> processing in the code so provided implementation fits as for serial as for AIO
>>>> at the same time sticking to simplicity as much as possibly. If you see something
>>>> that would fit better please speak up and share.
>>>
>>> I have to insist that perf_mmap__push stays untouched
>>> and we do other processing in the push callbacks
>>
>> What is about perf_mmap__aio_push()?
>>
>> Without compression it does
>> memcpy(), memcpy(), aio_push()
>>
>> With compression its does
>> memcpy_with_compression(), memcpy_with_compression(), aio_push()
>
> so to be on the same page.. normal processing without compression is:
>
> perf_mmap__push does:
> push(mmap buf)
> record__pushfn
> record__write
> write(buf)
>
> perf_mmap__aio_push does:
> memcpy(aio buf, mmap buf)
> push(aio buf)
> record__aio_pushfn
> record__aio_write
> aio_write(aio buf)
>
>
> and for compression it would be:
>
> perf_mmap__push does:
> push(mmap buf)
> compress_push
> memcpy(compress buffer, mmapbuf) EXTRA copy
> record__pushfn
> record__write
> write(buf)
>
> perf_mmap__aio_push does:
> memcpy(aio buf, mmap buf)
> memcpy(compress buffer, mmapbuf) EXTRA copy
> push(aio buf)
> record__aio_pushfn
> record__aio_write
> aio_write(aio buf)
>
>
> side note: that actualy makes me think why do we even have perf_mmap__aio_push,
> it looks like we could copy the buf in the callback push function with no harm?
>
> so.. there's one extra memcpy for compression, is it right?
It is right for serial trace streaming.
For AIO there is no extra memcpy().
Plain memcpy() is replaced by the compressing one.
~Alexey
> I might miss some part which makes this scheme unusable..
>
> thanks,
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-08 10:46 ` Jiri Olsa
2019-03-10 15:55 ` Alexey Budankov
@ 2019-03-10 16:17 ` Alexey Budankov
2019-03-11 10:56 ` Jiri Olsa
1 sibling, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-10 16:17 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 08.03.2019 13:46, Jiri Olsa wrote:
> On Thu, Mar 07, 2019 at 06:26:47PM +0300, Alexey Budankov wrote:
>>
>> On 07.03.2019 15:14, Jiri Olsa wrote:
>>> On Thu, Mar 07, 2019 at 11:39:46AM +0300, Alexey Budankov wrote:
>>>>
>>>> On 05.03.2019 15:25, Jiri Olsa wrote:
>>>>> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>>>>>
>>>>> SNIP
>>>>>
>>>>>>
>>>>>> /*
>>>>>> * Increment md->refcount to guard md->data[idx] buffer
>>>>>> @@ -350,7 +357,7 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
>>>>>> md->prev = head;
>>>>>> perf_mmap__consume(md);
>>>>>>
>>>>>> - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
>>>>>> + rc = push(to, md->aio.data[idx], size0 + size, *off, &md->aio.cblocks[idx]);
>>>>>> if (!rc) {
>>>>>> *off += size0 + size;
>>>>>> } else {
>>>>>> @@ -556,13 +563,15 @@ int perf_mmap__read_init(struct perf_mmap *map)
>>>>>> }
>>>>>>
>>>>>> int perf_mmap__push(struct perf_mmap *md, void *to,
>>>>>> - int push(struct perf_mmap *map, void *to, void *buf, size_t size))
>>>>>> + int push(struct perf_mmap *map, void *to, void *buf, size_t size),
>>>>>> + perf_mmap__compress_fn_t compress, void *comp_data)
>>>>>> {
>>>>>> u64 head = perf_mmap__read_head(md);
>>>>>> unsigned char *data = md->base + page_size;
>>>>>> unsigned long size;
>>>>>> void *buf;
>>>>>> int rc = 0;
>>>>>> + size_t mmap_len = perf_mmap__mmap_len(md);
>>>>>>
>>>>>> rc = perf_mmap__read_init(md);
>>>>>> if (rc < 0)
>>>>>> @@ -574,7 +583,10 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
>>>>>> buf = &data[md->start & md->mask];
>>>>>> size = md->mask + 1 - (md->start & md->mask);
>>>>>> md->start += size;
>>>>>> -
>>>>>> + if (compress) {
>>>>>> + size = compress(comp_data, md->data, mmap_len, buf, size);
>>>>>> + buf = md->data;
>>>>>> + }
>>>>>> if (push(md, to, buf, size) < 0) {
>>>>>> rc = -1;
>>>>>> goto out;
>>>>>
>>>>> when we discussed the compress callback should be another layer
>>>>> in perf_mmap__push I was thinking more of the layered/fifo design,
>>>>> like:
>>>>>
>>>>> normaly we call:
>>>>>
>>>>> perf_mmap__push(... push = record__pushfn ...)
>>>>> -> reads mmap data and calls push(data), which translates as:
>>>>>
>>>>> record__pushfn(data);
>>>>> - which stores the data
>>>>>
>>>>>
>>>>> for compressed it'd be:
>>>>>
>>>>> perf_mmap__push(... push = compressed_push ...)
>>>>>
>>>>> -> reads mmap data and calls push(data), which translates as:
>>>>>
>>>>> compressed_push(data)
>>>>> -> reads data, compresses them and calls, next push callback in line:
>>>>>
>>>>> record__pushfn(data)
>>>>> - which stores the data
>>>>>
>>>>>
>>>>> there'd need to be the logic for compressed_push to
>>>>> remember the 'next push' function
>>>>
>>>> That is suboptimal for AIO. Also compression is an independent operation that
>>>> could be applied on any of push stages you mean.
>>>
>>> not sure what you mean by suboptimal, but I think
>>> that it can still happen in subsequent push callback
>>>
>>>>
>>>>>
>>>>> but I think this was the original idea behind the
>>>>> perf_mmap__push -> it gets the data and pushes them for
>>>>> the next processing.. it should stay as simple as that
>>>>
>>>> Agree on keeping simplicity and, at the moment, there is no any push to the next
>>>> processing in the code so provided implementation fits as for serial as for AIO
>>>> at the same time sticking to simplicity as much as possibly. If you see something
>>>> that would fit better please speak up and share.
>>>
>>> I have to insist that perf_mmap__push stays untouched
>>> and we do other processing in the push callbacks
>>
>> What is about perf_mmap__aio_push()?
>>
>> Without compression it does
>> memcpy(), memcpy(), aio_push()
>>
>> With compression its does
>> memcpy_with_compression(), memcpy_with_compression(), aio_push()
>
> so to be on the same page.. normal processing without compression is:
>
> perf_mmap__push does:
> push(mmap buf)
> record__pushfn
> record__write
> write(buf)
>
> perf_mmap__aio_push does:
> memcpy(aio buf, mmap buf)
> push(aio buf)
> record__aio_pushfn
> record__aio_write
> aio_write(aio buf)
>
>
> and for compression it would be:
>
> perf_mmap__push does:
> push(mmap buf)
> compress_push
> memcpy(compress buffer, mmapbuf) EXTRA copy
> record__pushfn
> record__write
> write(buf)
>
> perf_mmap__aio_push does:
> memcpy(aio buf, mmap buf)
> memcpy(compress buffer, mmapbuf) EXTRA copy
> push(aio buf)
> record__aio_pushfn
> record__aio_write
> aio_write(aio buf)
>
>
> side note: that actualy makes me think why do we even have perf_mmap__aio_push,
> it looks like we could copy the buf in the callback push function with no harm?
Well, yes, perf_mmap__aio_push() can be avoided and perf_mmap__push() can be used
as for serial as for AIO, moving all the specifics to record code from mmap.c,
like this:
Serial
perf_mmap__push(, record__pushfn)
push(), possibly two times
record__pushfn()
if (-z) zstd_compress(map->base => map->data) <-- compressing memcpy()
record__write(-z ? map->data, map->base)
AIO
record__aio_push()
perf_mmap__push(, record__aio_pushfn())
push(), possibly two times
record__aio_pushfn()
if (-z) zstd_compress(map->base => map->aio.data[i]) <--- compressing memcpy()
else memcpy(map->base => map->aio.data[i]) <--- plain memcpy()
record__aio_write(map->aio.data[i])
So now it looks optimal as from performance and data loss reduction
perspective as from design perspective. What do you think?
~Alexey
>
> so.. there's one extra memcpy for compression, is it right?
> I might miss some part which makes this scheme unusable..
>
> thanks,
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-10 16:17 ` Alexey Budankov
@ 2019-03-11 10:56 ` Jiri Olsa
0 siblings, 0 replies; 52+ messages in thread
From: Jiri Olsa @ 2019-03-11 10:56 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Sun, Mar 10, 2019 at 07:17:08PM +0300, Alexey Budankov wrote:
SNIP
> > so to be on the same page.. normal processing without compression is:
> >
> > perf_mmap__push does:
> > push(mmap buf)
> > record__pushfn
> > record__write
> > write(buf)
> >
> > perf_mmap__aio_push does:
> > memcpy(aio buf, mmap buf)
> > push(aio buf)
> > record__aio_pushfn
> > record__aio_write
> > aio_write(aio buf)
> >
> >
> > and for compression it would be:
> >
> > perf_mmap__push does:
> > push(mmap buf)
> > compress_push
> > memcpy(compress buffer, mmapbuf) EXTRA copy
> > record__pushfn
> > record__write
> > write(buf)
> >
> > perf_mmap__aio_push does:
> > memcpy(aio buf, mmap buf)
> > memcpy(compress buffer, mmapbuf) EXTRA copy
> > push(aio buf)
> > record__aio_pushfn
> > record__aio_write
> > aio_write(aio buf)
> >
> >
> > side note: that actualy makes me think why do we even have perf_mmap__aio_push,
> > it looks like we could copy the buf in the callback push function with no harm?
>
> Well, yes, perf_mmap__aio_push() can be avoided and perf_mmap__push() can be used
> as for serial as for AIO, moving all the specifics to record code from mmap.c,
> like this:
>
> Serial
> perf_mmap__push(, record__pushfn)
> push(), possibly two times
> record__pushfn()
> if (-z) zstd_compress(map->base => map->data) <-- compressing memcpy()
> record__write(-z ? map->data, map->base)
> AIO
> record__aio_push()
> perf_mmap__push(, record__aio_pushfn())
> push(), possibly two times
> record__aio_pushfn()
> if (-z) zstd_compress(map->base => map->aio.data[i]) <--- compressing memcpy()
> else memcpy(map->base => map->aio.data[i]) <--- plain memcpy()
> record__aio_write(map->aio.data[i])
>
> So now it looks optimal as from performance and data loss reduction
> perspective as from design perspective. What do you think?
yes, that looks much better.. so we'd have record__pushfn
for standard (serial) and record__aio_pushfn for AIO
thanks,
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-01 15:58 ` [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression Alexey Budankov
2019-03-05 0:01 ` Andi Kleen
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-05 12:25 ` Jiri Olsa
2019-03-07 8:26 ` Alexey Budankov
2019-03-05 12:26 ` Jiri Olsa
` (2 subsequent siblings)
5 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:25 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
SNIP
> +static size_t record__zstd_compress(void *data, void *dst, size_t dst_size,
> + void *src, size_t src_size)
> +{
> + size_t compressed;
> + struct perf_session *session = data;
> + /* maximum size of record data size (2^16 - 1 - header) */
> + const size_t max_record_size = (1 << 8 * sizeof(u16)) -
I dont follow this calculation, could you just use PERF_SAMPLE_MAX_SIZE?
jirka
> + 1 - sizeof(struct compressed_event);
> +
SNIP
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-07 8:26 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:26 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:25, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>
> SNIP
>
>> +static size_t record__zstd_compress(void *data, void *dst, size_t dst_size,
>> + void *src, size_t src_size)
>> +{
>> + size_t compressed;
>> + struct perf_session *session = data;
>> + /* maximum size of record data size (2^16 - 1 - header) */
>> + const size_t max_record_size = (1 << 8 * sizeof(u16)) -
>
> I dont follow this calculation, could you just use PERF_SAMPLE_MAX_SIZE?
in v7.
~Alexey
>
> jirka
>
>> + 1 - sizeof(struct compressed_event);
>> +
>
> SNIP
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-01 15:58 ` [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression Alexey Budankov
` (2 preceding siblings ...)
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-05 12:26 ` Jiri Olsa
2019-03-07 8:26 ` Alexey Budankov
2019-03-05 12:26 ` Jiri Olsa
2019-03-05 12:26 ` Jiri Olsa
5 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:26 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
SNIP
> +static size_t record__process_comp_header(void *record, size_t increment)
> +{
> + struct compressed_event *event = record;
> + size_t size = sizeof(struct compressed_event);
> +
> + if (increment) {
> + event->header.size += increment;
> + return increment;
> + } else {
> + event->header.type = PERF_RECORD_COMPRESSED;
> + event->header.size = size;
> + return size;
> + }
so zstd_compress_stream_to_records calls this in the loop:
while (input.pos < input.size) {
...
size = process_header(record, 0);
...
size = process_header(record, size);
}
the header.size will get overwritten with every new iteration, no?
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-07 8:26 ` Alexey Budankov
2019-03-07 11:59 ` Jiri Olsa
0 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:26 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:26, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>
> SNIP
>
>> +static size_t record__process_comp_header(void *record, size_t increment)
>> +{
>> + struct compressed_event *event = record;
>> + size_t size = sizeof(struct compressed_event);
>> +
>> + if (increment) {
>> + event->header.size += increment;
>> + return increment;
>> + } else {
>> + event->header.type = PERF_RECORD_COMPRESSED;
>> + event->header.size = size;
>> + return size;
>> + }
>
> so zstd_compress_stream_to_records calls this in the loop:
>
> while (input.pos < input.size) {
> ...
> size = process_header(record, 0);
> ...
> size = process_header(record, size);
> }
>
> the header.size will get overwritten with every new iteration, no?
Every new record is headed by PERF_RECORD_COMPRESSED header an updated with
resulted compressed frame size after compression.
process_header(, 0) puts header, then
compression puts compressed frame, then
process_header(, N) updates header.size value (+=).
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-07 8:26 ` Alexey Budankov
@ 2019-03-07 11:59 ` Jiri Olsa
2019-03-07 14:51 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-07 11:59 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Thu, Mar 07, 2019 at 11:26:16AM +0300, Alexey Budankov wrote:
>
> On 05.03.2019 15:26, Jiri Olsa wrote:
> > On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
> >
> > SNIP
> >
> >> +static size_t record__process_comp_header(void *record, size_t increment)
> >> +{
> >> + struct compressed_event *event = record;
> >> + size_t size = sizeof(struct compressed_event);
> >> +
> >> + if (increment) {
> >> + event->header.size += increment;
> >> + return increment;
> >> + } else {
> >> + event->header.type = PERF_RECORD_COMPRESSED;
> >> + event->header.size = size;
> >> + return size;
> >> + }
> >
> > so zstd_compress_stream_to_records calls this in the loop:
> >
> > while (input.pos < input.size) {
> > ...
> > size = process_header(record, 0);
> > ...
> > size = process_header(record, size);
> > }
> >
> > the header.size will get overwritten with every new iteration, no?
>
> Every new record is headed by PERF_RECORD_COMPRESSED header an updated with
> resulted compressed frame size after compression.
>
> process_header(, 0) puts header, then
> compression puts compressed frame, then
> process_header(, N) updates header.size value (+=).
and then process_header(, 0) is called again no?
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-07 11:59 ` Jiri Olsa
@ 2019-03-07 14:51 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 14:51 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 07.03.2019 14:59, Jiri Olsa wrote:
> On Thu, Mar 07, 2019 at 11:26:16AM +0300, Alexey Budankov wrote:
>>
>> On 05.03.2019 15:26, Jiri Olsa wrote:
>>> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>>>
>>> SNIP
>>>
>>>> +static size_t record__process_comp_header(void *record, size_t increment)
>>>> +{
>>>> + struct compressed_event *event = record;
>>>> + size_t size = sizeof(struct compressed_event);
>>>> +
>>>> + if (increment) {
>>>> + event->header.size += increment;
>>>> + return increment;
>>>> + } else {
>>>> + event->header.type = PERF_RECORD_COMPRESSED;
>>>> + event->header.size = size;
>>>> + return size;
>>>> + }
>>>
>>> so zstd_compress_stream_to_records calls this in the loop:
>>>
>>> while (input.pos < input.size) {
>>> ...
>>> size = process_header(record, 0);
>>> ...
>>> size = process_header(record, size);
>>> }
>>>
>>> the header.size will get overwritten with every new iteration, no?
>>
>> Every new record is headed by PERF_RECORD_COMPRESSED header an updated with
>> resulted compressed frame size after compression.
>>
>> process_header(, 0) puts header, then
>> compression puts compressed frame, then
>> process_header(, N) updates header.size value (+=).
>
> and then process_header(, 0) is called again no?
Yes, in the next iteration these three steps repeat for the next record.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-01 15:58 ` [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression Alexey Budankov
` (3 preceding siblings ...)
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-05 12:26 ` Jiri Olsa
2019-03-07 8:26 ` Alexey Budankov
2019-03-05 12:26 ` Jiri Olsa
5 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:26 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
SNIP
>
> +static size_t record__process_comp_header(void *record, size_t increment)
> +{
> + struct compressed_event *event = record;
> + size_t size = sizeof(struct compressed_event);
> +
> + if (increment) {
> + event->header.size += increment;
> + return increment;
> + } else {
> + event->header.type = PERF_RECORD_COMPRESSED;
> + event->header.size = size;
> + return size;
> + }
> +}
> +
> +static size_t record__zstd_compress(void *data, void *dst, size_t dst_size,
we use the <name>__ to mark 'struct' related function,
which supposed to be the first argument.. IMO zstd_compress
will be fine in here
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-07 8:26 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:26 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:26, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>
> SNIP
>
>>
>> +static size_t record__process_comp_header(void *record, size_t increment)
>> +{
>> + struct compressed_event *event = record;
>> + size_t size = sizeof(struct compressed_event);
>> +
>> + if (increment) {
>> + event->header.size += increment;
>> + return increment;
>> + } else {
>> + event->header.type = PERF_RECORD_COMPRESSED;
>> + event->header.size = size;
>> + return size;
>> + }
>> +}
>> +
>> +static size_t record__zstd_compress(void *data, void *dst, size_t dst_size,
>
> we use the <name>__ to mark 'struct' related function,
> which supposed to be the first argument.. IMO zstd_compress
> will be fine in here
in v7.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-01 15:58 ` [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression Alexey Budankov
` (4 preceding siblings ...)
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-05 12:26 ` Jiri Olsa
2019-03-07 8:26 ` Alexey Budankov
5 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:26 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
SNIP
> +static int record__aio_enabled(struct record *rec);
> +
> static void record__aio_mmap_read_sync(struct record *rec)
> {
> int i;
> struct perf_evlist *evlist = rec->evlist;
> struct perf_mmap *maps = evlist->mmap;
>
> - if (!rec->opts.nr_cblocks)
> + if (!record__aio_enabled(rec))
> return;
>
> for (i = 0; i < evlist->nr_mmaps; i++) {
> @@ -292,13 +294,17 @@ static int record__aio_parse(const struct option *opt,
>
> if (unset) {
> opts->nr_cblocks = 0;
> - } else {
> - if (str)
> - opts->nr_cblocks = strtol(str, NULL, 0);
> - if (!opts->nr_cblocks)
> - opts->nr_cblocks = nr_cblocks_default;
> + return 0;
> }
>
> + if (str)
> + opts->nr_cblocks = strtol(str, NULL, 0);
> + if (!opts->nr_cblocks)
> + opts->nr_cblocks = nr_cblocks_default;
> +
> + if (opts->nr_cblocks > nr_cblocks_max)
> + opts->nr_cblocks = nr_cblocks_max;
> +
> return 0;
does not seem to be related to the patch, if that's the case please separate
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-07 8:26 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:26 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:26, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 06:58:32PM +0300, Alexey Budankov wrote:
>
> SNIP
>
>> +static int record__aio_enabled(struct record *rec);
>> +
>> static void record__aio_mmap_read_sync(struct record *rec)
>> {
>> int i;
>> struct perf_evlist *evlist = rec->evlist;
>> struct perf_mmap *maps = evlist->mmap;
>>
>> - if (!rec->opts.nr_cblocks)
>> + if (!record__aio_enabled(rec))
>> return;
>>
>> for (i = 0; i < evlist->nr_mmaps; i++) {
>> @@ -292,13 +294,17 @@ static int record__aio_parse(const struct option *opt,
>>
>> if (unset) {
>> opts->nr_cblocks = 0;
>> - } else {
>> - if (str)
>> - opts->nr_cblocks = strtol(str, NULL, 0);
>> - if (!opts->nr_cblocks)
>> - opts->nr_cblocks = nr_cblocks_default;
>> + return 0;
>> }
>>
>> + if (str)
>> + opts->nr_cblocks = strtol(str, NULL, 0);
>> + if (!opts->nr_cblocks)
>> + opts->nr_cblocks = nr_cblocks_default;
>> +
>> + if (opts->nr_cblocks > nr_cblocks_max)
>> + opts->nr_cblocks = nr_cblocks_max;
>> +
>> return 0;
>
> does not seem to be related to the patch, if that's the case please separate
in v7.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v5 08/10] perf report: implement record trace decompression
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (4 preceding siblings ...)
2019-03-01 15:58 ` [PATCH v5 07/10] perf record: implement -z,--compression_level=n option and compression Alexey Budankov
@ 2019-03-01 16:06 ` Alexey Budankov
2019-03-05 12:25 ` Jiri Olsa
2019-03-01 16:07 ` [PATCH v5 09/10] perf inject: enable COMPRESSED records decompression Alexey Budankov
` (3 subsequent siblings)
9 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 16:06 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Trace frames containing PERF_RECORD_COMPRESSED records are
decompressed using functions from zstd.c into a linked list
of mmaped memory regions of mmap_comp_len size (struct decomp).
After decompression of one COMPRESSED record its content is
iterated and fetched for usual processing. The mmaped memory regions
with decompressed events are kept till the tool process termination.
When dumping raw trace (e.g., perf report -D --header) file
offsets of events from compressed records are printed as zero.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-report.c | 5 +-
tools/perf/util/compress.h | 4 ++
tools/perf/util/session.c | 124 +++++++++++++++++++++++++++++++++++-
tools/perf/util/session.h | 10 +++
tools/perf/util/tool.h | 2 +
tools/perf/util/zstd.c | 48 ++++++++++++++
6 files changed, 191 insertions(+), 2 deletions(-)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1532ebde6c4b..a53eee3833f1 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1215,6 +1215,9 @@ int cmd_report(int argc, const char **argv)
if (session == NULL)
return -1;
+ if (zstd_init(&(session->zstd_data), 0) < 0)
+ pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
+
if (report.queue_size) {
ordered_events__set_alloc_size(&session->ordered_events,
report.queue_size);
@@ -1427,7 +1430,7 @@ int cmd_report(int argc, const char **argv)
error:
zfree(&report.ptime_range);
-
+ zstd_fini(&(session->zstd_data));
perf_session__delete(session);
return ret;
}
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index e0987616db94..4f6672770ebb 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -20,6 +20,7 @@ bool lzma_is_compressed(const char *input);
struct zstd_data {
#ifdef HAVE_ZSTD_SUPPORT
ZSTD_CStream *cstream;
+ ZSTD_DStream *dstream;
#endif
};
@@ -30,4 +31,7 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data,
void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
size_t process_header(void *record, size_t increment));
+size_t zstd_decompress_stream(struct zstd_data *data,
+ void *src, size_t src_size, void *dst, size_t dst_size);
+
#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c764bbc91009..b1bf37c30461 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -29,6 +29,67 @@
#include "stat.h"
#include "arch/common.h"
+#ifdef HAVE_ZSTD_SUPPORT
+static int perf_session__process_compressed_event(struct perf_session *session,
+ union perf_event *event, u64 file_offset)
+{
+ void *src;
+ size_t decomp_size, src_size;
+ u64 decomp_last_rem = 0;
+ size_t decomp_len = session->header.env.comp_mmap_len;
+ struct decomp *decomp, *decomp_last = session->decomp_last;
+
+ decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (decomp == MAP_FAILED) {
+ pr_err("Couldn't allocate memory for decompression\n");
+ return -1;
+ }
+
+ decomp->file_pos = file_offset;
+ decomp->head = 0;
+
+ if (decomp_last) {
+ decomp_last_rem = decomp_last->size - decomp_last->head;
+ memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
+ decomp->size = decomp_last_rem;
+ }
+
+ src = (void *)event + sizeof(struct compressed_event);
+ src_size = event->pack.header.size - sizeof(struct compressed_event);
+
+ decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
+ &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
+ if (!decomp_size) {
+ munmap(decomp, sizeof(struct decomp) + decomp_len);
+ pr_err("Couldn't decompress data\n");
+ return -1;
+ }
+
+ decomp->size += decomp_size;
+
+ if (session->decomp == NULL) {
+ session->decomp = decomp;
+ session->decomp_last = decomp;
+ } else {
+ session->decomp_last->next = decomp;
+ session->decomp_last = decomp;
+ }
+
+ pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
+
+ return 0;
+}
+#else /* !HAVE_ZSTD_SUPPORT */
+static int perf_session__process_compressed_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ u64 file_offset __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+#endif
+
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool,
@@ -196,12 +257,23 @@ static void perf_session__delete_threads(struct perf_session *session)
void perf_session__delete(struct perf_session *session)
{
+ struct decomp *next, *decomp;
+ size_t decomp_len;
if (session == NULL)
return;
auxtrace__free(session);
auxtrace_index__free(&session->auxtrace_index);
perf_session__destroy_kernel_maps(session);
perf_session__delete_threads(session);
+ next = session->decomp;
+ decomp_len = session->header.env.comp_mmap_len;
+ do {
+ decomp = next;
+ if (decomp == NULL)
+ break;
+ next = decomp->next;
+ munmap(decomp, decomp_len + sizeof(struct decomp));
+ } while (1);
perf_env__exit(&session->header.env);
machines__exit(&session->machines);
if (session->data)
@@ -427,6 +499,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->time_conv = process_event_op2_stub;
if (tool->feature == NULL)
tool->feature = process_event_op2_stub;
+ if (tool->compressed == NULL)
+ tool->compressed = perf_session__process_compressed_event;
}
static void swap_sample_id_all(union perf_event *event, void *data)
@@ -1370,7 +1444,8 @@ static s64 perf_session__process_user_event(struct perf_session *session,
int fd = perf_data__fd(session->data);
int err;
- dump_event(session->evlist, event, file_offset, &sample);
+ if (event->header.type != PERF_RECORD_COMPRESSED)
+ dump_event(session->evlist, event, file_offset, &sample);
/* These events are processed right away */
switch (event->header.type) {
@@ -1423,6 +1498,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
return tool->time_conv(session, event);
case PERF_RECORD_HEADER_FEATURE:
return tool->feature(session, event);
+ case PERF_RECORD_COMPRESSED:
+ err = tool->compressed(session, event, file_offset);
+ if (err)
+ dump_event(session->evlist, event, file_offset, &sample);
+ return 0;
default:
return -EINVAL;
}
@@ -1705,6 +1785,8 @@ static int perf_session__flush_thread_stacks(struct perf_session *session)
volatile int session_done;
+static int __perf_session__process_decomp_events(struct perf_session *session);
+
static int __perf_session__process_pipe_events(struct perf_session *session)
{
struct ordered_events *oe = &session->ordered_events;
@@ -1785,6 +1867,10 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
if (skip > 0)
head += skip;
+ err = __perf_session__process_decomp_events(session);
+ if (err)
+ goto out_err;
+
if (!session_done())
goto more;
done:
@@ -1833,6 +1919,38 @@ fetch_mmaped_event(struct perf_session *session,
return event;
}
+static int __perf_session__process_decomp_events(struct perf_session *session)
+{
+ s64 skip;
+ u64 size, file_pos = 0;
+ union perf_event *event;
+ struct decomp *decomp = session->decomp_last;
+
+ if (!decomp)
+ return 0;
+
+ while (decomp->head < decomp->size && !session_done()) {
+ event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data);
+ if (!event)
+ break;
+
+ size = event->header.size;
+ if (size < sizeof(struct perf_event_header) ||
+ (skip = perf_session__process_event(session, event, file_pos)) < 0) {
+ pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+ decomp->file_pos + decomp->head, event->header.size, event->header.type);
+ return -EINVAL;
+ }
+
+ if (skip)
+ size += skip;
+
+ decomp->head += size;
+ }
+
+ return 0;
+}
+
/*
* On 64bit we can mmap the data file in one go. No need for tiny mmap
* slices. On 32bit we use 32MB.
@@ -1933,6 +2051,10 @@ reader__process_events(struct reader *rd, struct perf_session *session,
head += size;
file_pos += size;
+ err = __perf_session__process_decomp_events(session);
+ if (err)
+ goto out;
+
ui_progress__update(prog, size);
if (session_done())
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 6c984c895924..dd8920b745bc 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -39,6 +39,16 @@ struct perf_session {
u64 bytes_transferred;
u64 bytes_compressed;
struct zstd_data zstd_data;
+ struct decomp *decomp;
+ struct decomp *decomp_last;
+};
+
+struct decomp {
+ struct decomp *next;
+ u64 file_pos;
+ u64 head;
+ size_t size;
+ char data[];
};
struct perf_tool;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 250391672f9f..9096a6e3de59 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -28,6 +28,7 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
+typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data);
typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
struct ordered_events *oe);
@@ -72,6 +73,7 @@ struct perf_tool {
stat,
stat_round,
feature;
+ event_op4 compressed;
event_op3 auxtrace;
bool ordered_events;
bool ordering_requires_timestamps;
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
index e5d44d0f6b5d..ca43538b8772 100644
--- a/tools/perf/util/zstd.c
+++ b/tools/perf/util/zstd.c
@@ -11,6 +11,21 @@ int zstd_init(struct zstd_data *data, int level)
{
size_t ret;
+ data->dstream = ZSTD_createDStream();
+ if (data->dstream == NULL) {
+ pr_err("Couldn't create decompression stream.\n");
+ return -1;
+ }
+
+ ret = ZSTD_initDStream(data->dstream);
+ if (ZSTD_isError(ret)) {
+ pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret));
+ return -1;
+ }
+
+ if (!level)
+ return 0;
+
data->cstream = ZSTD_createCStream();
if (data->cstream == NULL) {
pr_err("Couldn't create compression stream.\n");
@@ -28,6 +43,11 @@ int zstd_init(struct zstd_data *data, int level)
int zstd_fini(struct zstd_data *data)
{
+ if (data->dstream) {
+ ZSTD_freeDStream(data->dstream);
+ data->dstream = NULL;
+ }
+
if (data->cstream) {
ZSTD_freeCStream(data->cstream);
data->cstream = NULL;
@@ -71,6 +91,27 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data,
return compressed;
}
+size_t zstd_decompress_stream(struct zstd_data *data,
+ void *src, size_t src_size, void *dst, size_t dst_size)
+{
+ size_t ret;
+ ZSTD_inBuffer input = { src, src_size, 0 };
+ ZSTD_outBuffer output = { dst, dst_size, 0 };
+
+ while (input.pos < input.size) {
+ ret = ZSTD_decompressStream(data->dstream, &output, &input);
+ if (ZSTD_isError(ret)) {
+ pr_err("failed to decompress (B): %ld -> %ld : %s\n",
+ src_size, output.size, ZSTD_getErrorName(ret));
+ break;
+ }
+ output.dst = dst + output.pos;
+ output.size = dst_size - output.pos;
+ }
+
+ return output.pos;
+}
+
#else /* !HAVE_ZSTD_SUPPORT */
int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
@@ -92,4 +133,11 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
return 0;
}
+size_t zstd_decompress_stream(struct zstd_data *data __maybe_unused, void *src __maybe_unused,
+ size_t src_size __maybe_unused, void *dst __maybe_unused,
+ size_t dst_size __maybe_unused)
+{
+ return 0;
+}
+
#endif
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v5 08/10] perf report: implement record trace decompression
2019-03-01 16:06 ` [PATCH v5 08/10] perf report: implement record trace decompression Alexey Budankov
@ 2019-03-05 12:25 ` Jiri Olsa
2019-03-07 8:27 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:25 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 07:06:23PM +0300, Alexey Budankov wrote:
SNIP
> +static int __perf_session__process_decomp_events(struct perf_session *session)
> +{
> + s64 skip;
> + u64 size, file_pos = 0;
> + union perf_event *event;
> + struct decomp *decomp = session->decomp_last;
> +
> + if (!decomp)
> + return 0;
> +
> + while (decomp->head < decomp->size && !session_done()) {
> + event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data);
> + if (!event)
> + break;
> +
> + size = event->header.size;
> + if (size < sizeof(struct perf_event_header) ||
> + (skip = perf_session__process_event(session, event, file_pos)) < 0) {
> + pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
> + decomp->file_pos + decomp->head, event->header.size, event->header.type);
> + return -EINVAL;
> + }
> +
> + if (skip)
> + size += skip;
> +
> + decomp->head += size;
> + }
> +
> + return 0;
> +}
> +
> /*
> * On 64bit we can mmap the data file in one go. No need for tiny mmap
> * slices. On 32bit we use 32MB.
> @@ -1933,6 +2051,10 @@ reader__process_events(struct reader *rd, struct perf_session *session,
> head += size;
> file_pos += size;
>
> + err = __perf_session__process_decomp_events(session);
> + if (err)
> + goto out;
why don't we process decompressed events directly from the
perf_session__process_compressed_event callback?
there would be no need for 'struct decomp' list logic
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 08/10] perf report: implement record trace decompression
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-07 8:27 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:27 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:25, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 07:06:23PM +0300, Alexey Budankov wrote:
>
> SNIP
>
>> +static int __perf_session__process_decomp_events(struct perf_session *session)
>> +{
>> + s64 skip;
>> + u64 size, file_pos = 0;
>> + union perf_event *event;
>> + struct decomp *decomp = session->decomp_last;
>> +
>> + if (!decomp)
>> + return 0;
>> +
>> + while (decomp->head < decomp->size && !session_done()) {
>> + event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data);
>> + if (!event)
>> + break;
>> +
>> + size = event->header.size;
>> + if (size < sizeof(struct perf_event_header) ||
>> + (skip = perf_session__process_event(session, event, file_pos)) < 0) {
>> + pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
>> + decomp->file_pos + decomp->head, event->header.size, event->header.type);
>> + return -EINVAL;
>> + }
>> +
>> + if (skip)
>> + size += skip;
>> +
>> + decomp->head += size;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> /*
>> * On 64bit we can mmap the data file in one go. No need for tiny mmap
>> * slices. On 32bit we use 32MB.
>> @@ -1933,6 +2051,10 @@ reader__process_events(struct reader *rd, struct perf_session *session,
>> head += size;
>> file_pos += size;
>>
>> + err = __perf_session__process_decomp_events(session);
>> + if (err)
>> + goto out;
>
> why don't we process decompressed events directly from the
> perf_session__process_compressed_event callback?
>
> there would be no need for 'struct decomp' list logic
It would, because events need to stay in memory after decompression.
It looks reasonable to keep new processing code at the same place
where the processing is implemented currently.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v5 09/10] perf inject: enable COMPRESSED records decompression
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (5 preceding siblings ...)
2019-03-01 16:06 ` [PATCH v5 08/10] perf report: implement record trace decompression Alexey Budankov
@ 2019-03-01 16:07 ` Alexey Budankov
2019-03-05 12:26 ` Jiri Olsa
2019-03-01 16:09 ` [PATCH v5 10/10] perf tests: implement Zstd comp/decomp integration test Alexey Budankov
` (2 subsequent siblings)
9 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 16:07 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Initialized decompression API so COMPRESSED record would be
decompressed into the resulting output data file.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-inject.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 24086b7f1b14..8e0e06d3edfc 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -837,6 +837,9 @@ int cmd_inject(int argc, const char **argv)
if (inject.session == NULL)
return -1;
+ if (zstd_init(&(inject.session->zstd_data), 0) < 0)
+ pr_warning("Decompression initialization failed.\n");
+
if (inject.build_ids) {
/*
* to make sure the mmap records are ordered correctly
@@ -867,6 +870,7 @@ int cmd_inject(int argc, const char **argv)
ret = __cmd_inject(&inject);
out_delete:
+ zstd_fini(&(inject.session->zstd_data));
perf_session__delete(inject.session);
return ret;
}
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v5 09/10] perf inject: enable COMPRESSED records decompression
2019-03-01 16:07 ` [PATCH v5 09/10] perf inject: enable COMPRESSED records decompression Alexey Budankov
@ 2019-03-05 12:26 ` Jiri Olsa
2019-03-07 8:28 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:26 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 07:07:49PM +0300, Alexey Budankov wrote:
>
> Initialized decompression API so COMPRESSED record would be
> decompressed into the resulting output data file.
>
> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
> ---
> tools/perf/builtin-inject.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index 24086b7f1b14..8e0e06d3edfc 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -837,6 +837,9 @@ int cmd_inject(int argc, const char **argv)
> if (inject.session == NULL)
> return -1;
>
> + if (zstd_init(&(inject.session->zstd_data), 0) < 0)
> + pr_warning("Decompression initialization failed.\n");
hum, I wonder we should put this inside perf_session__new/perf_session__delete
jirka
> +
> if (inject.build_ids) {
> /*
> * to make sure the mmap records are ordered correctly
> @@ -867,6 +870,7 @@ int cmd_inject(int argc, const char **argv)
> ret = __cmd_inject(&inject);
>
> out_delete:
> + zstd_fini(&(inject.session->zstd_data));
> perf_session__delete(inject.session);
> return ret;
> }
> --
> 2.20.1
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 09/10] perf inject: enable COMPRESSED records decompression
2019-03-05 12:26 ` Jiri Olsa
@ 2019-03-07 8:28 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:28 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:26, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 07:07:49PM +0300, Alexey Budankov wrote:
>>
>> Initialized decompression API so COMPRESSED record would be
>> decompressed into the resulting output data file.
>>
>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
>> ---
>> tools/perf/builtin-inject.c | 4 ++++
>> 1 file changed, 4 insertions(+)
>>
>> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
>> index 24086b7f1b14..8e0e06d3edfc 100644
>> --- a/tools/perf/builtin-inject.c
>> +++ b/tools/perf/builtin-inject.c
>> @@ -837,6 +837,9 @@ int cmd_inject(int argc, const char **argv)
>> if (inject.session == NULL)
>> return -1;
>>
>> + if (zstd_init(&(inject.session->zstd_data), 0) < 0)
>> + pr_warning("Decompression initialization failed.\n");
>
> hum, I wonder we should put this inside perf_session__new/perf_session__delete
comp_level parameter passing would extend the signature of __new()
struct perf_session *perf_session__new(struct perf_data *data,
bool repipe, struct perf_tool *tool,
int comp_level)
~Alexey
>
> jirka
>
>> +
>> if (inject.build_ids) {
>> /*
>> * to make sure the mmap records are ordered correctly
>> @@ -867,6 +870,7 @@ int cmd_inject(int argc, const char **argv)
>> ret = __cmd_inject(&inject);
>>
>> out_delete:
>> + zstd_fini(&(inject.session->zstd_data));
>> perf_session__delete(inject.session);
>> return ret;
>> }
>> --
>> 2.20.1
>>
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v5 10/10] perf tests: implement Zstd comp/decomp integration test
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (6 preceding siblings ...)
2019-03-01 16:07 ` [PATCH v5 09/10] perf inject: enable COMPRESSED records decompression Alexey Budankov
@ 2019-03-01 16:09 ` Alexey Budankov
2019-03-05 12:25 ` Jiri Olsa
2019-03-01 16:37 ` [PATCH v5 05/10] perf mmap: implement dedicated memory buffer for data compression Alexey Budankov
2019-03-01 16:38 ` [PATCH v5 01/10] feature: implement libzstd check, LIBZSTD_DIR and NO_LIBZSTD defines Alexey Budankov
9 siblings, 1 reply; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 16:09 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Implemented basic integration test for Zstd based trace
compression/decompression in record and report modes.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
.../tests/shell/record+zstd_comp_decomp.sh | 28 +++++++++++++++++++
1 file changed, 28 insertions(+)
create mode 100755 tools/perf/tests/shell/record+zstd_comp_decomp.sh
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
new file mode 100755
index 000000000000..5cb2ab8e2112
--- /dev/null
+++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+# record trace Zstd compression/decompression
+
+trace_file=$(mktemp /tmp/perf.data.XXX)
+perf_tool=perf
+output=/dev/null
+
+skip_if_no_z_record() {
+ $perf_tool record -h 2>&1 | grep '\-z, \-\-compression\-level <n>'
+}
+
+collect_z_record() {
+ echo "Collecting compressed record trace file:"
+ $perf_tool record -o $trace_file -a -z 1 -F 25000 -e cycles -- \
+ dd count=2000 if=/dev/random of=/dev/null > $output 2>&1
+}
+
+check_compressed_stats() {
+ echo "Checking compressed events stats:"
+ $perf_tool report -i $trace_file --header --stats | \
+ grep -E "(# compressed : Zstd,)|(COMPRESSED events:)" > $output 2>&1
+}
+
+skip_if_no_z_record || exit 2
+collect_z_record && check_compressed_stats
+err=$?
+rm -f $trace_file
+exit $err
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v5 10/10] perf tests: implement Zstd comp/decomp integration test
2019-03-01 16:09 ` [PATCH v5 10/10] perf tests: implement Zstd comp/decomp integration test Alexey Budankov
@ 2019-03-05 12:25 ` Jiri Olsa
2019-03-07 8:29 ` Alexey Budankov
0 siblings, 1 reply; 52+ messages in thread
From: Jiri Olsa @ 2019-03-05 12:25 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On Fri, Mar 01, 2019 at 07:09:26PM +0300, Alexey Budankov wrote:
>
> Implemented basic integration test for Zstd based trace
> compression/decompression in record and report modes.
>
> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
> ---
> .../tests/shell/record+zstd_comp_decomp.sh | 28 +++++++++++++++++++
> 1 file changed, 28 insertions(+)
> create mode 100755 tools/perf/tests/shell/record+zstd_comp_decomp.sh
>
> diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
> new file mode 100755
> index 000000000000..5cb2ab8e2112
> --- /dev/null
> +++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
> @@ -0,0 +1,28 @@
> +#!/bin/sh
> +# record trace Zstd compression/decompression
> +
> +trace_file=$(mktemp /tmp/perf.data.XXX)
> +perf_tool=perf
> +output=/dev/null
> +
> +skip_if_no_z_record() {
> + $perf_tool record -h 2>&1 | grep '\-z, \-\-compression\-level <n>'
> +}
> +
> +collect_z_record() {
> + echo "Collecting compressed record trace file:"
> + $perf_tool record -o $trace_file -a -z 1 -F 25000 -e cycles -- \
> + dd count=2000 if=/dev/random of=/dev/null > $output 2>&1
> +}
> +
> +check_compressed_stats() {
> + echo "Checking compressed events stats:"
> + $perf_tool report -i $trace_file --header --stats | \
> + grep -E "(# compressed : Zstd,)|(COMPRESSED events:)" > $output 2>&1
> +}
so perf inject can decompress the perf.data and store
uncompressed one, right? would be great to 'uncompress'
the perf data and compare 'perf report --stdio > out.X'
outputs
thanks a lot for adding tests!
jirka
^ permalink raw reply [flat|nested] 52+ messages in thread
* Re: [PATCH v5 10/10] perf tests: implement Zstd comp/decomp integration test
2019-03-05 12:25 ` Jiri Olsa
@ 2019-03-07 8:29 ` Alexey Budankov
0 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-07 8:29 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Peter Zijlstra, Ingo Molnar, Andi Kleen, linux-kernel
On 05.03.2019 15:25, Jiri Olsa wrote:
> On Fri, Mar 01, 2019 at 07:09:26PM +0300, Alexey Budankov wrote:
>>
>> Implemented basic integration test for Zstd based trace
>> compression/decompression in record and report modes.
>>
>> Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
>> ---
>> .../tests/shell/record+zstd_comp_decomp.sh | 28 +++++++++++++++++++
>> 1 file changed, 28 insertions(+)
>> create mode 100755 tools/perf/tests/shell/record+zstd_comp_decomp.sh
>>
>> diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
>> new file mode 100755
>> index 000000000000..5cb2ab8e2112
>> --- /dev/null
>> +++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
>> @@ -0,0 +1,28 @@
>> +#!/bin/sh
>> +# record trace Zstd compression/decompression
>> +
>> +trace_file=$(mktemp /tmp/perf.data.XXX)
>> +perf_tool=perf
>> +output=/dev/null
>> +
>> +skip_if_no_z_record() {
>> + $perf_tool record -h 2>&1 | grep '\-z, \-\-compression\-level <n>'
>> +}
>> +
>> +collect_z_record() {
>> + echo "Collecting compressed record trace file:"
>> + $perf_tool record -o $trace_file -a -z 1 -F 25000 -e cycles -- \
>> + dd count=2000 if=/dev/random of=/dev/null > $output 2>&1
>> +}
>> +
>> +check_compressed_stats() {
>> + echo "Checking compressed events stats:"
>> + $perf_tool report -i $trace_file --header --stats | \
>> + grep -E "(# compressed : Zstd,)|(COMPRESSED events:)" > $output 2>&1
>> +}
>
> so perf inject can decompress the perf.data and store
> uncompressed one, right? would be great to 'uncompress'
Right.
> the perf data and compare 'perf report --stdio > out.X'
> outputs
>
> thanks a lot for adding tests!
You are welcome.
Implemented test performs some level of sanity testing for the feature
and it is all that I can provide in this patch set.
~Alexey
>
> jirka
>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v5 05/10] perf mmap: implement dedicated memory buffer for data compression
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (7 preceding siblings ...)
2019-03-01 16:09 ` [PATCH v5 10/10] perf tests: implement Zstd comp/decomp integration test Alexey Budankov
@ 2019-03-01 16:37 ` Alexey Budankov
2019-03-01 16:38 ` [PATCH v5 01/10] feature: implement libzstd check, LIBZSTD_DIR and NO_LIBZSTD defines Alexey Budankov
9 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 16:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Implemented mmap data buffer that is used as the memory to operate
on when compressing sampling data in case of serial trace streaming.
In case of AIO trace streaming AIO buffers are used to implement
sampling data compression.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-record.c | 6 +++++-
tools/perf/util/evlist.c | 8 +++++---
tools/perf/util/evlist.h | 2 +-
tools/perf/util/mmap.c | 30 ++++++++++++++++++++++++++++--
tools/perf/util/mmap.h | 4 +++-
5 files changed, 42 insertions(+), 8 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9eae28d77291..954141c491b0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -572,7 +572,7 @@ static int record__mmap_evlist(struct record *rec,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode,
opts->nr_cblocks, opts->affinity,
- opts->mmap_flush) < 0) {
+ opts->mmap_flush, opts->comp_level) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@@ -2241,6 +2241,10 @@ int cmd_record(int argc, const char **argv)
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
+ if (rec->opts.comp_level > 22)
+ rec->opts.comp_level = 0;
+ pr_debug("comp level: %d\n", rec->opts.comp_level);
+
err = __cmd_record(&record, argc, argv);
out:
perf_evlist__delete(rec->evlist);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 937039faac59..a13458b43dc1 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1022,7 +1022,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
*/
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush)
+ bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
+ int comp_level)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
@@ -1032,7 +1033,8 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
* Its value is decided by evsel's write_backward.
* So &mp should not be passed through const pointer.
*/
- struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush };
+ struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
+ .comp_level = comp_level };
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1064,7 +1066,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
{
- return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1);
+ return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
}
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index edf18811e39f..77c11dac4a63 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -166,7 +166,7 @@ unsigned long perf_event_mlock_kb_in_pages(void);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
bool auxtrace_overwrite, int nr_cblocks,
- int affinity, int flush);
+ int affinity, int flush, int comp_level);
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index ef3d79b2c90b..d85e73fc82e2 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -157,6 +157,10 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
}
#ifdef HAVE_AIO_SUPPORT
+static int perf_mmap__aio_enabled(struct perf_mmap *map)
+{
+ return map->aio.nr_cblocks > 0;
+}
#ifdef HAVE_LIBNUMA_SUPPORT
static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
@@ -198,7 +202,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi
return 0;
}
-#else
+#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
{
map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
@@ -359,7 +363,12 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
return rc;
}
-#else
+#else /* !HAVE_AIO_SUPPORT */
+static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
+{
+ return 0;
+}
+
static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
struct mmap_params *mp __maybe_unused)
{
@@ -374,6 +383,10 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
void perf_mmap__munmap(struct perf_mmap *map)
{
perf_mmap__aio_munmap(map);
+ if (map->data != NULL) {
+ munmap(map->data, perf_mmap__mmap_len(map));
+ map->data = NULL;
+ }
if (map->base != NULL) {
munmap(map->base, perf_mmap__mmap_len(map));
map->base = NULL;
@@ -442,6 +455,19 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
map->flush = mp->flush;
+ map->comp_level = mp->comp_level;
+
+ if (map->comp_level && !perf_mmap__aio_enabled(map)) {
+ map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (map->data == MAP_FAILED) {
+ pr_debug2("failed to mmap data buffer, error %d\n",
+ errno);
+ map->data = NULL;
+ return -1;
+ }
+ }
+
if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
&mp->auxtrace_mp, map->base, fd))
return -1;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index b82f8c2d55c4..a02427d609c0 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -40,6 +40,8 @@ struct perf_mmap {
#endif
cpu_set_t affinity_mask;
u64 flush;
+ void *data;
+ int comp_level;
};
/*
@@ -71,7 +73,7 @@ enum bkw_mmap_state {
};
struct mmap_params {
- int prot, mask, nr_cblocks, affinity, flush;
+ int prot, mask, nr_cblocks, affinity, flush, comp_level;
struct auxtrace_mmap_params auxtrace_mp;
};
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v5 01/10] feature: implement libzstd check, LIBZSTD_DIR and NO_LIBZSTD defines
2019-03-01 15:27 [PATCH v5 00/10] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (8 preceding siblings ...)
2019-03-01 16:37 ` [PATCH v5 05/10] perf mmap: implement dedicated memory buffer for data compression Alexey Budankov
@ 2019-03-01 16:38 ` Alexey Budankov
9 siblings, 0 replies; 52+ messages in thread
From: Alexey Budankov @ 2019-03-01 16:38 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Peter Zijlstra,
Ingo Molnar, Andi Kleen, linux-kernel
Implement libzstd feature check, NO_LIBZSTD and LIBZSTD_DIR defines
to override Zstd library sources or disable the feature from the
command line:
$ make -C tools/perf LIBZSTD_DIR=/path/to/zstd/sources/ clean all
$ make -C tools/perf NO_LIBZSTD=1 clean all
Auto detection feature status is reported just before compilation starts.
If your system has some version of the zstd library preinstalled then
the build system finds and uses it during the build.
If you still prefer to compile with some other version of zstd library
that is not preinstalled you have capability to refer the compilation
to that version using LIBZSTD_DIR define.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
Changes in v5:
- implemented perf version --build-options extension for aio and zstd
---
tools/build/Makefile.feature | 6 ++++--
tools/build/feature/Makefile | 6 +++++-
tools/build/feature/test-all.c | 5 +++++
tools/build/feature/test-libzstd.c | 12 ++++++++++++
tools/perf/Makefile.config | 20 ++++++++++++++++++++
tools/perf/Makefile.perf | 3 +++
tools/perf/builtin-version.c | 2 ++
7 files changed, 51 insertions(+), 3 deletions(-)
create mode 100644 tools/build/feature/test-libzstd.c
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 61e46d54a67c..adf791cbd726 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -66,7 +66,8 @@ FEATURE_TESTS_BASIC := \
sched_getcpu \
sdt \
setns \
- libaio
+ libaio \
+ libzstd
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
# of all feature tests
@@ -118,7 +119,8 @@ FEATURE_DISPLAY ?= \
lzma \
get_cpuid \
bpf \
- libaio
+ libaio \
+ libzstd
# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
# If in the future we need per-feature checks/flags for features not
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 7ceb4441b627..4b8244ee65ce 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -62,7 +62,8 @@ FILES= \
test-clang.bin \
test-llvm.bin \
test-llvm-version.bin \
- test-libaio.bin
+ test-libaio.bin \
+ test-libzstd.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
@@ -301,6 +302,9 @@ $(OUTPUT)test-clang.bin:
$(OUTPUT)test-libaio.bin:
$(BUILD) -lrt
+$(OUTPUT)test-libzstd.bin:
+ $(BUILD) -lzstd
+
###############################
clean:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index e903b86b742f..b0dda7db2a17 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -178,6 +178,10 @@
# include "test-reallocarray.c"
#undef main
+#define main main_test_zstd
+# include "test-libzstd.c"
+#undef main
+
int main(int argc, char *argv[])
{
main_test_libpython();
@@ -219,6 +223,7 @@ int main(int argc, char *argv[])
main_test_setns();
main_test_libaio();
main_test_reallocarray();
+ main_test_libzstd();
return 0;
}
diff --git a/tools/build/feature/test-libzstd.c b/tools/build/feature/test-libzstd.c
new file mode 100644
index 000000000000..55268c01b84d
--- /dev/null
+++ b/tools/build/feature/test-libzstd.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <zstd.h>
+
+int main(void)
+{
+ ZSTD_CStream *cstream;
+
+ cstream = ZSTD_createCStream();
+ ZSTD_freeCStream(cstream);
+
+ return 0;
+}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0f11d5891301..4949bdb16a66 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -152,6 +152,13 @@ endif
FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+ifdef LIBZSTD_DIR
+ LIBZSTD_CFLAGS := -I$(LIBZSTD_DIR)/lib
+ LIBZSTD_LDFLAGS := -L$(LIBZSTD_DIR)/lib
+endif
+FEATURE_CHECK_CFLAGS-libzstd := $(LIBZSTD_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libzstd := $(LIBZSTD_LDFLAGS)
+
FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
# include ARCH specific config
-include $(src-perf)/arch/$(SRCARCH)/Makefile
@@ -782,6 +789,19 @@ ifndef NO_LZMA
endif
endif
+ifndef NO_LIBZSTD
+ ifeq ($(feature-libzstd), 1)
+ CFLAGS += -DHAVE_ZSTD_SUPPORT
+ CFLAGS += $(LIBZSTD_CFLAGS)
+ LDFLAGS += $(LIBZSTD_LDFLAGS)
+ EXTLIBS += -lzstd
+ $(call detected,CONFIG_ZSTD)
+ else
+ msg := $(warning No libzstd found, disables trace compression, please install libzstd-dev[el] and/or set LIBZSTD_DIR);
+ NO_LIBZSTD := 1
+ endif
+endif
+
ifndef NO_BACKTRACE
ifeq ($(feature-backtrace), 1)
CFLAGS += -DHAVE_BACKTRACE_SUPPORT
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 01f7555fd933..06b927ee6ee3 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -108,6 +108,9 @@ include ../scripts/utilities.mak
# streaming for record mode. Currently Posix AIO trace streaming is
# supported only when linking with glibc.
#
+# Define NO_LIBZSTD if you do not want support of Zstandard based runtime
+# trace compression in record mode.
+#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index 50df168be326..f470144d1a70 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -78,6 +78,8 @@ static void library_status(void)
STATUS(HAVE_LZMA_SUPPORT, lzma);
STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
STATUS(HAVE_LIBBPF_SUPPORT, bpf);
+ STATUS(HAVE_AIO_SUPPORT, aio);
+ STATUS(HAVE_ZSTD_SUPPORT, zstd);
}
int cmd_version(int argc, const char **argv)
--
2.20.1
^ permalink raw reply related [flat|nested] 52+ messages in thread