* [PATCH v8 01/12] feature: implement libzstd check, LIBZSTD_DIR and NO_LIBZSTD defines
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
@ 2019-03-14 11:28 ` Alexey Budankov
2019-03-14 11:29 ` [PATCH v8 02/12] perf record: implement --mmap-flush=<threshold> option Alexey Budankov
` (11 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:28 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Implement libzstd feature check, NO_LIBZSTD and LIBZSTD_DIR defines
to override Zstd library sources or disable the feature from the
command line:
$ make -C tools/perf LIBZSTD_DIR=/path/to/zstd/sources/ clean all
$ make -C tools/perf NO_LIBZSTD=1 clean all
Auto detection feature status is reported just before compilation starts.
If your system has some version of the zstd library preinstalled then
the build system finds and uses it during the build.
If you still prefer to compile with some other version of zstd library
you have capability to refer the compilation to that version using
LIBZSTD_DIR define.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/build/Makefile.feature | 6 ++++--
tools/build/feature/Makefile | 6 +++++-
tools/build/feature/test-all.c | 5 +++++
tools/build/feature/test-libzstd.c | 12 ++++++++++++
tools/perf/Makefile.config | 20 ++++++++++++++++++++
tools/perf/Makefile.perf | 3 +++
tools/perf/builtin-version.c | 2 ++
7 files changed, 51 insertions(+), 3 deletions(-)
create mode 100644 tools/build/feature/test-libzstd.c
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 61e46d54a67c..adf791cbd726 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -66,7 +66,8 @@ FEATURE_TESTS_BASIC := \
sched_getcpu \
sdt \
setns \
- libaio
+ libaio \
+ libzstd
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
# of all feature tests
@@ -118,7 +119,8 @@ FEATURE_DISPLAY ?= \
lzma \
get_cpuid \
bpf \
- libaio
+ libaio \
+ libzstd
# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
# If in the future we need per-feature checks/flags for features not
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 7ceb4441b627..4b8244ee65ce 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -62,7 +62,8 @@ FILES= \
test-clang.bin \
test-llvm.bin \
test-llvm-version.bin \
- test-libaio.bin
+ test-libaio.bin \
+ test-libzstd.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
@@ -301,6 +302,9 @@ $(OUTPUT)test-clang.bin:
$(OUTPUT)test-libaio.bin:
$(BUILD) -lrt
+$(OUTPUT)test-libzstd.bin:
+ $(BUILD) -lzstd
+
###############################
clean:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index e903b86b742f..b0dda7db2a17 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -178,6 +178,10 @@
# include "test-reallocarray.c"
#undef main
+#define main main_test_zstd
+# include "test-libzstd.c"
+#undef main
+
int main(int argc, char *argv[])
{
main_test_libpython();
@@ -219,6 +223,7 @@ int main(int argc, char *argv[])
main_test_setns();
main_test_libaio();
main_test_reallocarray();
+ main_test_libzstd();
return 0;
}
diff --git a/tools/build/feature/test-libzstd.c b/tools/build/feature/test-libzstd.c
new file mode 100644
index 000000000000..55268c01b84d
--- /dev/null
+++ b/tools/build/feature/test-libzstd.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <zstd.h>
+
+int main(void)
+{
+ ZSTD_CStream *cstream;
+
+ cstream = ZSTD_createCStream();
+ ZSTD_freeCStream(cstream);
+
+ return 0;
+}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0f11d5891301..4949bdb16a66 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -152,6 +152,13 @@ endif
FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+ifdef LIBZSTD_DIR
+ LIBZSTD_CFLAGS := -I$(LIBZSTD_DIR)/lib
+ LIBZSTD_LDFLAGS := -L$(LIBZSTD_DIR)/lib
+endif
+FEATURE_CHECK_CFLAGS-libzstd := $(LIBZSTD_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libzstd := $(LIBZSTD_LDFLAGS)
+
FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
# include ARCH specific config
-include $(src-perf)/arch/$(SRCARCH)/Makefile
@@ -782,6 +789,19 @@ ifndef NO_LZMA
endif
endif
+ifndef NO_LIBZSTD
+ ifeq ($(feature-libzstd), 1)
+ CFLAGS += -DHAVE_ZSTD_SUPPORT
+ CFLAGS += $(LIBZSTD_CFLAGS)
+ LDFLAGS += $(LIBZSTD_LDFLAGS)
+ EXTLIBS += -lzstd
+ $(call detected,CONFIG_ZSTD)
+ else
+ msg := $(warning No libzstd found, disables trace compression, please install libzstd-dev[el] and/or set LIBZSTD_DIR);
+ NO_LIBZSTD := 1
+ endif
+endif
+
ifndef NO_BACKTRACE
ifeq ($(feature-backtrace), 1)
CFLAGS += -DHAVE_BACKTRACE_SUPPORT
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 01f7555fd933..06b927ee6ee3 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -108,6 +108,9 @@ include ../scripts/utilities.mak
# streaming for record mode. Currently Posix AIO trace streaming is
# supported only when linking with glibc.
#
+# Define NO_LIBZSTD if you do not want support of Zstandard based runtime
+# trace compression in record mode.
+#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index 50df168be326..f470144d1a70 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -78,6 +78,8 @@ static void library_status(void)
STATUS(HAVE_LZMA_SUPPORT, lzma);
STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
STATUS(HAVE_LIBBPF_SUPPORT, bpf);
+ STATUS(HAVE_AIO_SUPPORT, aio);
+ STATUS(HAVE_ZSTD_SUPPORT, zstd);
}
int cmd_version(int argc, const char **argv)
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 02/12] perf record: implement --mmap-flush=<threshold> option
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
2019-03-14 11:28 ` [PATCH v8 01/12] feature: implement libzstd check, LIBZSTD_DIR and NO_LIBZSTD defines Alexey Budankov
@ 2019-03-14 11:29 ` Alexey Budankov
2019-03-14 11:30 ` [PATCH v8 03/12] perf session: define bytes_transferred and bytes_compressed metrics Alexey Budankov
` (10 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:29 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Implemented --mmap-flush option that specifies minimal size of data
chunk that is extracted from mmaped kernel buffer to store into a trace.
The default option value is 1 byte what means every time trace writing
thread finds some new data in the mmaped buffer the data is extracted,
possibly compressed and written to a trace.
$ tools/perf/perf record --mmap-flush 1024 -e cycles -- matrix.gcc
$ tools/perf/perf record --aio --mmap-flush 1K -e cycles -- matrix.gcc
The option is independent from -z setting, doesn't vary with compression
level and can serve two purposes.
The first purpose is to increase the compression ratio of a trace data.
Larger data chunks are compressed more effectively so the implemented
option allows specifying data chunk size to compress. Also at some cases
executing more write syscalls with smaller data size can take longer
than executing less write syscalls with bigger data size due to syscall
overhead so extracting bigger data chunks specified by the option value
could additionally decrease runtime overhead.
The second purpose is to avoid self monitoring live-lock issue in system
wide (-a) profiling mode. Profiling in system wide mode with compression
(-a -z) can additionally induce data into the kernel buffers along with
the data from monitored processes. If performance data rate and volume
from the monitored processes is high then trace streaming and compression
activity in the tool is also high. High tool process activity can lead
to subtle live-lock effect when compression of single new byte from some
of mmaped kernel buffer leads to generation of the next single byte at
some mmaped buffer. So perf tool process ends up in endless self monitoring.
Implemented sync param is the mean to force data move independently from
the specified flush threshold value. Despite the provided flush value the
tool needs capability to unconditionally drain memory buffers, at least
in the end of the collection.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/Documentation/perf-record.txt | 12 +++++
tools/perf/builtin-record.c | 65 +++++++++++++++++++++---
tools/perf/perf.h | 1 +
tools/perf/util/evlist.c | 6 +--
tools/perf/util/evlist.h | 3 +-
tools/perf/util/mmap.c | 4 +-
tools/perf/util/mmap.h | 3 +-
7 files changed, 82 insertions(+), 12 deletions(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 8f0c2be34848..8eb3f5b57202 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -459,6 +459,18 @@ Set affinity mask of trace reading thread according to the policy defined by 'mo
node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
cpu - thread affinity mask is set to cpu of the processed mmap buffer
+--mmap-flush=n::
+Specify minimal number of bytes that is extracted from mmap data pages and stored
+into a trace. The number specification is possible using B/K/M/G suffixes. Maximal allowed
+value is a quarter of the size of mmaped data pages. The default option value is 1 byte
+what means that every time trace writing thread finds some new data in the mmaped buffer
+the data is extracted, possibly compressed (-z) and written to a trace. Larger data chunks
+are compressed more effectively in comparison to smaller chunks so extraction of larger
+chunks from the mmap data pages is preferable from perspective of trace size reduction.
+Also at some cases executing less trace write syscalls with bigger data size can take
+shorter than executing more trace write syscalls with smaller data size thus lowering
+runtime profiling overhead.
+
--all-kernel::
Configure all used events to run in kernel space.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a468d882e74f..414dd4cef680 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -334,6 +334,41 @@ static int record__aio_enabled(struct record *rec)
return rec->opts.nr_cblocks > 0;
}
+#define MMAP_FLUSH_DEFAULT 1
+static int record__mmap_flush_parse(const struct option *opt,
+ const char *str,
+ int unset)
+{
+ int flush_max;
+ struct record_opts *opts = (struct record_opts *)opt->value;
+ static struct parse_tag tags[] = {
+ { .tag = 'B', .mult = 1 },
+ { .tag = 'K', .mult = 1 << 10 },
+ { .tag = 'M', .mult = 1 << 20 },
+ { .tag = 'G', .mult = 1 << 30 },
+ { .tag = 0 },
+ };
+
+ if (unset)
+ return 0;
+
+ if (str) {
+ opts->mmap_flush = parse_tag_value(str, tags);
+ if (opts->mmap_flush == (int)-1)
+ opts->mmap_flush = strtol(str, NULL, 0);
+ }
+
+ if (!opts->mmap_flush)
+ opts->mmap_flush = MMAP_FLUSH_DEFAULT;
+
+ flush_max = perf_evlist__mmap_size(opts->mmap_pages);
+ flush_max /= 4;
+ if (opts->mmap_flush > flush_max)
+ opts->mmap_flush = flush_max;
+
+ return 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -543,7 +578,8 @@ static int record__mmap_evlist(struct record *rec,
if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode,
- opts->nr_cblocks, opts->affinity) < 0) {
+ opts->nr_cblocks, opts->affinity,
+ opts->mmap_flush) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@@ -733,7 +769,7 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
}
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
- bool overwrite)
+ bool overwrite, bool sync)
{
u64 bytes_written = rec->bytes_written;
int i;
@@ -756,12 +792,19 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
off = record__aio_get_pos(trace_fd);
for (i = 0; i < evlist->nr_mmaps; i++) {
+ u64 flush = 0;
struct perf_mmap *map = &maps[i];
if (map->base) {
record__adjust_affinity(rec, map);
+ if (sync) {
+ flush = map->flush;
+ map->flush = 1;
+ }
if (!record__aio_enabled(rec)) {
if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+ if (sync)
+ map->flush = flush;
rc = -1;
goto out;
}
@@ -774,10 +817,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
idx = record__aio_sync(map, false);
if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
record__aio_set_pos(trace_fd, off);
+ if (sync)
+ map->flush = flush;
rc = -1;
goto out;
}
}
+ if (sync)
+ map->flush = flush;
}
if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
@@ -803,15 +850,15 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
return rc;
}
-static int record__mmap_read_all(struct record *rec)
+static int record__mmap_read_all(struct record *rec, bool sync)
{
int err;
- err = record__mmap_read_evlist(rec, rec->evlist, false);
+ err = record__mmap_read_evlist(rec, rec->evlist, false, sync);
if (err)
return err;
- return record__mmap_read_evlist(rec, rec->evlist, true);
+ return record__mmap_read_evlist(rec, rec->evlist, true, sync);
}
static void record__init_features(struct record *rec)
@@ -1312,7 +1359,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (trigger_is_hit(&switch_output_trigger) || done || draining)
perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
- if (record__mmap_read_all(rec) < 0) {
+ if (record__mmap_read_all(rec, false) < 0) {
trigger_error(&auxtrace_snapshot_trigger);
trigger_error(&switch_output_trigger);
err = -1;
@@ -1413,6 +1460,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
+ record__mmap_read_all(rec, true);
record__aio_mmap_read_sync(rec);
if (forks) {
@@ -1815,6 +1863,7 @@ static struct record record = {
.uses_mmap = true,
.default_per_cpu = true,
},
+ .mmap_flush = MMAP_FLUSH_DEFAULT,
},
.tool = {
.sample = process_sample_event,
@@ -1881,6 +1930,9 @@ static struct option __record_options[] = {
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
"number of mmap data pages and AUX area tracing mmap pages",
record__parse_mmap_pages),
+ OPT_CALLBACK(0, "mmap-flush", &record.opts, "bytes",
+ "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
+ record__mmap_flush_parse),
OPT_BOOLEAN(0, "group", &record.opts.group,
"put the counters into a counter group"),
OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
@@ -2184,6 +2236,7 @@ int cmd_record(int argc, const char **argv)
pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
+ pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
err = __cmd_record(&record, argc, argv);
out:
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index b120e547ddc7..7886cc9771cf 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -85,6 +85,7 @@ struct record_opts {
u64 clockid_res_ns;
int nr_cblocks;
int affinity;
+ int mmap_flush;
};
enum perf_affinity {
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ed20f4379956..8858d829983b 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1037,7 +1037,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
*/
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite, int nr_cblocks, int affinity)
+ bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
@@ -1047,7 +1047,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
* Its value is decided by evsel's write_backward.
* So &mp should not be passed through const pointer.
*/
- struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity };
+ struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush };
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1079,7 +1079,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
{
- return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS);
+ return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1);
}
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 744906dd4887..edf18811e39f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -165,7 +165,8 @@ unsigned long perf_event_mlock_kb_in_pages(void);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite, int nr_cblocks, int affinity);
+ bool auxtrace_overwrite, int nr_cblocks,
+ int affinity, int flush);
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index cdc7740fc181..ef3d79b2c90b 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -440,6 +440,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
perf_mmap__setup_affinity_mask(map, mp);
+ map->flush = mp->flush;
+
if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
&mp->auxtrace_mp, map->base, fd))
return -1;
@@ -492,7 +494,7 @@ static int __perf_mmap__read_init(struct perf_mmap *md)
md->start = md->overwrite ? head : old;
md->end = md->overwrite ? old : head;
- if (md->start == md->end)
+ if ((md->end - md->start) < md->flush)
return -EAGAIN;
size = md->end - md->start;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index e566c19b242b..b82f8c2d55c4 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -39,6 +39,7 @@ struct perf_mmap {
} aio;
#endif
cpu_set_t affinity_mask;
+ u64 flush;
};
/*
@@ -70,7 +71,7 @@ enum bkw_mmap_state {
};
struct mmap_params {
- int prot, mask, nr_cblocks, affinity;
+ int prot, mask, nr_cblocks, affinity, flush;
struct auxtrace_mmap_params auxtrace_mp;
};
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 03/12] perf session: define bytes_transferred and bytes_compressed metrics
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
2019-03-14 11:28 ` [PATCH v8 01/12] feature: implement libzstd check, LIBZSTD_DIR and NO_LIBZSTD defines Alexey Budankov
2019-03-14 11:29 ` [PATCH v8 02/12] perf record: implement --mmap-flush=<threshold> option Alexey Budankov
@ 2019-03-14 11:30 ` Alexey Budankov
2019-03-14 11:30 ` [PATCH v8 04/12] perf record: implement COMPRESSED event record and its attributes Alexey Budankov
` (9 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:30 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Define bytes_transferred and bytes_compressed metrics to calculate ratio
in the end of the data collection:
compression ratio = bytes_transferred / bytes_compressed
bytes_transferred accumulates the amount of bytes that was extracted from
the mmaped kernel buffers for compression. bytes_compressed accumulates
the amount of bytes that was received after applying compression.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-record.c | 14 +++++++++++++-
tools/perf/util/env.h | 1 +
tools/perf/util/session.h | 2 ++
3 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 414dd4cef680..8e6e0c7a89b9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1502,6 +1502,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
char samples[128];
const char *postfix = rec->timestamp_filename ?
".<timestamp>" : "";
+ float ratio = 0;
if (rec->samples && !rec->opts.full_auxtrace)
scnprintf(samples, sizeof(samples),
@@ -1509,9 +1510,20 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
else
samples[0] = '\0';
- fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
+ if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
+ ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
+ session->header.env.comp_ratio = ratio + 0.5;
+ }
+
+ fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
perf_data__size(data) / 1024.0 / 1024.0,
data->path, postfix, samples);
+ if (ratio) {
+ fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
+ rec->session->bytes_transferred / 1024.0 / 1024.0,
+ ratio);
+ }
+ fprintf(stderr, " ]\n");
}
out_delete_session:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index d01b8355f4ca..fb39e9af128f 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -64,6 +64,7 @@ struct perf_env {
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
u64 clockid_res_ns;
+ u32 comp_ratio;
};
extern struct perf_env perf_env;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d96eccd7d27f..0e14884f28b2 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -35,6 +35,8 @@ struct perf_session {
struct ordered_events ordered_events;
struct perf_data *data;
struct perf_tool *tool;
+ u64 bytes_transferred;
+ u64 bytes_compressed;
};
struct perf_tool;
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 04/12] perf record: implement COMPRESSED event record and its attributes
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (2 preceding siblings ...)
2019-03-14 11:30 ` [PATCH v8 03/12] perf session: define bytes_transferred and bytes_compressed metrics Alexey Budankov
@ 2019-03-14 11:30 ` Alexey Budankov
2019-03-14 11:31 ` [PATCH v8 05/12] perf mmap: implement dedicated memory buffer for data compression Alexey Budankov
` (8 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:30 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Implemented PERF_RECORD_COMPRESSED event, related data types, header
feature and functions to write, read and print feature attributes
from the trace header section.
comp_mmap_len preserves the size of mmaped kernel buffer that was used
during collection. comp_mmap_len size is used on loading stage as the
size of decomp buffer for decompression of COMPRESSED events content.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
.../Documentation/perf.data-file-format.txt | 24 ++++++++
tools/perf/builtin-record.c | 8 +++
tools/perf/perf.h | 1 +
tools/perf/util/env.h | 10 ++++
tools/perf/util/event.c | 1 +
tools/perf/util/event.h | 7 +++
| 55 ++++++++++++++++++-
| 1 +
8 files changed, 106 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index 593ef49b273c..418fa0bce52e 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -272,6 +272,19 @@ struct {
Two uint64_t for the time of first sample and the time of last sample.
+ HEADER_COMPRESSED = 24,
+
+struct {
+ u32 version;
+ u32 type;
+ u32 level;
+ u32 ratio;
+ u32 mmap_len;
+};
+
+Indicates that trace contains records of PERF_RECORD_COMPRESSED type
+that have perf_events records in compressed form.
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
@@ -437,6 +450,17 @@ struct auxtrace_error_event {
Describes a header feature. These are records used in pipe-mode that
contain information that otherwise would be in perf.data file's header.
+ PERF_RECORD_COMPRESSED = 81,
+
+struct compressed_event {
+ struct perf_event_header header;
+ char data[];
+};
+
+The header is followed by compressed data frame that can be decompressed
+into array of perf trace records. The size of the entire compressed event
+record including the header is limited by the max value of header.size.
+
Event types
Define the event attributes with their IDs.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8e6e0c7a89b9..5bba860d5376 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -369,6 +369,11 @@ static int record__mmap_flush_parse(const struct option *opt,
return 0;
}
+static int record__comp_enabled(struct record *rec)
+{
+ return rec->opts.comp_level > 0;
+}
+
static int process_synthesized_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -885,6 +890,8 @@ static void record__init_features(struct record *rec)
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
+ if (!record__comp_enabled(rec))
+ perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
perf_header__clear_feat(&session->header, HEADER_STAT);
}
@@ -1224,6 +1231,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
err = -1;
goto out_child;
}
+ session->header.env.comp_mmap_len = session->evlist->mmap_len;
err = bpf__apply_obj_config();
if (err) {
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 7886cc9771cf..2c6caad45b10 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -86,6 +86,7 @@ struct record_opts {
int nr_cblocks;
int affinity;
int mmap_flush;
+ unsigned int comp_level;
};
enum perf_affinity {
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index fb39e9af128f..7990d63ab764 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -65,6 +65,16 @@ struct perf_env {
unsigned long long memory_bsize;
u64 clockid_res_ns;
u32 comp_ratio;
+ u32 comp_ver;
+ u32 comp_type;
+ u32 comp_level;
+ u32 comp_mmap_len;
+};
+
+enum perf_compress_type {
+ PERF_COMP_NONE = 0,
+ PERF_COMP_ZSTD,
+ PERF_COMP_MAX
};
extern struct perf_env perf_env;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index ba7be74fad6e..d1ad6c419724 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -68,6 +68,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
[PERF_RECORD_TIME_CONV] = "TIME_CONV",
[PERF_RECORD_HEADER_FEATURE] = "FEATURE",
+ [PERF_RECORD_COMPRESSED] = "COMPRESSED",
};
static const char *perf_ns__names[] = {
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 36ae7e92dab1..8a13aefe734e 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -254,6 +254,7 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_EVENT_UPDATE = 78,
PERF_RECORD_TIME_CONV = 79,
PERF_RECORD_HEADER_FEATURE = 80,
+ PERF_RECORD_COMPRESSED = 81,
PERF_RECORD_HEADER_MAX
};
@@ -626,6 +627,11 @@ struct feature_event {
char data[];
};
+struct compressed_event {
+ struct perf_event_header header;
+ char data[];
+};
+
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
@@ -659,6 +665,7 @@ union perf_event {
struct feature_event feat;
struct ksymbol_event ksymbol_event;
struct bpf_event bpf_event;
+ struct compressed_event pack;
};
void perf_event__print_totals(void);
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b0683bf4d9f3..ee5dd3befa4b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1259,6 +1259,30 @@ static int write_mem_topology(struct feat_fd *ff __maybe_unused,
return ret;
}
+static int write_compressed(struct feat_fd *ff __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ int ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_ver), sizeof(ff->ph->env.comp_ver));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_type), sizeof(ff->ph->env.comp_type));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_level), sizeof(ff->ph->env.comp_level));
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &(ff->ph->env.comp_ratio), sizeof(ff->ph->env.comp_ratio));
+ if (ret)
+ return ret;
+
+ return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1557,6 +1581,13 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
}
}
+static void print_compressed(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
+ ff->ph->env.comp_type == PERF_COMP_ZSTD ? "Zstd" : "Unknown",
+ ff->ph->env.comp_level, ff->ph->env.comp_ratio);
+}
+
static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
{
const char *delimiter = "# pmu mappings: ";
@@ -2414,6 +2445,27 @@ static int process_dir_format(struct feat_fd *ff,
return do_read_u64(ff, &data->dir.version);
}
+static int process_compressed(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ if (do_read_u32(ff, &(ff->ph->env.comp_ver)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_type)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_level)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_ratio)))
+ return -1;
+
+ if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len)))
+ return -1;
+
+ return 0;
+}
+
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2474,7 +2526,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(SAMPLE_TIME, sample_time, false),
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
FEAT_OPR(CLOCKID, clockid, false),
- FEAT_OPN(DIR_FORMAT, dir_format, false)
+ FEAT_OPN(DIR_FORMAT, dir_format, false),
+ FEAT_OPR(COMPRESSED, compressed, false)
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 6a231340238d..9ccfb204bd2c 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -40,6 +40,7 @@ enum {
HEADER_MEM_TOPOLOGY,
HEADER_CLOCKID,
HEADER_DIR_FORMAT,
+ HEADER_COMPRESSED,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 05/12] perf mmap: implement dedicated memory buffer for data compression
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (3 preceding siblings ...)
2019-03-14 11:30 ` [PATCH v8 04/12] perf record: implement COMPRESSED event record and its attributes Alexey Budankov
@ 2019-03-14 11:31 ` Alexey Budankov
2019-03-14 11:32 ` [PATCH v8 06/12] perf util: introduce Zstd streaming based compression API Alexey Budankov
` (7 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:31 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Implemented mmap data buffer that is used as the memory to operate
on when compressing data in case of serial trace streaming.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-record.c | 8 +++++++-
tools/perf/util/evlist.c | 8 +++++---
tools/perf/util/evlist.h | 2 +-
tools/perf/util/mmap.c | 30 ++++++++++++++++++++++++++++--
tools/perf/util/mmap.h | 4 +++-
5 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5bba860d5376..e2d9347c08b3 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -369,6 +369,8 @@ static int record__mmap_flush_parse(const struct option *opt,
return 0;
}
+static unsigned int comp_level_max = 22;
+
static int record__comp_enabled(struct record *rec)
{
return rec->opts.comp_level > 0;
@@ -584,7 +586,7 @@ static int record__mmap_evlist(struct record *rec,
opts->auxtrace_mmap_pages,
opts->auxtrace_snapshot_mode,
opts->nr_cblocks, opts->affinity,
- opts->mmap_flush) < 0) {
+ opts->mmap_flush, opts->comp_level) < 0) {
if (errno == EPERM) {
pr_err("Permission error mapping pages.\n"
"Consider increasing "
@@ -2258,6 +2260,10 @@ int cmd_record(int argc, const char **argv)
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
+ if (rec->opts.comp_level > comp_level_max)
+ rec->opts.comp_level = comp_level_max;
+ pr_debug("comp level: %d\n", rec->opts.comp_level);
+
err = __cmd_record(&record, argc, argv);
out:
perf_evlist__delete(rec->evlist);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8858d829983b..4d8a25a12430 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1037,7 +1037,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
*/
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush)
+ bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
+ int comp_level)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
@@ -1047,7 +1048,8 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
* Its value is decided by evsel's write_backward.
* So &mp should not be passed through const pointer.
*/
- struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush };
+ struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
+ .comp_level = comp_level };
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1079,7 +1081,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
{
- return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1);
+ return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
}
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index edf18811e39f..77c11dac4a63 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -166,7 +166,7 @@ unsigned long perf_event_mlock_kb_in_pages(void);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
bool auxtrace_overwrite, int nr_cblocks,
- int affinity, int flush);
+ int affinity, int flush, int comp_level);
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index ef3d79b2c90b..d85e73fc82e2 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -157,6 +157,10 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
}
#ifdef HAVE_AIO_SUPPORT
+static int perf_mmap__aio_enabled(struct perf_mmap *map)
+{
+ return map->aio.nr_cblocks > 0;
+}
#ifdef HAVE_LIBNUMA_SUPPORT
static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
@@ -198,7 +202,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi
return 0;
}
-#else
+#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
{
map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
@@ -359,7 +363,12 @@ int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
return rc;
}
-#else
+#else /* !HAVE_AIO_SUPPORT */
+static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
+{
+ return 0;
+}
+
static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
struct mmap_params *mp __maybe_unused)
{
@@ -374,6 +383,10 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
void perf_mmap__munmap(struct perf_mmap *map)
{
perf_mmap__aio_munmap(map);
+ if (map->data != NULL) {
+ munmap(map->data, perf_mmap__mmap_len(map));
+ map->data = NULL;
+ }
if (map->base != NULL) {
munmap(map->base, perf_mmap__mmap_len(map));
map->base = NULL;
@@ -442,6 +455,19 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
map->flush = mp->flush;
+ map->comp_level = mp->comp_level;
+
+ if (map->comp_level && !perf_mmap__aio_enabled(map)) {
+ map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (map->data == MAP_FAILED) {
+ pr_debug2("failed to mmap data buffer, error %d\n",
+ errno);
+ map->data = NULL;
+ return -1;
+ }
+ }
+
if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
&mp->auxtrace_mp, map->base, fd))
return -1;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index b82f8c2d55c4..a02427d609c0 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -40,6 +40,8 @@ struct perf_mmap {
#endif
cpu_set_t affinity_mask;
u64 flush;
+ void *data;
+ int comp_level;
};
/*
@@ -71,7 +73,7 @@ enum bkw_mmap_state {
};
struct mmap_params {
- int prot, mask, nr_cblocks, affinity, flush;
+ int prot, mask, nr_cblocks, affinity, flush, comp_level;
struct auxtrace_mmap_params auxtrace_mp;
};
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 06/12] perf util: introduce Zstd streaming based compression API
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (4 preceding siblings ...)
2019-03-14 11:31 ` [PATCH v8 05/12] perf mmap: implement dedicated memory buffer for data compression Alexey Budankov
@ 2019-03-14 11:32 ` Alexey Budankov
2019-03-14 11:34 ` [PATCH v8 07/12] perf record: implement compression for serial trace streaming Alexey Budankov
` (6 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:32 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Implemented functions are based on Zstd streaming compression API.
The functions are used in runtime to compress data that come from
mmaped kernel buffer. zstd_init(), zstd_fini() are used for
initialization and finalization to allocate and deallocate internal
zstd objects. zstd_compress_stream_to_records() is used to convert
parts of mmaped kernel buffer into an array of PERF_RECORD_COMPRESSED
records.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/util/Build | 2 ++
tools/perf/util/compress.h | 43 +++++++++++++++++++++++
tools/perf/util/zstd.c | 71 ++++++++++++++++++++++++++++++++++++++
3 files changed, 116 insertions(+)
create mode 100644 tools/perf/util/zstd.c
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8dd3102301ea..6d5bbc8b589b 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -145,6 +145,8 @@ perf-y += scripting-engines/
perf-$(CONFIG_ZLIB) += zlib.o
perf-$(CONFIG_LZMA) += lzma.o
+perf-$(CONFIG_ZSTD) += zstd.o
+
perf-y += demangle-java.o
perf-y += demangle-rust.o
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index 892e92e7e7fc..d00d7cb095aa 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -2,6 +2,11 @@
#ifndef PERF_COMPRESS_H
#define PERF_COMPRESS_H
+#include <stdbool.h>
+#ifdef HAVE_ZSTD_SUPPORT
+#include <zstd.h>
+#endif
+
#ifdef HAVE_ZLIB_SUPPORT
int gzip_decompress_to_file(const char *input, int output_fd);
bool gzip_is_compressed(const char *input);
@@ -12,4 +17,42 @@ int lzma_decompress_to_file(const char *input, int output_fd);
bool lzma_is_compressed(const char *input);
#endif
+struct zstd_data {
+#ifdef HAVE_ZSTD_SUPPORT
+ ZSTD_CStream *cstream;
+#endif
+};
+
+#ifdef HAVE_ZSTD_SUPPORT
+
+int zstd_init(struct zstd_data *data, int level);
+int zstd_fini(struct zstd_data *data);
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data,
+ void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
+ size_t process_header(void *record, size_t increment));
+
+#else /* !HAVE_ZSTD_SUPPORT */
+
+static inline int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
+{
+ return 0;
+}
+
+static inline int zstd_fini(struct zstd_data *data __maybe_unused)
+{
+ return 0;
+}
+
+static inline size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
+ void *dst __maybe_unused, size_t dst_size __maybe_unused,
+ void *src __maybe_unused, size_t src_size __maybe_unused,
+ size_t max_record_size __maybe_unused,
+ size_t process_header(void *record, size_t increment) __maybe_unused)
+{
+ return 0;
+}
+
+#endif
+
#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
new file mode 100644
index 000000000000..6d4f69d57567
--- /dev/null
+++ b/tools/perf/util/zstd.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include "util/compress.h"
+#include "util/debug.h"
+
+int zstd_init(struct zstd_data *data, int level)
+{
+ size_t ret;
+
+ data->cstream = ZSTD_createCStream();
+ if (data->cstream == NULL) {
+ pr_err("Couldn't create compression stream.\n");
+ return -1;
+ }
+
+ ret = ZSTD_initCStream(data->cstream, level);
+ if (ZSTD_isError(ret)) {
+ pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
+ return -1;
+ }
+
+ return 0;
+}
+
+int zstd_fini(struct zstd_data *data)
+{
+ if (data->cstream) {
+ ZSTD_freeCStream(data->cstream);
+ data->cstream = NULL;
+ }
+
+ return 0;
+}
+
+size_t zstd_compress_stream_to_records(struct zstd_data *data,
+ void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
+ size_t process_header(void *record, size_t increment))
+{
+ size_t ret, size, compressed = 0;
+ ZSTD_inBuffer input = { src, src_size, 0 };
+ ZSTD_outBuffer output;
+ void *record;
+
+ while (input.pos < input.size) {
+ record = dst;
+ size = process_header(record, 0);
+ compressed += size;
+ dst += size;
+ dst_size -= size;
+ output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
+ max_record_size : dst_size, 0 };
+ ret = ZSTD_compressStream(data->cstream, &output, &input);
+ ZSTD_flushStream(data->cstream, &output);
+ if (ZSTD_isError(ret)) {
+ pr_err("failed to compress %ld bytes: %s\n",
+ (long)src_size, ZSTD_getErrorName(ret));
+ memcpy(dst, src, src_size);
+ return src_size;
+ }
+ size = output.pos;
+ size = process_header(record, size);
+ compressed += size;
+ dst += size;
+ dst_size -= size;
+ }
+
+ return compressed;
+}
+
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 07/12] perf record: implement compression for serial trace streaming
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (5 preceding siblings ...)
2019-03-14 11:32 ` [PATCH v8 06/12] perf util: introduce Zstd streaming based compression API Alexey Budankov
@ 2019-03-14 11:34 ` Alexey Budankov
2019-03-14 11:35 ` [PATCH v8 08/12] perf record: implement compression for AIO " Alexey Budankov
` (5 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:34 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Compression is implemented using the functions from zstd.c. As the memory
to operate on the compression uses mmap->data buffer. If Zstd streaming
compression API fails for some reason the data to be compressed are just
copied into the memory buffers using plain memcpy().
Compressed trace frame consists of an array of PERF_RECORD_COMPRESSED
records. Each element of the array is not longer that PERF_SAMPLE_MAX_SIZE
and consists of perf_event_header followed by the compressed chunk
that is decompressed on the loading stage.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-record.c | 52 +++++++++++++++++++++++++++++++++++--
tools/perf/util/session.h | 2 ++
2 files changed, 52 insertions(+), 2 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e2d9347c08b3..c90824172beb 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -130,6 +130,9 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse
return 0;
}
+static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
+ void *src, size_t src_size);
+
#ifdef HAVE_AIO_SUPPORT
static int record__aio_write(struct aiocb *cblock, int trace_fd,
void *buf, size_t size, off_t off)
@@ -389,6 +392,12 @@ static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size
{
struct record *rec = to;
+ if (record__comp_enabled(rec)) {
+ size = zstd_compress(rec->session, map->data,
+ perf_mmap__mmap_len(map), bf, size);
+ bf = map->data;
+ }
+
rec->samples++;
return record__write(rec, map, bf, size);
}
@@ -775,6 +784,37 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
}
}
+static size_t process_comp_header(void *record, size_t increment)
+{
+ struct compressed_event *event = record;
+ size_t size = sizeof(struct compressed_event);
+
+ if (increment) {
+ event->header.size += increment;
+ return increment;
+ } else {
+ event->header.type = PERF_RECORD_COMPRESSED;
+ event->header.size = size;
+ return size;
+ }
+}
+
+static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
+ void *src, size_t src_size)
+{
+ size_t compressed;
+ size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event);
+
+ compressed = zstd_compress_stream_to_records(&(session->zstd_data),
+ dst, dst_size, src, src_size, max_record_size,
+ process_comp_header);
+
+ session->bytes_transferred += src_size;
+ session->bytes_compressed += compressed;
+
+ return compressed;
+}
+
static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
bool overwrite, bool sync)
{
@@ -1204,6 +1244,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
fd = perf_data__fd(data);
rec->session = session;
+ if (zstd_init(&(session->zstd_data), rec->opts.comp_level) < 0) {
+ pr_err("Compression initialization failed.\n");
+ return -1;
+ }
+
+ session->header.env.comp_type = PERF_COMP_ZSTD;
+ session->header.env.comp_level = rec->opts.comp_level;
+
record__init_features(rec);
if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
@@ -1537,6 +1585,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
}
out_delete_session:
+ zstd_fini(&(session->zstd_data));
perf_session__delete(session);
return status;
}
@@ -2254,8 +2303,7 @@ int cmd_record(int argc, const char **argv)
if (rec->opts.nr_cblocks > nr_cblocks_max)
rec->opts.nr_cblocks = nr_cblocks_max;
- if (verbose > 0)
- pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
+ pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 0e14884f28b2..6c984c895924 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -8,6 +8,7 @@
#include "machine.h"
#include "data.h"
#include "ordered-events.h"
+#include "util/compress.h"
#include <linux/kernel.h>
#include <linux/rbtree.h>
#include <linux/perf_event.h>
@@ -37,6 +38,7 @@ struct perf_session {
struct perf_tool *tool;
u64 bytes_transferred;
u64 bytes_compressed;
+ struct zstd_data zstd_data;
};
struct perf_tool;
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 08/12] perf record: implement compression for AIO trace streaming
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (6 preceding siblings ...)
2019-03-14 11:34 ` [PATCH v8 07/12] perf record: implement compression for serial trace streaming Alexey Budankov
@ 2019-03-14 11:35 ` Alexey Budankov
2019-03-14 11:36 ` [PATCH v8 09/12] perf record: implement -z,--compression_level=n option Alexey Budankov
` (4 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:35 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Compression is implemented using the functions from zstd.c. As the memory
to operate on the compression uses mmap->aio.data[] buffers. If Zstd
streaming compression API fails for some reason the data to be compressed
are just copied into the memory buffers using plain memcpy().
Compressed trace frame consists of an array of PERF_RECORD_COMPRESSED
records. Each element of the array is not longer that PERF_SAMPLE_MAX_SIZE
and consists of perf_event_header followed by the compressed chunk
that is decompressed on the loading stage.
perf_mmap__aio_push() is replaced by perf_mmap__push() which is now used
in the both serial and AIO streaming cases. perf_mmap__push() is extended
with positive return values to signify absence of data ready for processing.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-record.c | 112 ++++++++++++++++++++++++++++--------
tools/perf/util/mmap.c | 76 +-----------------------
tools/perf/util/mmap.h | 12 ----
3 files changed, 88 insertions(+), 112 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c90824172beb..d23be2a82cf3 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -130,6 +130,8 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse
return 0;
}
+static int record__aio_enabled(struct record *rec);
+static int record__comp_enabled(struct record *rec);
static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
void *src, size_t src_size);
@@ -183,9 +185,9 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
if (rem_size == 0) {
cblock->aio_fildes = -1;
/*
- * md->refcount is incremented in perf_mmap__push() for
- * every enqueued aio write request so decrement it because
- * the request is now complete.
+ * md->refcount is incremented in record__aio_pushfn() for
+ * every aio write request started in record__aio_push() so
+ * decrement it because the request is now complete.
*/
perf_mmap__put(md);
rc = 1;
@@ -240,18 +242,89 @@ static int record__aio_sync(struct perf_mmap *md, bool sync_all)
} while (1);
}
-static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
+struct record_aio {
+ struct record *rec;
+ void *data;
+ size_t size;
+};
+
+static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size)
{
- struct record *rec = to;
- int ret, trace_fd = rec->session->data->file.fd;
+ struct record_aio *aio = to;
- rec->samples++;
+ /*
+ * map->base data pointed by buf is copied into free map->aio.data[] buffer
+ * to release space in the kernel buffer as fast as possible, calling
+ * perf_mmap__consume() from perf_mmap__push() function.
+ *
+ * That lets the kernel to proceed with storing more profiling data into
+ * the kernel buffer earlier than other per-cpu kernel buffers are handled.
+ *
+ * Coping can be done in two steps in case the chunk of profiling data
+ * crosses the upper bound of the kernel buffer. In this case we first move
+ * part of data from map->start till the upper bound and then the reminder
+ * from the beginning of the kernel buffer till the end of the data chunk.
+ */
+
+ if (record__comp_enabled(aio->rec)) {
+ size = zstd_compress(aio->rec->session, aio->data + aio->size,
+ perf_mmap__mmap_len(map) - aio->size,
+ buf, size);
+ } else {
+ memcpy(aio->data + aio->size, buf, size);
+ }
+
+ if (!aio->size) {
+ /*
+ * Increment map->refcount to guard map->aio.data[] buffer
+ * from premature deallocation because map object can be
+ * released earlier than aio write request started on
+ * map->aio.data[] buffer is complete.
+ *
+ * perf_mmap__put() is done at record__aio_complete()
+ * after started aio request completion or at record__aio_push()
+ * if the request failed to start.
+ */
+ perf_mmap__get(map);
+ }
+
+ aio->size += size;
+
+ return size;
+}
+
+static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off)
+{
+ int ret, idx;
+ int trace_fd = rec->session->data->file.fd;
+ struct record_aio aio = { .rec = rec, .size = 0 };
- ret = record__aio_write(cblock, trace_fd, bf, size, off);
+ /*
+ * Call record__aio_sync() to wait till map->aio.data[] buffer
+ * becomes available after previous aio write operation.
+ */
+
+ idx = record__aio_sync(map, false);
+ aio.data = map->aio.data[idx];
+ ret = perf_mmap__push(map, &aio, record__aio_pushfn);
+ if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
+ return ret;
+
+ rec->samples++;
+ ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
if (!ret) {
- rec->bytes_written += size;
+ *off += aio.size;
+ rec->bytes_written += aio.size;
if (switch_output_size(rec))
trigger_hit(&switch_output_trigger);
+ } else {
+ /*
+ * Decrement map->refcount incremented in record__aio_pushfn()
+ * back if record__aio_write() operation failed to start, otherwise
+ * map->refcount is decremented in record__aio_complete() after
+ * aio write operation finishes successfully.
+ */
+ perf_mmap__put(map);
}
return ret;
@@ -307,13 +380,8 @@ static int record__aio_parse(const struct option *opt,
#else /* HAVE_AIO_SUPPORT */
static int nr_cblocks_max = 0;
-static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
-{
- return -1;
-}
-
-static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
- void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
+static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused,
+ off_t *off __maybe_unused)
{
return -1;
}
@@ -823,7 +891,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
int rc = 0;
struct perf_mmap *maps;
int trace_fd = rec->data.file.fd;
- off_t off;
+ off_t off = 0;
if (!evlist)
return 0;
@@ -849,20 +917,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
map->flush = 1;
}
if (!record__aio_enabled(rec)) {
- if (perf_mmap__push(map, rec, record__pushfn) != 0) {
+ if (perf_mmap__push(map, rec, record__pushfn) < 0) {
if (sync)
map->flush = flush;
rc = -1;
goto out;
}
} else {
- int idx;
- /*
- * Call record__aio_sync() to wait till map->data buffer
- * becomes available after previous aio write request.
- */
- idx = record__aio_sync(map, false);
- if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
+ if (record__aio_push(rec, map, &off) < 0) {
record__aio_set_pos(trace_fd, off);
if (sync)
map->flush = flush;
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index d85e73fc82e2..868c0b0e909c 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -289,80 +289,6 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
zfree(&map->aio.cblocks);
zfree(&map->aio.aiocb);
}
-
-int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
- int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
- off_t *off)
-{
- u64 head = perf_mmap__read_head(md);
- unsigned char *data = md->base + page_size;
- unsigned long size, size0 = 0;
- void *buf;
- int rc = 0;
-
- rc = perf_mmap__read_init(md);
- if (rc < 0)
- return (rc == -EAGAIN) ? 0 : -1;
-
- /*
- * md->base data is copied into md->data[idx] buffer to
- * release space in the kernel buffer as fast as possible,
- * thru perf_mmap__consume() below.
- *
- * That lets the kernel to proceed with storing more
- * profiling data into the kernel buffer earlier than other
- * per-cpu kernel buffers are handled.
- *
- * Coping can be done in two steps in case the chunk of
- * profiling data crosses the upper bound of the kernel buffer.
- * In this case we first move part of data from md->start
- * till the upper bound and then the reminder from the
- * beginning of the kernel buffer till the end of
- * the data chunk.
- */
-
- size = md->end - md->start;
-
- if ((md->start & md->mask) + size != (md->end & md->mask)) {
- buf = &data[md->start & md->mask];
- size = md->mask + 1 - (md->start & md->mask);
- md->start += size;
- memcpy(md->aio.data[idx], buf, size);
- size0 = size;
- }
-
- buf = &data[md->start & md->mask];
- size = md->end - md->start;
- md->start += size;
- memcpy(md->aio.data[idx] + size0, buf, size);
-
- /*
- * Increment md->refcount to guard md->data[idx] buffer
- * from premature deallocation because md object can be
- * released earlier than aio write request started
- * on mmap->data[idx] is complete.
- *
- * perf_mmap__put() is done at record__aio_complete()
- * after started request completion.
- */
- perf_mmap__get(md);
-
- md->prev = head;
- perf_mmap__consume(md);
-
- rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
- if (!rc) {
- *off += size0 + size;
- } else {
- /*
- * Decrement md->refcount back if aio write
- * operation failed to start.
- */
- perf_mmap__put(md);
- }
-
- return rc;
-}
#else /* !HAVE_AIO_SUPPORT */
static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
{
@@ -566,7 +492,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
rc = perf_mmap__read_init(md);
if (rc < 0)
- return (rc == -EAGAIN) ? 0 : -1;
+ return (rc == -EAGAIN) ? 1 : -1;
size = md->end - md->start;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index a02427d609c0..e8a34e0bb10e 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -101,18 +101,6 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map);
int perf_mmap__push(struct perf_mmap *md, void *to,
int push(struct perf_mmap *map, void *to, void *buf, size_t size));
-#ifdef HAVE_AIO_SUPPORT
-int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
- int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
- off_t *off);
-#else
-static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused,
- int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused,
- off_t *off __maybe_unused)
-{
- return 0;
-}
-#endif
size_t perf_mmap__mmap_len(struct perf_mmap *map);
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 09/12] perf record: implement -z,--compression_level=n option
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (7 preceding siblings ...)
2019-03-14 11:35 ` [PATCH v8 08/12] perf record: implement compression for AIO " Alexey Budankov
@ 2019-03-14 11:36 ` Alexey Budankov
2019-03-14 11:37 ` [PATCH v8 10/12] perf report: implement record trace decompression Alexey Budankov
` (3 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:36 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Implemented -z,--compression_level=n option that enables compression
of mmaped kernel data buffers content in runtime during perf record
mode collection. Default option value is 1 (fastest compression).
Compression overhead has been measured for serial and AIO streaming
when profiling matrix multiplication workload:
-------------------------------------------------------------
| SERIAL | AIO-1 |
----------------------------------------------------------------|
|-z | OVH(x) | ratio(x) size(MiB) | OVH(x) | ratio(x) size(MiB) |
|---------------------------------------------------------------|
| 0 | 1,00 | 1,000 179,424 | 1,00 | 1,000 187,527 |
| 1 | 1,04 | 8,427 181,148 | 1,01 | 8,474 188,562 |
| 2 | 1,07 | 8,055 186,953 | 1,03 | 7,912 191,773 |
| 3 | 1,04 | 8,283 181,908 | 1,03 | 8,220 191,078 |
| 5 | 1,09 | 8,101 187,705 | 1,05 | 7,780 190,065 |
| 8 | 1,05 | 9,217 179,191 | 1,12 | 6,111 193,024 |
-----------------------------------------------------------------
OVH = (Execution time with -z N) / (Execution time with -z 0)
ratio - compression ratio
size - number of bytes that was compressed
size ~= trace size x ratio
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/Documentation/perf-record.txt | 5 +++++
tools/perf/builtin-record.c | 27 +++++++++++++++++++++++-
2 files changed, 31 insertions(+), 1 deletion(-)
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 8eb3f5b57202..f5c886fde243 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -471,6 +471,11 @@ Also at some cases executing less trace write syscalls with bigger data size can
shorter than executing more trace write syscalls with smaller data size thus lowering
runtime profiling overhead.
+-z::
+--compression-level=n::
+Produce compressed trace using specified level n (default: 1 - fastest compression,
+22 - smallest trace)
+
--all-kernel::
Configure all used events to run in kernel space.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d23be2a82cf3..e0da9100e52f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -346,7 +346,7 @@ static void record__aio_mmap_read_sync(struct record *rec)
struct perf_evlist *evlist = rec->evlist;
struct perf_mmap *maps = evlist->mmap;
- if (!rec->opts.nr_cblocks)
+ if (!record__aio_enabled(rec))
return;
for (i = 0; i < evlist->nr_mmaps; i++) {
@@ -440,6 +440,26 @@ static int record__mmap_flush_parse(const struct option *opt,
return 0;
}
+#ifdef HAVE_ZSTD_SUPPORT
+static unsigned int comp_level_default = 1;
+static int record__parse_comp_level(const struct option *opt,
+ const char *str,
+ int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+
+ if (unset) {
+ opts->comp_level = 0;
+ } else {
+ if (str)
+ opts->comp_level = strtol(str, NULL, 0);
+ if (!opts->comp_level)
+ opts->comp_level = comp_level_default;
+ }
+
+ return 0;
+}
+#endif
static unsigned int comp_level_max = 22;
static int record__comp_enabled(struct record *rec)
@@ -2168,6 +2188,11 @@ static struct option __record_options[] = {
OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
"Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
record__parse_affinity),
+#ifdef HAVE_ZSTD_SUPPORT
+ OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts,
+ &comp_level_default, "n", "Produce compressed trace using specified level (default: 1 - fastest, 22 - smallest trace)",
+ record__parse_comp_level),
+#endif
OPT_END()
};
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 10/12] perf report: implement record trace decompression
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (8 preceding siblings ...)
2019-03-14 11:36 ` [PATCH v8 09/12] perf record: implement -z,--compression_level=n option Alexey Budankov
@ 2019-03-14 11:37 ` Alexey Budankov
2019-03-14 11:38 ` [PATCH v8 11/12] perf inject: enable COMPRESSED records decompression Alexey Budankov
` (2 subsequent siblings)
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:37 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
zstd_init(, comp_level = 0) initializes decompression part of API only
that now consists of zstd_decompress_stream() function.
Trace frames containing PERF_RECORD_COMPRESSED records are decompressed
using zstd_decompress_stream() function into a linked list of mmaped
memory regions of mmap_comp_len size (struct decomp).
After decompression of one COMPRESSED record its content is iterated and
fetched for usual processing. The mmaped memory regions with decompressed
events are kept in the linked list till the tool process termination.
When dumping raw trace (e.g., perf report -D --header) file offsets of
events from compressed records are printed as zero.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-report.c | 5 +-
tools/perf/util/compress.h | 11 ++++
tools/perf/util/session.c | 124 +++++++++++++++++++++++++++++++++++-
tools/perf/util/session.h | 10 +++
tools/perf/util/tool.h | 2 +
tools/perf/util/zstd.c | 40 ++++++++++++
6 files changed, 190 insertions(+), 2 deletions(-)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1921aaa9cece..f8f899245289 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1260,6 +1260,9 @@ int cmd_report(int argc, const char **argv)
if (session == NULL)
return -1;
+ if (zstd_init(&(session->zstd_data), 0) < 0)
+ pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
+
if (report.queue_size) {
ordered_events__set_alloc_size(&session->ordered_events,
report.queue_size);
@@ -1450,7 +1453,7 @@ int cmd_report(int argc, const char **argv)
error:
if (report.ptime_range)
zfree(&report.ptime_range);
-
+ zstd_fini(&(session->zstd_data));
perf_session__delete(session);
return ret;
}
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index d00d7cb095aa..46127f7e4563 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -20,6 +20,7 @@ bool lzma_is_compressed(const char *input);
struct zstd_data {
#ifdef HAVE_ZSTD_SUPPORT
ZSTD_CStream *cstream;
+ ZSTD_DStream *dstream;
#endif
};
@@ -32,6 +33,9 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data,
void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size,
size_t process_header(void *record, size_t increment));
+size_t zstd_decompress_stream(struct zstd_data *data,
+ void *src, size_t src_size, void *dst, size_t dst_size);
+
#else /* !HAVE_ZSTD_SUPPORT */
static inline int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
@@ -53,6 +57,13 @@ static inline size_t zstd_compress_stream_to_records(struct zstd_data *data __ma
return 0;
}
+static inline size_t zstd_decompress_stream(struct zstd_data *data __maybe_unused, void *src __maybe_unused,
+ size_t src_size __maybe_unused, void *dst __maybe_unused,
+ size_t dst_size __maybe_unused)
+{
+ return 0;
+}
+
#endif
#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0ec34227bd60..a2e4f1202fe4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -29,6 +29,67 @@
#include "stat.h"
#include "arch/common.h"
+#ifdef HAVE_ZSTD_SUPPORT
+static int perf_session__process_compressed_event(struct perf_session *session,
+ union perf_event *event, u64 file_offset)
+{
+ void *src;
+ size_t decomp_size, src_size;
+ u64 decomp_last_rem = 0;
+ size_t decomp_len = session->header.env.comp_mmap_len;
+ struct decomp *decomp, *decomp_last = session->decomp_last;
+
+ decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (decomp == MAP_FAILED) {
+ pr_err("Couldn't allocate memory for decompression\n");
+ return -1;
+ }
+
+ decomp->file_pos = file_offset;
+ decomp->head = 0;
+
+ if (decomp_last) {
+ decomp_last_rem = decomp_last->size - decomp_last->head;
+ memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
+ decomp->size = decomp_last_rem;
+ }
+
+ src = (void *)event + sizeof(struct compressed_event);
+ src_size = event->pack.header.size - sizeof(struct compressed_event);
+
+ decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
+ &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
+ if (!decomp_size) {
+ munmap(decomp, sizeof(struct decomp) + decomp_len);
+ pr_err("Couldn't decompress data\n");
+ return -1;
+ }
+
+ decomp->size += decomp_size;
+
+ if (session->decomp == NULL) {
+ session->decomp = decomp;
+ session->decomp_last = decomp;
+ } else {
+ session->decomp_last->next = decomp;
+ session->decomp_last = decomp;
+ }
+
+ pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
+
+ return 0;
+}
+#else /* !HAVE_ZSTD_SUPPORT */
+static int perf_session__process_compressed_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ u64 file_offset __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+#endif
+
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool,
@@ -198,12 +259,23 @@ static void perf_session__delete_threads(struct perf_session *session)
void perf_session__delete(struct perf_session *session)
{
+ struct decomp *next, *decomp;
+ size_t decomp_len;
if (session == NULL)
return;
auxtrace__free(session);
auxtrace_index__free(&session->auxtrace_index);
perf_session__destroy_kernel_maps(session);
perf_session__delete_threads(session);
+ next = session->decomp;
+ decomp_len = session->header.env.comp_mmap_len;
+ do {
+ decomp = next;
+ if (decomp == NULL)
+ break;
+ next = decomp->next;
+ munmap(decomp, decomp_len + sizeof(struct decomp));
+ } while (1);
perf_env__exit(&session->header.env);
machines__exit(&session->machines);
if (session->data)
@@ -429,6 +501,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->time_conv = process_event_op2_stub;
if (tool->feature == NULL)
tool->feature = process_event_op2_stub;
+ if (tool->compressed == NULL)
+ tool->compressed = perf_session__process_compressed_event;
}
static void swap_sample_id_all(union perf_event *event, void *data)
@@ -1372,7 +1446,8 @@ static s64 perf_session__process_user_event(struct perf_session *session,
int fd = perf_data__fd(session->data);
int err;
- dump_event(session->evlist, event, file_offset, &sample);
+ if (event->header.type != PERF_RECORD_COMPRESSED)
+ dump_event(session->evlist, event, file_offset, &sample);
/* These events are processed right away */
switch (event->header.type) {
@@ -1425,6 +1500,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
return tool->time_conv(session, event);
case PERF_RECORD_HEADER_FEATURE:
return tool->feature(session, event);
+ case PERF_RECORD_COMPRESSED:
+ err = tool->compressed(session, event, file_offset);
+ if (err)
+ dump_event(session->evlist, event, file_offset, &sample);
+ return 0;
default:
return -EINVAL;
}
@@ -1707,6 +1787,8 @@ static int perf_session__flush_thread_stacks(struct perf_session *session)
volatile int session_done;
+static int __perf_session__process_decomp_events(struct perf_session *session);
+
static int __perf_session__process_pipe_events(struct perf_session *session)
{
struct ordered_events *oe = &session->ordered_events;
@@ -1787,6 +1869,10 @@ static int __perf_session__process_pipe_events(struct perf_session *session)
if (skip > 0)
head += skip;
+ err = __perf_session__process_decomp_events(session);
+ if (err)
+ goto out_err;
+
if (!session_done())
goto more;
done:
@@ -1835,6 +1921,38 @@ fetch_mmaped_event(struct perf_session *session,
return event;
}
+static int __perf_session__process_decomp_events(struct perf_session *session)
+{
+ s64 skip;
+ u64 size, file_pos = 0;
+ union perf_event *event;
+ struct decomp *decomp = session->decomp_last;
+
+ if (!decomp)
+ return 0;
+
+ while (decomp->head < decomp->size && !session_done()) {
+ event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data);
+ if (!event)
+ break;
+
+ size = event->header.size;
+ if (size < sizeof(struct perf_event_header) ||
+ (skip = perf_session__process_event(session, event, file_pos)) < 0) {
+ pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+ decomp->file_pos + decomp->head, event->header.size, event->header.type);
+ return -EINVAL;
+ }
+
+ if (skip)
+ size += skip;
+
+ decomp->head += size;
+ }
+
+ return 0;
+}
+
/*
* On 64bit we can mmap the data file in one go. No need for tiny mmap
* slices. On 32bit we use 32MB.
@@ -1942,6 +2060,10 @@ reader__process_events(struct reader *rd, struct perf_session *session,
head += size;
file_pos += size;
+ err = __perf_session__process_decomp_events(session);
+ if (err)
+ goto out;
+
ui_progress__update(prog, size);
if (session_done())
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 6c984c895924..dd8920b745bc 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -39,6 +39,16 @@ struct perf_session {
u64 bytes_transferred;
u64 bytes_compressed;
struct zstd_data zstd_data;
+ struct decomp *decomp;
+ struct decomp *decomp_last;
+};
+
+struct decomp {
+ struct decomp *next;
+ u64 file_pos;
+ u64 head;
+ size_t size;
+ char data[];
};
struct perf_tool;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 250391672f9f..9096a6e3de59 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -28,6 +28,7 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
+typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data);
typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
struct ordered_events *oe);
@@ -72,6 +73,7 @@ struct perf_tool {
stat,
stat_round,
feature;
+ event_op4 compressed;
event_op3 auxtrace;
bool ordered_events;
bool ordering_requires_timestamps;
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
index 6d4f69d57567..15aa02c933ef 100644
--- a/tools/perf/util/zstd.c
+++ b/tools/perf/util/zstd.c
@@ -9,6 +9,21 @@ int zstd_init(struct zstd_data *data, int level)
{
size_t ret;
+ data->dstream = ZSTD_createDStream();
+ if (data->dstream == NULL) {
+ pr_err("Couldn't create decompression stream.\n");
+ return -1;
+ }
+
+ ret = ZSTD_initDStream(data->dstream);
+ if (ZSTD_isError(ret)) {
+ pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret));
+ return -1;
+ }
+
+ if (!level)
+ return 0;
+
data->cstream = ZSTD_createCStream();
if (data->cstream == NULL) {
pr_err("Couldn't create compression stream.\n");
@@ -26,6 +41,11 @@ int zstd_init(struct zstd_data *data, int level)
int zstd_fini(struct zstd_data *data)
{
+ if (data->dstream) {
+ ZSTD_freeDStream(data->dstream);
+ data->dstream = NULL;
+ }
+
if (data->cstream) {
ZSTD_freeCStream(data->cstream);
data->cstream = NULL;
@@ -69,3 +89,23 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data,
return compressed;
}
+size_t zstd_decompress_stream(struct zstd_data *data,
+ void *src, size_t src_size, void *dst, size_t dst_size)
+{
+ size_t ret;
+ ZSTD_inBuffer input = { src, src_size, 0 };
+ ZSTD_outBuffer output = { dst, dst_size, 0 };
+
+ while (input.pos < input.size) {
+ ret = ZSTD_decompressStream(data->dstream, &output, &input);
+ if (ZSTD_isError(ret)) {
+ pr_err("failed to decompress (B): %ld -> %ld : %s\n",
+ src_size, output.size, ZSTD_getErrorName(ret));
+ break;
+ }
+ output.dst = dst + output.pos;
+ output.size = dst_size - output.pos;
+ }
+
+ return output.pos;
+}
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 11/12] perf inject: enable COMPRESSED records decompression
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (9 preceding siblings ...)
2019-03-14 11:37 ` [PATCH v8 10/12] perf report: implement record trace decompression Alexey Budankov
@ 2019-03-14 11:38 ` Alexey Budankov
2019-03-14 11:38 ` [PATCH v8 12/12] perf tests: implement Zstd comp/decomp integration test Alexey Budankov
2019-03-15 12:28 ` [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Jiri Olsa
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:38 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Initialized decompression part of Zstd based API so COMPRESSED records
would be decompressed into the resulting output data file.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
tools/perf/builtin-inject.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 24086b7f1b14..8e0e06d3edfc 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -837,6 +837,9 @@ int cmd_inject(int argc, const char **argv)
if (inject.session == NULL)
return -1;
+ if (zstd_init(&(inject.session->zstd_data), 0) < 0)
+ pr_warning("Decompression initialization failed.\n");
+
if (inject.build_ids) {
/*
* to make sure the mmap records are ordered correctly
@@ -867,6 +870,7 @@ int cmd_inject(int argc, const char **argv)
ret = __cmd_inject(&inject);
out_delete:
+ zstd_fini(&(inject.session->zstd_data));
perf_session__delete(inject.session);
return ret;
}
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v8 12/12] perf tests: implement Zstd comp/decomp integration test
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (10 preceding siblings ...)
2019-03-14 11:38 ` [PATCH v8 11/12] perf inject: enable COMPRESSED records decompression Alexey Budankov
@ 2019-03-14 11:38 ` Alexey Budankov
2019-03-15 12:28 ` [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Jiri Olsa
12 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-14 11:38 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Jiri Olsa, Namhyung Kim, Alexander Shishkin, Ingo Molnar,
Peter Zijlstra, Andi Kleen, linux-kernel
Implemented basic integration test for Zstd based trace
compression/decompression in record and report modes.
Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
---
.../tests/shell/record+zstd_comp_decomp.sh | 28 +++++++++++++++++++
1 file changed, 28 insertions(+)
create mode 100755 tools/perf/tests/shell/record+zstd_comp_decomp.sh
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
new file mode 100755
index 000000000000..bc195874fc27
--- /dev/null
+++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+# record trace Zstd compression/decompression
+
+trace_file=$(mktemp /tmp/perf.data.XXX)
+perf_tool=perf
+output=/dev/null
+
+skip_if_no_z_record() {
+ $perf_tool record -h 2>&1 | grep '\-z, \-\-compression\-level'
+}
+
+collect_z_record() {
+ echo "Collecting compressed record trace file:"
+ $perf_tool record -o $trace_file -a -z -F 25000 -e cycles -- \
+ dd count=2000 if=/dev/random of=/dev/null > $output 2>&1
+}
+
+check_compressed_stats() {
+ echo "Checking compressed events stats:"
+ $perf_tool report -i $trace_file --header --stats | \
+ grep -E "(# compressed : Zstd,)|(COMPRESSED events:)" > $output 2>&1
+}
+
+skip_if_no_z_record || exit 2
+collect_z_record && check_compressed_stats
+err=$?
+rm -f $trace_file
+exit $err
--
2.20.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space
2019-03-14 11:26 [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Alexey Budankov
` (11 preceding siblings ...)
2019-03-14 11:38 ` [PATCH v8 12/12] perf tests: implement Zstd comp/decomp integration test Alexey Budankov
@ 2019-03-15 12:28 ` Jiri Olsa
2019-03-15 13:43 ` Alexey Budankov
12 siblings, 1 reply; 16+ messages in thread
From: Jiri Olsa @ 2019-03-15 12:28 UTC (permalink / raw)
To: Alexey Budankov
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Ingo Molnar, Peter Zijlstra, Andi Kleen, linux-kernel
On Thu, Mar 14, 2019 at 02:26:23PM +0300, Alexey Budankov wrote:
>
> The patch set implements runtime trace compression (-z option) in
> record mode and trace auto decompression in report and inject modes.
> Streaming Zstd API [1] is used for compression and decompression of
> data that come from kernel mmaped data buffers.
>
> Usage of implemented -z,--compression_level=n option provides ~3-5x
> avg. trace file size reduction on variety of tested workloads what
> saves storage space on larger server systems where trace file size
> can easily reach several tens or even hundreds of GiBs, especially
> when profiling with dwarf-based stacks and tracing of context switches.
> Default option value is 1 (fastest compression).
>
> Implemented --mmap-flush option can be used to specify minimal size
> of data chunk that is extracted from mmaped kernel buffer to store
> into a trace. The option is independent from -z setting and doesn't
> vary with compression level. The default option value is 1 byte what
> means every time trace writing thread finds some new data in the
> mmaped buffer the data is extracted, possibly compressed and written
> to a trace. The option serves two purposes the first one is to increase
> the compression ratio of trace data and the second one is to avoid
> live-lock self tool process monitoring in system wide (-a) profiling
> mode. Profiling in system wide mode with compression (-a -z) can
> additionally induce data into the kernel buffers along with the data
> from monitored processes. If performance data rate and volume from
> the monitored processes is high then trace streaming and compression
> activity in the tool is also high. It can lead to subtle live-lock
> effect of endless activity when compression of single new byte from
> some of mmaped kernel buffer induces the next single byte at some
> mmaped buffer. So perf tool thread never stops on polling event file
> descriptors. Varying data chunk size to be extracted from mmap buffers
> allows avoiding live-locking self monitoring in system wide mode and
> makes mmap buffers polling loop configurable.
>
> $ tools/perf/perf record -z -e cycles -- matrix.gcc
> $ tools/perf/perf record --aio -z -e cycles -- matrix.gcc
> $ tools/perf/perf record -z --mmap-flush 1024 -e cycles -- matrix.gcc
> $ tools/perf/perf record --aio -z --mmap-flush 1K -e cycles -- matrix.gcc
hi,
I'm getting error with -z:
[root@krava perf]# ./perf record -z ./perf bench sched messaging -l 10000
# Running 'sched/messaging' benchmark:
# 20 sender and receiver processes per group
# 10 groups == 400 processes run
Total time: 18.775 [sec]
[ perf record: Woken up 57 times to write data ]
0x5228 [0]: failed to process type: 81
[ perf record: Captured and wrote 6.453 MB perf.data, compressed (original 21.486 MB, ratio is 3.340) ]
jirka
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space
2019-03-15 12:28 ` [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space Jiri Olsa
@ 2019-03-15 13:43 ` Alexey Budankov
2019-03-15 15:18 ` Alexey Budankov
0 siblings, 1 reply; 16+ messages in thread
From: Alexey Budankov @ 2019-03-15 13:43 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Ingo Molnar, Peter Zijlstra, Andi Kleen, linux-kernel
On 15.03.2019 15:28, Jiri Olsa wrote:
> On Thu, Mar 14, 2019 at 02:26:23PM +0300, Alexey Budankov wrote:
>>
<SNIP>
>> The patch set implements runtime trace compression (-z option) in
>> record mode and trace auto decompression in report and inject modes.
>> Streaming Zstd API [1] is used for compression and decompression of
>> data that come from kernel mmaped data buffers.
>>
<SNIP>
>> $ tools/perf/perf record -z -e cycles -- matrix.gcc
>> $ tools/perf/perf record --aio -z -e cycles -- matrix.gcc
>> $ tools/perf/perf record -z --mmap-flush 1024 -e cycles -- matrix.gcc
>> $ tools/perf/perf record --aio -z --mmap-flush 1K -e cycles -- matrix.gcc
>
> hi,
> I'm getting error with -z:
>
> [root@krava perf]# ./perf record -z ./perf bench sched messaging -l 10000
> # Running 'sched/messaging' benchmark:
> # 20 sender and receiver processes per group
> # 10 groups == 400 processes run
>
> Total time: 18.775 [sec]
> [ perf record: Woken up 57 times to write data ]
> 0x5228 [0]: failed to process type: 81
> [ perf record: Captured and wrote 6.453 MB perf.data, compressed (original 21.486 MB, ratio is 3.340) ]
Reproduced locally. Investigating right now.
tools/perf/perf record -z tools/perf/perf bench sched messaging -l 10000
# Running 'sched/messaging' benchmark:
# 20 sender and receiver processes per group
# 10 groups == 400 processes run
Total time: 8.799 [sec]
[ perf record: Woken up 35 times to write data ]
0x2e48 [0]: failed to process type: 81
[ perf record: Captured and wrote 3.142 MB perf.data, compressed (original 10.241 MB, ratio is 3.272) ]
However it is not observed in my tests on 8 cores Skylake.
tools/perf/perf record -z ../../matrix/linux/matrix.gcc
Addr of buf1 = 0x7f2eca1ab010
Offs of buf1 = 0x7f2eca1ab180
Addr of buf2 = 0x7f2ec81aa010
Offs of buf2 = 0x7f2ec81aa1c0
Addr of buf3 = 0x7f2ec61a9010
Offs of buf3 = 0x7f2ec61a9100
Addr of buf4 = 0x7f2ec41a8010
Offs of buf4 = 0x7f2ec41a8140
Threads #: 8 Pthreads
Matrix size: 2048
Using multiply kernel: multiply1
Execution time = 30.075 seconds
[ perf record: Woken up 127 times to write data ]
[ perf record: Captured and wrote 6.820 MB perf.data (953438 samples), compressed (original 36.372 MB, ratio is 5.344) ]
Thanks,
Alexey
>
>
> jirka
>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v8 00/12] perf: enable compression of record mode trace to save storage space
2019-03-15 13:43 ` Alexey Budankov
@ 2019-03-15 15:18 ` Alexey Budankov
0 siblings, 0 replies; 16+ messages in thread
From: Alexey Budankov @ 2019-03-15 15:18 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Namhyung Kim, Alexander Shishkin,
Ingo Molnar, Peter Zijlstra, Andi Kleen, linux-kernel
On 15.03.2019 16:43, Alexey Budankov wrote:
> On 15.03.2019 15:28, Jiri Olsa wrote:
>> On Thu, Mar 14, 2019 at 02:26:23PM +0300, Alexey Budankov wrote:
>>>
> <SNIP>
>>> The patch set implements runtime trace compression (-z option) in
>>> record mode and trace auto decompression in report and inject modes.
>>> Streaming Zstd API [1] is used for compression and decompression of
>>> data that come from kernel mmaped data buffers.
>>>
> <SNIP>
>>> $ tools/perf/perf record -z -e cycles -- matrix.gcc
>>> $ tools/perf/perf record --aio -z -e cycles -- matrix.gcc
>>> $ tools/perf/perf record -z --mmap-flush 1024 -e cycles -- matrix.gcc
>>> $ tools/perf/perf record --aio -z --mmap-flush 1K -e cycles -- matrix.gcc
>>
>> hi,
>> I'm getting error with -z:
>>
>> [root@krava perf]# ./perf record -z ./perf bench sched messaging -l 10000
>> # Running 'sched/messaging' benchmark:
>> # 20 sender and receiver processes per group
>> # 10 groups == 400 processes run
>>
>> Total time: 18.775 [sec]
>> [ perf record: Woken up 57 times to write data ]
>> 0x5228 [0]: failed to process type: 81
>> [ perf record: Captured and wrote 6.453 MB perf.data, compressed (original 21.486 MB, ratio is 3.340) ]
>
> Reproduced locally. Investigating right now.
>
> tools/perf/perf record -z tools/perf/perf bench sched messaging -l 10000
> # Running 'sched/messaging' benchmark:
> # 20 sender and receiver processes per group
> # 10 groups == 400 processes run
>
> Total time: 8.799 [sec]
> [ perf record: Woken up 35 times to write data ]
> 0x2e48 [0]: failed to process type: 81
> [ perf record: Captured and wrote 3.142 MB perf.data, compressed (original 10.241 MB, ratio is 3.272) ]
Here it is:
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4c1d4c4f7b33..e69e50db53b0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -891,7 +891,7 @@ static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_
void *src, size_t src_size)
{
size_t compressed;
- size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event);
+ size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1;
compressed = zstd_compress_stream_to_records(&(session->zstd_data),
dst, dst_size, src, src_size, max_record_size,
~Alexey
>
> However it is not observed in my tests on 8 cores Skylake.
>
> tools/perf/perf record -z ../../matrix/linux/matrix.gcc
> Addr of buf1 = 0x7f2eca1ab010
> Offs of buf1 = 0x7f2eca1ab180
> Addr of buf2 = 0x7f2ec81aa010
> Offs of buf2 = 0x7f2ec81aa1c0
> Addr of buf3 = 0x7f2ec61a9010
> Offs of buf3 = 0x7f2ec61a9100
> Addr of buf4 = 0x7f2ec41a8010
> Offs of buf4 = 0x7f2ec41a8140
> Threads #: 8 Pthreads
> Matrix size: 2048
> Using multiply kernel: multiply1
> Execution time = 30.075 seconds
> [ perf record: Woken up 127 times to write data ]
> [ perf record: Captured and wrote 6.820 MB perf.data (953438 samples), compressed (original 36.372 MB, ratio is 5.344) ]
>
> Thanks,
> Alexey
>
>>
>>
>> jirka
>>
>
^ permalink raw reply related [flat|nested] 16+ messages in thread