All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com>
To: rostedt@goodmis.org
Cc: linux-trace-devel@vger.kernel.org
Subject: [PATCH v3 14/20] trace-cmd library: Add logic for in-memory decompression
Date: Fri,  8 Oct 2021 07:22:08 +0300	[thread overview]
Message-ID: <20211008042214.977193-15-tz.stoyanov@gmail.com> (raw)
In-Reply-To: <20211008042214.977193-1-tz.stoyanov@gmail.com>

There are two approaches to read compressed trace data:
 - use a temporary file to decompress entire trace data before reading
 - use in-memory decompression of requested trace data chunk only
In-memory decompression seems to be more efficient, but selecting which
approach to use depends in the use case.
A compression chunk consists of multiple trace pages, that's why a small
cache with uncompressed chunks is implemented. The chunk stays in the
cache until there are pages which have reference to it.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 lib/trace-cmd/trace-input.c | 110 ++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
index 91114483..5b9ff041 100644
--- a/lib/trace-cmd/trace-input.c
+++ b/lib/trace-cmd/trace-input.c
@@ -29,6 +29,9 @@
 
 #define COMMIT_MASK ((1 << 27) - 1)
 
+/* force uncompressing in memory */
+#define INMEMORY_DECOMPRESS
+
 /* for debugging read instead of mmap */
 static int force_read = 0;
 
@@ -1249,6 +1252,105 @@ static void free_page_map(struct page_map *page_map)
 	free(page_map);
 }
 
+#define CHUNK_CHECK_OFFSET(C, O)	((O) >= (C)->offset && (O) < ((C)->offset + (C)->size))
+static struct tracecmd_compress_chunk *get_zchunk(struct cpu_data *cpu, off64_t offset)
+{
+	struct cpu_zdata *cpuz = &cpu->compress;
+	int min, mid, max;
+
+	if (!cpuz->chunks)
+		return NULL;
+	if (offset > (cpuz->chunks[cpuz->count - 1].offset + cpuz->chunks[cpuz->count - 1].size))
+		return NULL;
+
+	/* check if the requested offset is in the last requested chunk or in the next chunk */
+	if (CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset))
+		return cpuz->chunks + cpuz->last_chunk;
+	cpuz->last_chunk++;
+	if (cpuz->last_chunk < cpuz->count &&
+	    CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset))
+		return cpuz->chunks + cpuz->last_chunk;
+
+	/* do a binary search to find the chunk holding the given offset */
+	min = 0;
+	max = cpuz->count - 1;
+	mid = (min + max)/2;
+	while (min <= max) {
+		if (offset < cpuz->chunks[mid].offset)
+			max = mid - 1;
+		else if (offset > (cpuz->chunks[mid].offset + cpuz->chunks[mid].size))
+			min = mid + 1;
+		else
+			break;
+		mid = (min + max)/2;
+	}
+	cpuz->last_chunk = mid;
+	return cpuz->chunks + mid;
+}
+
+static void free_zpage(struct cpu_data *cpu_data, void *map)
+{
+	struct zchunk_cache *cache;
+
+	list_for_each_entry(cache, &cpu_data->compress.cache, list) {
+		if (map <= cache->map && map > (cache->map + cache->chunk->size))
+			goto found;
+	}
+	return;
+
+found:
+	cache->ref--;
+	if (cache->ref)
+		return;
+	list_del(&cache->list);
+	free(cache->map);
+	free(cache);
+}
+
+static void *read_zpage(struct tracecmd_input *handle, int cpu, off64_t offset)
+{
+	struct cpu_data *cpu_data = &handle->cpu_data[cpu];
+	struct tracecmd_compress_chunk *chunk;
+	struct zchunk_cache *cache;
+	void *map = NULL;
+	int pindex;
+	int size;
+
+	/* Look in the cache of already loaded chunks */
+	list_for_each_entry(cache, &cpu_data->compress.cache, list) {
+		if (CHUNK_CHECK_OFFSET(cache->chunk, offset)) {
+			cache->ref++;
+			goto out;
+		}
+	}
+
+	chunk =  get_zchunk(cpu_data, offset);
+	if (!chunk)
+		return NULL;
+	size = handle->page_size > chunk->size ? handle->page_size : chunk->size;
+	map = malloc(size);
+	if (!map)
+		return NULL;
+	if (tracecmd_uncompress_chunk(handle->compress, chunk, map) < 0)
+		goto error;
+
+	cache = calloc(1, sizeof(struct zchunk_cache));
+	if (!cache)
+		goto error;
+	cache->ref = 1;
+	cache->chunk = chunk;
+	cache->map = map;
+	list_add(&cache->list, &cpu_data->compress.cache);
+
+	/* a chunk can hold multiple pages, get the requested one */
+out:
+	pindex = (offset - cache->chunk->offset) / handle->page_size;
+	return cache->map + (pindex * handle->page_size);
+error:
+	free(map);
+	return NULL;
+}
+
 static void *allocate_page_map(struct tracecmd_input *handle,
 			       struct page *page, int cpu, off64_t offset)
 {
@@ -1260,6 +1362,9 @@ static void *allocate_page_map(struct tracecmd_input *handle,
 	int ret;
 	int fd;
 
+	if (handle->cpu_compressed && handle->read_zpage)
+		return read_zpage(handle, cpu, offset);
+
 	if (handle->read_page) {
 		map = malloc(handle->page_size);
 		if (!map)
@@ -1402,6 +1507,8 @@ static void __free_page(struct tracecmd_input *handle, struct page *page)
 
 	if (handle->read_page)
 		free(page->map);
+	else if (handle->read_zpage)
+		free_zpage(cpu_data, page->map);
 	else
 		free_page_map(page->page_map);
 
@@ -3890,6 +3997,9 @@ struct tracecmd_input *tracecmd_alloc_fd(int fd, int flags)
 	/* By default, use usecs, unless told otherwise */
 	handle->flags |= TRACECMD_FL_IN_USECS;
 
+#ifdef INMEMORY_DECOMPRESS
+	handle->read_zpage = 1;
+#endif
 	if (do_read_check(handle, buf, 3))
 		goto failed_read;
 
-- 
2.31.1


  parent reply	other threads:[~2021-10-08  4:22 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-08  4:21 [PATCH v3 00/20] Trace file version 7 - compression Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 01/20] trace-cmd library: Add support for compression algorithms Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 02/20] trace-cmd library: Internal helpers for compressing data Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 03/20] trace-cmd library: Internal helpers for uncompressing data Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 04/20] trace-cmd library: Inherit compression algorithm from input file Tzvetomir Stoyanov (VMware)
2021-10-08  4:21 ` [PATCH v3 05/20] trace-cmd library: New API to configure compression on an output handler Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 06/20] trace-cmd library: Write compression header in the trace file Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 07/20] trace-cmd library: Compress part of " Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 08/20] trace-cmd library: Add local helper function for data compression Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 09/20] trace-cmd library: Compress the trace data Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 10/20] trace-cmd library: Decompress the options section, if it is compressed Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 11/20] trace-cmd library: Read compression header Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 12/20] trace-cmd library: Extend the input handler with trace data decompression context Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 13/20] trace-cmd library: Initialize CPU data decompression logic Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` Tzvetomir Stoyanov (VMware) [this message]
2021-10-08  4:22 ` [PATCH v3 15/20] trace-cmd library: Read compressed latency data Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 16/20] trace-cmd library: Decompress file sections on reading Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 17/20] trace-cmd library: Add zlib compression algorithm Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 18/20] trace-cmd list: Show supported compression algorithms Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 19/20] trace-cmd record: Add compression to the trace context Tzvetomir Stoyanov (VMware)
2021-10-08  4:22 ` [PATCH v3 20/20] trace-cmd report: Add new parameter for trace file compression Tzvetomir Stoyanov (VMware)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211008042214.977193-15-tz.stoyanov@gmail.com \
    --to=tz.stoyanov@gmail.com \
    --cc=linux-trace-devel@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.