linux-trace-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] Fix overflow when applying tsc2nsec calculations
@ 2021-04-16 10:34 Tzvetomir Stoyanov (VMware)
  2021-04-16 10:34 ` [PATCH v2 1/3] trace-cmd library: Add new trace-cmd library APIs for guest ts corrections Tzvetomir Stoyanov (VMware)
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2021-04-16 10:34 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

Verify that applying tsc2nsec corrections on the recorded timestamps will not
cause an overflow. When recording the trace, in case of tsc2nsec clock, get
the first TSC timestamp as offset to be used in tsc2nsec conversion.

v2 changes:
 - Optimize the logic for getting the timestamp of the first recorded event.

Tzvetomir Stoyanov (VMware) (3):
  trace-cmd library: Add new trace-cmd library APIs for guest ts
    corrections
  trace-cmd library: Add check before applying tsc2nsec offset
  trace-cmd: Get the timestamp of the first recorded event as TSC offset

 .../include/private/trace-cmd-private.h       |  18 ++-
 lib/trace-cmd/trace-input.c                   | 116 ++-----------
 lib/trace-cmd/trace-timesync.c                | 113 ++++++++++---
 tracecmd/include/trace-local.h                |   1 +
 tracecmd/trace-record.c                       | 152 ++++++++++++++----
 5 files changed, 246 insertions(+), 154 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/3] trace-cmd library: Add new trace-cmd library APIs for guest ts corrections
  2021-04-16 10:34 [PATCH v2 0/3] Fix overflow when applying tsc2nsec calculations Tzvetomir Stoyanov (VMware)
@ 2021-04-16 10:34 ` Tzvetomir Stoyanov (VMware)
  2021-04-16 10:34 ` [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset Tzvetomir Stoyanov (VMware)
  2021-04-16 10:34 ` [PATCH v2 3/3] trace-cmd: Get the timestamp of the first recorded event as TSC offset Tzvetomir Stoyanov (VMware)
  2 siblings, 0 replies; 10+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2021-04-16 10:34 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

The logic for converting guest to host timestamps is local to the trace-cmd
library and is used only when a trace file is opened. In order to reuse
that logic, a new APIs are added:
 tracecmd_tsync_get_proto_flags()
 tracecmd_tsync_get_offsets()
 tracecmd_guest_ts_calc()
 struct ts_offset_sample
 struct tracecmd_ts_corrections

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 .../include/private/trace-cmd-private.h       |  18 ++-
 lib/trace-cmd/trace-input.c                   | 106 ++--------------
 lib/trace-cmd/trace-timesync.c                | 113 ++++++++++++++----
 3 files changed, 116 insertions(+), 121 deletions(-)

diff --git a/lib/trace-cmd/include/private/trace-cmd-private.h b/lib/trace-cmd/include/private/trace-cmd-private.h
index 42e739fa..56f82244 100644
--- a/lib/trace-cmd/include/private/trace-cmd-private.h
+++ b/lib/trace-cmd/include/private/trace-cmd-private.h
@@ -443,6 +443,17 @@ enum tracecmd_time_sync_role {
 /* Timestamp synchronization flags */
 #define TRACECMD_TSYNC_FLAG_INTERPOLATE	0x1
 
+struct ts_offset_sample {
+	long long	time;
+	long long	offset;
+	long long	scaling;
+};
+
+struct tracecmd_ts_corrections {
+	int	ts_samples_count;
+	struct ts_offset_sample	*ts_samples;
+};
+
 void tracecmd_tsync_init(void);
 int tracecmd_tsync_proto_getall(struct tracecmd_tsync_protos **protos, const char *clock, int role);
 bool tsync_proto_is_supported(const char *proto_name);
@@ -456,8 +467,8 @@ tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval,
 			  int guest_cpus, const char *proto_name, const char *clock);
 int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync);
 int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
-				int *count, long long **ts,
-				long long **offsets, long long **scalings);
+			       struct tracecmd_ts_corrections *offsets);
+int tracecmd_tsync_get_proto_flags(struct tracecmd_time_sync *tsync, unsigned int *flags);
 int tracecmd_tsync_get_session_params(struct tracecmd_time_sync *tsync,
 				      char **selected_proto,
 				      unsigned int *tsync_port);
@@ -465,6 +476,9 @@ void tracecmd_tsync_free(struct tracecmd_time_sync *tsync);
 int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
 				    struct tracecmd_time_sync *tsync);
 
+unsigned long long tracecmd_guest_ts_calc(unsigned long long ts,
+					  struct tracecmd_ts_corrections *tsync, int flags);
+
 /* --- Plugin handling --- */
 extern struct tep_plugin_option trace_ftrace_options[];
 
diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
index b17b36e0..974879e8 100644
--- a/lib/trace-cmd/trace-input.c
+++ b/lib/trace-cmd/trace-input.c
@@ -78,12 +78,6 @@ struct input_buffer_instance {
 	size_t			offset;
 };
 
-struct ts_offset_sample {
-	long long	time;
-	long long	offset;
-	long long	scaling;
-};
-
 struct guest_trace_info {
 	struct guest_trace_info	*next;
 	char			*name;
@@ -92,19 +86,12 @@ struct guest_trace_info {
 	int			*cpu_pid;
 };
 
-struct timesync_offsets {
-	int	ts_samples_count;
-	struct ts_offset_sample	*ts_samples;
-};
-
 struct host_trace_info {
-	unsigned long long	peer_trace_id;
-	unsigned int		flags;
-	bool			sync_enable;
-	int			ts_samples_count;
-	struct ts_offset_sample	*ts_samples;
-	int			cpu_count;
-	struct timesync_offsets	*ts_offsets;
+	unsigned long long			peer_trace_id;
+	unsigned int				flags;
+	bool					sync_enable;
+	int					cpu_count;
+	struct tracecmd_ts_corrections		*ts_offsets;
 };
 
 struct tsc2nsec {
@@ -1227,81 +1214,6 @@ static unsigned long long mul_u64_u32_shr(unsigned long long a,
 	return ret;
 }
 
-static inline unsigned long long
-timestamp_correction_calc(unsigned long long ts, unsigned int flags,
-			  struct ts_offset_sample *min,
-			  struct ts_offset_sample *max)
-{
-	long long scaling;
-	long long tscor;
-
-	if (flags & TRACECMD_TSYNC_FLAG_INTERPOLATE) {
-		long long delta = max->time - min->time;
-		long long offset = ((long long)ts - min->time) *
-				   (max->offset - min->offset);
-
-		scaling = (min->scaling + max->scaling) / 2;
-		tscor = min->offset + (offset + delta / 2) / delta;
-
-	} else {
-		scaling = min->scaling;
-		tscor = min->offset;
-	}
-
-	ts *= scaling;
-	if (tscor < 0)
-		return ts - llabs(tscor);
-
-	return ts + tscor;
-}
-
-static unsigned long long timestamp_host_sync(unsigned long long ts, int cpu,
-					      struct tracecmd_input *handle)
-{
-	struct timesync_offsets *tsync;
-	int min, mid, max;
-
-	if (cpu >= handle->host.cpu_count)
-		return ts;
-	tsync = &handle->host.ts_offsets[cpu];
-
-	/* We have one sample, nothing to calc here */
-	if (tsync->ts_samples_count == 1)
-		return ts + tsync->ts_samples[0].offset;
-
-	/* We have two samples, nothing to search here */
-	if (tsync->ts_samples_count == 2)
-		return timestamp_correction_calc(ts, handle->host.flags,
-						 &tsync->ts_samples[0],
-						 &tsync->ts_samples[1]);
-
-	/* We have more than two samples */
-	if (ts <= tsync->ts_samples[0].time)
-		return timestamp_correction_calc(ts, handle->host.flags,
-						 &tsync->ts_samples[0],
-						 &tsync->ts_samples[1]);
-	else if (ts >= tsync->ts_samples[tsync->ts_samples_count-1].time)
-		return timestamp_correction_calc(ts, handle->host.flags,
-						 &tsync->ts_samples[tsync->ts_samples_count-2],
-						 &tsync->ts_samples[tsync->ts_samples_count-1]);
-	min = 0;
-	max = tsync->ts_samples_count-1;
-	mid = (min + max)/2;
-	while (min <= max) {
-		if (ts < tsync->ts_samples[mid].time)
-			max = mid - 1;
-		else if (ts > tsync->ts_samples[mid].time)
-			min = mid + 1;
-		else
-			break;
-		mid = (min + max)/2;
-	}
-
-	return timestamp_correction_calc(ts, handle->host.flags,
-					 &tsync->ts_samples[mid],
-					 &tsync->ts_samples[mid+1]);
-}
-
 static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
 					 struct tracecmd_input *handle)
 {
@@ -1310,8 +1222,8 @@ static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
 		return ts;
 
 	/* Guest trace file, sync with host timestamps */
-	if (handle->host.sync_enable)
-		ts = timestamp_host_sync(ts, cpu, handle);
+	if (handle->host.sync_enable && cpu < handle->host.cpu_count)
+		ts = tracecmd_guest_ts_calc(ts, &handle->host.ts_offsets[cpu], handle->host.flags);
 
 	if (handle->ts2secs) {
 		/* user specified clock frequency */
@@ -2329,7 +2241,7 @@ static int tsync_offset_cmp(const void *a, const void *b)
 	} while (0)
 
 static int tsync_offset_load(struct tep_handle	*tep,
-			     struct timesync_offsets *ts_offsets, char *buf, int size)
+			     struct tracecmd_ts_corrections *ts_offsets, char *buf, int size)
 {
 	int start_size = size;
 	int i, j;
@@ -2357,7 +2269,7 @@ static int tsync_cpu_offsets_load(struct tracecmd_input *handle, char *buf, int
 
 	safe_read(handle->host.cpu_count, 4);
 	handle->host.ts_offsets = calloc(handle->host.cpu_count,
-					 sizeof(struct timesync_offsets));
+					 sizeof(struct tracecmd_ts_corrections));
 	if (!handle->host.ts_offsets)
 		return -ENOMEM;
 	for (i = 0; i < handle->host.cpu_count; i++) {
diff --git a/lib/trace-cmd/trace-timesync.c b/lib/trace-cmd/trace-timesync.c
index 19ca19d7..791a0964 100644
--- a/lib/trace-cmd/trace-timesync.c
+++ b/lib/trace-cmd/trace-timesync.c
@@ -133,36 +133,101 @@ bool __hidden tsync_proto_is_supported(const char *proto_name)
  *
  * @tsync: Pointer to time sync context
  * @cpu: CPU for which to get the calculated offsets
- * @count: Returns the number of calculated time offsets
- * @ts: Array of size @count containing timestamps of callculated offsets
- * @offsets: array of size @count, containing offsets for each timestamp
- * @scalings: array of size @count, containing scaling ratios for each timestamp
+ * @offsets: Returns the calculated timestamp offsets for the given @cpu
  *
  * Retuns -1 in case of an error, or 0 otherwise
  */
 int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
-			       int *count, long long **ts,
-			       long long **offsets, long long **scalings)
+			       struct tracecmd_ts_corrections *offsets)
 {
 	struct clock_sync_context *tsync_context;
+	int i;
 
 	if (!tsync || !tsync->context)
 		return -1;
 	tsync_context = (struct clock_sync_context *)tsync->context;
 	if (cpu >= tsync_context->cpu_count || !tsync_context->offsets)
 		return -1;
-	if (count)
-		*count = tsync_context->offsets[cpu].sync_count;
-	if (ts)
-		*ts = tsync_context->offsets[cpu].sync_ts;
-	if (offsets)
-		*offsets = tsync_context->offsets[cpu].sync_offsets;
-	if (scalings)
-		*scalings = tsync_context->offsets[cpu].sync_scalings;
+	if (offsets) {
+		offsets->ts_samples = calloc(tsync_context->offsets[cpu].sync_count,
+					     sizeof(struct ts_offset_sample));
+		if (!offsets->ts_samples)
+			return -1;
+		offsets->ts_samples_count = tsync_context->offsets[cpu].sync_count;
+		for (i = 0; i < offsets->ts_samples_count; i++) {
+			offsets->ts_samples[i].time = tsync_context->offsets[cpu].sync_ts[i];
+			offsets->ts_samples[i].offset = tsync_context->offsets[cpu].sync_offsets[i];
+			offsets->ts_samples[i].scaling = tsync_context->offsets[cpu].sync_scalings[i];
+		}
+	}
 
 	return 0;
 }
 
+static inline unsigned long long correction_calc(unsigned long long ts, unsigned int flags,
+						struct ts_offset_sample *min,
+						struct ts_offset_sample *max)
+{
+	long long scaling;
+	long long tscor;
+
+	if (flags & TRACECMD_TSYNC_FLAG_INTERPOLATE) {
+		long long delta = max->time - min->time;
+		long long offset = ((long long)ts - min->time) *
+				   (max->offset - min->offset);
+
+		scaling = (min->scaling + max->scaling) / 2;
+		tscor = min->offset + (offset + delta / 2) / delta;
+
+	} else {
+		scaling = min->scaling;
+		tscor = min->offset;
+	}
+
+	ts *= scaling;
+	if (tscor < 0)
+		return ts - llabs(tscor);
+
+	return ts + tscor;
+}
+
+
+unsigned long long tracecmd_guest_ts_calc(unsigned long long ts,
+					  struct tracecmd_ts_corrections *tsync, int flags)
+{
+	int min, mid, max;
+
+	/* We have one sample, nothing to calc here */
+	if (tsync->ts_samples_count == 1)
+		return ts + tsync->ts_samples[0].offset;
+
+	/* We have two samples, nothing to search here */
+	if (tsync->ts_samples_count == 2)
+		return correction_calc(ts, flags, &tsync->ts_samples[0], &tsync->ts_samples[1]);
+
+	/* We have more than two samples */
+	if (ts <= tsync->ts_samples[0].time)
+		return correction_calc(ts, flags, &tsync->ts_samples[0], &tsync->ts_samples[1]);
+	else if (ts >= tsync->ts_samples[tsync->ts_samples_count-1].time)
+		return correction_calc(ts, flags,
+				       &tsync->ts_samples[tsync->ts_samples_count-2],
+				       &tsync->ts_samples[tsync->ts_samples_count-1]);
+	min = 0;
+	max = tsync->ts_samples_count-1;
+	mid = (min + max)/2;
+	while (min <= max) {
+		if (ts < tsync->ts_samples[mid].time)
+			max = mid - 1;
+		else if (ts > tsync->ts_samples[mid].time)
+			min = mid + 1;
+		else
+			break;
+		mid = (min + max)/2;
+	}
+
+	return correction_calc(ts, flags, &tsync->ts_samples[mid], &tsync->ts_samples[mid+1]);
+}
+
 /**
  * tsync_get_proto_flags - Get protocol flags
  *
@@ -171,8 +236,7 @@ int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu,
  *
  * Retuns -1 in case of an error, or 0 otherwise
  */
-static int tsync_get_proto_flags(struct tracecmd_time_sync *tsync,
-				 unsigned int *flags)
+int tracecmd_tsync_get_proto_flags(struct tracecmd_time_sync *tsync, unsigned int *flags)
 {
 	struct tsync_proto *protocol;
 
@@ -924,6 +988,7 @@ error:
 int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
 				    struct tracecmd_time_sync *tsync)
 {
+	struct clock_sync_context *tsync_context;
 	struct iovec *vector = NULL;
 	unsigned int flags;
 	long long *scalings = NULL;
@@ -934,13 +999,15 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
 	int i, j;
 	int ret = -1;
 
-	if (!tsync->vcpu_count)
+	if (!tsync || !tsync->context || !tsync->vcpu_count)
 		return -1;
+	tsync_context = (struct clock_sync_context *)tsync->context;
+
 	vcount = 3 + (4 * tsync->vcpu_count);
 	vector = calloc(vcount, sizeof(struct iovec));
 	if (!vector)
 		return -1;
-	ret = tsync_get_proto_flags(tsync, &flags);
+	ret = tracecmd_tsync_get_proto_flags(tsync, &flags);
 	if (ret < 0)
 		goto out;
 
@@ -952,11 +1019,13 @@ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle,
 	vector[j].iov_len = 4;
 	vector[j++].iov_base = &tsync->vcpu_count;
 	for (i = 0; i < tsync->vcpu_count; i++) {
-		if (j >= vcount)
+		if (j >= vcount || i >= tsync_context->cpu_count)
 			break;
-		ret = tracecmd_tsync_get_offsets(tsync, i, &count,
-						 &ts, &offsets, &scalings);
-		if (ret < 0 || !count || !ts || !offsets || !scalings)
+		count = tsync_context->offsets[i].sync_count;
+		ts = tsync_context->offsets[i].sync_ts;
+		offsets = tsync_context->offsets[i].sync_offsets;
+		scalings = tsync_context->offsets[i].sync_scalings;
+		if (!count || !ts || !offsets || !scalings)
 			break;
 		vector[j].iov_len = 4;
 		vector[j++].iov_base = &count;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset
  2021-04-16 10:34 [PATCH v2 0/3] Fix overflow when applying tsc2nsec calculations Tzvetomir Stoyanov (VMware)
  2021-04-16 10:34 ` [PATCH v2 1/3] trace-cmd library: Add new trace-cmd library APIs for guest ts corrections Tzvetomir Stoyanov (VMware)
@ 2021-04-16 10:34 ` Tzvetomir Stoyanov (VMware)
  2021-04-16 20:12   ` Steven Rostedt
  2021-04-16 10:34 ` [PATCH v2 3/3] trace-cmd: Get the timestamp of the first recorded event as TSC offset Tzvetomir Stoyanov (VMware)
  2 siblings, 1 reply; 10+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2021-04-16 10:34 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

When converting TSC timestamps to nanoseconds, an offset is used. That
offset is subtracted from each TSC timestamp, before the conversion.
However, if the timestamp is lower that the offset this causes an
overflow, as both timestamp and offset are unsigned long integers. A
check is added to verify the subtraction will not cause an overflow. In
case of an overflow, the timestamp is set to 0 and a warning message is
printed.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 lib/trace-cmd/trace-input.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
index 974879e8..991abd5f 100644
--- a/lib/trace-cmd/trace-input.c
+++ b/lib/trace-cmd/trace-input.c
@@ -1230,8 +1230,14 @@ static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
 		ts *= handle->ts2secs;
 	} else if (handle->tsc_calc.mult) {
 		/* auto calculated TSC clock frequency */
-		ts -= handle->tsc_calc.offset;
-		ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
+		if (ts > handle->tsc_calc.offset) {
+			ts -= handle->tsc_calc.offset;
+			ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
+		} else {
+			tracecmd_warning("Timestamp $llu is before the initial offset %llu, set it to 0",
+					 ts, handle->tsc_calc.offset);
+			ts = 0;
+		}
 	}
 
 	/* User specified time offset with --ts-offset or --date options */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 3/3] trace-cmd: Get the timestamp of the first recorded event as TSC offset
  2021-04-16 10:34 [PATCH v2 0/3] Fix overflow when applying tsc2nsec calculations Tzvetomir Stoyanov (VMware)
  2021-04-16 10:34 ` [PATCH v2 1/3] trace-cmd library: Add new trace-cmd library APIs for guest ts corrections Tzvetomir Stoyanov (VMware)
  2021-04-16 10:34 ` [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset Tzvetomir Stoyanov (VMware)
@ 2021-04-16 10:34 ` Tzvetomir Stoyanov (VMware)
  2021-04-16 19:28   ` Steven Rostedt
  2 siblings, 1 reply; 10+ messages in thread
From: Tzvetomir Stoyanov (VMware) @ 2021-04-16 10:34 UTC (permalink / raw)
  To: rostedt; +Cc: linux-trace-devel

When converting TSC timestamps to nanoseconds, an offset is used. That
offset is subtracted from each TSC timestamp, before the conversion. At
the end of the trace, the lowest recorded TSC timestamp is selected as
TSC offset, that will be used for tsc2nsec conversion.

Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
---
 tracecmd/include/trace-local.h |   1 +
 tracecmd/trace-record.c        | 152 ++++++++++++++++++++++++++-------
 2 files changed, 122 insertions(+), 31 deletions(-)

diff --git a/tracecmd/include/trace-local.h b/tracecmd/include/trace-local.h
index 1218de12..d504ea14 100644
--- a/tracecmd/include/trace-local.h
+++ b/tracecmd/include/trace-local.h
@@ -213,6 +213,7 @@ struct buffer_instance {
 	char			*name;
 	struct tracefs_instance	*tracefs;
 	unsigned long long	trace_id;
+	unsigned long long	first_ts;
 	char			*cpumask;
 	char			*output_file;
 	struct event_list	*events;
diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index fd03a605..b78b8af9 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -40,6 +40,7 @@
 #ifdef VSOCK
 #include <linux/vm_sockets.h>
 #endif
+#include <traceevent/kbuffer.h>
 
 #include "tracefs.h"
 #include "version.h"
@@ -685,34 +686,6 @@ add_tsc2nsec(struct tracecmd_output *handle, struct tsc_nsec *tsc2nsec)
 	tracecmd_add_option_v(handle, TRACECMD_OPTION_TSC2NSEC, vector, 3);
 }
 
-static void host_tsync_complete(struct common_record_context *ctx,
-				struct buffer_instance *instance)
-{
-	struct tracecmd_output *handle = NULL;
-	int fd = -1;
-	int ret;
-
-	ret = tracecmd_tsync_with_guest_stop(instance->tsync);
-	if (!ret) {
-		fd = open(instance->output_file, O_RDWR);
-		if (fd < 0)
-			die("error opening %s", instance->output_file);
-		handle = tracecmd_get_output_handle_fd(fd);
-		if (!handle)
-			die("cannot create output handle");
-
-		if (ctx->tsc2nsec.mult)
-			add_tsc2nsec(handle, &ctx->tsc2nsec);
-
-		tracecmd_write_guest_time_shift(handle, instance->tsync);
-		tracecmd_append_options(handle);
-		tracecmd_output_close(handle);
-	}
-
-	tracecmd_tsync_free(instance->tsync);
-	instance->tsync = NULL;
-}
-
 static void tell_guests_to_stop(struct common_record_context *ctx)
 {
 	struct buffer_instance *instance;
@@ -725,7 +698,7 @@ static void tell_guests_to_stop(struct common_record_context *ctx)
 
 	for_all_instances(instance) {
 		if (is_guest(instance))
-			host_tsync_complete(ctx, instance);
+			tracecmd_tsync_with_guest_stop(instance->tsync);
 	}
 
 	/* Wait for guests to acknowledge */
@@ -4183,7 +4156,105 @@ static void add_options(struct tracecmd_output *handle, struct common_record_con
 	free(clocks);
 }
 
-static void write_guest_file(struct buffer_instance *instance)
+int get_first_ts(struct kbuffer *kbuf, char *page, int psize, const char *file, unsigned long long *ts)
+{
+	int rsize;
+	int ret = -1;
+	char *data;
+	int fd;
+
+	fd = open(file, O_RDONLY);
+	if (fd < 0)
+		return -1;
+
+	rsize = read(fd, page, psize);
+	if (rsize <= 0)
+		goto error;
+
+	kbuffer_load_subbuffer(kbuf, page);
+	if (kbuffer_subbuffer_size(kbuf) > rsize)
+		goto error;
+
+	data = kbuffer_read_event(kbuf, ts);
+	if (!data)
+		goto error;
+
+	ret = 0;
+error:
+	close(fd);
+	return ret;
+}
+
+int get_first_ts_instance(struct buffer_instance *instance)
+{
+	struct tracecmd_ts_corrections corrections;
+	unsigned long long first_ts = 0;
+	enum kbuffer_long_size long_size;
+	enum kbuffer_endian endian;
+	struct kbuffer *kbuf = NULL;
+	unsigned long long ts;
+	unsigned int flags;
+	int first_ts_cpu;
+	bool first = true;
+	char *page;
+	char *file;
+	int psize;
+	int ret;
+	int i;
+
+	psize = getpagesize();
+	page = calloc(1, psize);
+	if (!page)
+		return -1;
+
+	if (tracecmd_host_bigendian())
+		endian = KBUFFER_ENDIAN_BIG;
+	else
+		endian = KBUFFER_ENDIAN_LITTLE;
+	if (sizeof(long) == 8)
+		long_size = KBUFFER_LSIZE_8;
+	else
+		long_size = KBUFFER_LSIZE_4;
+
+	kbuf = kbuffer_alloc(long_size, endian);
+	if (!kbuf)
+		goto out;
+
+	for (i = 0; i < instance->cpu_count; i++) {
+		file = get_temp_file(instance, i);
+		if (!file)
+			continue;
+		if (!get_first_ts(kbuf, page, psize, file, &ts)) {
+			if (first || ts < first_ts) {
+				first_ts = ts;
+				first_ts_cpu = i;
+				first = false;
+			}
+		}
+		put_temp_file(file);
+	}
+	if (!first) {
+		if (is_guest(instance)) {
+			ret = tracecmd_tsync_get_proto_flags(instance->tsync, &flags);
+			if (ret)
+				goto out;
+			ret =  tracecmd_tsync_get_offsets(instance->tsync, first_ts_cpu, &corrections);
+			if (ret)
+				goto out;
+			instance->first_ts = tracecmd_guest_ts_calc(first_ts, &corrections, flags);
+			free(corrections.ts_samples);
+		} else  {
+			instance->first_ts = first_ts;
+		}
+	}
+
+out:
+	kbuffer_free(kbuf);
+	free(page);
+	return first ? -1 : 0;
+}
+
+static void write_guest_file(struct common_record_context *ctx, struct buffer_instance *instance)
 {
 	struct tracecmd_output *handle;
 	int cpu_count = instance->cpu_count;
@@ -4201,6 +4272,12 @@ static void write_guest_file(struct buffer_instance *instance)
 		die("error writing to %s", file);
 	if (instance->flags & BUFFER_FL_TSC2NSEC)
 		tracecmd_set_out_clock(handle, TSCNSEC_CLOCK);
+
+	if (ctx->tsc2nsec.mult)
+		add_tsc2nsec(handle, &ctx->tsc2nsec);
+	tracecmd_write_guest_time_shift(handle, instance->tsync);
+	tracecmd_append_options(handle);
+
 	temp_files = malloc(sizeof(*temp_files) * cpu_count);
 	if (!temp_files)
 		die("failed to allocate temp_files for %d cpus",
@@ -4219,6 +4296,9 @@ static void write_guest_file(struct buffer_instance *instance)
 	for (i = 0; i < cpu_count; i++)
 		put_temp_file(temp_files[i]);
 	free(temp_files);
+
+	tracecmd_tsync_free(instance->tsync);
+	instance->tsync = NULL;
 }
 
 static void record_data(struct common_record_context *ctx)
@@ -4229,11 +4309,21 @@ static void record_data(struct common_record_context *ctx)
 	bool local = false;
 	int max_cpu_count = local_cpu_count;
 	char **temp_files;
+	int ret;
 	int i;
 
+	if (ctx->tsc2nsec.mult) {
+		for_all_instances(instance) {
+			ret = get_first_ts_instance(instance);
+			if (!ret &&
+			    (!ctx->tsc2nsec.offset || ctx->tsc2nsec.offset > instance->first_ts))
+				ctx->tsc2nsec.offset = instance->first_ts;
+		}
+	}
+
 	for_all_instances(instance) {
 		if (is_guest(instance))
-			write_guest_file(instance);
+			write_guest_file(ctx, instance);
 		else if (host && instance->msg_handle)
 			finish_network(instance->msg_handle);
 		else
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] trace-cmd: Get the timestamp of the first recorded event as TSC offset
  2021-04-16 10:34 ` [PATCH v2 3/3] trace-cmd: Get the timestamp of the first recorded event as TSC offset Tzvetomir Stoyanov (VMware)
@ 2021-04-16 19:28   ` Steven Rostedt
  0 siblings, 0 replies; 10+ messages in thread
From: Steven Rostedt @ 2021-04-16 19:28 UTC (permalink / raw)
  To: Tzvetomir Stoyanov (VMware); +Cc: linux-trace-devel

On Fri, 16 Apr 2021 13:34:09 +0300
"Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:

> +int get_first_ts_instance(struct buffer_instance *instance)
> +{
> +	struct tracecmd_ts_corrections corrections;
> +	unsigned long long first_ts = 0;
> +	enum kbuffer_long_size long_size;
> +	enum kbuffer_endian endian;
> +	struct kbuffer *kbuf = NULL;

Small nit, but I'll leave it as it doesn't really hurt anything. But
assigning kbuf to NULL isn't needed. It gets assigned in the
kbuffer_alloc() before it is ever referenced, and the only error path
before that is a return. But if we ever add another error path, it's safe
to have it NULL.

-- Steve


> +	unsigned long long ts;
> +	unsigned int flags;
> +	int first_ts_cpu;
> +	bool first = true;
> +	char *page;
> +	char *file;
> +	int psize;
> +	int ret;
> +	int i;
> +
> +	psize = getpagesize();
> +	page = calloc(1, psize);
> +	if (!page)
> +		return -1;
> +
> +	if (tracecmd_host_bigendian())
> +		endian = KBUFFER_ENDIAN_BIG;
> +	else
> +		endian = KBUFFER_ENDIAN_LITTLE;
> +	if (sizeof(long) == 8)
> +		long_size = KBUFFER_LSIZE_8;
> +	else
> +		long_size = KBUFFER_LSIZE_4;
> +
> +	kbuf = kbuffer_alloc(long_size, endian);
> +	if (!kbuf)
> +		goto out;
> +

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset
  2021-04-16 10:34 ` [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset Tzvetomir Stoyanov (VMware)
@ 2021-04-16 20:12   ` Steven Rostedt
  2021-04-19  8:08     ` Tzvetomir Stoyanov
  0 siblings, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2021-04-16 20:12 UTC (permalink / raw)
  To: Tzvetomir Stoyanov (VMware); +Cc: linux-trace-devel

On Fri, 16 Apr 2021 13:34:08 +0300
"Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:

> When converting TSC timestamps to nanoseconds, an offset is used. That
> offset is subtracted from each TSC timestamp, before the conversion.
> However, if the timestamp is lower that the offset this causes an
> overflow, as both timestamp and offset are unsigned long integers. A
> check is added to verify the subtraction will not cause an overflow. In
> case of an overflow, the timestamp is set to 0 and a warning message is
> printed.
> 
> Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> ---
>  lib/trace-cmd/trace-input.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
> index 974879e8..991abd5f 100644
> --- a/lib/trace-cmd/trace-input.c
> +++ b/lib/trace-cmd/trace-input.c
> @@ -1230,8 +1230,14 @@ static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
>  		ts *= handle->ts2secs;
>  	} else if (handle->tsc_calc.mult) {
>  		/* auto calculated TSC clock frequency */
> -		ts -= handle->tsc_calc.offset;
> -		ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
> +		if (ts > handle->tsc_calc.offset) {
> +			ts -= handle->tsc_calc.offset;
> +			ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
> +		} else {
> +			tracecmd_warning("Timestamp $llu is before the initial offset %llu, set it to 0",

   $llu  ??

Are you testing these? The bad trace files that I supplied triggers this.


> +					 ts, handle->tsc_calc.offset);
> +			ts = 0;

We should turn off the offset and not just zero out the timestamps,
otherwise we just have useless data:

  Timestamp $llu is before the initial offset 20094822433105, set it to 0
  Timestamp $llu is before the initial offset 20094822433105, set it to 0
  Timestamp $llu is before the initial offset 20094822460994, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094822961507, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094822967292, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094823302351, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094823306954, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094824963751, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094824965723, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094824972124, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094824972697, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094828868305, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094828871264, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094828881104, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094828882014, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094831874377, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094831876734, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094831881252, set it to 0
[..]
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094832060068, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094835262757, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094835265223, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094835270122, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094835273058, set it to 0
libtracecmd: File exists
  Timestamp $llu is before the initial offset 20094835273614, set it to 0
cpus=2
       trace-cmd-3196  [000]     0.000000: sched_stat_runtime:   comm=trace-cmd pid=3196 runtime=85576 [ns] vruntime=12853032342 [ns]
       trace-cmd-3196  [000]     0.000000: sched_switch:         trace-cmd:3196 [120] S ==> trace-cmd:3199 [120]
       trace-cmd-3199  [000]     0.000000: sched_stat_runtime:   comm=trace-cmd pid=3199 runtime=156103 [ns] vruntime=12857102867 [ns]
       trace-cmd-3199  [000]     0.000000: sched_switch:         trace-cmd:3199 [120] S ==> trace-cmd:3198 [120]
       trace-cmd-3198  [000]     0.000000: sched_stat_runtime:   comm=trace-cmd pid=3198 runtime=100540 [ns] vruntime=12858890364 [ns]
       trace-cmd-3198  [000]     0.000000: sched_switch:         trace-cmd:3198 [120] S ==> swapper/0:0 [120]
          <idle>-0     [000]     0.000000: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
          <idle>-0     [000]     0.000000: hrtimer_expire_entry: hrtimer=0xffff8cc5bd41f780 now=1444054745103 function=tick_sched_timer/0x0
          <idle>-0     [000]     0.000000: hrtimer_expire_exit:  hrtimer=0xffff8cc5bd41f780
          <idle>-0     [000]     0.000000: hrtimer_start:        hrtimer=0xffff8cc5bd41f780 function=tick_sched_timer/0x0 expires=1444055726117 softexpires=1444055726117
          <idle>-0     [000]     0.000000: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
          <idle>-0     [000]     0.000000: hrtimer_expire_entry: hrtimer=0xffff8cc5bd41f780 now=1444055895819 function=tick_sched_timer/0x0
          <idle>-0     [000]     0.000000: hrtimer_expire_exit:  hrtimer=0xffff8cc5bd41f780
          <idle>-0     [000]     0.000000: hrtimer_start:        hrtimer=0xffff8cc5bd41f780 function=tick_sched_timer/0x0 expires=1444056726117 softexpires=1444056726117
          <idle>-0     [000]     0.000000: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
          <idle>-0     [000]     0.000000: hrtimer_expire_entry: hrtimer=0xffff8cc5bd41f780 now=1444056782207 function=tick_sched_timer/0x0
          <idle>-0     [000]     0.000000: softirq_raise:        vec=1 [action=TIMER]
          <idle>-0     [000]     0.000000: hrtimer_expire_exit:  hrtimer=0xffff8cc5bd41f780
          <idle>-0     [000]     0.000000: hrtimer_start:        hrtimer=0xffff8cc5bd41f780 function=tick_sched_timer/0x0 expires=1444057726117 softexpires=1444057726117
          <idle>-0     [000]     0.000000: softirq_entry:        vec=1 [action=TIMER]
          <idle>-0     [000]     0.000000: timer_cancel:         timer=0xffffa61a0006be40
          <idle>-0     [000]     0.000000: timer_expire_entry:   timer=0xffffa61a0006be40 function=process_timeout now=4296111312 baseclk=4296111312
          <idle>-0     [000]     0.000000: sched_waking:         comm=rcu_sched pid=12 prio=120 target_cpu=000

We don't want all timestamps zero. We want to disable the starting event.

By having this instead:

		} else {
			tracecmd_warning("Timestamp %llu is before the initial offset %llu\n"
					 "\tSetting offset to 0",
					 ts, handle->tsc_calc.offset);
			handle->tsc_calc.offset = 0;
			ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
		}

We get this as output:

  Timestamp 20094822433105 is before the initial offset 20159736262966
        Setting offset to 0
cpus=2
       trace-cmd-3196  [000]  5923.651995: sched_stat_runtime:   comm=trace-cmd pid=3196 runtime=85576 [ns] vruntime=12853032342 [ns]
       trace-cmd-3196  [000]  5923.652004: sched_switch:         trace-cmd:3196 [120] S ==> trace-cmd:3199 [120]
       trace-cmd-3199  [000]  5923.652151: sched_stat_runtime:   comm=trace-cmd pid=3199 runtime=156103 [ns] vruntime=12857102867 [ns]
       trace-cmd-3199  [000]  5923.652153: sched_switch:         trace-cmd:3199 [120] S ==> trace-cmd:3198 [120]
       trace-cmd-3198  [000]  5923.652252: sched_stat_runtime:   comm=trace-cmd pid=3198 runtime=100540 [ns] vruntime=12858890364 [ns]
       trace-cmd-3198  [000]  5923.652253: sched_switch:         trace-cmd:3198 [120] S ==> swapper/0:0 [120]
          <idle>-0     [000]  5923.652741: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
          <idle>-0     [000]  5923.652742: hrtimer_expire_entry: hrtimer=0xffff8cc5bd41f780 now=1444054745103 function=tick_sched_timer/0x0
          <idle>-0     [000]  5923.652744: hrtimer_expire_exit:  hrtimer=0xffff8cc5bd41f780
          <idle>-0     [000]  5923.652744: hrtimer_start:        hrtimer=0xffff8cc5bd41f780 function=tick_sched_timer/0x0 expires=1444055726117 softexpires=1444055726117
          <idle>-0     [000]  5923.653892: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780

Much more usable.

-- Steve


> +		}
>  	}
>  
>  	/* User specified time offset with --ts-offset or --date options */


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset
  2021-04-16 20:12   ` Steven Rostedt
@ 2021-04-19  8:08     ` Tzvetomir Stoyanov
  2021-04-19 13:45       ` Steven Rostedt
  0 siblings, 1 reply; 10+ messages in thread
From: Tzvetomir Stoyanov @ 2021-04-19  8:08 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Linux Trace Devel

On Fri, Apr 16, 2021 at 11:12 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 16 Apr 2021 13:34:08 +0300
> "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com> wrote:
>
> > When converting TSC timestamps to nanoseconds, an offset is used. That
> > offset is subtracted from each TSC timestamp, before the conversion.
> > However, if the timestamp is lower that the offset this causes an
> > overflow, as both timestamp and offset are unsigned long integers. A
> > check is added to verify the subtraction will not cause an overflow. In
> > case of an overflow, the timestamp is set to 0 and a warning message is
> > printed.
> >
> > Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@gmail.com>
> > ---
> >  lib/trace-cmd/trace-input.c | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
> > index 974879e8..991abd5f 100644
> > --- a/lib/trace-cmd/trace-input.c
> > +++ b/lib/trace-cmd/trace-input.c
> > @@ -1230,8 +1230,14 @@ static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
> >               ts *= handle->ts2secs;
> >       } else if (handle->tsc_calc.mult) {
> >               /* auto calculated TSC clock frequency */
> > -             ts -= handle->tsc_calc.offset;
> > -             ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
> > +             if (ts > handle->tsc_calc.offset) {
> > +                     ts -= handle->tsc_calc.offset;
> > +                     ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
> > +             } else {
> > +                     tracecmd_warning("Timestamp $llu is before the initial offset %llu, set it to 0",
>
>    $llu  ??
>
> Are you testing these? The bad trace files that I supplied triggers this.
>
>
> > +                                      ts, handle->tsc_calc.offset);
> > +                     ts = 0;
>
> We should turn off the offset and not just zero out the timestamps,
> otherwise we just have useless data:
>
>   Timestamp $llu is before the initial offset 20094822433105, set it to 0
>   Timestamp $llu is before the initial offset 20094822433105, set it to 0
>   Timestamp $llu is before the initial offset 20094822460994, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094822961507, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094822967292, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094823302351, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094823306954, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094824963751, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094824965723, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094824972124, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094824972697, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094828868305, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094828871264, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094828881104, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094828882014, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094831874377, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094831876734, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094831881252, set it to 0
> [..]
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094832060068, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094835262757, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094835265223, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094835270122, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094835273058, set it to 0
> libtracecmd: File exists
>   Timestamp $llu is before the initial offset 20094835273614, set it to 0
> cpus=2
>        trace-cmd-3196  [000]     0.000000: sched_stat_runtime:   comm=trace-cmd pid=3196 runtime=85576 [ns] vruntime=12853032342 [ns]
>        trace-cmd-3196  [000]     0.000000: sched_switch:         trace-cmd:3196 [120] S ==> trace-cmd:3199 [120]
>        trace-cmd-3199  [000]     0.000000: sched_stat_runtime:   comm=trace-cmd pid=3199 runtime=156103 [ns] vruntime=12857102867 [ns]
>        trace-cmd-3199  [000]     0.000000: sched_switch:         trace-cmd:3199 [120] S ==> trace-cmd:3198 [120]
>        trace-cmd-3198  [000]     0.000000: sched_stat_runtime:   comm=trace-cmd pid=3198 runtime=100540 [ns] vruntime=12858890364 [ns]
>        trace-cmd-3198  [000]     0.000000: sched_switch:         trace-cmd:3198 [120] S ==> swapper/0:0 [120]
>           <idle>-0     [000]     0.000000: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
>           <idle>-0     [000]     0.000000: hrtimer_expire_entry: hrtimer=0xffff8cc5bd41f780 now=1444054745103 function=tick_sched_timer/0x0
>           <idle>-0     [000]     0.000000: hrtimer_expire_exit:  hrtimer=0xffff8cc5bd41f780
>           <idle>-0     [000]     0.000000: hrtimer_start:        hrtimer=0xffff8cc5bd41f780 function=tick_sched_timer/0x0 expires=1444055726117 softexpires=1444055726117
>           <idle>-0     [000]     0.000000: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
>           <idle>-0     [000]     0.000000: hrtimer_expire_entry: hrtimer=0xffff8cc5bd41f780 now=1444055895819 function=tick_sched_timer/0x0
>           <idle>-0     [000]     0.000000: hrtimer_expire_exit:  hrtimer=0xffff8cc5bd41f780
>           <idle>-0     [000]     0.000000: hrtimer_start:        hrtimer=0xffff8cc5bd41f780 function=tick_sched_timer/0x0 expires=1444056726117 softexpires=1444056726117
>           <idle>-0     [000]     0.000000: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
>           <idle>-0     [000]     0.000000: hrtimer_expire_entry: hrtimer=0xffff8cc5bd41f780 now=1444056782207 function=tick_sched_timer/0x0
>           <idle>-0     [000]     0.000000: softirq_raise:        vec=1 [action=TIMER]
>           <idle>-0     [000]     0.000000: hrtimer_expire_exit:  hrtimer=0xffff8cc5bd41f780
>           <idle>-0     [000]     0.000000: hrtimer_start:        hrtimer=0xffff8cc5bd41f780 function=tick_sched_timer/0x0 expires=1444057726117 softexpires=1444057726117
>           <idle>-0     [000]     0.000000: softirq_entry:        vec=1 [action=TIMER]
>           <idle>-0     [000]     0.000000: timer_cancel:         timer=0xffffa61a0006be40
>           <idle>-0     [000]     0.000000: timer_expire_entry:   timer=0xffffa61a0006be40 function=process_timeout now=4296111312 baseclk=4296111312
>           <idle>-0     [000]     0.000000: sched_waking:         comm=rcu_sched pid=12 prio=120 target_cpu=000
>
> We don't want all timestamps zero. We want to disable the starting event.
>
> By having this instead:
>
>                 } else {
>                         tracecmd_warning("Timestamp %llu is before the initial offset %llu\n"
>                                          "\tSetting offset to 0",
>                                          ts, handle->tsc_calc.offset);
>                         handle->tsc_calc.offset = 0;
>                         ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
>                 }
>

This will set the guest offset to 0, will not change the offsets of
the other files. The result is that the files will not be displayed in
sync, because of these few broken events. As this is only a sanity
check, and should not happen if the patch "trace-cmd: Get the
timestamp of the first recorded event as TSC offset" is applied, I
would suggest to zero the broken timestamps and to advise the user to
use "--raw-ts" option instead:
 "Timestamp %llu is before the initial offset %llu, set it to 0.
Display the files with --raw-ts option to see the original
timestamps.\n"


> We get this as output:
>
>   Timestamp 20094822433105 is before the initial offset 20159736262966
>         Setting offset to 0
> cpus=2
>        trace-cmd-3196  [000]  5923.651995: sched_stat_runtime:   comm=trace-cmd pid=3196 runtime=85576 [ns] vruntime=12853032342 [ns]
>        trace-cmd-3196  [000]  5923.652004: sched_switch:         trace-cmd:3196 [120] S ==> trace-cmd:3199 [120]
>        trace-cmd-3199  [000]  5923.652151: sched_stat_runtime:   comm=trace-cmd pid=3199 runtime=156103 [ns] vruntime=12857102867 [ns]
>        trace-cmd-3199  [000]  5923.652153: sched_switch:         trace-cmd:3199 [120] S ==> trace-cmd:3198 [120]
>        trace-cmd-3198  [000]  5923.652252: sched_stat_runtime:   comm=trace-cmd pid=3198 runtime=100540 [ns] vruntime=12858890364 [ns]
>        trace-cmd-3198  [000]  5923.652253: sched_switch:         trace-cmd:3198 [120] S ==> swapper/0:0 [120]
>           <idle>-0     [000]  5923.652741: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
>           <idle>-0     [000]  5923.652742: hrtimer_expire_entry: hrtimer=0xffff8cc5bd41f780 now=1444054745103 function=tick_sched_timer/0x0
>           <idle>-0     [000]  5923.652744: hrtimer_expire_exit:  hrtimer=0xffff8cc5bd41f780
>           <idle>-0     [000]  5923.652744: hrtimer_start:        hrtimer=0xffff8cc5bd41f780 function=tick_sched_timer/0x0 expires=1444055726117 softexpires=1444055726117
>           <idle>-0     [000]  5923.653892: hrtimer_cancel:       hrtimer=0xffff8cc5bd41f780
>
> Much more usable.
>
> -- Steve
>
>
> > +             }
> >       }
> >
> >       /* User specified time offset with --ts-offset or --date options */
>


-- 
Tzvetomir (Ceco) Stoyanov
VMware Open Source Technology Center

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset
  2021-04-19  8:08     ` Tzvetomir Stoyanov
@ 2021-04-19 13:45       ` Steven Rostedt
  2021-04-19 15:14         ` Steven Rostedt
  0 siblings, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2021-04-19 13:45 UTC (permalink / raw)
  To: Tzvetomir Stoyanov; +Cc: Linux Trace Devel

On Mon, 19 Apr 2021 11:08:04 +0300
Tzvetomir Stoyanov <tz.stoyanov@gmail.com> wrote:

> > We don't want all timestamps zero. We want to disable the starting event.
> >
> > By having this instead:
> >
> >                 } else {
> >                         tracecmd_warning("Timestamp %llu is before the initial offset %llu\n"
> >                                          "\tSetting offset to 0",
> >                                          ts, handle->tsc_calc.offset);
> >                         handle->tsc_calc.offset = 0;
> >                         ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
> >                 }
> >  
> 
> This will set the guest offset to 0, will not change the offsets of
> the other files. The result is that the files will not be displayed in
> sync, because of these few broken events. As this is only a sanity
> check, and should not happen if the patch "trace-cmd: Get the
> timestamp of the first recorded event as TSC offset" is applied, I
> would suggest to zero the broken timestamps and to advise the user to
> use "--raw-ts" option instead:
>  "Timestamp %llu is before the initial offset %llu, set it to 0.
> Display the files with --raw-ts option to see the original
> timestamps.\n"

I still don't like it, because a thousand events all with the same
timestamp will still screw up everything (including the output of
kernelshark, as you'll see a thousand events at time zero, and then the
rest of the events at some way off in the future time).

And as this is in the library, having a warning is only applicable to
trace-cmd, and not any other user case.

Actually, I think we are looking at the wrong place to do the check. All
event streams (CPUs) need to be in monotonic order, or it's just corrupted
to begin with.

The library should supply a check to see if each stream starts after the
offset, and if not, give a report back to the user (trace-cmd in this case,
or kernelsharks ftrace plugin). Then give the user a way to give another
offset to tell all the other streams to convert to.

That is:

	max = 0;
	for each stream:
		ret = read first event, compared to offset
		if (!ret) {
			/* ret will be how much off by offset */
			if (ret > max)
				max = ret;
		}
	if (max) {
		for each stream:
			Update offset by subtracting max
	}

Look at each stream, and have some callback give you how much ahead the
first event is from its given offset. Take the biggest value from reading
all the streams, then tell all the streams to subtract its offset by that
max value. The end result is that all streams now start at a positive value.

-- Steve



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset
  2021-04-19 13:45       ` Steven Rostedt
@ 2021-04-19 15:14         ` Steven Rostedt
  2021-04-28 12:31           ` Tzvetomir Stoyanov
  0 siblings, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2021-04-19 15:14 UTC (permalink / raw)
  To: Tzvetomir Stoyanov; +Cc: Linux Trace Devel

On Mon, 19 Apr 2021 09:45:43 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:


> That is:
> 
> 	max = 0;
> 	for each stream:
> 		ret = read first event, compared to offset
> 		if (!ret) {
> 			/* ret will be how much off by offset */
> 			if (ret > max)
> 				max = ret;
> 		}
> 	if (max) {
> 		for each stream:
> 			Update offset by subtracting max
> 	}
> 
> Look at each stream, and have some callback give you how much ahead the
> first event is from its given offset. Take the biggest value from reading
> all the streams, then tell all the streams to subtract its offset by that
> max value. The end result is that all streams now start at a positive value.

From our call, here's the pseudo code that I was talking about:

> diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
> index b17b36e0..f03dadd3 100644
> --- a/lib/trace-cmd/trace-input.c
> +++ b/lib/trace-cmd/trace-input.c
> @@ -1302,7 +1302,7 @@ static unsigned long long timestamp_host_sync(unsigned long long ts, int cpu,
>  					 &tsync->ts_samples[mid+1]);
>  }
>  
> -static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
> +static unsigned long long pre_timestamp_calc(unsigned long long ts, int cpu,
>  					 struct tracecmd_input *handle)

I pulled out the timestamp_calc into a helper function.

>  {
>  	/* do not modify raw timestamps */
> @@ -1318,17 +1318,44 @@ static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
>  		ts *= handle->ts2secs;
>  	} else if (handle->tsc_calc.mult) {
>  		/* auto calculated TSC clock frequency */
> -		ts -= handle->tsc_calc.offset;

And removed the calc offset.

>  		ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
>  	}
>  
>  	/* User specified time offset with --ts-offset or --date options */
> -	if (handle->ts_offset)
> -		ts += handle->ts_offset;
> +	ts += handle->ts_offset;

As we mentioned (and this can be a separate patch), the if statement is
useless.

>  
>  	return ts;
>  }
>  
> +static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
> +					 struct tracecmd_input *handle)
> +{
> +	static int once;
> +
> +	ts = pre_timestamp_calc(ts, cpu, handle);
> +	if (!once && ts > handle->start_ts_offset) {
> +		once++;
> +		tracecmd_warning();
> +	}
> +	ts -= handle->start_ts_offset;

After looking at this more, I think we should just have the ts_offset and
start_ts_offset be the same. And remove the ts += handle->ts_offset, from
the pre_timestamp_calc() above, and have this check test just ts_offset.

So now the timestamp_calc() will get the timestamp and then apply the
ts_offset separately (and warn if the offset is greater than the ts).

> +}
> +
> +
> +
> +long long tracecmd_cpu_first_ts_offset(struct tracecmd_input *handle, int cpu)
> +}
> +	struct tep_record rec;
> +
> +	rec = first_event(handle, cpu);
> +	return pre_timestamp_calc(rec->ts, cpu, handle) - handle->ts_offset;
> +}

Add an API that shows the difference between the first stream event
timestamp against the user supplied (or file supplied) ts_offset.


> +
> +int tracecmd_modify_ts_offset(struct tracecmd_input *handle, long long offset)
> +{
> +	handle->ts_offset += offset;
> +}

Allow the user to tweak that offset. As we already have:

tracecmd_set_ts_offset(handle, offset) to set ts_offset, if the user found
that the offset was before, it could tweak it.

> +
> +
>  /*
>   * Page is mapped, now read in the page header info.
>   */
> 


That is, in the options, we would need to have the calc offset from the
file (doing the sync), do:

	handle->ts_offset -= start_offset.

Or something like that. I'll let you look at this code and see what you
come up with, and we can discuss this further.

-- Steve

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset
  2021-04-19 15:14         ` Steven Rostedt
@ 2021-04-28 12:31           ` Tzvetomir Stoyanov
  0 siblings, 0 replies; 10+ messages in thread
From: Tzvetomir Stoyanov @ 2021-04-28 12:31 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Linux Trace Devel

On Mon, Apr 19, 2021 at 6:14 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Mon, 19 Apr 2021 09:45:43 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
>
>
> > That is:
> >
> >       max = 0;
> >       for each stream:
> >               ret = read first event, compared to offset
> >               if (!ret) {
> >                       /* ret will be how much off by offset */
> >                       if (ret > max)
> >                               max = ret;
> >               }
> >       if (max) {
> >               for each stream:
> >                       Update offset by subtracting max
> >       }
> >
> > Look at each stream, and have some callback give you how much ahead the
> > first event is from its given offset. Take the biggest value from reading
> > all the streams, then tell all the streams to subtract its offset by that
> > max value. The end result is that all streams now start at a positive value.
>
> From our call, here's the pseudo code that I was talking about:
>
> > diff --git a/lib/trace-cmd/trace-input.c b/lib/trace-cmd/trace-input.c
> > index b17b36e0..f03dadd3 100644
> > --- a/lib/trace-cmd/trace-input.c
> > +++ b/lib/trace-cmd/trace-input.c
> > @@ -1302,7 +1302,7 @@ static unsigned long long timestamp_host_sync(unsigned long long ts, int cpu,
> >                                        &tsync->ts_samples[mid+1]);
> >  }
> >
> > -static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
> > +static unsigned long long pre_timestamp_calc(unsigned long long ts, int cpu,
> >                                        struct tracecmd_input *handle)
>
> I pulled out the timestamp_calc into a helper function.
>
> >  {
> >       /* do not modify raw timestamps */
> > @@ -1318,17 +1318,44 @@ static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
> >               ts *= handle->ts2secs;
> >       } else if (handle->tsc_calc.mult) {
> >               /* auto calculated TSC clock frequency */
> > -             ts -= handle->tsc_calc.offset;
>
> And removed the calc offset.
>
> >               ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift);
> >       }
> >
> >       /* User specified time offset with --ts-offset or --date options */
> > -     if (handle->ts_offset)
> > -             ts += handle->ts_offset;
> > +     ts += handle->ts_offset;
>
> As we mentioned (and this can be a separate patch), the if statement is
> useless.
>
> >
> >       return ts;
> >  }
> >
> > +static unsigned long long timestamp_calc(unsigned long long ts, int cpu,
> > +                                      struct tracecmd_input *handle)
> > +{
> > +     static int once;
> > +
> > +     ts = pre_timestamp_calc(ts, cpu, handle);
> > +     if (!once && ts > handle->start_ts_offset) {
> > +             once++;
> > +             tracecmd_warning();
> > +     }
> > +     ts -= handle->start_ts_offset;
>
> After looking at this more, I think we should just have the ts_offset and
> start_ts_offset be the same. And remove the ts += handle->ts_offset, from
> the pre_timestamp_calc() above, and have this check test just ts_offset.
>
> So now the timestamp_calc() will get the timestamp and then apply the
> ts_offset separately (and warn if the offset is greater than the ts).
>
> > +}
> > +
> > +
> > +
> > +long long tracecmd_cpu_first_ts_offset(struct tracecmd_input *handle, int cpu)
> > +}
> > +     struct tep_record rec;
> > +
> > +     rec = first_event(handle, cpu);
> > +     return pre_timestamp_calc(rec->ts, cpu, handle) - handle->ts_offset;
> > +}
>
> Add an API that shows the difference between the first stream event
> timestamp against the user supplied (or file supplied) ts_offset.
>
>
> > +
> > +int tracecmd_modify_ts_offset(struct tracecmd_input *handle, long long offset)
> > +{
> > +     handle->ts_offset += offset;
> > +}
>
> Allow the user to tweak that offset. As we already have:
>
> tracecmd_set_ts_offset(handle, offset) to set ts_offset, if the user found
> that the offset was before, it could tweak it.
>
> > +
> > +
> >  /*
> >   * Page is mapped, now read in the page header info.
> >   */
> >
>
>
> That is, in the options, we would need to have the calc offset from the
> file (doing the sync), do:
>
>         handle->ts_offset -= start_offset.
>
> Or something like that. I'll let you look at this code and see what you
> come up with, and we can discuss this further.
>
> -- Steve

On Thu, Apr 22, 2021 at 11:25 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 16 Apr 2021 10:01:18 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
>
> > On Fri, 16 Apr 2021 09:59:08 -0400
> > Steven Rostedt <rostedt@goodmis.org> wrote:
> >
> > >
> > > As for pr_stat(), I think we should rename it to tep_info() and tep_vinfo()
> > > that acts just like tep_warning(), except it is for informational output
> > > (stdout instead of stderr). This is similar to what the kernel has.
> > >
> > > Since tep_vwarning() takes a name, so can tep_vinfo(), and I was thinking
> > > that we should expose this string to the application.
> > >
> >
> > Oh, and we could add a tep_critical() and tep_vcritical() which means that
> > the error is something really bad happened, (like failed to allocate).
>
> Any thoughts on this?

These changes are superseded by "RFC [PATCH 0/5] tsc2nsec fixes",
where some of these suggestions are implemented.
https://lore.kernel.org/linux-trace-devel/20210428122839.805296-1-tz.stoyanov@gmail.com/




--
Tzvetomir (Ceco) Stoyanov
VMware Open Source Technology Center

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-04-28 12:32 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-16 10:34 [PATCH v2 0/3] Fix overflow when applying tsc2nsec calculations Tzvetomir Stoyanov (VMware)
2021-04-16 10:34 ` [PATCH v2 1/3] trace-cmd library: Add new trace-cmd library APIs for guest ts corrections Tzvetomir Stoyanov (VMware)
2021-04-16 10:34 ` [PATCH v2 2/3] trace-cmd library: Add check before applying tsc2nsec offset Tzvetomir Stoyanov (VMware)
2021-04-16 20:12   ` Steven Rostedt
2021-04-19  8:08     ` Tzvetomir Stoyanov
2021-04-19 13:45       ` Steven Rostedt
2021-04-19 15:14         ` Steven Rostedt
2021-04-28 12:31           ` Tzvetomir Stoyanov
2021-04-16 10:34 ` [PATCH v2 3/3] trace-cmd: Get the timestamp of the first recorded event as TSC offset Tzvetomir Stoyanov (VMware)
2021-04-16 19:28   ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).