All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
@ 2017-12-19 11:15 Tvrtko Ursulin
  2017-12-19 11:39 ` ✓ Fi.CI.BAT: success for " Patchwork
                   ` (8 more replies)
  0 siblings, 9 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-19 11:15 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A couple of small optimizations which altogether bring around 30%
improvement in my testing.

1. Do less string processing on tracepoints names and push more of the
   check into the if-ladder.

2. Pull out common db key and ctx processing and cache common values in
   local vars.

3. Key value pair parsing is faster with a regexp.

4. Avoid sorting the db hash multiple times if possible.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
 scripts/trace.pl | 114 ++++++++++++++++++++++++-------------------------------
 1 file changed, 49 insertions(+), 65 deletions(-)
---
John please check if this helps with your monster traces before we decide
if it is worth putting this in. Well, there isn't much of a downside
except touching something which works and risking breaking it.

diff --git a/scripts/trace.pl b/scripts/trace.pl
index 98e4a9843a43..5463e1fd1954 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -356,110 +356,91 @@ my $prev_freq = 0;
 my $prev_freq_ts = 0;
 while (<>) {
 	my @fields;
-	my @tmp;
 	my $tp_name;
-	my $time;
 	my %tp;
-	my $key;
+	my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
 
 	chomp;
 	@fields = split ' ';
 
+	chop $fields[3];
+	$time = int($fields[3] * 1000000.0);
+
 	$tp_name = $fields[4];
-	@tmp = split ':', $tp_name, 2;
-	next unless $tmp[0] eq 'i915';
-	$tp_name = $tmp[1];
-	chop $tp_name;
 
-	chop $fields[3];
-	$time = $fields[3] * 1000000.0;
 	splice @fields, 0, 5;
 
 	foreach my $f (@fields) {
-		my @kv = split '=|,', $f;
-
-		$kv[0] = 'global' if $kv[0] eq 'global_seqno';
+		my ($k, $v);
 
-		$tp{$kv[0]} = $kv[1];
+		($k, $v) = ($f =~ /^(\S+)=(\d+)/);
+		next unless defined $k;
+		$k = 'global' if $k eq 'global_seqno';
+		$tp{$k} = $v;
 	}
 
 	next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
 
-	if ($tp_name eq 'i915_gem_request_wait_begin') {
-		my %rw;
+	if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
+		$ring = $tp{'ring'};
+		$ctx = $tp{'ctx'};
+		$seqno = $tp{'seqno'};
+
+		$orig_ctx = $ctx;
+		$ctx = sanitize_ctx($ctx, $ring);
+		$key = db_key($ring, $ctx, $seqno);
+	}
 
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+	if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
+		my %rw;
 
 		next if exists $reqwait{$key};
 
 		$rw{'key'} = $key;
-		$rw{'ring'} = $tp{'ring'};
-		$rw{'seqno'} = $tp{'seqno'};
-		$rw{'ctx'} = $tp{'ctx'};
+		$rw{'ring'} = $ring;
+		$rw{'seqno'} = $seqno;
+		$rw{'ctx'} = $ctx;
 		$rw{'start'} = $time;
 		$reqwait{$key} = \%rw;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_wait_end') {
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
 		next unless exists $reqwait{$key};
 
 		$reqwait{$key}->{'end'} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_add') {
-		my $orig_ctx = $tp{'ctx'};
-
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_add:') {
 		if (exists $queue{$key}) {
 			$ctxdb{$orig_ctx}++;
-			$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-			$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+			$ctx = sanitize_ctx($ctx, $ring);
+			$key = db_key($ring, $ctx, $seqno);
 		}
 
 		$queue{$key} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_submit') {
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
 		die if exists $submit{$key};
 		die unless exists $queue{$key};
 
 		$submit{$key} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_in') {
+	} elsif ($tp_name eq 'i915:i915_gem_request_in:') {
 		my %req;
 
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
 		die if exists $db{$key};
 		die unless exists $queue{$key};
 		die unless exists $submit{$key};
 
 		$req{'start'} = $time;
-		$req{'ring'} = $tp{'ring'};
-		$req{'seqno'} = $tp{'seqno'};
-		$req{'ctx'} = $tp{'ctx'};
-		$req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
+		$req{'ring'} = $ring;
+		$req{'seqno'} = $seqno;
+		$req{'ctx'} = $ctx;
+		$req{'name'} = $ctx . '/' . $seqno;
 		$req{'global'} = $tp{'global'};
 		$req{'port'} = $tp{'port'};
 		$req{'queue'} = $queue{$key};
 		$req{'submit-delay'} = $submit{$key} - $queue{$key};
 		$req{'execute-delay'} = $req{'start'} - $submit{$key};
-		$rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
-		$ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
+		$rings{$ring} = $gid++ unless exists $rings{$ring};
+		$ringmap{$rings{$ring}} = $ring;
 		$db{$key} = \%req;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_out') {
-		my $gkey = global_key($tp{'ring'}, $tp{'global'});
-
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+	} elsif ($tp_name eq 'i915:i915_gem_request_out:') {
+		my $gkey = global_key($ring, $tp{'global'});
 
 		die unless exists $db{$key};
 		die unless exists $db{$key}->{'start'};
@@ -476,15 +457,12 @@ while (<>) {
 		}
 		$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
 		$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
-		next;
-	} elsif ($tp_name eq 'intel_engine_notify') {
-		$notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
-		next;
-	} elsif ($tp_name eq 'intel_gpu_freq_change') {
+	} elsif ($tp_name eq 'intel_engine_notify:') {
+		$notify{global_key($ring, $seqno)} = $time;
+	} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
 		push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
 		$prev_freq_ts = $time;
 		$prev_freq = $tp{'new_freq'};
-		next;
 	}
 }
 
@@ -557,7 +535,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
 my $last_ts = 0;
 my $first_ts;
 
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
+my $re_sort = 0;
+
+foreach my $key (@sorted_keys) {
 	my $ring = $db{$key}->{'ring'};
 	my $end = $db{$key}->{'end'};
 
@@ -585,6 +566,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
 
 		# 20us tolerance
 		if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+			$re_sort = 1;
 			$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
 			$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
 			$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
@@ -601,6 +583,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
 	$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
 }
 
+@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
+
 foreach my $ring (keys %batch_avg) {
 	$batch_avg{$ring} /= $batch_count{$ring};
 	$batch_total_avg{$ring} /= $batch_count{$ring};
@@ -616,7 +600,7 @@ foreach my $gid (sort keys %rings) {
 	my (@s_, @e_);
 
 	# Extract all GPU busy intervals and sort them.
-	foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+	foreach my $key (@sorted_keys) {
 		next unless $db{$key}->{'ring'} == $ring;
 		push @s_, $db{$key}->{'start'};
 		push @e_, $db{$key}->{'end'};
@@ -660,7 +644,7 @@ my @gpu_intervals;
 my (@s_, @e_);
 
 # Extract all GPU busy intervals and sort them.
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+foreach my $key (@sorted_keys) {
 	push @s_, $db{$key}->{'start'};
 	push @e_, $db{$key}->{'end'};
 	die if $db{$key}->{'start'} > $db{$key}->{'end'};
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
@ 2017-12-19 11:39 ` Patchwork
  2017-12-19 13:23 ` ✓ Fi.CI.IGT: " Patchwork
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-19 11:39 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: scripts/trace.pl: Optimize event parsing and processing
URL   : https://patchwork.freedesktop.org/series/35569/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
d3d04e5f74f05850f595c729fb0c00eb7dd9c688 scripts/trace.pl: Fix ring ignore

with latest DRM-Tip kernel build CI_DRM_3545
3e151c0e41e6 drm-tip: 2017y-12m-18d-22h-45m-04s UTC integration manifest

No testlist changes.

Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-a:
                dmesg-warn -> PASS       (fi-kbl-r) fdo#104172
Test kms_psr_sink_crc:
        Subgroup psr_basic:
                dmesg-warn -> PASS       (fi-skl-6700hq) fdo#101144

fdo#104172 https://bugs.freedesktop.org/show_bug.cgi?id=104172
fdo#101144 https://bugs.freedesktop.org/show_bug.cgi?id=101144

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:434s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:444s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:391s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:495s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:278s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:498s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:495s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:487s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:466s
fi-elk-e7500     total:224  pass:163  dwarn:15  dfail:0   fail:0   skip:45 
fi-gdg-551       total:288  pass:178  dwarn:1   dfail:0   fail:1   skip:108 time:262s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:538s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:406s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:417s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:389s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:469s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:429s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:480s
fi-kbl-7560u     total:288  pass:268  dwarn:1   dfail:0   fail:0   skip:19  time:519s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:471s
fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:523s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:585s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:445s
fi-skl-6600u     total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:534s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:564s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:510s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:509s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:446s
fi-snb-2520m     total:245  pass:211  dwarn:0   dfail:0   fail:0   skip:33 
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:412s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:591s
fi-glk-dsi       total:288  pass:175  dwarn:1   dfail:4   fail:0   skip:108 time:333s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_701/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* ✓ Fi.CI.IGT: success for scripts/trace.pl: Optimize event parsing and processing
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
  2017-12-19 11:39 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-12-19 13:23 ` Patchwork
  2017-12-19 15:02 ` [PATCH i-g-t] " Chris Wilson
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-19 13:23 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: scripts/trace.pl: Optimize event parsing and processing
URL   : https://patchwork.freedesktop.org/series/35569/
State : success

== Summary ==

Test kms_cursor_crc:
        Subgroup cursor-256x85-sliding:
                skip       -> PASS       (shard-hsw)
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-shrfb-draw-render:
                fail       -> PASS       (shard-snb) fdo#101623 +2
Test kms_cursor_legacy:
        Subgroup basic-busy-flip-before-cursor-atomic:
                skip       -> PASS       (shard-hsw)
Test gem_tiled_swapping:
        Subgroup non-threaded:
                pass       -> INCOMPLETE (shard-snb) fdo#104218
Test pm_rpm:
        Subgroup universal-planes:
                skip       -> PASS       (shard-hsw)

fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
fdo#104218 https://bugs.freedesktop.org/show_bug.cgi?id=104218

shard-hsw        total:2712 pass:1537 dwarn:1   dfail:0   fail:10  skip:1164 time:9487s
shard-snb        total:2691 pass:1299 dwarn:1   dfail:0   fail:12  skip:1378 time:7821s
Blacklisted hosts:
shard-apl        total:2712 pass:1688 dwarn:1   dfail:0   fail:22  skip:1001 time:13732s
shard-kbl        total:2691 pass:1794 dwarn:1   dfail:0   fail:23  skip:872 time:10773s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_701/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
  2017-12-19 11:39 ` ✓ Fi.CI.BAT: success for " Patchwork
  2017-12-19 13:23 ` ✓ Fi.CI.IGT: " Patchwork
@ 2017-12-19 15:02 ` Chris Wilson
  2017-12-19 16:23 ` John Harrison
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2017-12-19 15:02 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx

Quoting Tvrtko Ursulin (2017-12-19 11:15:39)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> A couple of small optimizations which altogether bring around 30%
> improvement in my testing.
> 
> 1. Do less string processing on tracepoints names and push more of the
>    check into the if-ladder.
> 
> 2. Pull out common db key and ctx processing and cache common values in
>    local vars.
> 
> 3. Key value pair parsing is faster with a regexp.
> 
> 4. Avoid sorting the db hash multiple times if possible.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: John Harrison <John.C.Harrison@intel.com>
> ---
>  scripts/trace.pl | 114 ++++++++++++++++++++++++-------------------------------
>  1 file changed, 49 insertions(+), 65 deletions(-)
> ---
> John please check if this helps with your monster traces before we decide
> if it is worth putting this in. Well, there isn't much of a downside
> except touching something which works and risking breaking it.

Could you run the same trace and do something like
	time trace1 < input > output1.html
	time trace2 < input > output2.html
	cmp output1.html output2.html
?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2017-12-19 15:02 ` [PATCH i-g-t] " Chris Wilson
@ 2017-12-19 16:23 ` John Harrison
  2017-12-20  9:41   ` [PATCH i-g-t v2] " Tvrtko Ursulin
  2017-12-20  9:54   ` [PATCH i-g-t] " Tvrtko Ursulin
  2017-12-20 10:33 ` ✗ Fi.CI.BAT: failure for scripts/trace.pl: Optimize event parsing and processing (rev2) Patchwork
                   ` (4 subsequent siblings)
  8 siblings, 2 replies; 18+ messages in thread
From: John Harrison @ 2017-12-19 16:23 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx

On 12/19/2017 3:15 AM, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> A couple of small optimizations which altogether bring around 30%
> improvement in my testing.
>
> 1. Do less string processing on tracepoints names and push more of the
>     check into the if-ladder.
>
> 2. Pull out common db key and ctx processing and cache common values in
>     local vars.
>
> 3. Key value pair parsing is faster with a regexp.
>
> 4. Avoid sorting the db hash multiple times if possible.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: John Harrison <John.C.Harrison@intel.com>
> ---
>   scripts/trace.pl | 114 ++++++++++++++++++++++++-------------------------------
>   1 file changed, 49 insertions(+), 65 deletions(-)
> ---
> John please check if this helps with your monster traces before we decide
> if it is worth putting this in. Well, there isn't much of a downside
> except touching something which works and risking breaking it.
>
> diff --git a/scripts/trace.pl b/scripts/trace.pl
> index 98e4a9843a43..5463e1fd1954 100755
> --- a/scripts/trace.pl
> +++ b/scripts/trace.pl
> @@ -356,110 +356,91 @@ my $prev_freq = 0;
>   my $prev_freq_ts = 0;
>   while (<>) {
>   	my @fields;
> -	my @tmp;
>   	my $tp_name;
> -	my $time;
>   	my %tp;
> -	my $key;
> +	my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
>   
>   	chomp;
>   	@fields = split ' ';
>   
> +	chop $fields[3];
> +	$time = int($fields[3] * 1000000.0);
> +
>   	$tp_name = $fields[4];
> -	@tmp = split ':', $tp_name, 2;
> -	next unless $tmp[0] eq 'i915';
> -	$tp_name = $tmp[1];
> -	chop $tp_name;
>   
> -	chop $fields[3];
> -	$time = $fields[3] * 1000000.0;
>   	splice @fields, 0, 5;
>   
>   	foreach my $f (@fields) {
> -		my @kv = split '=|,', $f;
> -
> -		$kv[0] = 'global' if $kv[0] eq 'global_seqno';
> +		my ($k, $v);
>   
> -		$tp{$kv[0]} = $kv[1];
> +		($k, $v) = ($f =~ /^(\S+)=(\d+)/);
I recall something about using () selection is slower than using 
pre-defined values. E.g.
   $f =~ m/=/; ($k, $v) = ($`, $');

Or do you specifically need the filtering of only matching numeric values?

> +		next unless defined $k;
> +		$k = 'global' if $k eq 'global_seqno';
> +		$tp{$k} = $v;
>   	}
>   
>   	next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
>   
> -	if ($tp_name eq 'i915_gem_request_wait_begin') {
> -		my %rw;
> +	if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
> +		$ring = $tp{'ring'};
> +		$ctx = $tp{'ctx'};
> +		$seqno = $tp{'seqno'};
> +
> +		$orig_ctx = $ctx;
> +		$ctx = sanitize_ctx($ctx, $ring);
> +		$key = db_key($ring, $ctx, $seqno);
> +	}
>   
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +	if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
> +		my %rw;
>   
>   		next if exists $reqwait{$key};
>   
>   		$rw{'key'} = $key;
> -		$rw{'ring'} = $tp{'ring'};
> -		$rw{'seqno'} = $tp{'seqno'};
> -		$rw{'ctx'} = $tp{'ctx'};
> +		$rw{'ring'} = $ring;
> +		$rw{'seqno'} = $seqno;
> +		$rw{'ctx'} = $ctx;
>   		$rw{'start'} = $time;
>   		$reqwait{$key} = \%rw;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_wait_end') {
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
>   		next unless exists $reqwait{$key};
>   
>   		$reqwait{$key}->{'end'} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_add') {
> -		my $orig_ctx = $tp{'ctx'};
> -
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_add:') {
>   		if (exists $queue{$key}) {
>   			$ctxdb{$orig_ctx}++;
> -			$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -			$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +			$ctx = sanitize_ctx($ctx, $ring);
This bit originally worked on $orig_ctx not $ctx (i.e., before your 
previous refactoring patch). Which should it be? Will a double 
sanitisation step work as intended? I don't actually hit this line in 
any of the runs I've done.


> +			$key = db_key($ring, $ctx, $seqno);
>   		}
>   
>   		$queue{$key} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_submit') {
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
>   		die if exists $submit{$key};
>   		die unless exists $queue{$key};
>   
>   		$submit{$key} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_in') {
> +	} elsif ($tp_name eq 'i915:i915_gem_request_in:') {
>   		my %req;
>   
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
>   		die if exists $db{$key};
>   		die unless exists $queue{$key};
>   		die unless exists $submit{$key};
>   
>   		$req{'start'} = $time;
> -		$req{'ring'} = $tp{'ring'};
> -		$req{'seqno'} = $tp{'seqno'};
> -		$req{'ctx'} = $tp{'ctx'};
> -		$req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
> +		$req{'ring'} = $ring;
> +		$req{'seqno'} = $seqno;
> +		$req{'ctx'} = $ctx;
> +		$req{'name'} = $ctx . '/' . $seqno;
>   		$req{'global'} = $tp{'global'};
>   		$req{'port'} = $tp{'port'};
>   		$req{'queue'} = $queue{$key};
>   		$req{'submit-delay'} = $submit{$key} - $queue{$key};
>   		$req{'execute-delay'} = $req{'start'} - $submit{$key};
> -		$rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
> -		$ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
> +		$rings{$ring} = $gid++ unless exists $rings{$ring};
> +		$ringmap{$rings{$ring}} = $ring;
>   		$db{$key} = \%req;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_out') {
> -		my $gkey = global_key($tp{'ring'}, $tp{'global'});
> -
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +	} elsif ($tp_name eq 'i915:i915_gem_request_out:') {
> +		my $gkey = global_key($ring, $tp{'global'});
>   
>   		die unless exists $db{$key};
>   		die unless exists $db{$key}->{'start'};
> @@ -476,15 +457,12 @@ while (<>) {
>   		}
>   		$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
>   		$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
> -		next;
> -	} elsif ($tp_name eq 'intel_engine_notify') {
> -		$notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
> -		next;
> -	} elsif ($tp_name eq 'intel_gpu_freq_change') {
> +	} elsif ($tp_name eq 'intel_engine_notify:') {
> +		$notify{global_key($ring, $seqno)} = $time;
> +	} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
>   		push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
>   		$prev_freq_ts = $time;
>   		$prev_freq = $tp{'new_freq'};
> -		next;
>   	}
>   }
>   
> @@ -557,7 +535,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
>   my $last_ts = 0;
>   my $first_ts;
>   
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
> +my $re_sort = 0;
> +
> +foreach my $key (@sorted_keys) {
>   	my $ring = $db{$key}->{'ring'};
>   	my $end = $db{$key}->{'end'};
>   
> @@ -585,6 +566,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
>   
>   		# 20us tolerance
>   		if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
> +			$re_sort = 1;
>   			$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
>   			$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
>   			$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
> @@ -601,6 +583,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
>   	$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
>   }
>   
> +@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
> +
>   foreach my $ring (keys %batch_avg) {
>   	$batch_avg{$ring} /= $batch_count{$ring};
>   	$batch_total_avg{$ring} /= $batch_count{$ring};
> @@ -616,7 +600,7 @@ foreach my $gid (sort keys %rings) {
>   	my (@s_, @e_);
>   
>   	# Extract all GPU busy intervals and sort them.
> -	foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +	foreach my $key (@sorted_keys) {
>   		next unless $db{$key}->{'ring'} == $ring;
>   		push @s_, $db{$key}->{'start'};
>   		push @e_, $db{$key}->{'end'};
> @@ -660,7 +644,7 @@ my @gpu_intervals;
>   my (@s_, @e_);
>   
>   # Extract all GPU busy intervals and sort them.
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +foreach my $key (@sorted_keys) {
>   	push @s_, $db{$key}->{'start'};
>   	push @e_, $db{$key}->{'end'};
>   	die if $db{$key}->{'start'} > $db{$key}->{'end'};

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH i-g-t v2] scripts/trace.pl: Optimize event parsing and processing
  2017-12-19 16:23 ` John Harrison
@ 2017-12-20  9:41   ` Tvrtko Ursulin
  2017-12-21  9:37     ` [PATCH i-g-t v3] " Tvrtko Ursulin
  2017-12-20  9:54   ` [PATCH i-g-t] " Tvrtko Ursulin
  1 sibling, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-20  9:41 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A couple of small optimizations which altogether bring around 30%
improvement in my testing.

1. Do less string processing on tracepoints names and push more of the
   check into the if-ladder.

2. Pull out common db key and ctx processing and cache common values in
   local vars.

3. Key value pair parsing is faster with a regexp.

4. Avoid sorting the db hash multiple times if possible.

v2:
 * Use faster key-value splitting method. (John Harrison)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
 scripts/trace.pl | 115 ++++++++++++++++++++++++-------------------------------
 1 file changed, 50 insertions(+), 65 deletions(-)

diff --git a/scripts/trace.pl b/scripts/trace.pl
index ade0a9154bd7..2628a41abfc4 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -356,110 +356,92 @@ my $prev_freq = 0;
 my $prev_freq_ts = 0;
 while (<>) {
 	my @fields;
-	my @tmp;
 	my $tp_name;
-	my $time;
 	my %tp;
-	my $key;
+	my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
 
 	chomp;
 	@fields = split ' ';
 
+	chop $fields[3];
+	$time = int($fields[3] * 1000000.0);
+
 	$tp_name = $fields[4];
-	@tmp = split ':', $tp_name, 2;
-	next unless $tmp[0] eq 'i915';
-	$tp_name = $tmp[1];
-	chop $tp_name;
 
-	chop $fields[3];
-	$time = $fields[3] * 1000000.0;
 	splice @fields, 0, 5;
 
 	foreach my $f (@fields) {
-		my @kv = split '=|,', $f;
-
-		$kv[0] = 'global' if $kv[0] eq 'global_seqno';
+		my ($k, $v);
 
-		$tp{$kv[0]} = $kv[1];
+		next unless $f =~ m/=/;
+		($k, $v) = ($`, $');
+		$k = 'global' if $k eq 'global_seqno';
+		chop $v if substr($v, -1, 1) eq ',';
+		$tp{$k} = $v;
 	}
 
 	next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
 
-	if ($tp_name eq 'i915_gem_request_wait_begin') {
-		my %rw;
+	if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
+		$ring = $tp{'ring'};
+		$ctx = $tp{'ctx'};
+		$seqno = $tp{'seqno'};
+
+		$orig_ctx = $ctx;
+		$ctx = sanitize_ctx($ctx, $ring);
+		$key = db_key($ring, $ctx, $seqno);
+	}
 
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+	if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
+		my %rw;
 
 		next if exists $reqwait{$key};
 
 		$rw{'key'} = $key;
-		$rw{'ring'} = $tp{'ring'};
-		$rw{'seqno'} = $tp{'seqno'};
-		$rw{'ctx'} = $tp{'ctx'};
+		$rw{'ring'} = $ring;
+		$rw{'seqno'} = $seqno;
+		$rw{'ctx'} = $ctx;
 		$rw{'start'} = $time;
 		$reqwait{$key} = \%rw;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_wait_end') {
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
 		next unless exists $reqwait{$key};
 
 		$reqwait{$key}->{'end'} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_add') {
-		my $orig_ctx = $tp{'ctx'};
-
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_add:') {
 		if (exists $queue{$key}) {
 			$ctxdb{$orig_ctx}++;
-			$tp{'ctx'} = sanitize_ctx($orig_ctx, $tp{'ring'});
-			$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+			$ctx = sanitize_ctx($orig_ctx, $ring);
+			$key = db_key($ring, $ctx, $seqno);
 		}
 
 		$queue{$key} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_submit') {
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
 		die if exists $submit{$key};
 		die unless exists $queue{$key};
 
 		$submit{$key} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_in') {
+	} elsif ($tp_name eq 'i915:i915_gem_request_in:') {
 		my %req;
 
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
 		die if exists $db{$key};
 		die unless exists $queue{$key};
 		die unless exists $submit{$key};
 
 		$req{'start'} = $time;
-		$req{'ring'} = $tp{'ring'};
-		$req{'seqno'} = $tp{'seqno'};
-		$req{'ctx'} = $tp{'ctx'};
-		$req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
+		$req{'ring'} = $ring;
+		$req{'seqno'} = $seqno;
+		$req{'ctx'} = $ctx;
+		$req{'name'} = $ctx . '/' . $seqno;
 		$req{'global'} = $tp{'global'};
 		$req{'port'} = $tp{'port'};
 		$req{'queue'} = $queue{$key};
 		$req{'submit-delay'} = $submit{$key} - $queue{$key};
 		$req{'execute-delay'} = $req{'start'} - $submit{$key};
-		$rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
-		$ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
+		$rings{$ring} = $gid++ unless exists $rings{$ring};
+		$ringmap{$rings{$ring}} = $ring;
 		$db{$key} = \%req;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_out') {
-		my $gkey = global_key($tp{'ring'}, $tp{'global'});
-
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+	} elsif ($tp_name eq 'i915:i915_gem_request_out:') {
+		my $gkey = global_key($ring, $tp{'global'});
 
 		die unless exists $db{$key};
 		die unless exists $db{$key}->{'start'};
@@ -476,15 +458,12 @@ while (<>) {
 		}
 		$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
 		$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
-		next;
-	} elsif ($tp_name eq 'intel_engine_notify') {
-		$notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
-		next;
-	} elsif ($tp_name eq 'intel_gpu_freq_change') {
+	} elsif ($tp_name eq 'intel_engine_notify:') {
+		$notify{global_key($ring, $seqno)} = $time;
+	} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
 		push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
 		$prev_freq_ts = $time;
 		$prev_freq = $tp{'new_freq'};
-		next;
 	}
 }
 
@@ -557,7 +536,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
 my $last_ts = 0;
 my $first_ts;
 
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
+my $re_sort = 0;
+
+foreach my $key (@sorted_keys) {
 	my $ring = $db{$key}->{'ring'};
 	my $end = $db{$key}->{'end'};
 
@@ -585,6 +567,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
 
 		# 20us tolerance
 		if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+			$re_sort = 1;
 			$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
 			$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
 			$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
@@ -601,6 +584,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
 	$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
 }
 
+@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
+
 foreach my $ring (keys %batch_avg) {
 	$batch_avg{$ring} /= $batch_count{$ring};
 	$batch_total_avg{$ring} /= $batch_count{$ring};
@@ -616,7 +601,7 @@ foreach my $gid (sort keys %rings) {
 	my (@s_, @e_);
 
 	# Extract all GPU busy intervals and sort them.
-	foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+	foreach my $key (@sorted_keys) {
 		next unless $db{$key}->{'ring'} == $ring;
 		push @s_, $db{$key}->{'start'};
 		push @e_, $db{$key}->{'end'};
@@ -660,7 +645,7 @@ my @gpu_intervals;
 my (@s_, @e_);
 
 # Extract all GPU busy intervals and sort them.
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+foreach my $key (@sorted_keys) {
 	push @s_, $db{$key}->{'start'};
 	push @e_, $db{$key}->{'end'};
 	die if $db{$key}->{'start'} > $db{$key}->{'end'};
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
  2017-12-19 16:23 ` John Harrison
  2017-12-20  9:41   ` [PATCH i-g-t v2] " Tvrtko Ursulin
@ 2017-12-20  9:54   ` Tvrtko Ursulin
  2017-12-20 23:50     ` John Harrison
  1 sibling, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-20  9:54 UTC (permalink / raw)
  To: John Harrison, Tvrtko Ursulin, Intel-gfx


On 19/12/2017 16:23, John Harrison wrote:
> On 12/19/2017 3:15 AM, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> A couple of small optimizations which altogether bring around 30%
>> improvement in my testing.
>>
>> 1. Do less string processing on tracepoints names and push more of the
>>     check into the if-ladder.
>>
>> 2. Pull out common db key and ctx processing and cache common values in
>>     local vars.
>>
>> 3. Key value pair parsing is faster with a regexp.
>>
>> 4. Avoid sorting the db hash multiple times if possible.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: John Harrison <John.C.Harrison@intel.com>
>> ---
>>   scripts/trace.pl | 114 
>> ++++++++++++++++++++++++-------------------------------
>>   1 file changed, 49 insertions(+), 65 deletions(-)
>> ---
>> John please check if this helps with your monster traces before we decide
>> if it is worth putting this in. Well, there isn't much of a downside
>> except touching something which works and risking breaking it.
>>
>> diff --git a/scripts/trace.pl b/scripts/trace.pl
>> index 98e4a9843a43..5463e1fd1954 100755
>> --- a/scripts/trace.pl
>> +++ b/scripts/trace.pl
>> @@ -356,110 +356,91 @@ my $prev_freq = 0;
>>   my $prev_freq_ts = 0;
>>   while (<>) {
>>       my @fields;
>> -    my @tmp;
>>       my $tp_name;
>> -    my $time;
>>       my %tp;
>> -    my $key;
>> +    my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
>>       chomp;
>>       @fields = split ' ';
>> +    chop $fields[3];
>> +    $time = int($fields[3] * 1000000.0);
>> +
>>       $tp_name = $fields[4];
>> -    @tmp = split ':', $tp_name, 2;
>> -    next unless $tmp[0] eq 'i915';
>> -    $tp_name = $tmp[1];
>> -    chop $tp_name;
>> -    chop $fields[3];
>> -    $time = $fields[3] * 1000000.0;
>>       splice @fields, 0, 5;
>>       foreach my $f (@fields) {
>> -        my @kv = split '=|,', $f;
>> -
>> -        $kv[0] = 'global' if $kv[0] eq 'global_seqno';
>> +        my ($k, $v);
>> -        $tp{$kv[0]} = $kv[1];
>> +        ($k, $v) = ($f =~ /^(\S+)=(\d+)/);
> I recall something about using () selection is slower than using 
> pre-defined values. E.g.
>    $f =~ m/=/; ($k, $v) = ($`, $');
> 
> Or do you specifically need the filtering of only matching numeric values?

It doesn't matter at the moment, all the script touches just happens to 
be numeric so either way works. I did not know of this trick and it is 
indeed marginally faster. So I've sent an update with it included.

What was the effect of all this on your big traces? I am only testing 
with a smaller one which goes from ~3.3s to ~2.2s. On a larger trace it 
might be non-linear gains due to double sort avoidance, unless there 
will be some other effects to cancel that out.

>> +        next unless defined $k;
>> +        $k = 'global' if $k eq 'global_seqno';
>> +        $tp{$k} = $v;
>>       }
>>       next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
>> -    if ($tp_name eq 'i915_gem_request_wait_begin') {
>> -        my %rw;
>> +    if (exists $tp{'ring'} and exists $tp{'ctx'} and exists 
>> $tp{'seqno'}) {
>> +        $ring = $tp{'ring'};
>> +        $ctx = $tp{'ctx'};
>> +        $seqno = $tp{'seqno'};
>> +
>> +        $orig_ctx = $ctx;
>> +        $ctx = sanitize_ctx($ctx, $ring);
>> +        $key = db_key($ring, $ctx, $seqno);
>> +    }
>> -        $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> -        $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> +    if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
>> +        my %rw;
>>           next if exists $reqwait{$key};
>>           $rw{'key'} = $key;
>> -        $rw{'ring'} = $tp{'ring'};
>> -        $rw{'seqno'} = $tp{'seqno'};
>> -        $rw{'ctx'} = $tp{'ctx'};
>> +        $rw{'ring'} = $ring;
>> +        $rw{'seqno'} = $seqno;
>> +        $rw{'ctx'} = $ctx;
>>           $rw{'start'} = $time;
>>           $reqwait{$key} = \%rw;
>> -        next;
>> -    } elsif ($tp_name eq 'i915_gem_request_wait_end') {
>> -        $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> -        $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> -
>> +    } elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
>>           next unless exists $reqwait{$key};
>>           $reqwait{$key}->{'end'} = $time;
>> -        next;
>> -    } elsif ($tp_name eq 'i915_gem_request_add') {
>> -        my $orig_ctx = $tp{'ctx'};
>> -
>> -        $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> -        $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> -
>> +    } elsif ($tp_name eq 'i915:i915_gem_request_add:') {
>>           if (exists $queue{$key}) {
>>               $ctxdb{$orig_ctx}++;
>> -            $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> -            $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> +            $ctx = sanitize_ctx($ctx, $ring);
> This bit originally worked on $orig_ctx not $ctx (i.e., before your 
> previous refactoring patch). Which should it be? Will a double 
> sanitisation step work as intended? I don't actually hit this line in 
> any of the runs I've done.

Yeah well spotted, copy and paste error and fix sent. R-b would be cool 
on it.

This only triggers if you trace something which creates and destroys 
contexts due context id recycling. Here it auto-detects an impossible 
condition and deduces it must be a new context, so it assigns a new 
suffix to it.

Regards,

Tvrtko

> 
>> +            $key = db_key($ring, $ctx, $seqno);
>>           }
>>           $queue{$key} = $time;
>> -        next;
>> -    } elsif ($tp_name eq 'i915_gem_request_submit') {
>> -        $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> -        $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> -
>> +    } elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
>>           die if exists $submit{$key};
>>           die unless exists $queue{$key};
>>           $submit{$key} = $time;
>> -        next;
>> -    } elsif ($tp_name eq 'i915_gem_request_in') {
>> +    } elsif ($tp_name eq 'i915:i915_gem_request_in:') {
>>           my %req;
>> -        $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> -        $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> -
>>           die if exists $db{$key};
>>           die unless exists $queue{$key};
>>           die unless exists $submit{$key};
>>           $req{'start'} = $time;
>> -        $req{'ring'} = $tp{'ring'};
>> -        $req{'seqno'} = $tp{'seqno'};
>> -        $req{'ctx'} = $tp{'ctx'};
>> -        $req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
>> +        $req{'ring'} = $ring;
>> +        $req{'seqno'} = $seqno;
>> +        $req{'ctx'} = $ctx;
>> +        $req{'name'} = $ctx . '/' . $seqno;
>>           $req{'global'} = $tp{'global'};
>>           $req{'port'} = $tp{'port'};
>>           $req{'queue'} = $queue{$key};
>>           $req{'submit-delay'} = $submit{$key} - $queue{$key};
>>           $req{'execute-delay'} = $req{'start'} - $submit{$key};
>> -        $rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
>> -        $ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
>> +        $rings{$ring} = $gid++ unless exists $rings{$ring};
>> +        $ringmap{$rings{$ring}} = $ring;
>>           $db{$key} = \%req;
>> -        next;
>> -    } elsif ($tp_name eq 'i915_gem_request_out') {
>> -        my $gkey = global_key($tp{'ring'}, $tp{'global'});
>> -
>> -        $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> -        $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> +    } elsif ($tp_name eq 'i915:i915_gem_request_out:') {
>> +        my $gkey = global_key($ring, $tp{'global'});
>>           die unless exists $db{$key};
>>           die unless exists $db{$key}->{'start'};
>> @@ -476,15 +457,12 @@ while (<>) {
>>           }
>>           $db{$key}->{'duration'} = $db{$key}->{'notify'} - 
>> $db{$key}->{'start'};
>>           $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - 
>> $db{$key}->{'notify'};
>> -        next;
>> -    } elsif ($tp_name eq 'intel_engine_notify') {
>> -        $notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
>> -        next;
>> -    } elsif ($tp_name eq 'intel_gpu_freq_change') {
>> +    } elsif ($tp_name eq 'intel_engine_notify:') {
>> +        $notify{global_key($ring, $seqno)} = $time;
>> +    } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
>>           push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
>>           $prev_freq_ts = $time;
>>           $prev_freq = $tp{'new_freq'};
>> -        next;
>>       }
>>   }
>> @@ -557,7 +535,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
>>   my $last_ts = 0;
>>   my $first_ts;
>> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} 
>> keys %db) {
>> +my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} 
>> keys %db;
>> +my $re_sort = 0;
>> +
>> +foreach my $key (@sorted_keys) {
>>       my $ring = $db{$key}->{'ring'};
>>       my $end = $db{$key}->{'end'};
>> @@ -585,6 +566,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> 
>> $db{$b}->{'start'}} keys %db) {
>>           # 20us tolerance
>>           if (exists $db{$next_key} and $db{$next_key}->{'start'} < 
>> $start + 20) {
>> +            $re_sort = 1;
>>               $db{$next_key}->{'start'} = $start + 
>> $db{$key}->{'duration'};
>>               $db{$next_key}->{'start'} = $db{$next_key}->{'end'} if 
>> $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
>>               $db{$next_key}->{'duration'} = 
>> $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
>> @@ -601,6 +583,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> 
>> $db{$b}->{'start'}} keys %db) {
>>       $ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
>>   }
>> +@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys 
>> %db if $re_sort;
>> +
>>   foreach my $ring (keys %batch_avg) {
>>       $batch_avg{$ring} /= $batch_count{$ring};
>>       $batch_total_avg{$ring} /= $batch_count{$ring};
>> @@ -616,7 +600,7 @@ foreach my $gid (sort keys %rings) {
>>       my (@s_, @e_);
>>       # Extract all GPU busy intervals and sort them.
>> -    foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} 
>> keys %db) {
>> +    foreach my $key (@sorted_keys) {
>>           next unless $db{$key}->{'ring'} == $ring;
>>           push @s_, $db{$key}->{'start'};
>>           push @e_, $db{$key}->{'end'};
>> @@ -660,7 +644,7 @@ my @gpu_intervals;
>>   my (@s_, @e_);
>>   # Extract all GPU busy intervals and sort them.
>> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} 
>> keys %db) {
>> +foreach my $key (@sorted_keys) {
>>       push @s_, $db{$key}->{'start'};
>>       push @e_, $db{$key}->{'end'};
>>       die if $db{$key}->{'start'} > $db{$key}->{'end'};
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* ✗ Fi.CI.BAT: failure for scripts/trace.pl: Optimize event parsing and processing (rev2)
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
                   ` (3 preceding siblings ...)
  2017-12-19 16:23 ` John Harrison
@ 2017-12-20 10:33 ` Patchwork
  2017-12-21 11:11 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev3) Patchwork
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-20 10:33 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: scripts/trace.pl: Optimize event parsing and processing (rev2)
URL   : https://patchwork.freedesktop.org/series/35569/
State : failure

== Summary ==

Applying: scripts/trace.pl: Optimize event parsing and processing
Using index info to reconstruct a base tree...
M	scripts/trace.pl
Falling back to patching base and 3-way merge...
Auto-merging scripts/trace.pl
CONFLICT (content): Merge conflict in scripts/trace.pl
Patch failed at 0001 scripts/trace.pl: Optimize event parsing and processing
The copy of the patch that failed is found in: .git/rebase-apply/patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
  2017-12-20  9:54   ` [PATCH i-g-t] " Tvrtko Ursulin
@ 2017-12-20 23:50     ` John Harrison
  2017-12-21  9:34       ` Tvrtko Ursulin
  0 siblings, 1 reply; 18+ messages in thread
From: John Harrison @ 2017-12-20 23:50 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

On 12/20/2017 1:54 AM, Tvrtko Ursulin wrote:
> What was the effect of all this on your big traces? I am only testing 
> with a smaller one which goes from ~3.3s to ~2.2s. On a larger trace 
> it might be non-linear gains due to double sort avoidance, unless 
> there will be some other effects to cancel that out.
>

So with a trace of a shortened gem_exec_nop/basic_sequential, the 'perf 
script' output is 439MB and the original trace.pl before any of the 
changes took ~180s. After the 'auto-detect field order' patch, it went 
up to ~201s. With the optimisation patch it is down to ~129s.

However, I am also seeing differences in the HTML output since the 
optimisation patch. The differences aren't massive, just slight 
variations in the times. The structure is all the same, its just that 
the accounting and/or time stamps are out.

For example:
     {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32% 
busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18% 
wait<br><br>200931 batches<br>331.28us avg batch<br>331.38us avg engine 
batch<br></small>'},
vs
     {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32% 
busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18% 
wait<br><br>200931 batches<br>338.56us avg batch<br>338.56us avg engine 
batch<br></small>'},

Or:
     {id: 58, key: -210383407,  type: 'range', group: 4, subgroup: 2, 
subgroupOrder: 3, content: '428/3 <small>0</small> 
<small><i>???</i></small> <small><i>++</i></small> <br>142us 
<small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end: 
'2017-01-05 21:27:45.353110', style: 'color: white; background-color: 
red;'},
vs
     {id: 58, key: -210383407,  type: 'range', group: 4, subgroup: 2, 
subgroupOrder: 3, content: '428/3 <small>0</small> 
<small><i>???</i></small> <small><i>++</i></small> <br>159us 
<small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end: 
'2017-01-05 21:27:45.353127', style: 'color: white; background-color: 
red;'},

I can send you the full output if it is useful and the source logs too. 
The HTML output is about 840KB but as noted, the perf logs are hundreds 
of MBs.

John.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
  2017-12-20 23:50     ` John Harrison
@ 2017-12-21  9:34       ` Tvrtko Ursulin
  2017-12-21 22:44         ` John Harrison
  0 siblings, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-21  9:34 UTC (permalink / raw)
  To: John Harrison, Tvrtko Ursulin, Intel-gfx


On 20/12/2017 23:50, John Harrison wrote:
> On 12/20/2017 1:54 AM, Tvrtko Ursulin wrote:
>> What was the effect of all this on your big traces? I am only testing 
>> with a smaller one which goes from ~3.3s to ~2.2s. On a larger trace 
>> it might be non-linear gains due to double sort avoidance, unless 
>> there will be some other effects to cancel that out.
>>
> 
> So with a trace of a shortened gem_exec_nop/basic_sequential, the 'perf 
> script' output is 439MB and the original trace.pl before any of the 
> changes took ~180s. After the 'auto-detect field order' patch, it went 
> up to ~201s. With the optimisation patch it is down to ~129s.
> 
> However, I am also seeing differences in the HTML output since the 
> optimisation patch. The differences aren't massive, just slight 
> variations in the times. The structure is all the same, its just that 
> the accounting and/or time stamps are out.
> 
> For example:
>      {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32% 
> busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18% 
> wait<br><br>200931 batches<br>331.28us avg batch<br>331.38us avg engine 
> batch<br></small>'},
> vs
>      {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32% 
> busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18% 
> wait<br><br>200931 batches<br>338.56us avg batch<br>338.56us avg engine 
> batch<br></small>'},
> 
> Or:
>      {id: 58, key: -210383407,  type: 'range', group: 4, subgroup: 2, 
> subgroupOrder: 3, content: '428/3 <small>0</small> 
> <small><i>???</i></small> <small><i>++</i></small> <br>142us 
> <small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end: 
> '2017-01-05 21:27:45.353110', style: 'color: white; background-color: 
> red;'},
> vs
>      {id: 58, key: -210383407,  type: 'range', group: 4, subgroup: 2, 
> subgroupOrder: 3, content: '428/3 <small>0</small> 
> <small><i>???</i></small> <small><i>++</i></small> <br>159us 
> <small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end: 
> '2017-01-05 21:27:45.353127', style: 'color: white; background-color: 
> red;'},
> 
> I can send you the full output if it is useful and the source logs too. 
> The HTML output is about 840KB but as noted, the perf logs are hundreds 
> of MBs.

I was able to reproduce it. I think it's down to floating point 
mishandling. Neither the original nor the optimized version were correct 
and both are accumulating error. I'll send an updated patch shortly.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH i-g-t v3] scripts/trace.pl: Optimize event parsing and processing
  2017-12-20  9:41   ` [PATCH i-g-t v2] " Tvrtko Ursulin
@ 2017-12-21  9:37     ` Tvrtko Ursulin
  0 siblings, 0 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-21  9:37 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A couple of small optimizations which altogether bring around 30%
improvement in my testing.

1. Do less string processing on tracepoints names and push more of the
   check into the if-ladder.

2. Pull out common db key and ctx processing and cache common values in
   local vars.

3. Key value pair parsing is faster with a regexp.

4. Avoid sorting the db hash multiple times if possible.

v2:
 * Use faster key-value splitting method. (John Harrison)

v3:
 * Fix floating-point to int time conversion.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
 scripts/trace.pl | 115 ++++++++++++++++++++++++-------------------------------
 1 file changed, 50 insertions(+), 65 deletions(-)

diff --git a/scripts/trace.pl b/scripts/trace.pl
index ade0a9154bd7..539024a4dbbd 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -356,110 +356,92 @@ my $prev_freq = 0;
 my $prev_freq_ts = 0;
 while (<>) {
 	my @fields;
-	my @tmp;
 	my $tp_name;
-	my $time;
 	my %tp;
-	my $key;
+	my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
 
 	chomp;
 	@fields = split ' ';
 
+	chop $fields[3];
+	$time = int($fields[3] * 1000000.0 + 0.5);
+
 	$tp_name = $fields[4];
-	@tmp = split ':', $tp_name, 2;
-	next unless $tmp[0] eq 'i915';
-	$tp_name = $tmp[1];
-	chop $tp_name;
 
-	chop $fields[3];
-	$time = $fields[3] * 1000000.0;
 	splice @fields, 0, 5;
 
 	foreach my $f (@fields) {
-		my @kv = split '=|,', $f;
-
-		$kv[0] = 'global' if $kv[0] eq 'global_seqno';
+		my ($k, $v);
 
-		$tp{$kv[0]} = $kv[1];
+		next unless $f =~ m/=/;
+		($k, $v) = ($`, $');
+		$k = 'global' if $k eq 'global_seqno';
+		chop $v if substr($v, -1, 1) eq ',';
+		$tp{$k} = $v;
 	}
 
 	next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
 
-	if ($tp_name eq 'i915_gem_request_wait_begin') {
-		my %rw;
+	if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
+		$ring = $tp{'ring'};
+		$ctx = $tp{'ctx'};
+		$seqno = $tp{'seqno'};
+
+		$orig_ctx = $ctx;
+		$ctx = sanitize_ctx($ctx, $ring);
+		$key = db_key($ring, $ctx, $seqno);
+	}
 
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+	if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
+		my %rw;
 
 		next if exists $reqwait{$key};
 
 		$rw{'key'} = $key;
-		$rw{'ring'} = $tp{'ring'};
-		$rw{'seqno'} = $tp{'seqno'};
-		$rw{'ctx'} = $tp{'ctx'};
+		$rw{'ring'} = $ring;
+		$rw{'seqno'} = $seqno;
+		$rw{'ctx'} = $ctx;
 		$rw{'start'} = $time;
 		$reqwait{$key} = \%rw;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_wait_end') {
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
 		next unless exists $reqwait{$key};
 
 		$reqwait{$key}->{'end'} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_add') {
-		my $orig_ctx = $tp{'ctx'};
-
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_add:') {
 		if (exists $queue{$key}) {
 			$ctxdb{$orig_ctx}++;
-			$tp{'ctx'} = sanitize_ctx($orig_ctx, $tp{'ring'});
-			$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+			$ctx = sanitize_ctx($orig_ctx, $ring);
+			$key = db_key($ring, $ctx, $seqno);
 		}
 
 		$queue{$key} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_submit') {
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
 		die if exists $submit{$key};
 		die unless exists $queue{$key};
 
 		$submit{$key} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_in') {
+	} elsif ($tp_name eq 'i915:i915_gem_request_in:') {
 		my %req;
 
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
 		die if exists $db{$key};
 		die unless exists $queue{$key};
 		die unless exists $submit{$key};
 
 		$req{'start'} = $time;
-		$req{'ring'} = $tp{'ring'};
-		$req{'seqno'} = $tp{'seqno'};
-		$req{'ctx'} = $tp{'ctx'};
-		$req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
+		$req{'ring'} = $ring;
+		$req{'seqno'} = $seqno;
+		$req{'ctx'} = $ctx;
+		$req{'name'} = $ctx . '/' . $seqno;
 		$req{'global'} = $tp{'global'};
 		$req{'port'} = $tp{'port'};
 		$req{'queue'} = $queue{$key};
 		$req{'submit-delay'} = $submit{$key} - $queue{$key};
 		$req{'execute-delay'} = $req{'start'} - $submit{$key};
-		$rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
-		$ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
+		$rings{$ring} = $gid++ unless exists $rings{$ring};
+		$ringmap{$rings{$ring}} = $ring;
 		$db{$key} = \%req;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_out') {
-		my $gkey = global_key($tp{'ring'}, $tp{'global'});
-
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+	} elsif ($tp_name eq 'i915:i915_gem_request_out:') {
+		my $gkey = global_key($ring, $tp{'global'});
 
 		die unless exists $db{$key};
 		die unless exists $db{$key}->{'start'};
@@ -476,15 +458,12 @@ while (<>) {
 		}
 		$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
 		$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
-		next;
-	} elsif ($tp_name eq 'intel_engine_notify') {
-		$notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
-		next;
-	} elsif ($tp_name eq 'intel_gpu_freq_change') {
+	} elsif ($tp_name eq 'intel_engine_notify:') {
+		$notify{global_key($ring, $seqno)} = $time;
+	} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
 		push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
 		$prev_freq_ts = $time;
 		$prev_freq = $tp{'new_freq'};
-		next;
 	}
 }
 
@@ -557,7 +536,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
 my $last_ts = 0;
 my $first_ts;
 
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
+my $re_sort = 0;
+
+foreach my $key (@sorted_keys) {
 	my $ring = $db{$key}->{'ring'};
 	my $end = $db{$key}->{'end'};
 
@@ -585,6 +567,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
 
 		# 20us tolerance
 		if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+			$re_sort = 1;
 			$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
 			$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
 			$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
@@ -601,6 +584,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
 	$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
 }
 
+@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
+
 foreach my $ring (keys %batch_avg) {
 	$batch_avg{$ring} /= $batch_count{$ring};
 	$batch_total_avg{$ring} /= $batch_count{$ring};
@@ -616,7 +601,7 @@ foreach my $gid (sort keys %rings) {
 	my (@s_, @e_);
 
 	# Extract all GPU busy intervals and sort them.
-	foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+	foreach my $key (@sorted_keys) {
 		next unless $db{$key}->{'ring'} == $ring;
 		push @s_, $db{$key}->{'start'};
 		push @e_, $db{$key}->{'end'};
@@ -660,7 +645,7 @@ my @gpu_intervals;
 my (@s_, @e_);
 
 # Extract all GPU busy intervals and sort them.
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+foreach my $key (@sorted_keys) {
 	push @s_, $db{$key}->{'start'};
 	push @e_, $db{$key}->{'end'};
 	die if $db{$key}->{'start'} > $db{$key}->{'end'};
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev3)
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
                   ` (4 preceding siblings ...)
  2017-12-20 10:33 ` ✗ Fi.CI.BAT: failure for scripts/trace.pl: Optimize event parsing and processing (rev2) Patchwork
@ 2017-12-21 11:11 ` Patchwork
  2017-12-21 13:27 ` ✗ Fi.CI.IGT: warning " Patchwork
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-21 11:11 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: scripts/trace.pl: Optimize event parsing and processing (rev3)
URL   : https://patchwork.freedesktop.org/series/35569/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
beb26d89ff5c5621c1e6b6ac2a45439507af86b7 meson: Install .testlist files and README from tests/intel-ci

with latest DRM-Tip kernel build CI_DRM_3563
c21bf92fbe84 drm-tip: 2017y-12m-21d-09h-56m-40s UTC integration manifest

No testlist changes.

Test debugfs_test:
        Subgroup read_all_entries:
                pass       -> DMESG-FAIL (fi-elk-e7500) fdo#103989 +1
Test kms_pipe_crc_basic:
        Subgroup suspend-read-crc-pipe-b:
                incomplete -> PASS       (fi-snb-2520m) fdo#103713
Test kms_psr_sink_crc:
        Subgroup psr_basic:
                dmesg-warn -> PASS       (fi-skl-6700hq) fdo#101144

fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#101144 https://bugs.freedesktop.org/show_bug.cgi?id=101144

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:442s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:441s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:383s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:509s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:277s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:489s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:497s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:484s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:472s
fi-elk-e7500     total:224  pass:163  dwarn:14  dfail:1   fail:0   skip:45 
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:263s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:533s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:406s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:426s
fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:420s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:467s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:426s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:475s
fi-kbl-7560u     total:288  pass:268  dwarn:1   dfail:0   fail:0   skip:19  time:522s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:469s
fi-kbl-r         total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:523s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:581s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:455s
fi-skl-6600u     total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:531s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:558s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:510s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:491s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:447s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:556s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:426s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:589s
fi-cnl-y         total:247  pass:222  dwarn:0   dfail:0   fail:0   skip:24 

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_718/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* ✗ Fi.CI.IGT: warning for scripts/trace.pl: Optimize event parsing and processing (rev3)
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
                   ` (5 preceding siblings ...)
  2017-12-21 11:11 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev3) Patchwork
@ 2017-12-21 13:27 ` Patchwork
  2017-12-22  9:34 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev5) Patchwork
  2017-12-22 10:21 ` ✓ Fi.CI.IGT: " Patchwork
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-21 13:27 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: scripts/trace.pl: Optimize event parsing and processing (rev3)
URL   : https://patchwork.freedesktop.org/series/35569/
State : warning

== Summary ==

Test gem_tiled_swapping:
        Subgroup non-threaded:
                incomplete -> PASS       (shard-snb) fdo#104218
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-shrfb-draw-blt:
                pass       -> FAIL       (shard-snb) fdo#101623 +1
Test kms_cursor_legacy:
        Subgroup cursor-vs-flip-atomic-transitions-varying-size:
                skip       -> PASS       (shard-hsw) fdo#103172
Test kms_setmode:
        Subgroup basic:
                fail       -> PASS       (shard-hsw) fdo#99912
Test kms_chv_cursor_fail:
        Subgroup pipe-a-64x64-top-edge:
                pass       -> SKIP       (shard-hsw)
Test kms_flip:
        Subgroup vblank-vs-suspend:
                incomplete -> PASS       (shard-snb) fdo#103375
Test gem_eio:
        Subgroup in-flight-contexts:
                dmesg-warn -> PASS       (shard-snb) fdo#104058

fdo#104218 https://bugs.freedesktop.org/show_bug.cgi?id=104218
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
fdo#103172 https://bugs.freedesktop.org/show_bug.cgi?id=103172
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#104058 https://bugs.freedesktop.org/show_bug.cgi?id=104058

shard-hsw        total:2712 pass:1537 dwarn:1   dfail:0   fail:9   skip:1165 time:9402s
shard-snb        total:2712 pass:1309 dwarn:1   dfail:0   fail:11  skip:1391 time:8130s
Blacklisted hosts:
shard-apl        total:2712 pass:1687 dwarn:1   dfail:0   fail:22  skip:1001 time:13954s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_718/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
  2017-12-21  9:34       ` Tvrtko Ursulin
@ 2017-12-21 22:44         ` John Harrison
  2017-12-22  9:16           ` [PATCH i-g-t v4] " Tvrtko Ursulin
  0 siblings, 1 reply; 18+ messages in thread
From: John Harrison @ 2017-12-21 22:44 UTC (permalink / raw)
  To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx

On 12/21/2017 1:34 AM, Tvrtko Ursulin wrote:
>
> On 20/12/2017 23:50, John Harrison wrote:
>> On 12/20/2017 1:54 AM, Tvrtko Ursulin wrote:
>>> What was the effect of all this on your big traces? I am only 
>>> testing with a smaller one which goes from ~3.3s to ~2.2s. On a 
>>> larger trace it might be non-linear gains due to double sort 
>>> avoidance, unless there will be some other effects to cancel that out.
>>>
>>
>> So with a trace of a shortened gem_exec_nop/basic_sequential, the 
>> 'perf script' output is 439MB and the original trace.pl before any of 
>> the changes took ~180s. After the 'auto-detect field order' patch, it 
>> went up to ~201s. With the optimisation patch it is down to ~129s.
>>
>> However, I am also seeing differences in the HTML output since the 
>> optimisation patch. The differences aren't massive, just slight 
>> variations in the times. The structure is all the same, its just that 
>> the accounting and/or time stamps are out.
>>
>> For example:
>>      {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32% 
>> busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18% 
>> wait<br><br>200931 batches<br>331.28us avg batch<br>331.38us avg 
>> engine batch<br></small>'},
>> vs
>>      {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32% 
>> busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18% 
>> wait<br><br>200931 batches<br>338.56us avg batch<br>338.56us avg 
>> engine batch<br></small>'},
>>
>> Or:
>>      {id: 58, key: -210383407,  type: 'range', group: 4, subgroup: 2, 
>> subgroupOrder: 3, content: '428/3 <small>0</small> 
>> <small><i>???</i></small> <small><i>++</i></small> <br>142us 
>> <small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end: 
>> '2017-01-05 21:27:45.353110', style: 'color: white; background-color: 
>> red;'},
>> vs
>>      {id: 58, key: -210383407,  type: 'range', group: 4, subgroup: 2, 
>> subgroupOrder: 3, content: '428/3 <small>0</small> 
>> <small><i>???</i></small> <small><i>++</i></small> <br>159us 
>> <small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end: 
>> '2017-01-05 21:27:45.353127', style: 'color: white; background-color: 
>> red;'},
>>
>> I can send you the full output if it is useful and the source logs 
>> too. The HTML output is about 840KB but as noted, the perf logs are 
>> hundreds of MBs.
>
> I was able to reproduce it. I think it's down to floating point 
> mishandling. Neither the original nor the optimized version were 
> correct and both are accumulating error. I'll send an updated patch 
> shortly.
>
> Regards,
>
> Tvrtko

The new version with +0.5 gives me exactly the same results as the 
previous edition without the rounding. If I get rid of the int() 
conversion too, I still get the same results.

What does make a difference is if I fix up the broken processing of 
notify traces:
diff --git a/scripts/trace.pl b/scripts/trace.pl
index 8dbd497b..20ae3c78 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -382,14 +382,16 @@ while (<>) {

         next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};

-       if (exists $tp{'ring'} and exists $tp{'ctx'} and exists 
$tp{'seqno'}) {
+       if (exists $tp{'ring'} and exists $tp{'seqno'}) {
                 $ring = $tp{'ring'};
-               $ctx = $tp{'ctx'};
                 $seqno = $tp{'seqno'};

-               $orig_ctx = $ctx;
-               $ctx = sanitize_ctx($ctx, $ring);
-               $key = db_key($ring, $ctx, $seqno);
+               if (exists $tp{'ctx'}) {
+                       $ctx = $tp{'ctx'};
+                       $orig_ctx = $ctx;
+                       $ctx = sanitize_ctx($ctx, $ring);
+                       $key = db_key($ring, $ctx, $seqno);
+               }
         }

         if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
@@ -461,7 +463,7 @@ while (<>) {
                 }
                 $db{$key}->{'duration'} = $db{$key}->{'notify'} - 
$db{$key}->{'start'};
                 $db{$key}->{'context-complete-delay'} = 
$db{$key}->{'end'} - $db{$key}->{'notify'};
-       } elsif ($tp_name eq 'intel_engine_notify:') {
+       } elsif ($tp_name eq 'i915:intel_engine_notify:') {
                 $notify{global_key($ring, $seqno)} = $time;
         } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
                 push @freqs, [$prev_freq_ts, $time, $prev_freq] if 
$prev_freq;

With that, I get exactly the same HTML page as before the optimisation 
patch.

John.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH i-g-t v4] scripts/trace.pl: Optimize event parsing and processing
  2017-12-21 22:44         ` John Harrison
@ 2017-12-22  9:16           ` Tvrtko Ursulin
  2017-12-22 20:38             ` John Harrison
  0 siblings, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-22  9:16 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A couple of small optimizations which altogether bring around 30%
improvement in my testing.

1. Do less string processing on tracepoints names and push more of the
   check into the if-ladder.

2. Pull out common db key and ctx processing and cache common values in
   local vars.

3. Key value pair parsing is faster with a regexp.

4. Avoid sorting the db hash multiple times if possible.

v2:
 * Use faster key-value splitting method. (John Harrison)

v3:
 * Fix floating-point to int time conversion.

v4:
 * Fix refactoring and logic fails. (John Harrison)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
 scripts/trace.pl | 117 +++++++++++++++++++++++++------------------------------
 1 file changed, 52 insertions(+), 65 deletions(-)

diff --git a/scripts/trace.pl b/scripts/trace.pl
index ade0a9154bd7..cb93900dd620 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -356,110 +356,94 @@ my $prev_freq = 0;
 my $prev_freq_ts = 0;
 while (<>) {
 	my @fields;
-	my @tmp;
 	my $tp_name;
-	my $time;
 	my %tp;
-	my $key;
+	my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
 
 	chomp;
 	@fields = split ' ';
 
+	chop $fields[3];
+	$time = int($fields[3] * 1000000.0 + 0.5);
+
 	$tp_name = $fields[4];
-	@tmp = split ':', $tp_name, 2;
-	next unless $tmp[0] eq 'i915';
-	$tp_name = $tmp[1];
-	chop $tp_name;
 
-	chop $fields[3];
-	$time = $fields[3] * 1000000.0;
 	splice @fields, 0, 5;
 
 	foreach my $f (@fields) {
-		my @kv = split '=|,', $f;
+		my ($k, $v);
 
-		$kv[0] = 'global' if $kv[0] eq 'global_seqno';
-
-		$tp{$kv[0]} = $kv[1];
+		next unless $f =~ m/=/;
+		($k, $v) = ($`, $');
+		$k = 'global' if $k eq 'global_seqno';
+		chop $v if substr($v, -1, 1) eq ',';
+		$tp{$k} = $v;
 	}
 
 	next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
 
-	if ($tp_name eq 'i915_gem_request_wait_begin') {
-		my %rw;
+	if (exists $tp{'ring'} and exists $tp{'seqno'}) {
+		$ring = $tp{'ring'};
+		$seqno = $tp{'seqno'};
 
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+		if (exists $tp{'ctx'}) {
+			$ctx = $tp{'ctx'};
+			$orig_ctx = $ctx;
+			$ctx = sanitize_ctx($ctx, $ring);
+			$key = db_key($ring, $ctx, $seqno);
+		}
+	}
+
+	if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
+		my %rw;
 
 		next if exists $reqwait{$key};
 
 		$rw{'key'} = $key;
-		$rw{'ring'} = $tp{'ring'};
-		$rw{'seqno'} = $tp{'seqno'};
-		$rw{'ctx'} = $tp{'ctx'};
+		$rw{'ring'} = $ring;
+		$rw{'seqno'} = $seqno;
+		$rw{'ctx'} = $ctx;
 		$rw{'start'} = $time;
 		$reqwait{$key} = \%rw;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_wait_end') {
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
 		next unless exists $reqwait{$key};
 
 		$reqwait{$key}->{'end'} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_add') {
-		my $orig_ctx = $tp{'ctx'};
-
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_add:') {
 		if (exists $queue{$key}) {
 			$ctxdb{$orig_ctx}++;
-			$tp{'ctx'} = sanitize_ctx($orig_ctx, $tp{'ring'});
-			$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+			$ctx = sanitize_ctx($orig_ctx, $ring);
+			$key = db_key($ring, $ctx, $seqno);
 		}
 
 		$queue{$key} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_submit') {
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+	} elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
 		die if exists $submit{$key};
 		die unless exists $queue{$key};
 
 		$submit{$key} = $time;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_in') {
+	} elsif ($tp_name eq 'i915:i915_gem_request_in:') {
 		my %req;
 
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
 		die if exists $db{$key};
 		die unless exists $queue{$key};
 		die unless exists $submit{$key};
 
 		$req{'start'} = $time;
-		$req{'ring'} = $tp{'ring'};
-		$req{'seqno'} = $tp{'seqno'};
-		$req{'ctx'} = $tp{'ctx'};
-		$req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
+		$req{'ring'} = $ring;
+		$req{'seqno'} = $seqno;
+		$req{'ctx'} = $ctx;
+		$req{'name'} = $ctx . '/' . $seqno;
 		$req{'global'} = $tp{'global'};
 		$req{'port'} = $tp{'port'};
 		$req{'queue'} = $queue{$key};
 		$req{'submit-delay'} = $submit{$key} - $queue{$key};
 		$req{'execute-delay'} = $req{'start'} - $submit{$key};
-		$rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
-		$ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
+		$rings{$ring} = $gid++ unless exists $rings{$ring};
+		$ringmap{$rings{$ring}} = $ring;
 		$db{$key} = \%req;
-		next;
-	} elsif ($tp_name eq 'i915_gem_request_out') {
-		my $gkey = global_key($tp{'ring'}, $tp{'global'});
-
-		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
-		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+	} elsif ($tp_name eq 'i915:i915_gem_request_out:') {
+		my $gkey = global_key($ring, $tp{'global'});
 
 		die unless exists $db{$key};
 		die unless exists $db{$key}->{'start'};
@@ -476,15 +460,12 @@ while (<>) {
 		}
 		$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
 		$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
-		next;
-	} elsif ($tp_name eq 'intel_engine_notify') {
-		$notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
-		next;
-	} elsif ($tp_name eq 'intel_gpu_freq_change') {
+	} elsif ($tp_name eq 'i915:intel_engine_notify:') {
+		$notify{global_key($ring, $seqno)} = $time;
+	} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
 		push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
 		$prev_freq_ts = $time;
 		$prev_freq = $tp{'new_freq'};
-		next;
 	}
 }
 
@@ -557,7 +538,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
 my $last_ts = 0;
 my $first_ts;
 
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
+my $re_sort = 0;
+
+foreach my $key (@sorted_keys) {
 	my $ring = $db{$key}->{'ring'};
 	my $end = $db{$key}->{'end'};
 
@@ -585,6 +569,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
 
 		# 20us tolerance
 		if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+			$re_sort = 1;
 			$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
 			$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
 			$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
@@ -601,6 +586,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
 	$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
 }
 
+@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
+
 foreach my $ring (keys %batch_avg) {
 	$batch_avg{$ring} /= $batch_count{$ring};
 	$batch_total_avg{$ring} /= $batch_count{$ring};
@@ -616,7 +603,7 @@ foreach my $gid (sort keys %rings) {
 	my (@s_, @e_);
 
 	# Extract all GPU busy intervals and sort them.
-	foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+	foreach my $key (@sorted_keys) {
 		next unless $db{$key}->{'ring'} == $ring;
 		push @s_, $db{$key}->{'start'};
 		push @e_, $db{$key}->{'end'};
@@ -660,7 +647,7 @@ my @gpu_intervals;
 my (@s_, @e_);
 
 # Extract all GPU busy intervals and sort them.
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+foreach my $key (@sorted_keys) {
 	push @s_, $db{$key}->{'start'};
 	push @e_, $db{$key}->{'end'};
 	die if $db{$key}->{'start'} > $db{$key}->{'end'};
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev5)
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
                   ` (6 preceding siblings ...)
  2017-12-21 13:27 ` ✗ Fi.CI.IGT: warning " Patchwork
@ 2017-12-22  9:34 ` Patchwork
  2017-12-22 10:21 ` ✓ Fi.CI.IGT: " Patchwork
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-22  9:34 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: scripts/trace.pl: Optimize event parsing and processing (rev5)
URL   : https://patchwork.freedesktop.org/series/35569/
State : success

== Summary ==

IGT patchset tested on top of latest successful build
beb26d89ff5c5621c1e6b6ac2a45439507af86b7 meson: Install .testlist files and README from tests/intel-ci

with latest DRM-Tip kernel build CI_DRM_3566
c0a64101df89 drm-tip: 2017y-12m-21d-18h-02m-56s UTC integration manifest

No testlist changes.

Test debugfs_test:
        Subgroup read_all_entries:
                dmesg-warn -> DMESG-FAIL (fi-elk-e7500) fdo#103989
                incomplete -> PASS       (fi-snb-2520m) fdo#103713

fdo#103989 
fdo#103713 

fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:436s
fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:443s
fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:383s
fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:500s
fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:277s
fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:493s
fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:503s
fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:480s
fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:473s
fi-elk-e7500     total:224  pass:163  dwarn:14  dfail:1   fail:0   skip:45 
fi-gdg-551       total:288  pass:179  dwarn:0   dfail:0   fail:1   skip:108 time:264s
fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:534s
fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:408s
fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:413s
fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:479s
fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:429s
fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:484s
fi-kbl-7560u     total:288  pass:268  dwarn:1   dfail:0   fail:0   skip:19  time:526s
fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:472s
fi-kbl-r         total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:526s
fi-pnv-d510      total:288  pass:222  dwarn:1   dfail:0   fail:0   skip:65  time:584s
fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:443s
fi-skl-6600u     total:288  pass:260  dwarn:1   dfail:0   fail:0   skip:27  time:531s
fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:555s
fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:507s
fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:495s
fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:445s
fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:554s
fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:418s
Blacklisted hosts:
fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:587s
fi-cnl-y         total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:609s
fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:485s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_724/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* ✓ Fi.CI.IGT: success for scripts/trace.pl: Optimize event parsing and processing (rev5)
  2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
                   ` (7 preceding siblings ...)
  2017-12-22  9:34 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev5) Patchwork
@ 2017-12-22 10:21 ` Patchwork
  8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-22 10:21 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

== Series Details ==

Series: scripts/trace.pl: Optimize event parsing and processing (rev5)
URL   : https://patchwork.freedesktop.org/series/35569/
State : success

== Summary ==

Test gem_tiled_swapping:
        Subgroup non-threaded:
                incomplete -> PASS       (shard-hsw) fdo#104218
Test drv_suspend:
        Subgroup fence-restore-tiled2untiled:
                skip       -> PASS       (shard-snb)
                skip       -> PASS       (shard-hsw)
        Subgroup sysfs-reader:
                skip       -> PASS       (shard-hsw)
Test pm_rpm:
        Subgroup system-suspend:
                skip       -> PASS       (shard-hsw) fdo#103375 +1
Test kms_frontbuffer_tracking:
        Subgroup fbc-1p-offscren-pri-shrfb-draw-blt:
                pass       -> FAIL       (shard-snb) fdo#101623

fdo#104218 https://bugs.freedesktop.org/show_bug.cgi?id=104218
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623

shard-hsw        total:2702 pass:1532 dwarn:1   dfail:0   fail:10  skip:1158 time:8997s
shard-snb        total:2712 pass:1309 dwarn:1   dfail:0   fail:11  skip:1391 time:8098s
Blacklisted hosts:
shard-apl        total:2635 pass:1636 dwarn:1   dfail:0   fail:23  skip:974 time:13422s
shard-kbl        total:2703 pass:1798 dwarn:4   dfail:0   fail:23  skip:877 time:10623s

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_724/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH i-g-t v4] scripts/trace.pl: Optimize event parsing and processing
  2017-12-22  9:16           ` [PATCH i-g-t v4] " Tvrtko Ursulin
@ 2017-12-22 20:38             ` John Harrison
  0 siblings, 0 replies; 18+ messages in thread
From: John Harrison @ 2017-12-22 20:38 UTC (permalink / raw)
  To: Tvrtko Ursulin, Intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 8722 bytes --]

This one works for me. Loads faster but identical output to the original 
version.

Reviewed-by:  John Harrison <John.C.Harrison@Intel.com>


On 12/22/2017 1:16 AM, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> A couple of small optimizations which altogether bring around 30%
> improvement in my testing.
>
> 1. Do less string processing on tracepoints names and push more of the
>     check into the if-ladder.
>
> 2. Pull out common db key and ctx processing and cache common values in
>     local vars.
>
> 3. Key value pair parsing is faster with a regexp.
>
> 4. Avoid sorting the db hash multiple times if possible.
>
> v2:
>   * Use faster key-value splitting method. (John Harrison)
>
> v3:
>   * Fix floating-point to int time conversion.
>
> v4:
>   * Fix refactoring and logic fails. (John Harrison)
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: John Harrison <John.C.Harrison@intel.com>
> ---
>   scripts/trace.pl | 117 +++++++++++++++++++++++++------------------------------
>   1 file changed, 52 insertions(+), 65 deletions(-)
>
> diff --git a/scripts/trace.pl b/scripts/trace.pl
> index ade0a9154bd7..cb93900dd620 100755
> --- a/scripts/trace.pl
> +++ b/scripts/trace.pl
> @@ -356,110 +356,94 @@ my $prev_freq = 0;
>   my $prev_freq_ts = 0;
>   while (<>) {
>   	my @fields;
> -	my @tmp;
>   	my $tp_name;
> -	my $time;
>   	my %tp;
> -	my $key;
> +	my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
>   
>   	chomp;
>   	@fields = split ' ';
>   
> +	chop $fields[3];
> +	$time = int($fields[3] * 1000000.0 + 0.5);
> +
>   	$tp_name = $fields[4];
> -	@tmp = split ':', $tp_name, 2;
> -	next unless $tmp[0] eq 'i915';
> -	$tp_name = $tmp[1];
> -	chop $tp_name;
>   
> -	chop $fields[3];
> -	$time = $fields[3] * 1000000.0;
>   	splice @fields, 0, 5;
>   
>   	foreach my $f (@fields) {
> -		my @kv = split '=|,', $f;
> +		my ($k, $v);
>   
> -		$kv[0] = 'global' if $kv[0] eq 'global_seqno';
> -
> -		$tp{$kv[0]} = $kv[1];
> +		next unless $f =~ m/=/;
> +		($k, $v) = ($`, $');
> +		$k = 'global' if $k eq 'global_seqno';
> +		chop $v if substr($v, -1, 1) eq ',';
> +		$tp{$k} = $v;
>   	}
>   
>   	next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
>   
> -	if ($tp_name eq 'i915_gem_request_wait_begin') {
> -		my %rw;
> +	if (exists $tp{'ring'} and exists $tp{'seqno'}) {
> +		$ring = $tp{'ring'};
> +		$seqno = $tp{'seqno'};
>   
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +		if (exists $tp{'ctx'}) {
> +			$ctx = $tp{'ctx'};
> +			$orig_ctx = $ctx;
> +			$ctx = sanitize_ctx($ctx, $ring);
> +			$key = db_key($ring, $ctx, $seqno);
> +		}
> +	}
> +
> +	if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
> +		my %rw;
>   
>   		next if exists $reqwait{$key};
>   
>   		$rw{'key'} = $key;
> -		$rw{'ring'} = $tp{'ring'};
> -		$rw{'seqno'} = $tp{'seqno'};
> -		$rw{'ctx'} = $tp{'ctx'};
> +		$rw{'ring'} = $ring;
> +		$rw{'seqno'} = $seqno;
> +		$rw{'ctx'} = $ctx;
>   		$rw{'start'} = $time;
>   		$reqwait{$key} = \%rw;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_wait_end') {
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
>   		next unless exists $reqwait{$key};
>   
>   		$reqwait{$key}->{'end'} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_add') {
> -		my $orig_ctx = $tp{'ctx'};
> -
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_add:') {
>   		if (exists $queue{$key}) {
>   			$ctxdb{$orig_ctx}++;
> -			$tp{'ctx'} = sanitize_ctx($orig_ctx, $tp{'ring'});
> -			$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +			$ctx = sanitize_ctx($orig_ctx, $ring);
> +			$key = db_key($ring, $ctx, $seqno);
>   		}
>   
>   		$queue{$key} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_submit') {
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
>   		die if exists $submit{$key};
>   		die unless exists $queue{$key};
>   
>   		$submit{$key} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_in') {
> +	} elsif ($tp_name eq 'i915:i915_gem_request_in:') {
>   		my %req;
>   
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
>   		die if exists $db{$key};
>   		die unless exists $queue{$key};
>   		die unless exists $submit{$key};
>   
>   		$req{'start'} = $time;
> -		$req{'ring'} = $tp{'ring'};
> -		$req{'seqno'} = $tp{'seqno'};
> -		$req{'ctx'} = $tp{'ctx'};
> -		$req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
> +		$req{'ring'} = $ring;
> +		$req{'seqno'} = $seqno;
> +		$req{'ctx'} = $ctx;
> +		$req{'name'} = $ctx . '/' . $seqno;
>   		$req{'global'} = $tp{'global'};
>   		$req{'port'} = $tp{'port'};
>   		$req{'queue'} = $queue{$key};
>   		$req{'submit-delay'} = $submit{$key} - $queue{$key};
>   		$req{'execute-delay'} = $req{'start'} - $submit{$key};
> -		$rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
> -		$ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
> +		$rings{$ring} = $gid++ unless exists $rings{$ring};
> +		$ringmap{$rings{$ring}} = $ring;
>   		$db{$key} = \%req;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_out') {
> -		my $gkey = global_key($tp{'ring'}, $tp{'global'});
> -
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +	} elsif ($tp_name eq 'i915:i915_gem_request_out:') {
> +		my $gkey = global_key($ring, $tp{'global'});
>   
>   		die unless exists $db{$key};
>   		die unless exists $db{$key}->{'start'};
> @@ -476,15 +460,12 @@ while (<>) {
>   		}
>   		$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
>   		$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
> -		next;
> -	} elsif ($tp_name eq 'intel_engine_notify') {
> -		$notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
> -		next;
> -	} elsif ($tp_name eq 'intel_gpu_freq_change') {
> +	} elsif ($tp_name eq 'i915:intel_engine_notify:') {
> +		$notify{global_key($ring, $seqno)} = $time;
> +	} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
>   		push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
>   		$prev_freq_ts = $time;
>   		$prev_freq = $tp{'new_freq'};
> -		next;
>   	}
>   }
>   
> @@ -557,7 +538,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
>   my $last_ts = 0;
>   my $first_ts;
>   
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
> +my $re_sort = 0;
> +
> +foreach my $key (@sorted_keys) {
>   	my $ring = $db{$key}->{'ring'};
>   	my $end = $db{$key}->{'end'};
>   
> @@ -585,6 +569,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
>   
>   		# 20us tolerance
>   		if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
> +			$re_sort = 1;
>   			$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
>   			$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
>   			$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
> @@ -601,6 +586,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
>   	$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
>   }
>   
> +@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
> +
>   foreach my $ring (keys %batch_avg) {
>   	$batch_avg{$ring} /= $batch_count{$ring};
>   	$batch_total_avg{$ring} /= $batch_count{$ring};
> @@ -616,7 +603,7 @@ foreach my $gid (sort keys %rings) {
>   	my (@s_, @e_);
>   
>   	# Extract all GPU busy intervals and sort them.
> -	foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +	foreach my $key (@sorted_keys) {
>   		next unless $db{$key}->{'ring'} == $ring;
>   		push @s_, $db{$key}->{'start'};
>   		push @e_, $db{$key}->{'end'};
> @@ -660,7 +647,7 @@ my @gpu_intervals;
>   my (@s_, @e_);
>   
>   # Extract all GPU busy intervals and sort them.
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +foreach my $key (@sorted_keys) {
>   	push @s_, $db{$key}->{'start'};
>   	push @e_, $db{$key}->{'end'};
>   	die if $db{$key}->{'start'} > $db{$key}->{'end'};


[-- Attachment #1.2: Type: text/html, Size: 9019 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2017-12-22 20:38 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
2017-12-19 11:39 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-12-19 13:23 ` ✓ Fi.CI.IGT: " Patchwork
2017-12-19 15:02 ` [PATCH i-g-t] " Chris Wilson
2017-12-19 16:23 ` John Harrison
2017-12-20  9:41   ` [PATCH i-g-t v2] " Tvrtko Ursulin
2017-12-21  9:37     ` [PATCH i-g-t v3] " Tvrtko Ursulin
2017-12-20  9:54   ` [PATCH i-g-t] " Tvrtko Ursulin
2017-12-20 23:50     ` John Harrison
2017-12-21  9:34       ` Tvrtko Ursulin
2017-12-21 22:44         ` John Harrison
2017-12-22  9:16           ` [PATCH i-g-t v4] " Tvrtko Ursulin
2017-12-22 20:38             ` John Harrison
2017-12-20 10:33 ` ✗ Fi.CI.BAT: failure for scripts/trace.pl: Optimize event parsing and processing (rev2) Patchwork
2017-12-21 11:11 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev3) Patchwork
2017-12-21 13:27 ` ✗ Fi.CI.IGT: warning " Patchwork
2017-12-22  9:34 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev5) Patchwork
2017-12-22 10:21 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.