* [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
@ 2017-12-19 11:15 Tvrtko Ursulin
2017-12-19 11:39 ` ✓ Fi.CI.BAT: success for " Patchwork
` (8 more replies)
0 siblings, 9 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-19 11:15 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
A couple of small optimizations which altogether bring around 30%
improvement in my testing.
1. Do less string processing on tracepoints names and push more of the
check into the if-ladder.
2. Pull out common db key and ctx processing and cache common values in
local vars.
3. Key value pair parsing is faster with a regexp.
4. Avoid sorting the db hash multiple times if possible.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
scripts/trace.pl | 114 ++++++++++++++++++++++++-------------------------------
1 file changed, 49 insertions(+), 65 deletions(-)
---
John please check if this helps with your monster traces before we decide
if it is worth putting this in. Well, there isn't much of a downside
except touching something which works and risking breaking it.
diff --git a/scripts/trace.pl b/scripts/trace.pl
index 98e4a9843a43..5463e1fd1954 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -356,110 +356,91 @@ my $prev_freq = 0;
my $prev_freq_ts = 0;
while (<>) {
my @fields;
- my @tmp;
my $tp_name;
- my $time;
my %tp;
- my $key;
+ my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
chomp;
@fields = split ' ';
+ chop $fields[3];
+ $time = int($fields[3] * 1000000.0);
+
$tp_name = $fields[4];
- @tmp = split ':', $tp_name, 2;
- next unless $tmp[0] eq 'i915';
- $tp_name = $tmp[1];
- chop $tp_name;
- chop $fields[3];
- $time = $fields[3] * 1000000.0;
splice @fields, 0, 5;
foreach my $f (@fields) {
- my @kv = split '=|,', $f;
-
- $kv[0] = 'global' if $kv[0] eq 'global_seqno';
+ my ($k, $v);
- $tp{$kv[0]} = $kv[1];
+ ($k, $v) = ($f =~ /^(\S+)=(\d+)/);
+ next unless defined $k;
+ $k = 'global' if $k eq 'global_seqno';
+ $tp{$k} = $v;
}
next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
- if ($tp_name eq 'i915_gem_request_wait_begin') {
- my %rw;
+ if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
+ $ring = $tp{'ring'};
+ $ctx = $tp{'ctx'};
+ $seqno = $tp{'seqno'};
+
+ $orig_ctx = $ctx;
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+ }
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
+ my %rw;
next if exists $reqwait{$key};
$rw{'key'} = $key;
- $rw{'ring'} = $tp{'ring'};
- $rw{'seqno'} = $tp{'seqno'};
- $rw{'ctx'} = $tp{'ctx'};
+ $rw{'ring'} = $ring;
+ $rw{'seqno'} = $seqno;
+ $rw{'ctx'} = $ctx;
$rw{'start'} = $time;
$reqwait{$key} = \%rw;
- next;
- } elsif ($tp_name eq 'i915_gem_request_wait_end') {
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
next unless exists $reqwait{$key};
$reqwait{$key}->{'end'} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_add') {
- my $orig_ctx = $tp{'ctx'};
-
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_add:') {
if (exists $queue{$key}) {
$ctxdb{$orig_ctx}++;
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
}
$queue{$key} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_submit') {
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
die if exists $submit{$key};
die unless exists $queue{$key};
$submit{$key} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_in') {
+ } elsif ($tp_name eq 'i915:i915_gem_request_in:') {
my %req;
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
die if exists $db{$key};
die unless exists $queue{$key};
die unless exists $submit{$key};
$req{'start'} = $time;
- $req{'ring'} = $tp{'ring'};
- $req{'seqno'} = $tp{'seqno'};
- $req{'ctx'} = $tp{'ctx'};
- $req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
+ $req{'ring'} = $ring;
+ $req{'seqno'} = $seqno;
+ $req{'ctx'} = $ctx;
+ $req{'name'} = $ctx . '/' . $seqno;
$req{'global'} = $tp{'global'};
$req{'port'} = $tp{'port'};
$req{'queue'} = $queue{$key};
$req{'submit-delay'} = $submit{$key} - $queue{$key};
$req{'execute-delay'} = $req{'start'} - $submit{$key};
- $rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
- $ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
+ $rings{$ring} = $gid++ unless exists $rings{$ring};
+ $ringmap{$rings{$ring}} = $ring;
$db{$key} = \%req;
- next;
- } elsif ($tp_name eq 'i915_gem_request_out') {
- my $gkey = global_key($tp{'ring'}, $tp{'global'});
-
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ } elsif ($tp_name eq 'i915:i915_gem_request_out:') {
+ my $gkey = global_key($ring, $tp{'global'});
die unless exists $db{$key};
die unless exists $db{$key}->{'start'};
@@ -476,15 +457,12 @@ while (<>) {
}
$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
- next;
- } elsif ($tp_name eq 'intel_engine_notify') {
- $notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
- next;
- } elsif ($tp_name eq 'intel_gpu_freq_change') {
+ } elsif ($tp_name eq 'intel_engine_notify:') {
+ $notify{global_key($ring, $seqno)} = $time;
+ } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
$prev_freq_ts = $time;
$prev_freq = $tp{'new_freq'};
- next;
}
}
@@ -557,7 +535,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
my $last_ts = 0;
my $first_ts;
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
+my $re_sort = 0;
+
+foreach my $key (@sorted_keys) {
my $ring = $db{$key}->{'ring'};
my $end = $db{$key}->{'end'};
@@ -585,6 +566,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
# 20us tolerance
if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+ $re_sort = 1;
$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
@@ -601,6 +583,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
}
+@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
+
foreach my $ring (keys %batch_avg) {
$batch_avg{$ring} /= $batch_count{$ring};
$batch_total_avg{$ring} /= $batch_count{$ring};
@@ -616,7 +600,7 @@ foreach my $gid (sort keys %rings) {
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
- foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+ foreach my $key (@sorted_keys) {
next unless $db{$key}->{'ring'} == $ring;
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
@@ -660,7 +644,7 @@ my @gpu_intervals;
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+foreach my $key (@sorted_keys) {
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
die if $db{$key}->{'start'} > $db{$key}->{'end'};
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 18+ messages in thread
* ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
@ 2017-12-19 11:39 ` Patchwork
2017-12-19 13:23 ` ✓ Fi.CI.IGT: " Patchwork
` (7 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-19 11:39 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: scripts/trace.pl: Optimize event parsing and processing
URL : https://patchwork.freedesktop.org/series/35569/
State : success
== Summary ==
IGT patchset tested on top of latest successful build
d3d04e5f74f05850f595c729fb0c00eb7dd9c688 scripts/trace.pl: Fix ring ignore
with latest DRM-Tip kernel build CI_DRM_3545
3e151c0e41e6 drm-tip: 2017y-12m-18d-22h-45m-04s UTC integration manifest
No testlist changes.
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-a:
dmesg-warn -> PASS (fi-kbl-r) fdo#104172
Test kms_psr_sink_crc:
Subgroup psr_basic:
dmesg-warn -> PASS (fi-skl-6700hq) fdo#101144
fdo#104172 https://bugs.freedesktop.org/show_bug.cgi?id=104172
fdo#101144 https://bugs.freedesktop.org/show_bug.cgi?id=101144
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:434s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:444s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:391s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:495s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:278s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:498s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:495s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:487s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:466s
fi-elk-e7500 total:224 pass:163 dwarn:15 dfail:0 fail:0 skip:45
fi-gdg-551 total:288 pass:178 dwarn:1 dfail:0 fail:1 skip:108 time:262s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:538s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:406s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:417s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:389s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:469s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:429s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:480s
fi-kbl-7560u total:288 pass:268 dwarn:1 dfail:0 fail:0 skip:19 time:519s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:471s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:523s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:585s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:445s
fi-skl-6600u total:288 pass:260 dwarn:1 dfail:0 fail:0 skip:27 time:534s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:564s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:510s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:509s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:446s
fi-snb-2520m total:245 pass:211 dwarn:0 dfail:0 fail:0 skip:33
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:412s
Blacklisted hosts:
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:591s
fi-glk-dsi total:288 pass:175 dwarn:1 dfail:4 fail:0 skip:108 time:333s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_701/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* ✓ Fi.CI.IGT: success for scripts/trace.pl: Optimize event parsing and processing
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
2017-12-19 11:39 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2017-12-19 13:23 ` Patchwork
2017-12-19 15:02 ` [PATCH i-g-t] " Chris Wilson
` (6 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-19 13:23 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: scripts/trace.pl: Optimize event parsing and processing
URL : https://patchwork.freedesktop.org/series/35569/
State : success
== Summary ==
Test kms_cursor_crc:
Subgroup cursor-256x85-sliding:
skip -> PASS (shard-hsw)
Test kms_frontbuffer_tracking:
Subgroup fbc-1p-offscren-pri-shrfb-draw-render:
fail -> PASS (shard-snb) fdo#101623 +2
Test kms_cursor_legacy:
Subgroup basic-busy-flip-before-cursor-atomic:
skip -> PASS (shard-hsw)
Test gem_tiled_swapping:
Subgroup non-threaded:
pass -> INCOMPLETE (shard-snb) fdo#104218
Test pm_rpm:
Subgroup universal-planes:
skip -> PASS (shard-hsw)
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
fdo#104218 https://bugs.freedesktop.org/show_bug.cgi?id=104218
shard-hsw total:2712 pass:1537 dwarn:1 dfail:0 fail:10 skip:1164 time:9487s
shard-snb total:2691 pass:1299 dwarn:1 dfail:0 fail:12 skip:1378 time:7821s
Blacklisted hosts:
shard-apl total:2712 pass:1688 dwarn:1 dfail:0 fail:22 skip:1001 time:13732s
shard-kbl total:2691 pass:1794 dwarn:1 dfail:0 fail:23 skip:872 time:10773s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_701/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
2017-12-19 11:39 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-12-19 13:23 ` ✓ Fi.CI.IGT: " Patchwork
@ 2017-12-19 15:02 ` Chris Wilson
2017-12-19 16:23 ` John Harrison
` (5 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Chris Wilson @ 2017-12-19 15:02 UTC (permalink / raw)
To: Tvrtko Ursulin, Intel-gfx
Quoting Tvrtko Ursulin (2017-12-19 11:15:39)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> A couple of small optimizations which altogether bring around 30%
> improvement in my testing.
>
> 1. Do less string processing on tracepoints names and push more of the
> check into the if-ladder.
>
> 2. Pull out common db key and ctx processing and cache common values in
> local vars.
>
> 3. Key value pair parsing is faster with a regexp.
>
> 4. Avoid sorting the db hash multiple times if possible.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: John Harrison <John.C.Harrison@intel.com>
> ---
> scripts/trace.pl | 114 ++++++++++++++++++++++++-------------------------------
> 1 file changed, 49 insertions(+), 65 deletions(-)
> ---
> John please check if this helps with your monster traces before we decide
> if it is worth putting this in. Well, there isn't much of a downside
> except touching something which works and risking breaking it.
Could you run the same trace and do something like
time trace1 < input > output1.html
time trace2 < input > output2.html
cmp output1.html output2.html
?
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
` (2 preceding siblings ...)
2017-12-19 15:02 ` [PATCH i-g-t] " Chris Wilson
@ 2017-12-19 16:23 ` John Harrison
2017-12-20 9:41 ` [PATCH i-g-t v2] " Tvrtko Ursulin
2017-12-20 9:54 ` [PATCH i-g-t] " Tvrtko Ursulin
2017-12-20 10:33 ` ✗ Fi.CI.BAT: failure for scripts/trace.pl: Optimize event parsing and processing (rev2) Patchwork
` (4 subsequent siblings)
8 siblings, 2 replies; 18+ messages in thread
From: John Harrison @ 2017-12-19 16:23 UTC (permalink / raw)
To: Tvrtko Ursulin, Intel-gfx
On 12/19/2017 3:15 AM, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> A couple of small optimizations which altogether bring around 30%
> improvement in my testing.
>
> 1. Do less string processing on tracepoints names and push more of the
> check into the if-ladder.
>
> 2. Pull out common db key and ctx processing and cache common values in
> local vars.
>
> 3. Key value pair parsing is faster with a regexp.
>
> 4. Avoid sorting the db hash multiple times if possible.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: John Harrison <John.C.Harrison@intel.com>
> ---
> scripts/trace.pl | 114 ++++++++++++++++++++++++-------------------------------
> 1 file changed, 49 insertions(+), 65 deletions(-)
> ---
> John please check if this helps with your monster traces before we decide
> if it is worth putting this in. Well, there isn't much of a downside
> except touching something which works and risking breaking it.
>
> diff --git a/scripts/trace.pl b/scripts/trace.pl
> index 98e4a9843a43..5463e1fd1954 100755
> --- a/scripts/trace.pl
> +++ b/scripts/trace.pl
> @@ -356,110 +356,91 @@ my $prev_freq = 0;
> my $prev_freq_ts = 0;
> while (<>) {
> my @fields;
> - my @tmp;
> my $tp_name;
> - my $time;
> my %tp;
> - my $key;
> + my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
>
> chomp;
> @fields = split ' ';
>
> + chop $fields[3];
> + $time = int($fields[3] * 1000000.0);
> +
> $tp_name = $fields[4];
> - @tmp = split ':', $tp_name, 2;
> - next unless $tmp[0] eq 'i915';
> - $tp_name = $tmp[1];
> - chop $tp_name;
>
> - chop $fields[3];
> - $time = $fields[3] * 1000000.0;
> splice @fields, 0, 5;
>
> foreach my $f (@fields) {
> - my @kv = split '=|,', $f;
> -
> - $kv[0] = 'global' if $kv[0] eq 'global_seqno';
> + my ($k, $v);
>
> - $tp{$kv[0]} = $kv[1];
> + ($k, $v) = ($f =~ /^(\S+)=(\d+)/);
I recall something about using () selection is slower than using
pre-defined values. E.g.
$f =~ m/=/; ($k, $v) = ($`, $');
Or do you specifically need the filtering of only matching numeric values?
> + next unless defined $k;
> + $k = 'global' if $k eq 'global_seqno';
> + $tp{$k} = $v;
> }
>
> next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
>
> - if ($tp_name eq 'i915_gem_request_wait_begin') {
> - my %rw;
> + if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
> + $ring = $tp{'ring'};
> + $ctx = $tp{'ctx'};
> + $seqno = $tp{'seqno'};
> +
> + $orig_ctx = $ctx;
> + $ctx = sanitize_ctx($ctx, $ring);
> + $key = db_key($ring, $ctx, $seqno);
> + }
>
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> + if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
> + my %rw;
>
> next if exists $reqwait{$key};
>
> $rw{'key'} = $key;
> - $rw{'ring'} = $tp{'ring'};
> - $rw{'seqno'} = $tp{'seqno'};
> - $rw{'ctx'} = $tp{'ctx'};
> + $rw{'ring'} = $ring;
> + $rw{'seqno'} = $seqno;
> + $rw{'ctx'} = $ctx;
> $rw{'start'} = $time;
> $reqwait{$key} = \%rw;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_wait_end') {
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> + } elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
> next unless exists $reqwait{$key};
>
> $reqwait{$key}->{'end'} = $time;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_add') {
> - my $orig_ctx = $tp{'ctx'};
> -
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> + } elsif ($tp_name eq 'i915:i915_gem_request_add:') {
> if (exists $queue{$key}) {
> $ctxdb{$orig_ctx}++;
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> + $ctx = sanitize_ctx($ctx, $ring);
This bit originally worked on $orig_ctx not $ctx (i.e., before your
previous refactoring patch). Which should it be? Will a double
sanitisation step work as intended? I don't actually hit this line in
any of the runs I've done.
> + $key = db_key($ring, $ctx, $seqno);
> }
>
> $queue{$key} = $time;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_submit') {
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> + } elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
> die if exists $submit{$key};
> die unless exists $queue{$key};
>
> $submit{$key} = $time;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_in') {
> + } elsif ($tp_name eq 'i915:i915_gem_request_in:') {
> my %req;
>
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> die if exists $db{$key};
> die unless exists $queue{$key};
> die unless exists $submit{$key};
>
> $req{'start'} = $time;
> - $req{'ring'} = $tp{'ring'};
> - $req{'seqno'} = $tp{'seqno'};
> - $req{'ctx'} = $tp{'ctx'};
> - $req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
> + $req{'ring'} = $ring;
> + $req{'seqno'} = $seqno;
> + $req{'ctx'} = $ctx;
> + $req{'name'} = $ctx . '/' . $seqno;
> $req{'global'} = $tp{'global'};
> $req{'port'} = $tp{'port'};
> $req{'queue'} = $queue{$key};
> $req{'submit-delay'} = $submit{$key} - $queue{$key};
> $req{'execute-delay'} = $req{'start'} - $submit{$key};
> - $rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
> - $ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
> + $rings{$ring} = $gid++ unless exists $rings{$ring};
> + $ringmap{$rings{$ring}} = $ring;
> $db{$key} = \%req;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_out') {
> - my $gkey = global_key($tp{'ring'}, $tp{'global'});
> -
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> + } elsif ($tp_name eq 'i915:i915_gem_request_out:') {
> + my $gkey = global_key($ring, $tp{'global'});
>
> die unless exists $db{$key};
> die unless exists $db{$key}->{'start'};
> @@ -476,15 +457,12 @@ while (<>) {
> }
> $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
> $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
> - next;
> - } elsif ($tp_name eq 'intel_engine_notify') {
> - $notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
> - next;
> - } elsif ($tp_name eq 'intel_gpu_freq_change') {
> + } elsif ($tp_name eq 'intel_engine_notify:') {
> + $notify{global_key($ring, $seqno)} = $time;
> + } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
> push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
> $prev_freq_ts = $time;
> $prev_freq = $tp{'new_freq'};
> - next;
> }
> }
>
> @@ -557,7 +535,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
> my $last_ts = 0;
> my $first_ts;
>
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
> +my $re_sort = 0;
> +
> +foreach my $key (@sorted_keys) {
> my $ring = $db{$key}->{'ring'};
> my $end = $db{$key}->{'end'};
>
> @@ -585,6 +566,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
>
> # 20us tolerance
> if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
> + $re_sort = 1;
> $db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
> $db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
> $db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
> @@ -601,6 +583,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> $ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
> }
>
> +@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
> +
> foreach my $ring (keys %batch_avg) {
> $batch_avg{$ring} /= $batch_count{$ring};
> $batch_total_avg{$ring} /= $batch_count{$ring};
> @@ -616,7 +600,7 @@ foreach my $gid (sort keys %rings) {
> my (@s_, @e_);
>
> # Extract all GPU busy intervals and sort them.
> - foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> + foreach my $key (@sorted_keys) {
> next unless $db{$key}->{'ring'} == $ring;
> push @s_, $db{$key}->{'start'};
> push @e_, $db{$key}->{'end'};
> @@ -660,7 +644,7 @@ my @gpu_intervals;
> my (@s_, @e_);
>
> # Extract all GPU busy intervals and sort them.
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +foreach my $key (@sorted_keys) {
> push @s_, $db{$key}->{'start'};
> push @e_, $db{$key}->{'end'};
> die if $db{$key}->{'start'} > $db{$key}->{'end'};
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH i-g-t v2] scripts/trace.pl: Optimize event parsing and processing
2017-12-19 16:23 ` John Harrison
@ 2017-12-20 9:41 ` Tvrtko Ursulin
2017-12-21 9:37 ` [PATCH i-g-t v3] " Tvrtko Ursulin
2017-12-20 9:54 ` [PATCH i-g-t] " Tvrtko Ursulin
1 sibling, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-20 9:41 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
A couple of small optimizations which altogether bring around 30%
improvement in my testing.
1. Do less string processing on tracepoints names and push more of the
check into the if-ladder.
2. Pull out common db key and ctx processing and cache common values in
local vars.
3. Key value pair parsing is faster with a regexp.
4. Avoid sorting the db hash multiple times if possible.
v2:
* Use faster key-value splitting method. (John Harrison)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
scripts/trace.pl | 115 ++++++++++++++++++++++++-------------------------------
1 file changed, 50 insertions(+), 65 deletions(-)
diff --git a/scripts/trace.pl b/scripts/trace.pl
index ade0a9154bd7..2628a41abfc4 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -356,110 +356,92 @@ my $prev_freq = 0;
my $prev_freq_ts = 0;
while (<>) {
my @fields;
- my @tmp;
my $tp_name;
- my $time;
my %tp;
- my $key;
+ my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
chomp;
@fields = split ' ';
+ chop $fields[3];
+ $time = int($fields[3] * 1000000.0);
+
$tp_name = $fields[4];
- @tmp = split ':', $tp_name, 2;
- next unless $tmp[0] eq 'i915';
- $tp_name = $tmp[1];
- chop $tp_name;
- chop $fields[3];
- $time = $fields[3] * 1000000.0;
splice @fields, 0, 5;
foreach my $f (@fields) {
- my @kv = split '=|,', $f;
-
- $kv[0] = 'global' if $kv[0] eq 'global_seqno';
+ my ($k, $v);
- $tp{$kv[0]} = $kv[1];
+ next unless $f =~ m/=/;
+ ($k, $v) = ($`, $');
+ $k = 'global' if $k eq 'global_seqno';
+ chop $v if substr($v, -1, 1) eq ',';
+ $tp{$k} = $v;
}
next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
- if ($tp_name eq 'i915_gem_request_wait_begin') {
- my %rw;
+ if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
+ $ring = $tp{'ring'};
+ $ctx = $tp{'ctx'};
+ $seqno = $tp{'seqno'};
+
+ $orig_ctx = $ctx;
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+ }
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
+ my %rw;
next if exists $reqwait{$key};
$rw{'key'} = $key;
- $rw{'ring'} = $tp{'ring'};
- $rw{'seqno'} = $tp{'seqno'};
- $rw{'ctx'} = $tp{'ctx'};
+ $rw{'ring'} = $ring;
+ $rw{'seqno'} = $seqno;
+ $rw{'ctx'} = $ctx;
$rw{'start'} = $time;
$reqwait{$key} = \%rw;
- next;
- } elsif ($tp_name eq 'i915_gem_request_wait_end') {
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
next unless exists $reqwait{$key};
$reqwait{$key}->{'end'} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_add') {
- my $orig_ctx = $tp{'ctx'};
-
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_add:') {
if (exists $queue{$key}) {
$ctxdb{$orig_ctx}++;
- $tp{'ctx'} = sanitize_ctx($orig_ctx, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ $ctx = sanitize_ctx($orig_ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
}
$queue{$key} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_submit') {
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
die if exists $submit{$key};
die unless exists $queue{$key};
$submit{$key} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_in') {
+ } elsif ($tp_name eq 'i915:i915_gem_request_in:') {
my %req;
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
die if exists $db{$key};
die unless exists $queue{$key};
die unless exists $submit{$key};
$req{'start'} = $time;
- $req{'ring'} = $tp{'ring'};
- $req{'seqno'} = $tp{'seqno'};
- $req{'ctx'} = $tp{'ctx'};
- $req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
+ $req{'ring'} = $ring;
+ $req{'seqno'} = $seqno;
+ $req{'ctx'} = $ctx;
+ $req{'name'} = $ctx . '/' . $seqno;
$req{'global'} = $tp{'global'};
$req{'port'} = $tp{'port'};
$req{'queue'} = $queue{$key};
$req{'submit-delay'} = $submit{$key} - $queue{$key};
$req{'execute-delay'} = $req{'start'} - $submit{$key};
- $rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
- $ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
+ $rings{$ring} = $gid++ unless exists $rings{$ring};
+ $ringmap{$rings{$ring}} = $ring;
$db{$key} = \%req;
- next;
- } elsif ($tp_name eq 'i915_gem_request_out') {
- my $gkey = global_key($tp{'ring'}, $tp{'global'});
-
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ } elsif ($tp_name eq 'i915:i915_gem_request_out:') {
+ my $gkey = global_key($ring, $tp{'global'});
die unless exists $db{$key};
die unless exists $db{$key}->{'start'};
@@ -476,15 +458,12 @@ while (<>) {
}
$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
- next;
- } elsif ($tp_name eq 'intel_engine_notify') {
- $notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
- next;
- } elsif ($tp_name eq 'intel_gpu_freq_change') {
+ } elsif ($tp_name eq 'intel_engine_notify:') {
+ $notify{global_key($ring, $seqno)} = $time;
+ } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
$prev_freq_ts = $time;
$prev_freq = $tp{'new_freq'};
- next;
}
}
@@ -557,7 +536,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
my $last_ts = 0;
my $first_ts;
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
+my $re_sort = 0;
+
+foreach my $key (@sorted_keys) {
my $ring = $db{$key}->{'ring'};
my $end = $db{$key}->{'end'};
@@ -585,6 +567,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
# 20us tolerance
if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+ $re_sort = 1;
$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
@@ -601,6 +584,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
}
+@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
+
foreach my $ring (keys %batch_avg) {
$batch_avg{$ring} /= $batch_count{$ring};
$batch_total_avg{$ring} /= $batch_count{$ring};
@@ -616,7 +601,7 @@ foreach my $gid (sort keys %rings) {
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
- foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+ foreach my $key (@sorted_keys) {
next unless $db{$key}->{'ring'} == $ring;
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
@@ -660,7 +645,7 @@ my @gpu_intervals;
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+foreach my $key (@sorted_keys) {
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
die if $db{$key}->{'start'} > $db{$key}->{'end'};
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
2017-12-19 16:23 ` John Harrison
2017-12-20 9:41 ` [PATCH i-g-t v2] " Tvrtko Ursulin
@ 2017-12-20 9:54 ` Tvrtko Ursulin
2017-12-20 23:50 ` John Harrison
1 sibling, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-20 9:54 UTC (permalink / raw)
To: John Harrison, Tvrtko Ursulin, Intel-gfx
On 19/12/2017 16:23, John Harrison wrote:
> On 12/19/2017 3:15 AM, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> A couple of small optimizations which altogether bring around 30%
>> improvement in my testing.
>>
>> 1. Do less string processing on tracepoints names and push more of the
>> check into the if-ladder.
>>
>> 2. Pull out common db key and ctx processing and cache common values in
>> local vars.
>>
>> 3. Key value pair parsing is faster with a regexp.
>>
>> 4. Avoid sorting the db hash multiple times if possible.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: John Harrison <John.C.Harrison@intel.com>
>> ---
>> scripts/trace.pl | 114
>> ++++++++++++++++++++++++-------------------------------
>> 1 file changed, 49 insertions(+), 65 deletions(-)
>> ---
>> John please check if this helps with your monster traces before we decide
>> if it is worth putting this in. Well, there isn't much of a downside
>> except touching something which works and risking breaking it.
>>
>> diff --git a/scripts/trace.pl b/scripts/trace.pl
>> index 98e4a9843a43..5463e1fd1954 100755
>> --- a/scripts/trace.pl
>> +++ b/scripts/trace.pl
>> @@ -356,110 +356,91 @@ my $prev_freq = 0;
>> my $prev_freq_ts = 0;
>> while (<>) {
>> my @fields;
>> - my @tmp;
>> my $tp_name;
>> - my $time;
>> my %tp;
>> - my $key;
>> + my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
>> chomp;
>> @fields = split ' ';
>> + chop $fields[3];
>> + $time = int($fields[3] * 1000000.0);
>> +
>> $tp_name = $fields[4];
>> - @tmp = split ':', $tp_name, 2;
>> - next unless $tmp[0] eq 'i915';
>> - $tp_name = $tmp[1];
>> - chop $tp_name;
>> - chop $fields[3];
>> - $time = $fields[3] * 1000000.0;
>> splice @fields, 0, 5;
>> foreach my $f (@fields) {
>> - my @kv = split '=|,', $f;
>> -
>> - $kv[0] = 'global' if $kv[0] eq 'global_seqno';
>> + my ($k, $v);
>> - $tp{$kv[0]} = $kv[1];
>> + ($k, $v) = ($f =~ /^(\S+)=(\d+)/);
> I recall something about using () selection is slower than using
> pre-defined values. E.g.
> $f =~ m/=/; ($k, $v) = ($`, $');
>
> Or do you specifically need the filtering of only matching numeric values?
It doesn't matter at the moment, all the script touches just happens to
be numeric so either way works. I did not know of this trick and it is
indeed marginally faster. So I've sent an update with it included.
What was the effect of all this on your big traces? I am only testing
with a smaller one which goes from ~3.3s to ~2.2s. On a larger trace it
might be non-linear gains due to double sort avoidance, unless there
will be some other effects to cancel that out.
>> + next unless defined $k;
>> + $k = 'global' if $k eq 'global_seqno';
>> + $tp{$k} = $v;
>> }
>> next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
>> - if ($tp_name eq 'i915_gem_request_wait_begin') {
>> - my %rw;
>> + if (exists $tp{'ring'} and exists $tp{'ctx'} and exists
>> $tp{'seqno'}) {
>> + $ring = $tp{'ring'};
>> + $ctx = $tp{'ctx'};
>> + $seqno = $tp{'seqno'};
>> +
>> + $orig_ctx = $ctx;
>> + $ctx = sanitize_ctx($ctx, $ring);
>> + $key = db_key($ring, $ctx, $seqno);
>> + }
>> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> + if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
>> + my %rw;
>> next if exists $reqwait{$key};
>> $rw{'key'} = $key;
>> - $rw{'ring'} = $tp{'ring'};
>> - $rw{'seqno'} = $tp{'seqno'};
>> - $rw{'ctx'} = $tp{'ctx'};
>> + $rw{'ring'} = $ring;
>> + $rw{'seqno'} = $seqno;
>> + $rw{'ctx'} = $ctx;
>> $rw{'start'} = $time;
>> $reqwait{$key} = \%rw;
>> - next;
>> - } elsif ($tp_name eq 'i915_gem_request_wait_end') {
>> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> -
>> + } elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
>> next unless exists $reqwait{$key};
>> $reqwait{$key}->{'end'} = $time;
>> - next;
>> - } elsif ($tp_name eq 'i915_gem_request_add') {
>> - my $orig_ctx = $tp{'ctx'};
>> -
>> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> -
>> + } elsif ($tp_name eq 'i915:i915_gem_request_add:') {
>> if (exists $queue{$key}) {
>> $ctxdb{$orig_ctx}++;
>> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> + $ctx = sanitize_ctx($ctx, $ring);
> This bit originally worked on $orig_ctx not $ctx (i.e., before your
> previous refactoring patch). Which should it be? Will a double
> sanitisation step work as intended? I don't actually hit this line in
> any of the runs I've done.
Yeah well spotted, copy and paste error and fix sent. R-b would be cool
on it.
This only triggers if you trace something which creates and destroys
contexts due context id recycling. Here it auto-detects an impossible
condition and deduces it must be a new context, so it assigns a new
suffix to it.
Regards,
Tvrtko
>
>> + $key = db_key($ring, $ctx, $seqno);
>> }
>> $queue{$key} = $time;
>> - next;
>> - } elsif ($tp_name eq 'i915_gem_request_submit') {
>> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> -
>> + } elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
>> die if exists $submit{$key};
>> die unless exists $queue{$key};
>> $submit{$key} = $time;
>> - next;
>> - } elsif ($tp_name eq 'i915_gem_request_in') {
>> + } elsif ($tp_name eq 'i915:i915_gem_request_in:') {
>> my %req;
>> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> -
>> die if exists $db{$key};
>> die unless exists $queue{$key};
>> die unless exists $submit{$key};
>> $req{'start'} = $time;
>> - $req{'ring'} = $tp{'ring'};
>> - $req{'seqno'} = $tp{'seqno'};
>> - $req{'ctx'} = $tp{'ctx'};
>> - $req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
>> + $req{'ring'} = $ring;
>> + $req{'seqno'} = $seqno;
>> + $req{'ctx'} = $ctx;
>> + $req{'name'} = $ctx . '/' . $seqno;
>> $req{'global'} = $tp{'global'};
>> $req{'port'} = $tp{'port'};
>> $req{'queue'} = $queue{$key};
>> $req{'submit-delay'} = $submit{$key} - $queue{$key};
>> $req{'execute-delay'} = $req{'start'} - $submit{$key};
>> - $rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
>> - $ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
>> + $rings{$ring} = $gid++ unless exists $rings{$ring};
>> + $ringmap{$rings{$ring}} = $ring;
>> $db{$key} = \%req;
>> - next;
>> - } elsif ($tp_name eq 'i915_gem_request_out') {
>> - my $gkey = global_key($tp{'ring'}, $tp{'global'});
>> -
>> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
>> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
>> + } elsif ($tp_name eq 'i915:i915_gem_request_out:') {
>> + my $gkey = global_key($ring, $tp{'global'});
>> die unless exists $db{$key};
>> die unless exists $db{$key}->{'start'};
>> @@ -476,15 +457,12 @@ while (<>) {
>> }
>> $db{$key}->{'duration'} = $db{$key}->{'notify'} -
>> $db{$key}->{'start'};
>> $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} -
>> $db{$key}->{'notify'};
>> - next;
>> - } elsif ($tp_name eq 'intel_engine_notify') {
>> - $notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
>> - next;
>> - } elsif ($tp_name eq 'intel_gpu_freq_change') {
>> + } elsif ($tp_name eq 'intel_engine_notify:') {
>> + $notify{global_key($ring, $seqno)} = $time;
>> + } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
>> push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
>> $prev_freq_ts = $time;
>> $prev_freq = $tp{'new_freq'};
>> - next;
>> }
>> }
>> @@ -557,7 +535,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
>> my $last_ts = 0;
>> my $first_ts;
>> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}}
>> keys %db) {
>> +my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}}
>> keys %db;
>> +my $re_sort = 0;
>> +
>> +foreach my $key (@sorted_keys) {
>> my $ring = $db{$key}->{'ring'};
>> my $end = $db{$key}->{'end'};
>> @@ -585,6 +566,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=>
>> $db{$b}->{'start'}} keys %db) {
>> # 20us tolerance
>> if (exists $db{$next_key} and $db{$next_key}->{'start'} <
>> $start + 20) {
>> + $re_sort = 1;
>> $db{$next_key}->{'start'} = $start +
>> $db{$key}->{'duration'};
>> $db{$next_key}->{'start'} = $db{$next_key}->{'end'} if
>> $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
>> $db{$next_key}->{'duration'} =
>> $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
>> @@ -601,6 +583,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=>
>> $db{$b}->{'start'}} keys %db) {
>> $ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
>> }
>> +@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys
>> %db if $re_sort;
>> +
>> foreach my $ring (keys %batch_avg) {
>> $batch_avg{$ring} /= $batch_count{$ring};
>> $batch_total_avg{$ring} /= $batch_count{$ring};
>> @@ -616,7 +600,7 @@ foreach my $gid (sort keys %rings) {
>> my (@s_, @e_);
>> # Extract all GPU busy intervals and sort them.
>> - foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}}
>> keys %db) {
>> + foreach my $key (@sorted_keys) {
>> next unless $db{$key}->{'ring'} == $ring;
>> push @s_, $db{$key}->{'start'};
>> push @e_, $db{$key}->{'end'};
>> @@ -660,7 +644,7 @@ my @gpu_intervals;
>> my (@s_, @e_);
>> # Extract all GPU busy intervals and sort them.
>> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}}
>> keys %db) {
>> +foreach my $key (@sorted_keys) {
>> push @s_, $db{$key}->{'start'};
>> push @e_, $db{$key}->{'end'};
>> die if $db{$key}->{'start'} > $db{$key}->{'end'};
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* ✗ Fi.CI.BAT: failure for scripts/trace.pl: Optimize event parsing and processing (rev2)
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
` (3 preceding siblings ...)
2017-12-19 16:23 ` John Harrison
@ 2017-12-20 10:33 ` Patchwork
2017-12-21 11:11 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev3) Patchwork
` (3 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-20 10:33 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: scripts/trace.pl: Optimize event parsing and processing (rev2)
URL : https://patchwork.freedesktop.org/series/35569/
State : failure
== Summary ==
Applying: scripts/trace.pl: Optimize event parsing and processing
Using index info to reconstruct a base tree...
M scripts/trace.pl
Falling back to patching base and 3-way merge...
Auto-merging scripts/trace.pl
CONFLICT (content): Merge conflict in scripts/trace.pl
Patch failed at 0001 scripts/trace.pl: Optimize event parsing and processing
The copy of the patch that failed is found in: .git/rebase-apply/patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
2017-12-20 9:54 ` [PATCH i-g-t] " Tvrtko Ursulin
@ 2017-12-20 23:50 ` John Harrison
2017-12-21 9:34 ` Tvrtko Ursulin
0 siblings, 1 reply; 18+ messages in thread
From: John Harrison @ 2017-12-20 23:50 UTC (permalink / raw)
To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx
On 12/20/2017 1:54 AM, Tvrtko Ursulin wrote:
> What was the effect of all this on your big traces? I am only testing
> with a smaller one which goes from ~3.3s to ~2.2s. On a larger trace
> it might be non-linear gains due to double sort avoidance, unless
> there will be some other effects to cancel that out.
>
So with a trace of a shortened gem_exec_nop/basic_sequential, the 'perf
script' output is 439MB and the original trace.pl before any of the
changes took ~180s. After the 'auto-detect field order' patch, it went
up to ~201s. With the optimisation patch it is down to ~129s.
However, I am also seeing differences in the HTML output since the
optimisation patch. The differences aren't massive, just slight
variations in the times. The structure is all the same, its just that
the accounting and/or time stamps are out.
For example:
{id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32%
busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18%
wait<br><br>200931 batches<br>331.28us avg batch<br>331.38us avg engine
batch<br></small>'},
vs
{id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32%
busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18%
wait<br><br>200931 batches<br>338.56us avg batch<br>338.56us avg engine
batch<br></small>'},
Or:
{id: 58, key: -210383407, type: 'range', group: 4, subgroup: 2,
subgroupOrder: 3, content: '428/3 <small>0</small>
<small><i>???</i></small> <small><i>++</i></small> <br>142us
<small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end:
'2017-01-05 21:27:45.353110', style: 'color: white; background-color:
red;'},
vs
{id: 58, key: -210383407, type: 'range', group: 4, subgroup: 2,
subgroupOrder: 3, content: '428/3 <small>0</small>
<small><i>???</i></small> <small><i>++</i></small> <br>159us
<small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end:
'2017-01-05 21:27:45.353127', style: 'color: white; background-color:
red;'},
I can send you the full output if it is useful and the source logs too.
The HTML output is about 840KB but as noted, the perf logs are hundreds
of MBs.
John.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
2017-12-20 23:50 ` John Harrison
@ 2017-12-21 9:34 ` Tvrtko Ursulin
2017-12-21 22:44 ` John Harrison
0 siblings, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-21 9:34 UTC (permalink / raw)
To: John Harrison, Tvrtko Ursulin, Intel-gfx
On 20/12/2017 23:50, John Harrison wrote:
> On 12/20/2017 1:54 AM, Tvrtko Ursulin wrote:
>> What was the effect of all this on your big traces? I am only testing
>> with a smaller one which goes from ~3.3s to ~2.2s. On a larger trace
>> it might be non-linear gains due to double sort avoidance, unless
>> there will be some other effects to cancel that out.
>>
>
> So with a trace of a shortened gem_exec_nop/basic_sequential, the 'perf
> script' output is 439MB and the original trace.pl before any of the
> changes took ~180s. After the 'auto-detect field order' patch, it went
> up to ~201s. With the optimisation patch it is down to ~129s.
>
> However, I am also seeing differences in the HTML output since the
> optimisation patch. The differences aren't massive, just slight
> variations in the times. The structure is all the same, its just that
> the accounting and/or time stamps are out.
>
> For example:
> {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32%
> busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18%
> wait<br><br>200931 batches<br>331.28us avg batch<br>331.38us avg engine
> batch<br></small>'},
> vs
> {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32%
> busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18%
> wait<br><br>200931 batches<br>338.56us avg batch<br>338.56us avg engine
> batch<br></small>'},
>
> Or:
> {id: 58, key: -210383407, type: 'range', group: 4, subgroup: 2,
> subgroupOrder: 3, content: '428/3 <small>0</small>
> <small><i>???</i></small> <small><i>++</i></small> <br>142us
> <small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end:
> '2017-01-05 21:27:45.353110', style: 'color: white; background-color:
> red;'},
> vs
> {id: 58, key: -210383407, type: 'range', group: 4, subgroup: 2,
> subgroupOrder: 3, content: '428/3 <small>0</small>
> <small><i>???</i></small> <small><i>++</i></small> <br>159us
> <small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end:
> '2017-01-05 21:27:45.353127', style: 'color: white; background-color:
> red;'},
>
> I can send you the full output if it is useful and the source logs too.
> The HTML output is about 840KB but as noted, the perf logs are hundreds
> of MBs.
I was able to reproduce it. I think it's down to floating point
mishandling. Neither the original nor the optimized version were correct
and both are accumulating error. I'll send an updated patch shortly.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH i-g-t v3] scripts/trace.pl: Optimize event parsing and processing
2017-12-20 9:41 ` [PATCH i-g-t v2] " Tvrtko Ursulin
@ 2017-12-21 9:37 ` Tvrtko Ursulin
0 siblings, 0 replies; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-21 9:37 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
A couple of small optimizations which altogether bring around 30%
improvement in my testing.
1. Do less string processing on tracepoints names and push more of the
check into the if-ladder.
2. Pull out common db key and ctx processing and cache common values in
local vars.
3. Key value pair parsing is faster with a regexp.
4. Avoid sorting the db hash multiple times if possible.
v2:
* Use faster key-value splitting method. (John Harrison)
v3:
* Fix floating-point to int time conversion.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
scripts/trace.pl | 115 ++++++++++++++++++++++++-------------------------------
1 file changed, 50 insertions(+), 65 deletions(-)
diff --git a/scripts/trace.pl b/scripts/trace.pl
index ade0a9154bd7..539024a4dbbd 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -356,110 +356,92 @@ my $prev_freq = 0;
my $prev_freq_ts = 0;
while (<>) {
my @fields;
- my @tmp;
my $tp_name;
- my $time;
my %tp;
- my $key;
+ my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
chomp;
@fields = split ' ';
+ chop $fields[3];
+ $time = int($fields[3] * 1000000.0 + 0.5);
+
$tp_name = $fields[4];
- @tmp = split ':', $tp_name, 2;
- next unless $tmp[0] eq 'i915';
- $tp_name = $tmp[1];
- chop $tp_name;
- chop $fields[3];
- $time = $fields[3] * 1000000.0;
splice @fields, 0, 5;
foreach my $f (@fields) {
- my @kv = split '=|,', $f;
-
- $kv[0] = 'global' if $kv[0] eq 'global_seqno';
+ my ($k, $v);
- $tp{$kv[0]} = $kv[1];
+ next unless $f =~ m/=/;
+ ($k, $v) = ($`, $');
+ $k = 'global' if $k eq 'global_seqno';
+ chop $v if substr($v, -1, 1) eq ',';
+ $tp{$k} = $v;
}
next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
- if ($tp_name eq 'i915_gem_request_wait_begin') {
- my %rw;
+ if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
+ $ring = $tp{'ring'};
+ $ctx = $tp{'ctx'};
+ $seqno = $tp{'seqno'};
+
+ $orig_ctx = $ctx;
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+ }
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
+ my %rw;
next if exists $reqwait{$key};
$rw{'key'} = $key;
- $rw{'ring'} = $tp{'ring'};
- $rw{'seqno'} = $tp{'seqno'};
- $rw{'ctx'} = $tp{'ctx'};
+ $rw{'ring'} = $ring;
+ $rw{'seqno'} = $seqno;
+ $rw{'ctx'} = $ctx;
$rw{'start'} = $time;
$reqwait{$key} = \%rw;
- next;
- } elsif ($tp_name eq 'i915_gem_request_wait_end') {
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
next unless exists $reqwait{$key};
$reqwait{$key}->{'end'} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_add') {
- my $orig_ctx = $tp{'ctx'};
-
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_add:') {
if (exists $queue{$key}) {
$ctxdb{$orig_ctx}++;
- $tp{'ctx'} = sanitize_ctx($orig_ctx, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ $ctx = sanitize_ctx($orig_ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
}
$queue{$key} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_submit') {
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
die if exists $submit{$key};
die unless exists $queue{$key};
$submit{$key} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_in') {
+ } elsif ($tp_name eq 'i915:i915_gem_request_in:') {
my %req;
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
die if exists $db{$key};
die unless exists $queue{$key};
die unless exists $submit{$key};
$req{'start'} = $time;
- $req{'ring'} = $tp{'ring'};
- $req{'seqno'} = $tp{'seqno'};
- $req{'ctx'} = $tp{'ctx'};
- $req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
+ $req{'ring'} = $ring;
+ $req{'seqno'} = $seqno;
+ $req{'ctx'} = $ctx;
+ $req{'name'} = $ctx . '/' . $seqno;
$req{'global'} = $tp{'global'};
$req{'port'} = $tp{'port'};
$req{'queue'} = $queue{$key};
$req{'submit-delay'} = $submit{$key} - $queue{$key};
$req{'execute-delay'} = $req{'start'} - $submit{$key};
- $rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
- $ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
+ $rings{$ring} = $gid++ unless exists $rings{$ring};
+ $ringmap{$rings{$ring}} = $ring;
$db{$key} = \%req;
- next;
- } elsif ($tp_name eq 'i915_gem_request_out') {
- my $gkey = global_key($tp{'ring'}, $tp{'global'});
-
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ } elsif ($tp_name eq 'i915:i915_gem_request_out:') {
+ my $gkey = global_key($ring, $tp{'global'});
die unless exists $db{$key};
die unless exists $db{$key}->{'start'};
@@ -476,15 +458,12 @@ while (<>) {
}
$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
- next;
- } elsif ($tp_name eq 'intel_engine_notify') {
- $notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
- next;
- } elsif ($tp_name eq 'intel_gpu_freq_change') {
+ } elsif ($tp_name eq 'intel_engine_notify:') {
+ $notify{global_key($ring, $seqno)} = $time;
+ } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
$prev_freq_ts = $time;
$prev_freq = $tp{'new_freq'};
- next;
}
}
@@ -557,7 +536,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
my $last_ts = 0;
my $first_ts;
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
+my $re_sort = 0;
+
+foreach my $key (@sorted_keys) {
my $ring = $db{$key}->{'ring'};
my $end = $db{$key}->{'end'};
@@ -585,6 +567,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
# 20us tolerance
if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+ $re_sort = 1;
$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
@@ -601,6 +584,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
}
+@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
+
foreach my $ring (keys %batch_avg) {
$batch_avg{$ring} /= $batch_count{$ring};
$batch_total_avg{$ring} /= $batch_count{$ring};
@@ -616,7 +601,7 @@ foreach my $gid (sort keys %rings) {
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
- foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+ foreach my $key (@sorted_keys) {
next unless $db{$key}->{'ring'} == $ring;
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
@@ -660,7 +645,7 @@ my @gpu_intervals;
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+foreach my $key (@sorted_keys) {
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
die if $db{$key}->{'start'} > $db{$key}->{'end'};
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 18+ messages in thread
* ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev3)
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
` (4 preceding siblings ...)
2017-12-20 10:33 ` ✗ Fi.CI.BAT: failure for scripts/trace.pl: Optimize event parsing and processing (rev2) Patchwork
@ 2017-12-21 11:11 ` Patchwork
2017-12-21 13:27 ` ✗ Fi.CI.IGT: warning " Patchwork
` (2 subsequent siblings)
8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-21 11:11 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: scripts/trace.pl: Optimize event parsing and processing (rev3)
URL : https://patchwork.freedesktop.org/series/35569/
State : success
== Summary ==
IGT patchset tested on top of latest successful build
beb26d89ff5c5621c1e6b6ac2a45439507af86b7 meson: Install .testlist files and README from tests/intel-ci
with latest DRM-Tip kernel build CI_DRM_3563
c21bf92fbe84 drm-tip: 2017y-12m-21d-09h-56m-40s UTC integration manifest
No testlist changes.
Test debugfs_test:
Subgroup read_all_entries:
pass -> DMESG-FAIL (fi-elk-e7500) fdo#103989 +1
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
incomplete -> PASS (fi-snb-2520m) fdo#103713
Test kms_psr_sink_crc:
Subgroup psr_basic:
dmesg-warn -> PASS (fi-skl-6700hq) fdo#101144
fdo#103989 https://bugs.freedesktop.org/show_bug.cgi?id=103989
fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
fdo#101144 https://bugs.freedesktop.org/show_bug.cgi?id=101144
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:442s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:441s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:383s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:509s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:277s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:489s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:497s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:484s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:472s
fi-elk-e7500 total:224 pass:163 dwarn:14 dfail:1 fail:0 skip:45
fi-gdg-551 total:288 pass:179 dwarn:0 dfail:0 fail:1 skip:108 time:263s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:533s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:406s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:426s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:420s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:467s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:426s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:475s
fi-kbl-7560u total:288 pass:268 dwarn:1 dfail:0 fail:0 skip:19 time:522s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:469s
fi-kbl-r total:288 pass:260 dwarn:1 dfail:0 fail:0 skip:27 time:523s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:581s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:455s
fi-skl-6600u total:288 pass:260 dwarn:1 dfail:0 fail:0 skip:27 time:531s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:558s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:510s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:491s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:447s
fi-snb-2520m total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:556s
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:426s
Blacklisted hosts:
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:589s
fi-cnl-y total:247 pass:222 dwarn:0 dfail:0 fail:0 skip:24
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_718/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* ✗ Fi.CI.IGT: warning for scripts/trace.pl: Optimize event parsing and processing (rev3)
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
` (5 preceding siblings ...)
2017-12-21 11:11 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev3) Patchwork
@ 2017-12-21 13:27 ` Patchwork
2017-12-22 9:34 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev5) Patchwork
2017-12-22 10:21 ` ✓ Fi.CI.IGT: " Patchwork
8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-21 13:27 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: scripts/trace.pl: Optimize event parsing and processing (rev3)
URL : https://patchwork.freedesktop.org/series/35569/
State : warning
== Summary ==
Test gem_tiled_swapping:
Subgroup non-threaded:
incomplete -> PASS (shard-snb) fdo#104218
Test kms_frontbuffer_tracking:
Subgroup fbc-1p-offscren-pri-shrfb-draw-blt:
pass -> FAIL (shard-snb) fdo#101623 +1
Test kms_cursor_legacy:
Subgroup cursor-vs-flip-atomic-transitions-varying-size:
skip -> PASS (shard-hsw) fdo#103172
Test kms_setmode:
Subgroup basic:
fail -> PASS (shard-hsw) fdo#99912
Test kms_chv_cursor_fail:
Subgroup pipe-a-64x64-top-edge:
pass -> SKIP (shard-hsw)
Test kms_flip:
Subgroup vblank-vs-suspend:
incomplete -> PASS (shard-snb) fdo#103375
Test gem_eio:
Subgroup in-flight-contexts:
dmesg-warn -> PASS (shard-snb) fdo#104058
fdo#104218 https://bugs.freedesktop.org/show_bug.cgi?id=104218
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
fdo#103172 https://bugs.freedesktop.org/show_bug.cgi?id=103172
fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#104058 https://bugs.freedesktop.org/show_bug.cgi?id=104058
shard-hsw total:2712 pass:1537 dwarn:1 dfail:0 fail:9 skip:1165 time:9402s
shard-snb total:2712 pass:1309 dwarn:1 dfail:0 fail:11 skip:1391 time:8130s
Blacklisted hosts:
shard-apl total:2712 pass:1687 dwarn:1 dfail:0 fail:22 skip:1001 time:13954s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_718/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing
2017-12-21 9:34 ` Tvrtko Ursulin
@ 2017-12-21 22:44 ` John Harrison
2017-12-22 9:16 ` [PATCH i-g-t v4] " Tvrtko Ursulin
0 siblings, 1 reply; 18+ messages in thread
From: John Harrison @ 2017-12-21 22:44 UTC (permalink / raw)
To: Tvrtko Ursulin, Tvrtko Ursulin, Intel-gfx
On 12/21/2017 1:34 AM, Tvrtko Ursulin wrote:
>
> On 20/12/2017 23:50, John Harrison wrote:
>> On 12/20/2017 1:54 AM, Tvrtko Ursulin wrote:
>>> What was the effect of all this on your big traces? I am only
>>> testing with a smaller one which goes from ~3.3s to ~2.2s. On a
>>> larger trace it might be non-linear gains due to double sort
>>> avoidance, unless there will be some other effects to cancel that out.
>>>
>>
>> So with a trace of a shortened gem_exec_nop/basic_sequential, the
>> 'perf script' output is 439MB and the original trace.pl before any of
>> the changes took ~180s. After the 'auto-detect field order' patch, it
>> went up to ~201s. With the optimisation patch it is down to ~129s.
>>
>> However, I am also seeing differences in the HTML output since the
>> optimisation patch. The differences aren't massive, just slight
>> variations in the times. The structure is all the same, its just that
>> the accounting and/or time stamps are out.
>>
>> For example:
>> {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32%
>> busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18%
>> wait<br><br>200931 batches<br>331.28us avg batch<br>331.38us avg
>> engine batch<br></small>'},
>> vs
>> {id: 1, content: 'Ring0<br><small><br>79.48% idle<br><br>34.32%
>> busy<br>584.97% runnable<br>2103.60% queued<br><br>16.18%
>> wait<br><br>200931 batches<br>338.56us avg batch<br>338.56us avg
>> engine batch<br></small>'},
>>
>> Or:
>> {id: 58, key: -210383407, type: 'range', group: 4, subgroup: 2,
>> subgroupOrder: 3, content: '428/3 <small>0</small>
>> <small><i>???</i></small> <small><i>++</i></small> <br>142us
>> <small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end:
>> '2017-01-05 21:27:45.353110', style: 'color: white; background-color:
>> red;'},
>> vs
>> {id: 58, key: -210383407, type: 'range', group: 4, subgroup: 2,
>> subgroupOrder: 3, content: '428/3 <small>0</small>
>> <small><i>???</i></small> <small><i>++</i></small> <br>159us
>> <small>(0us)</small>', start: '2017-01-05 21:27:45.352968', end:
>> '2017-01-05 21:27:45.353127', style: 'color: white; background-color:
>> red;'},
>>
>> I can send you the full output if it is useful and the source logs
>> too. The HTML output is about 840KB but as noted, the perf logs are
>> hundreds of MBs.
>
> I was able to reproduce it. I think it's down to floating point
> mishandling. Neither the original nor the optimized version were
> correct and both are accumulating error. I'll send an updated patch
> shortly.
>
> Regards,
>
> Tvrtko
The new version with +0.5 gives me exactly the same results as the
previous edition without the rounding. If I get rid of the int()
conversion too, I still get the same results.
What does make a difference is if I fix up the broken processing of
notify traces:
diff --git a/scripts/trace.pl b/scripts/trace.pl
index 8dbd497b..20ae3c78 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -382,14 +382,16 @@ while (<>) {
next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
- if (exists $tp{'ring'} and exists $tp{'ctx'} and exists
$tp{'seqno'}) {
+ if (exists $tp{'ring'} and exists $tp{'seqno'}) {
$ring = $tp{'ring'};
- $ctx = $tp{'ctx'};
$seqno = $tp{'seqno'};
- $orig_ctx = $ctx;
- $ctx = sanitize_ctx($ctx, $ring);
- $key = db_key($ring, $ctx, $seqno);
+ if (exists $tp{'ctx'}) {
+ $ctx = $tp{'ctx'};
+ $orig_ctx = $ctx;
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+ }
}
if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
@@ -461,7 +463,7 @@ while (<>) {
}
$db{$key}->{'duration'} = $db{$key}->{'notify'} -
$db{$key}->{'start'};
$db{$key}->{'context-complete-delay'} =
$db{$key}->{'end'} - $db{$key}->{'notify'};
- } elsif ($tp_name eq 'intel_engine_notify:') {
+ } elsif ($tp_name eq 'i915:intel_engine_notify:') {
$notify{global_key($ring, $seqno)} = $time;
} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
push @freqs, [$prev_freq_ts, $time, $prev_freq] if
$prev_freq;
With that, I get exactly the same HTML page as before the optimisation
patch.
John.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH i-g-t v4] scripts/trace.pl: Optimize event parsing and processing
2017-12-21 22:44 ` John Harrison
@ 2017-12-22 9:16 ` Tvrtko Ursulin
2017-12-22 20:38 ` John Harrison
0 siblings, 1 reply; 18+ messages in thread
From: Tvrtko Ursulin @ 2017-12-22 9:16 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
A couple of small optimizations which altogether bring around 30%
improvement in my testing.
1. Do less string processing on tracepoints names and push more of the
check into the if-ladder.
2. Pull out common db key and ctx processing and cache common values in
local vars.
3. Key value pair parsing is faster with a regexp.
4. Avoid sorting the db hash multiple times if possible.
v2:
* Use faster key-value splitting method. (John Harrison)
v3:
* Fix floating-point to int time conversion.
v4:
* Fix refactoring and logic fails. (John Harrison)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
scripts/trace.pl | 117 +++++++++++++++++++++++++------------------------------
1 file changed, 52 insertions(+), 65 deletions(-)
diff --git a/scripts/trace.pl b/scripts/trace.pl
index ade0a9154bd7..cb93900dd620 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -356,110 +356,94 @@ my $prev_freq = 0;
my $prev_freq_ts = 0;
while (<>) {
my @fields;
- my @tmp;
my $tp_name;
- my $time;
my %tp;
- my $key;
+ my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
chomp;
@fields = split ' ';
+ chop $fields[3];
+ $time = int($fields[3] * 1000000.0 + 0.5);
+
$tp_name = $fields[4];
- @tmp = split ':', $tp_name, 2;
- next unless $tmp[0] eq 'i915';
- $tp_name = $tmp[1];
- chop $tp_name;
- chop $fields[3];
- $time = $fields[3] * 1000000.0;
splice @fields, 0, 5;
foreach my $f (@fields) {
- my @kv = split '=|,', $f;
+ my ($k, $v);
- $kv[0] = 'global' if $kv[0] eq 'global_seqno';
-
- $tp{$kv[0]} = $kv[1];
+ next unless $f =~ m/=/;
+ ($k, $v) = ($`, $');
+ $k = 'global' if $k eq 'global_seqno';
+ chop $v if substr($v, -1, 1) eq ',';
+ $tp{$k} = $v;
}
next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
- if ($tp_name eq 'i915_gem_request_wait_begin') {
- my %rw;
+ if (exists $tp{'ring'} and exists $tp{'seqno'}) {
+ $ring = $tp{'ring'};
+ $seqno = $tp{'seqno'};
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ if (exists $tp{'ctx'}) {
+ $ctx = $tp{'ctx'};
+ $orig_ctx = $ctx;
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+ }
+ }
+
+ if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
+ my %rw;
next if exists $reqwait{$key};
$rw{'key'} = $key;
- $rw{'ring'} = $tp{'ring'};
- $rw{'seqno'} = $tp{'seqno'};
- $rw{'ctx'} = $tp{'ctx'};
+ $rw{'ring'} = $ring;
+ $rw{'seqno'} = $seqno;
+ $rw{'ctx'} = $ctx;
$rw{'start'} = $time;
$reqwait{$key} = \%rw;
- next;
- } elsif ($tp_name eq 'i915_gem_request_wait_end') {
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
next unless exists $reqwait{$key};
$reqwait{$key}->{'end'} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_add') {
- my $orig_ctx = $tp{'ctx'};
-
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_add:') {
if (exists $queue{$key}) {
$ctxdb{$orig_ctx}++;
- $tp{'ctx'} = sanitize_ctx($orig_ctx, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ $ctx = sanitize_ctx($orig_ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
}
$queue{$key} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_submit') {
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
+ } elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
die if exists $submit{$key};
die unless exists $queue{$key};
$submit{$key} = $time;
- next;
- } elsif ($tp_name eq 'i915_gem_request_in') {
+ } elsif ($tp_name eq 'i915:i915_gem_request_in:') {
my %req;
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
-
die if exists $db{$key};
die unless exists $queue{$key};
die unless exists $submit{$key};
$req{'start'} = $time;
- $req{'ring'} = $tp{'ring'};
- $req{'seqno'} = $tp{'seqno'};
- $req{'ctx'} = $tp{'ctx'};
- $req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
+ $req{'ring'} = $ring;
+ $req{'seqno'} = $seqno;
+ $req{'ctx'} = $ctx;
+ $req{'name'} = $ctx . '/' . $seqno;
$req{'global'} = $tp{'global'};
$req{'port'} = $tp{'port'};
$req{'queue'} = $queue{$key};
$req{'submit-delay'} = $submit{$key} - $queue{$key};
$req{'execute-delay'} = $req{'start'} - $submit{$key};
- $rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
- $ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
+ $rings{$ring} = $gid++ unless exists $rings{$ring};
+ $ringmap{$rings{$ring}} = $ring;
$db{$key} = \%req;
- next;
- } elsif ($tp_name eq 'i915_gem_request_out') {
- my $gkey = global_key($tp{'ring'}, $tp{'global'});
-
- $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
- $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
+ } elsif ($tp_name eq 'i915:i915_gem_request_out:') {
+ my $gkey = global_key($ring, $tp{'global'});
die unless exists $db{$key};
die unless exists $db{$key}->{'start'};
@@ -476,15 +460,12 @@ while (<>) {
}
$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
- next;
- } elsif ($tp_name eq 'intel_engine_notify') {
- $notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
- next;
- } elsif ($tp_name eq 'intel_gpu_freq_change') {
+ } elsif ($tp_name eq 'i915:intel_engine_notify:') {
+ $notify{global_key($ring, $seqno)} = $time;
+ } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
$prev_freq_ts = $time;
$prev_freq = $tp{'new_freq'};
- next;
}
}
@@ -557,7 +538,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
my $last_ts = 0;
my $first_ts;
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
+my $re_sort = 0;
+
+foreach my $key (@sorted_keys) {
my $ring = $db{$key}->{'ring'};
my $end = $db{$key}->{'end'};
@@ -585,6 +569,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
# 20us tolerance
if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+ $re_sort = 1;
$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
@@ -601,6 +586,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
}
+@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
+
foreach my $ring (keys %batch_avg) {
$batch_avg{$ring} /= $batch_count{$ring};
$batch_total_avg{$ring} /= $batch_count{$ring};
@@ -616,7 +603,7 @@ foreach my $gid (sort keys %rings) {
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
- foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+ foreach my $key (@sorted_keys) {
next unless $db{$key}->{'ring'} == $ring;
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
@@ -660,7 +647,7 @@ my @gpu_intervals;
my (@s_, @e_);
# Extract all GPU busy intervals and sort them.
-foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+foreach my $key (@sorted_keys) {
push @s_, $db{$key}->{'start'};
push @e_, $db{$key}->{'end'};
die if $db{$key}->{'start'} > $db{$key}->{'end'};
--
2.14.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 18+ messages in thread
* ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev5)
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
` (6 preceding siblings ...)
2017-12-21 13:27 ` ✗ Fi.CI.IGT: warning " Patchwork
@ 2017-12-22 9:34 ` Patchwork
2017-12-22 10:21 ` ✓ Fi.CI.IGT: " Patchwork
8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-22 9:34 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: scripts/trace.pl: Optimize event parsing and processing (rev5)
URL : https://patchwork.freedesktop.org/series/35569/
State : success
== Summary ==
IGT patchset tested on top of latest successful build
beb26d89ff5c5621c1e6b6ac2a45439507af86b7 meson: Install .testlist files and README from tests/intel-ci
with latest DRM-Tip kernel build CI_DRM_3566
c0a64101df89 drm-tip: 2017y-12m-21d-18h-02m-56s UTC integration manifest
No testlist changes.
Test debugfs_test:
Subgroup read_all_entries:
dmesg-warn -> DMESG-FAIL (fi-elk-e7500) fdo#103989
incomplete -> PASS (fi-snb-2520m) fdo#103713
fdo#103989
fdo#103713
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:436s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:443s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:383s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:500s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:277s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:493s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:503s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:480s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:473s
fi-elk-e7500 total:224 pass:163 dwarn:14 dfail:1 fail:0 skip:45
fi-gdg-551 total:288 pass:179 dwarn:0 dfail:0 fail:1 skip:108 time:264s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:534s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:408s
fi-hsw-4770r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:413s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:479s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:429s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:484s
fi-kbl-7560u total:288 pass:268 dwarn:1 dfail:0 fail:0 skip:19 time:526s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:472s
fi-kbl-r total:288 pass:260 dwarn:1 dfail:0 fail:0 skip:27 time:526s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:584s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:443s
fi-skl-6600u total:288 pass:260 dwarn:1 dfail:0 fail:0 skip:27 time:531s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:555s
fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:507s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:495s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:445s
fi-snb-2520m total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:554s
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:418s
Blacklisted hosts:
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:587s
fi-cnl-y total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:609s
fi-glk-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:485s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_724/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* ✓ Fi.CI.IGT: success for scripts/trace.pl: Optimize event parsing and processing (rev5)
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
` (7 preceding siblings ...)
2017-12-22 9:34 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev5) Patchwork
@ 2017-12-22 10:21 ` Patchwork
8 siblings, 0 replies; 18+ messages in thread
From: Patchwork @ 2017-12-22 10:21 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: scripts/trace.pl: Optimize event parsing and processing (rev5)
URL : https://patchwork.freedesktop.org/series/35569/
State : success
== Summary ==
Test gem_tiled_swapping:
Subgroup non-threaded:
incomplete -> PASS (shard-hsw) fdo#104218
Test drv_suspend:
Subgroup fence-restore-tiled2untiled:
skip -> PASS (shard-snb)
skip -> PASS (shard-hsw)
Subgroup sysfs-reader:
skip -> PASS (shard-hsw)
Test pm_rpm:
Subgroup system-suspend:
skip -> PASS (shard-hsw) fdo#103375 +1
Test kms_frontbuffer_tracking:
Subgroup fbc-1p-offscren-pri-shrfb-draw-blt:
pass -> FAIL (shard-snb) fdo#101623
fdo#104218 https://bugs.freedesktop.org/show_bug.cgi?id=104218
fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
shard-hsw total:2702 pass:1532 dwarn:1 dfail:0 fail:10 skip:1158 time:8997s
shard-snb total:2712 pass:1309 dwarn:1 dfail:0 fail:11 skip:1391 time:8098s
Blacklisted hosts:
shard-apl total:2635 pass:1636 dwarn:1 dfail:0 fail:23 skip:974 time:13422s
shard-kbl total:2703 pass:1798 dwarn:4 dfail:0 fail:23 skip:877 time:10623s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_724/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t v4] scripts/trace.pl: Optimize event parsing and processing
2017-12-22 9:16 ` [PATCH i-g-t v4] " Tvrtko Ursulin
@ 2017-12-22 20:38 ` John Harrison
0 siblings, 0 replies; 18+ messages in thread
From: John Harrison @ 2017-12-22 20:38 UTC (permalink / raw)
To: Tvrtko Ursulin, Intel-gfx
[-- Attachment #1.1: Type: text/plain, Size: 8722 bytes --]
This one works for me. Loads faster but identical output to the original
version.
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
On 12/22/2017 1:16 AM, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> A couple of small optimizations which altogether bring around 30%
> improvement in my testing.
>
> 1. Do less string processing on tracepoints names and push more of the
> check into the if-ladder.
>
> 2. Pull out common db key and ctx processing and cache common values in
> local vars.
>
> 3. Key value pair parsing is faster with a regexp.
>
> 4. Avoid sorting the db hash multiple times if possible.
>
> v2:
> * Use faster key-value splitting method. (John Harrison)
>
> v3:
> * Fix floating-point to int time conversion.
>
> v4:
> * Fix refactoring and logic fails. (John Harrison)
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: John Harrison <John.C.Harrison@intel.com>
> ---
> scripts/trace.pl | 117 +++++++++++++++++++++++++------------------------------
> 1 file changed, 52 insertions(+), 65 deletions(-)
>
> diff --git a/scripts/trace.pl b/scripts/trace.pl
> index ade0a9154bd7..cb93900dd620 100755
> --- a/scripts/trace.pl
> +++ b/scripts/trace.pl
> @@ -356,110 +356,94 @@ my $prev_freq = 0;
> my $prev_freq_ts = 0;
> while (<>) {
> my @fields;
> - my @tmp;
> my $tp_name;
> - my $time;
> my %tp;
> - my $key;
> + my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
>
> chomp;
> @fields = split ' ';
>
> + chop $fields[3];
> + $time = int($fields[3] * 1000000.0 + 0.5);
> +
> $tp_name = $fields[4];
> - @tmp = split ':', $tp_name, 2;
> - next unless $tmp[0] eq 'i915';
> - $tp_name = $tmp[1];
> - chop $tp_name;
>
> - chop $fields[3];
> - $time = $fields[3] * 1000000.0;
> splice @fields, 0, 5;
>
> foreach my $f (@fields) {
> - my @kv = split '=|,', $f;
> + my ($k, $v);
>
> - $kv[0] = 'global' if $kv[0] eq 'global_seqno';
> -
> - $tp{$kv[0]} = $kv[1];
> + next unless $f =~ m/=/;
> + ($k, $v) = ($`, $');
> + $k = 'global' if $k eq 'global_seqno';
> + chop $v if substr($v, -1, 1) eq ',';
> + $tp{$k} = $v;
> }
>
> next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
>
> - if ($tp_name eq 'i915_gem_request_wait_begin') {
> - my %rw;
> + if (exists $tp{'ring'} and exists $tp{'seqno'}) {
> + $ring = $tp{'ring'};
> + $seqno = $tp{'seqno'};
>
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> + if (exists $tp{'ctx'}) {
> + $ctx = $tp{'ctx'};
> + $orig_ctx = $ctx;
> + $ctx = sanitize_ctx($ctx, $ring);
> + $key = db_key($ring, $ctx, $seqno);
> + }
> + }
> +
> + if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
> + my %rw;
>
> next if exists $reqwait{$key};
>
> $rw{'key'} = $key;
> - $rw{'ring'} = $tp{'ring'};
> - $rw{'seqno'} = $tp{'seqno'};
> - $rw{'ctx'} = $tp{'ctx'};
> + $rw{'ring'} = $ring;
> + $rw{'seqno'} = $seqno;
> + $rw{'ctx'} = $ctx;
> $rw{'start'} = $time;
> $reqwait{$key} = \%rw;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_wait_end') {
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> + } elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
> next unless exists $reqwait{$key};
>
> $reqwait{$key}->{'end'} = $time;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_add') {
> - my $orig_ctx = $tp{'ctx'};
> -
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> + } elsif ($tp_name eq 'i915:i915_gem_request_add:') {
> if (exists $queue{$key}) {
> $ctxdb{$orig_ctx}++;
> - $tp{'ctx'} = sanitize_ctx($orig_ctx, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> + $ctx = sanitize_ctx($orig_ctx, $ring);
> + $key = db_key($ring, $ctx, $seqno);
> }
>
> $queue{$key} = $time;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_submit') {
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> + } elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
> die if exists $submit{$key};
> die unless exists $queue{$key};
>
> $submit{$key} = $time;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_in') {
> + } elsif ($tp_name eq 'i915:i915_gem_request_in:') {
> my %req;
>
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> die if exists $db{$key};
> die unless exists $queue{$key};
> die unless exists $submit{$key};
>
> $req{'start'} = $time;
> - $req{'ring'} = $tp{'ring'};
> - $req{'seqno'} = $tp{'seqno'};
> - $req{'ctx'} = $tp{'ctx'};
> - $req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
> + $req{'ring'} = $ring;
> + $req{'seqno'} = $seqno;
> + $req{'ctx'} = $ctx;
> + $req{'name'} = $ctx . '/' . $seqno;
> $req{'global'} = $tp{'global'};
> $req{'port'} = $tp{'port'};
> $req{'queue'} = $queue{$key};
> $req{'submit-delay'} = $submit{$key} - $queue{$key};
> $req{'execute-delay'} = $req{'start'} - $submit{$key};
> - $rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
> - $ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
> + $rings{$ring} = $gid++ unless exists $rings{$ring};
> + $ringmap{$rings{$ring}} = $ring;
> $db{$key} = \%req;
> - next;
> - } elsif ($tp_name eq 'i915_gem_request_out') {
> - my $gkey = global_key($tp{'ring'}, $tp{'global'});
> -
> - $tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> - $key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> + } elsif ($tp_name eq 'i915:i915_gem_request_out:') {
> + my $gkey = global_key($ring, $tp{'global'});
>
> die unless exists $db{$key};
> die unless exists $db{$key}->{'start'};
> @@ -476,15 +460,12 @@ while (<>) {
> }
> $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
> $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
> - next;
> - } elsif ($tp_name eq 'intel_engine_notify') {
> - $notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
> - next;
> - } elsif ($tp_name eq 'intel_gpu_freq_change') {
> + } elsif ($tp_name eq 'i915:intel_engine_notify:') {
> + $notify{global_key($ring, $seqno)} = $time;
> + } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
> push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
> $prev_freq_ts = $time;
> $prev_freq = $tp{'new_freq'};
> - next;
> }
> }
>
> @@ -557,7 +538,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
> my $last_ts = 0;
> my $first_ts;
>
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
> +my $re_sort = 0;
> +
> +foreach my $key (@sorted_keys) {
> my $ring = $db{$key}->{'ring'};
> my $end = $db{$key}->{'end'};
>
> @@ -585,6 +569,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
>
> # 20us tolerance
> if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
> + $re_sort = 1;
> $db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
> $db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
> $db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
> @@ -601,6 +586,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> $ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
> }
>
> +@sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
> +
> foreach my $ring (keys %batch_avg) {
> $batch_avg{$ring} /= $batch_count{$ring};
> $batch_total_avg{$ring} /= $batch_count{$ring};
> @@ -616,7 +603,7 @@ foreach my $gid (sort keys %rings) {
> my (@s_, @e_);
>
> # Extract all GPU busy intervals and sort them.
> - foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> + foreach my $key (@sorted_keys) {
> next unless $db{$key}->{'ring'} == $ring;
> push @s_, $db{$key}->{'start'};
> push @e_, $db{$key}->{'end'};
> @@ -660,7 +647,7 @@ my @gpu_intervals;
> my (@s_, @e_);
>
> # Extract all GPU busy intervals and sort them.
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +foreach my $key (@sorted_keys) {
> push @s_, $db{$key}->{'start'};
> push @e_, $db{$key}->{'end'};
> die if $db{$key}->{'start'} > $db{$key}->{'end'};
[-- Attachment #1.2: Type: text/html, Size: 9019 bytes --]
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2017-12-22 20:38 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-19 11:15 [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing Tvrtko Ursulin
2017-12-19 11:39 ` ✓ Fi.CI.BAT: success for " Patchwork
2017-12-19 13:23 ` ✓ Fi.CI.IGT: " Patchwork
2017-12-19 15:02 ` [PATCH i-g-t] " Chris Wilson
2017-12-19 16:23 ` John Harrison
2017-12-20 9:41 ` [PATCH i-g-t v2] " Tvrtko Ursulin
2017-12-21 9:37 ` [PATCH i-g-t v3] " Tvrtko Ursulin
2017-12-20 9:54 ` [PATCH i-g-t] " Tvrtko Ursulin
2017-12-20 23:50 ` John Harrison
2017-12-21 9:34 ` Tvrtko Ursulin
2017-12-21 22:44 ` John Harrison
2017-12-22 9:16 ` [PATCH i-g-t v4] " Tvrtko Ursulin
2017-12-22 20:38 ` John Harrison
2017-12-20 10:33 ` ✗ Fi.CI.BAT: failure for scripts/trace.pl: Optimize event parsing and processing (rev2) Patchwork
2017-12-21 11:11 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev3) Patchwork
2017-12-21 13:27 ` ✗ Fi.CI.IGT: warning " Patchwork
2017-12-22 9:34 ` ✓ Fi.CI.BAT: success for scripts/trace.pl: Optimize event parsing and processing (rev5) Patchwork
2017-12-22 10:21 ` ✓ Fi.CI.IGT: " Patchwork
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.