From cebbd1b629bbe5f856ec5dc7591478c003f5a944 Mon Sep 17 00:00:00 2001 From: Kieran Bingham Date: Wed, 22 Mar 2017 09:54:11 +0000 Subject: [PATCH] uvcvideo: debug: Add statistics for measuring performance Measuring the performance of the uvcvideo driver requires introspection into the internal state. Extend the information provided at: /sys/kernel/debug/usb/uvcvideo/${bus}-${dev}/stats to track the overall data throughput as well as URB usage and processing speeds. This patch is not intended for integration into any release tree, but is provided as-is to assist in measuring the performance while fine tuning specific use-cases of the driver. Signed-off-by: Kieran Bingham --- v2: - uvcvideo: debug: Convert to using ktime_t - uvcvideo: debug: Track header processing time (H/T Keiichi Watanabe Signed-off-by: Kieran Bingham --- drivers/media/usb/uvc/uvc_video.c | 153 +++++++++++++++++++++++++++++- drivers/media/usb/uvc/uvcvideo.h | 20 ++++ 2 files changed, 171 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index a4222d8d09a5..40943890668b 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -863,12 +863,56 @@ static void uvc_video_stats_update(struct uvc_streaming *stream) memset(&stream->stats.frame, 0, sizeof(stream->stats.frame)); } +size_t uvc_video_dump_time_stats(char *buf, size_t size, + struct uvc_stats_time *stat, const char *pfx) +{ + unsigned int avg = 0; + + if (stat->qty) + avg = stat->duration / stat->qty; + + /* Stat durations are in nanoseconds, we present in micro-seconds */ + return scnprintf(buf, size, + "%s: %llu/%u uS/qty: %u.%03u avg %u.%03u min %u.%03u max (uS)\n", + pfx, stat->duration / 1000, stat->qty, + avg / 1000, avg % 1000, + stat->min / 1000, stat->min % 1000, + stat->max / 1000, stat->max % 1000); +} + +size_t uvc_video_dump_speed(char *buf, size_t size, const char *pfx, + u64 bytes, u64 milliseconds) +{ + unsigned int rate = 0; + bool gbit = false; + + if (milliseconds) + rate = bytes * 8 / milliseconds; + + if (rate >= 1000000) { + gbit = true; + rate /= 1000; + } + + /* + * bits/milliseconds == kilobits/seconds, + * presented here as Mbits/s (or Gbit/s) with 3 decimal places + */ + return scnprintf(buf, size, "%s: %d.%03d %sbits/s\n", + pfx, rate / 1000, rate % 1000, + gbit ? "G" : "M"); +} + size_t uvc_video_stats_dump(struct uvc_streaming *stream, char *buf, size_t size) { + u64 bytes = stream->stats.stream.bytes; /* Single sample */ + unsigned int empty_ratio = 0; unsigned int scr_sof_freq; unsigned int duration; + unsigned int fps = 0; size_t count = 0; + u64 cpu = 0; /* Compute the SCR.SOF frequency estimate. At the nominal 1kHz SOF * frequency this will not overflow before more than 1h. @@ -881,12 +925,20 @@ size_t uvc_video_stats_dump(struct uvc_streaming *stream, char *buf, else scr_sof_freq = 0; + if (stream->stats.stream.nb_packets) + empty_ratio = stream->stats.stream.nb_empty * 100 / + stream->stats.stream.nb_packets; + count += scnprintf(buf + count, size - count, - "frames: %u\npackets: %u\nempty: %u\n" - "errors: %u\ninvalid: %u\n", + "frames: %u\n" + "packets: %u\n" + "empty: %u (%u %%)\n" + "errors: %u\n" + "invalid: %u\n", stream->stats.stream.nb_frames, stream->stats.stream.nb_packets, stream->stats.stream.nb_empty, + empty_ratio, stream->stats.stream.nb_errors, stream->stats.stream.nb_invalid); count += scnprintf(buf + count, size - count, @@ -903,6 +955,53 @@ size_t uvc_video_stats_dump(struct uvc_streaming *stream, char *buf, stream->stats.stream.min_sof, stream->stats.stream.max_sof, scr_sof_freq / 1000, scr_sof_freq % 1000); + count += scnprintf(buf + count, size - count, + "bytes %lld : duration %d\n", + bytes, duration); + + if (duration != 0) { + /* Duration is in milliseconds, * 100 to gain 2 dp precision */ + fps = stream->stats.stream.nb_frames * 1000 * 100 / duration; + /* CPU usage as a % with 6 decimal places */ + cpu = stream->stats.urbstat.decode.duration / duration * 100; + } + + count += scnprintf(buf + count, size - count, + "FPS: %u.%02u\n", fps / 100, fps % 100); + + /* Processing Times */ + + count += uvc_video_dump_time_stats(buf + count, size - count, + &stream->stats.urbstat.urb, "URB"); + count += uvc_video_dump_time_stats(buf + count, size - count, + &stream->stats.urbstat.header, "header"); + count += uvc_video_dump_time_stats(buf + count, size - count, + &stream->stats.urbstat.latency, "latency"); + count += uvc_video_dump_time_stats(buf + count, size - count, + &stream->stats.urbstat.decode, "decode"); + + /* Processing Speeds */ + + /* This should be representative of the memory bus / cpu speed */ + count += uvc_video_dump_speed(buf + count, size - count, + "raw decode speed", + bytes, + stream->stats.urbstat.decode.duration / 1000000); + + /* Raw bus speed - scheduling latencies */ + count += uvc_video_dump_speed(buf + count, size - count, + "raw URB handling speed", + bytes, + stream->stats.urbstat.urb.duration / 1000000); + + /* Throughput against wall clock time, stream duration is in millis*/ + count += uvc_video_dump_speed(buf + count, size - count, + "throughput", bytes, duration); + + /* Determine the 'CPU Usage' of our URB processing */ + count += scnprintf(buf + count, size - count, + "URB decode CPU usage %llu.%06llu %%\n", + cpu / 1000000, cpu % 1000000); return count; } @@ -911,6 +1010,10 @@ static void uvc_video_stats_start(struct uvc_streaming *stream) { memset(&stream->stats, 0, sizeof(stream->stats)); stream->stats.stream.min_sof = 2048; + + stream->stats.urbstat.urb.min = -1; + stream->stats.urbstat.latency.min = -1; + stream->stats.urbstat.decode.min = -1; } static void uvc_video_stats_stop(struct uvc_streaming *stream) @@ -918,6 +1021,27 @@ static void uvc_video_stats_stop(struct uvc_streaming *stream) stream->stats.stream.stop_ts = ktime_get(); } +static s64 uvc_stats_add(struct uvc_stats_time *s, + const ktime_t a, const ktime_t b) +{ + ktime_t delta; + u64 duration; + + delta = ktime_sub(b, a); + duration = ktime_to_ns(delta); + + s->qty++; + s->duration += duration; + + if (duration < s->min) + s->min = duration; + + if (duration > s->max) + s->max = duration; + + return duration; +} + /* ------------------------------------------------------------------------ * Video codecs */ @@ -981,6 +1105,9 @@ static int uvc_video_decode_start(struct uvc_streaming *stream, stream->sequence++; if (stream->sequence) uvc_video_stats_update(stream); + + /* Update the stream timer each frame */ + stream->stats.stream.stop_ts = ktime_get(); } uvc_video_clock_decode(stream, buf, data, len); @@ -1063,18 +1190,34 @@ static int uvc_video_decode_start(struct uvc_streaming *stream, static void uvc_video_copy_data_work(struct work_struct *work) { struct uvc_urb *uvc_urb = container_of(work, struct uvc_urb, work); + ktime_t now; unsigned int i; int ret; + /* Measure decode performance */ + uvc_urb->decode_start = ktime_get(); + /* Measure scheduling latency */ + uvc_stats_add(&uvc_urb->stream->stats.urbstat.latency, + uvc_urb->received, uvc_urb->decode_start); + for (i = 0; i < uvc_urb->async_operations; i++) { struct uvc_copy_op *op = &uvc_urb->copy_operations[i]; memcpy(op->dst, op->src, op->len); + uvc_urb->stream->stats.stream.bytes += op->len; /* Release reference taken on this buffer */ uvc_queue_buffer_release(op->buf); } + now = ktime_get(); + /* measure 'memcpy time' */ + uvc_stats_add(&uvc_urb->stream->stats.urbstat.decode, + uvc_urb->decode_start, now); + /* measure 'full urb processing time' */ + uvc_stats_add(&uvc_urb->stream->stats.urbstat.urb, + uvc_urb->received, now); + ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC); if (ret < 0) uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n", @@ -1461,6 +1604,9 @@ static void uvc_video_complete(struct urb *urb) unsigned long flags; int ret; + /* Track URB processing performance */ + uvc_urb->received = ktime_get(); + switch (urb->status) { case 0: break; @@ -1512,6 +1658,9 @@ static void uvc_video_complete(struct urb *urb) INIT_WORK(&uvc_urb->work, uvc_video_copy_data_work); queue_work(stream->async_wq, &uvc_urb->work); + + uvc_stats_add(&uvc_urb->stream->stats.urbstat.header, + uvc_urb->received, ktime_get()); } /* diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h index 7ebeb680710b..efaa55a24a68 100644 --- a/drivers/media/usb/uvc/uvcvideo.h +++ b/drivers/media/usb/uvc/uvcvideo.h @@ -462,6 +462,13 @@ struct uvc_stats_frame { u32 scr_stc; /* SCR.STC of the last packet */ }; +struct uvc_stats_time { + u64 duration; /* Cumulative total duration between two events */ + unsigned int qty; /* Number of events represented in the total */ + unsigned int min; /* Shortest duration */ + unsigned int max; /* Longest duration */ +}; + struct uvc_stats_stream { ktime_t start_ts; /* Stream start timestamp */ ktime_t stop_ts; /* Stream stop timestamp */ @@ -483,6 +490,8 @@ struct uvc_stats_stream { unsigned int scr_sof; /* STC.SOF of the last packet */ unsigned int min_sof; /* Minimum STC.SOF value */ unsigned int max_sof; /* Maximum STC.SOF value */ + + unsigned long bytes; /* Successfully transferred bytes */ }; #define UVC_METATADA_BUF_SIZE 1024 @@ -512,6 +521,8 @@ struct uvc_copy_op { * @async_operations: counter to indicate the number of copy operations * @copy_operations: work descriptors for asynchronous copy operations * @work: work queue entry for asynchronous decode + * @received: URB interrupt time stamp + * @decode_start: URB processing start time stamp */ struct uvc_urb { struct urb *urb; @@ -523,6 +534,9 @@ struct uvc_urb { unsigned int async_operations; struct uvc_copy_op copy_operations[UVC_MAX_PACKETS]; struct work_struct work; + + ktime_t received; + ktime_t decode_start; }; struct uvc_streaming { @@ -585,6 +599,12 @@ struct uvc_streaming { struct { struct uvc_stats_frame frame; struct uvc_stats_stream stream; + struct uvc_stats_urb { + struct uvc_stats_time header; + struct uvc_stats_time latency; + struct uvc_stats_time decode; + struct uvc_stats_time urb; + } urbstat; } stats; /* Timestamps support. */ -- 2.17.1