Re: [PATCH] Too slow libv4l MJPEG decoding with HD cameras

* Re: [PATCH] Too slow libv4l MJPEG decoding with HD cameras
@ 2010-10-26 23:51 Mitar
  2010-10-27  9:08 ` Hans de Goede
  0 siblings, 1 reply; 8+ messages in thread
From: Mitar @ 2010-10-26 23:51 UTC (permalink / raw)
  To: linux-media

[-- Attachment #1: Type: text/plain, Size: 1241 bytes --]

Hi!

On Sun, Oct 24, 2010 at 6:04 PM, Mitar <mmitar@gmail.com> wrote:
> Has anybody tried to improve MJPEG support in libv4l? With newer
> cameras this becomes important.

I have made a patch which makes libv4l uses ffmpeg's avcodec library
for MJPEG decoding. Performance improvements are unbelievable.

I have been testing with Logitech HD Pro Webcam C910 and
2.6.36-rc6-amd64 and Intel(R) Core(TM)2 Quad CPU Q9400 @ 2.66GHz.
Camera supports 2592x1944 at 10 FPS MJPEG stream.

With using original MJPEG code it takes my computer on average 129.614
ms to decode the frame what is 0.0257 us per pixel.

With using ffmpeg MJPEG decoding it takes my computer on average
43.616 ms to decode the frame what is 0.0087 us per pixel.

In comparison with libv4l YUYV decoding which is 27.407 ms to decode
the frame what is 0.0054 us per pixel this is really amazing. So it is
same time range with YUYV decoding! This opens a new question of how
fast would YUYV decoding be if we would use swscale library for that.
(Code for that is already in my patch, I just use it only for MJPEG
decoding to proper output format.)

This makes decoding possible in real-time at 20 FPS on my computer.
This is really great. (When I will have such camera.)


Mitar

[-- Attachment #2: v4l-utils-ffmpeg-mjpeg.patch --]
[-- Type: application/octet-stream, Size: 7992 bytes --]

diff --git a/Make.rules b/Make.rules
index 5799de4..cb60f45 100644
--- a/Make.rules
+++ b/Make.rules
@@ -2,7 +2,7 @@ V4L_UTILS_VERSION=0.8.2-test
 
 # These ones can be overriden from the cmdline
 
-CFLAGS := -g -O1
+CFLAGS := -O3 -ffast-math -frename-registers -fweb -mtune=native
 CFLAGS += -Wall -Wpointer-arith
 CXXFLAGS := $(CFLAGS)
 CFLAGS += -Wstrict-prototypes -Wmissing-prototypes
diff --git a/lib/libv4lconvert/Makefile b/lib/libv4lconvert/Makefile
index 93e5fe8..0e12130 100644
--- a/lib/libv4lconvert/Makefile
+++ b/lib/libv4lconvert/Makefile
@@ -21,6 +21,8 @@ INCLUDES      = ../include/libv4lconvert.h
 
 override CPPFLAGS += -DLIBDIR=\"$(LIBDIR)\" -DLIBSUBDIR=\"$(LIBSUBDIR)\"
 
+override LDFLAGS += -lavcodec -lswscale
+
 all: $(TARGETS)
 
 -include $(CONVERT_OBJS:.o=.d)
diff --git a/lib/libv4lconvert/libv4lconvert-priv.h b/lib/libv4lconvert/libv4lconvert-priv.h
index 61a8c39..604c061 100644
--- a/lib/libv4lconvert/libv4lconvert-priv.h
+++ b/lib/libv4lconvert/libv4lconvert-priv.h
@@ -26,6 +26,14 @@
 #include "processing/libv4lprocessing.h"
 #include "tinyjpeg.h"
 
+#ifdef HAVE_AV_CONFIG_H
+#undef HAVE_AV_CONFIG_H
+#endif
+
+#include <libavcodec/avcodec.h>
+#include <libswscale/swscale.h>
+#include <libavutil/mathematics.h>
+
 #define ARRAY_SIZE(x) ((int)sizeof(x)/(int)sizeof((x)[0]))
 
 #define V4LCONVERT_ERROR_MSG_SIZE 256
@@ -43,6 +51,14 @@
 #define V4LCONVERT_NEEDS_CONVERSION      0x02 /* Apps likely wont know this */
 #define V4LCONVERT_COMPRESSED_AND_NEEDS_CONVERSION 0x03
 
+struct v4lconvert_ffmpeg {
+	AVCodecContext *context;
+	AVCodec *codec;
+	AVFrame *frame;
+	AVPacket packet;
+	struct SwsContext *sws_context;
+};
+
 struct v4lconvert_data {
 	int fd;
 	int flags; /* bitfield */
@@ -51,6 +67,7 @@ struct v4lconvert_data {
 	unsigned int no_formats;
 	char error_msg[V4LCONVERT_ERROR_MSG_SIZE];
 	struct jdec_private *jdec;
+	struct v4lconvert_ffmpeg ffmpeg;
 	struct v4l2_frmsizeenum framesizes[V4LCONVERT_MAX_FRAMESIZES];
 	unsigned int no_framesizes;
 	int convert1_buf_size;
diff --git a/lib/libv4lconvert/libv4lconvert.c b/lib/libv4lconvert/libv4lconvert.c
index f08996a..44d9ae7 100644
--- a/lib/libv4lconvert/libv4lconvert.c
+++ b/lib/libv4lconvert/libv4lconvert.c
@@ -26,6 +26,14 @@
 #include "libv4lconvert-priv.h"
 #include "libv4lsyscall-priv.h"
 
+#ifdef HAVE_AV_CONFIG_H
+#undef HAVE_AV_CONFIG_H
+#endif
+
+#include <libavcodec/avcodec.h>
+#include <libswscale/swscale.h>
+#include <libavutil/mathematics.h>
+
 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
 
 /* Note for proper functioning of v4lconvert_enum_fmt the first entries in
@@ -168,6 +176,16 @@ void v4lconvert_destroy(struct v4lconvert_data *data)
 		tinyjpeg_set_components(data->jdec, comps, 3);
 		tinyjpeg_free(data->jdec);
 	}
+	if (data->ffmpeg.context) {
+		avcodec_close(data->ffmpeg.context);
+		av_free(data->ffmpeg.context);
+	}
+	if (data->ffmpeg.frame) {
+		av_free(data->ffmpeg.frame);
+	}
+	if (data->ffmpeg.sws_context) {
+		sws_freeContext(data->ffmpeg.sws_context);
+	}
 	v4lconvert_helper_cleanup(data);
 	free(data->convert1_buf);
 	free(data->convert2_buf);
@@ -552,9 +570,6 @@ static int v4lconvert_convert_pixfmt(struct v4lconvert_data *data,
 	switch (src_pix_fmt) {
 	case V4L2_PIX_FMT_PJPG:
 		jpeg_flags |= TINYJPEG_FLAGS_PIXART_JPEG;
-		/* Fall through */
-	case V4L2_PIX_FMT_MJPEG:
-	case V4L2_PIX_FMT_JPEG:
 		if (!data->jdec) {
 			data->jdec = tinyjpeg_init();
 			if (!data->jdec)
@@ -639,6 +654,123 @@ static int v4lconvert_convert_pixfmt(struct v4lconvert_data *data,
 			result = -1;
 		}
 		break;
+		
+	case V4L2_PIX_FMT_MJPEG:
+	case V4L2_PIX_FMT_JPEG:
+		if (!data->ffmpeg.frame) {
+			if (!(data->ffmpeg.frame = avcodec_alloc_frame())) {
+				v4lconvert_oom_error(data);
+			}
+		}
+		if (!data->ffmpeg.context) {
+			if (!(data->ffmpeg.context = avcodec_alloc_context())) {
+				v4lconvert_oom_error(data);
+			}
+		}
+		if (!data->ffmpeg.codec) {
+			avcodec_init();
+			av_log_set_level(AV_LOG_ERROR);
+			avcodec_register_all();
+			if (!(data->ffmpeg.codec = avcodec_find_decoder(CODEC_ID_MJPEG))) {
+				V4LCONVERT_ERR("Codec not found\n");
+				errno = EINVAL;
+				return -1;
+			}
+			
+			data->ffmpeg.context->coded_width = width;
+			data->ffmpeg.context->coded_height = height;
+			
+			if (avcodec_open(data->ffmpeg.context, data->ffmpeg.codec) < 0) {
+				V4LCONVERT_ERR("Could not open codec\n");
+				errno = EINVAL;
+				return -1;
+			}
+			av_init_packet(&data->ffmpeg.packet);
+		}
+		
+		data->ffmpeg.packet.size = src_size;
+		data->ffmpeg.packet.data = src;
+		
+		int got_frame;
+		if (avcodec_decode_video2(data->ffmpeg.context, data->ffmpeg.frame, &got_frame, &data->ffmpeg.packet) < 0) {
+			V4LCONVERT_ERR("Error while decoding frame\n");
+			errno = EPIPE;
+			return -1;
+		}
+
+		if (!got_frame) {
+			V4LCONVERT_ERR("Could not decode frame\n");
+			errno = EPIPE;
+			return -1;
+		}
+
+		if (!data->ffmpeg.sws_context) {
+			switch (dest_pix_fmt) {
+				case V4L2_PIX_FMT_RGB24:
+					data->ffmpeg.sws_context = sws_getContext(data->ffmpeg.context->width, data->ffmpeg.context->height, data->ffmpeg.context->pix_fmt, width, height, PIX_FMT_RGB24, SWS_POINT, NULL, NULL, NULL);
+					break;
+				case V4L2_PIX_FMT_BGR24:
+					data->ffmpeg.sws_context = sws_getContext(data->ffmpeg.context->width, data->ffmpeg.context->height, data->ffmpeg.context->pix_fmt, width, height, PIX_FMT_BGR24, SWS_POINT, NULL, NULL, NULL);
+					break;
+				case V4L2_PIX_FMT_YUV420:
+					data->ffmpeg.sws_context = sws_getContext(data->ffmpeg.context->width, data->ffmpeg.context->height, data->ffmpeg.context->pix_fmt, width, height, PIX_FMT_YUV420P, SWS_POINT, NULL, NULL, NULL);
+					break;
+				case V4L2_PIX_FMT_YVU420:
+					data->ffmpeg.sws_context = sws_getContext(data->ffmpeg.context->width, data->ffmpeg.context->height, data->ffmpeg.context->pix_fmt, width, height, PIX_FMT_YUV420P, SWS_POINT, NULL, NULL, NULL);
+					break;
+			}
+			
+			if (!data->ffmpeg.sws_context) {
+				V4LCONVERT_ERR("Could not get libswscale context\n");
+				errno = EINVAL;
+				return -1;
+			}
+		}
+
+		AVPicture output;
+		switch (dest_pix_fmt) {
+			case V4L2_PIX_FMT_RGB24:
+				if (avpicture_fill(&output, dest, PIX_FMT_RGB24, width, height) > dest_size) {
+					V4LCONVERT_ERR("Destination buffer too small\n");
+					errno = EINVAL;
+					return -1;
+				}
+				break;
+			case V4L2_PIX_FMT_BGR24:
+				if (avpicture_fill(&output, dest, PIX_FMT_BGR24, width, height) > dest_size) {
+					V4LCONVERT_ERR("Destination buffer too small\n");
+					errno = EINVAL;
+					return -1;
+				}
+				break;
+			case V4L2_PIX_FMT_YUV420:
+				if (avpicture_fill(&output, dest, PIX_FMT_YUV420P, width, height) > dest_size) {
+					V4LCONVERT_ERR("Destination buffer too small\n");
+					errno = EINVAL;
+					return -1;
+				}
+				break;
+			case V4L2_PIX_FMT_YVU420:
+				if (avpicture_fill(&output, dest, PIX_FMT_YUV420P, width, height) > dest_size) {
+					V4LCONVERT_ERR("Destination buffer too small\n");
+					errno = EINVAL;
+					return -1;
+				}
+				
+				// U and V planes are swapped
+				uint8_t *temp = output.data[2];
+				output.data[2] = output.data[1];
+				output.data[1] = temp;
+				break;
+		}
+		
+		if (sws_scale(data->ffmpeg.sws_context, (const uint8_t **)data->ffmpeg.frame->data, data->ffmpeg.frame->linesize, 0, data->ffmpeg.context->height, output.data, output.linesize) != height) {
+			V4LCONVERT_ERR("Could not convert with libswscale\n");
+			errno = EINVAL;
+			return -1;				
+		}
+		
+		break;
 
 		/* Custom cam specific YUV formats */
 	case V4L2_PIX_FMT_SPCA501:
diff --git a/lib/libv4lconvert/tinyjpeg-internal.h b/lib/libv4lconvert/tinyjpeg-internal.h
index 702a2a2..fe55228 100644
--- a/lib/libv4lconvert/tinyjpeg-internal.h
+++ b/lib/libv4lconvert/tinyjpeg-internal.h
@@ -47,7 +47,7 @@ struct jdec_private;
 
 #define HUFFMAN_TABLES	   4
 #define COMPONENTS	   3
-#define JPEG_MAX_WIDTH	   2048
+#define JPEG_MAX_WIDTH	   4096
 #define JPEG_MAX_HEIGHT	   2048
 
 struct huffman_table {

^ permalink raw reply related	[flat|nested] 8+ messages in thread