linux-usb.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance
@ 2021-05-30 22:30 Michael Grzeschik
  2021-05-30 22:30 ` [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed Michael Grzeschik
                   ` (3 more replies)
  0 siblings, 4 replies; 17+ messages in thread
From: Michael Grzeschik @ 2021-05-30 22:30 UTC (permalink / raw)
  Cc: linux-usb, laurent.pinchart, caleb.connolly, paul.elder, balbi, kernel

This series improves the performance of the uvc video gadget by adding a
zero copy routine using the scatter list interface of the gadget. The
series also increases the amount of allocated requests depending of the
speed and it also reduces the interrupt load by only trigger on every
16th request in case of super-speed.

Michael Grzeschik (3):
  usb: gadget: uvc: make uvc_num_requests depend on gadget speed
  usb: gadget: uvc: add scatter gather support
  usb: gadget: uvc: decrease the interrupt load to a quarter

 drivers/usb/gadget/Kconfig              |   1 +
 drivers/usb/gadget/function/f_uvc.c     |   1 +
 drivers/usb/gadget/function/uvc.h       |  15 ++-
 drivers/usb/gadget/function/uvc_queue.c |  30 ++++-
 drivers/usb/gadget/function/uvc_queue.h |   5 +-
 drivers/usb/gadget/function/uvc_video.c | 150 +++++++++++++++++++-----
 6 files changed, 168 insertions(+), 34 deletions(-)

-- 
2.29.2


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed
  2021-05-30 22:30 [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance Michael Grzeschik
@ 2021-05-30 22:30 ` Michael Grzeschik
  2021-06-14 10:34   ` paul.elder
  2021-06-15  1:28   ` Laurent Pinchart
  2021-05-30 22:30 ` [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support Michael Grzeschik
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 17+ messages in thread
From: Michael Grzeschik @ 2021-05-30 22:30 UTC (permalink / raw)
  Cc: linux-usb, laurent.pinchart, caleb.connolly, paul.elder, balbi, kernel

While sending bigger images is possible with USB_SPEED_SUPER it is
better to use more isochronous requests in flight. This patch makes the
number uvc_num_requests dynamic by changing it depending on the gadget
speed.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
v1 -> v2: - fixed null pointer dereference in uvc_video_free_requests

 drivers/usb/gadget/function/uvc.h       | 11 +++--
 drivers/usb/gadget/function/uvc_queue.c |  7 ++++
 drivers/usb/gadget/function/uvc_video.c | 56 +++++++++++++++----------
 3 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index 23ee25383c1f7..83b9e945944e8 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -65,13 +65,17 @@ extern unsigned int uvc_gadget_trace_param;
  * Driver specific constants
  */
 
-#define UVC_NUM_REQUESTS			4
 #define UVC_MAX_REQUEST_SIZE			64
 #define UVC_MAX_EVENTS				4
 
 /* ------------------------------------------------------------------------
  * Structures
  */
+struct uvc_request {
+	struct usb_request *req;
+	__u8 *req_buffer;
+	struct uvc_video *video;
+};
 
 struct uvc_video {
 	struct uvc_device *uvc;
@@ -87,10 +91,11 @@ struct uvc_video {
 	unsigned int imagesize;
 	struct mutex mutex;	/* protects frame parameters */
 
+	int uvc_num_requests;
+
 	/* Requests */
 	unsigned int req_size;
-	struct usb_request *req[UVC_NUM_REQUESTS];
-	__u8 *req_buffer[UVC_NUM_REQUESTS];
+	struct uvc_request *ureq;
 	struct list_head req_free;
 	spinlock_t req_lock;
 
diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
index 61e2c94cc0b0c..dcd71304d521c 100644
--- a/drivers/usb/gadget/function/uvc_queue.c
+++ b/drivers/usb/gadget/function/uvc_queue.c
@@ -43,6 +43,8 @@ static int uvc_queue_setup(struct vb2_queue *vq,
 {
 	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
 	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
+	struct uvc_device *uvc = video->uvc;
+	struct usb_composite_dev *cdev = uvc->func.config->cdev;
 
 	if (*nbuffers > UVC_MAX_VIDEO_BUFFERS)
 		*nbuffers = UVC_MAX_VIDEO_BUFFERS;
@@ -51,6 +53,11 @@ static int uvc_queue_setup(struct vb2_queue *vq,
 
 	sizes[0] = video->imagesize;
 
+	if (cdev->gadget->speed < USB_SPEED_SUPER)
+		video->uvc_num_requests = 4;
+	else
+		video->uvc_num_requests = 64;
+
 	return 0;
 }
 
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 633e23d58d868..57840083dfdda 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/module.h>
 #include <linux/usb/video.h>
 
 #include <media/v4l2-dev.h>
@@ -145,7 +146,8 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
 static void
 uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 {
-	struct uvc_video *video = req->context;
+	struct uvc_request *ureq = req->context;
+	struct uvc_video *video = ureq->video;
 	struct uvc_video_queue *queue = &video->queue;
 	unsigned long flags;
 
@@ -177,16 +179,19 @@ uvc_video_free_requests(struct uvc_video *video)
 {
 	unsigned int i;
 
-	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
-		if (video->req[i]) {
-			usb_ep_free_request(video->ep, video->req[i]);
-			video->req[i] = NULL;
-		}
-
-		if (video->req_buffer[i]) {
-			kfree(video->req_buffer[i]);
-			video->req_buffer[i] = NULL;
+	if (video->ureq) {
+		for (i = 0; i < video->uvc_num_requests; ++i) {
+			if (video->ureq[i].req) {
+				usb_ep_free_request(video->ep, video->ureq[i].req);
+				video->ureq[i].req = NULL;
+			}
+			if (video->ureq[i].req_buffer) {
+				kfree(video->ureq[i].req_buffer);
+				video->ureq[i].req_buffer = NULL;
+			}
 		}
+		kfree(video->ureq);
+		video->ureq = NULL;
 	}
 
 	INIT_LIST_HEAD(&video->req_free);
@@ -207,21 +212,26 @@ uvc_video_alloc_requests(struct uvc_video *video)
 		 * max_t(unsigned int, video->ep->maxburst, 1)
 		 * (video->ep->mult);
 
-	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
-		video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL);
-		if (video->req_buffer[i] == NULL)
+	video->ureq = kcalloc(video->uvc_num_requests, sizeof(struct uvc_request), GFP_KERNEL);
+	if (video->ureq == NULL)
+		return ret;
+
+	for (i = 0; i < video->uvc_num_requests; ++i) {
+		video->ureq[i].req_buffer = kmalloc(req_size, GFP_KERNEL);
+		if (video->ureq[i].req_buffer == NULL)
 			goto error;
 
-		video->req[i] = usb_ep_alloc_request(video->ep, GFP_KERNEL);
-		if (video->req[i] == NULL)
+		video->ureq[i].req = usb_ep_alloc_request(video->ep, GFP_KERNEL);
+		if (video->ureq[i].req == NULL)
 			goto error;
 
-		video->req[i]->buf = video->req_buffer[i];
-		video->req[i]->length = 0;
-		video->req[i]->complete = uvc_video_complete;
-		video->req[i]->context = video;
+		video->ureq[i].req->buf = video->ureq[i].req_buffer;
+		video->ureq[i].req->length = 0;
+		video->ureq[i].req->complete = uvc_video_complete;
+		video->ureq[i].req->context = &video->ureq[i];
+		video->ureq[i].video = video;
 
-		list_add_tail(&video->req[i]->list, &video->req_free);
+		list_add_tail(&video->ureq[i].req->list, &video->req_free);
 	}
 
 	video->req_size = req_size;
@@ -312,9 +322,9 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
 		cancel_work_sync(&video->pump);
 		uvcg_queue_cancel(&video->queue, 0);
 
-		for (i = 0; i < UVC_NUM_REQUESTS; ++i)
-			if (video->req[i])
-				usb_ep_dequeue(video->ep, video->req[i]);
+		for (i = 0; i < video->uvc_num_requests; ++i)
+			if (video->ureq && video->ureq[i].req)
+				usb_ep_dequeue(video->ep, video->ureq[i].req);
 
 		uvc_video_free_requests(video);
 		uvcg_queue_enable(&video->queue, 0);
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support
  2021-05-30 22:30 [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance Michael Grzeschik
  2021-05-30 22:30 ` [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed Michael Grzeschik
@ 2021-05-30 22:30 ` Michael Grzeschik
  2021-05-31  1:33   ` kernel test robot
                     ` (2 more replies)
  2021-05-30 22:30 ` [PATCH v2 3/3] usb: gadget: uvc: decrease the interrupt load to a quarter Michael Grzeschik
  2021-06-09  8:17 ` [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance Greg KH
  3 siblings, 3 replies; 17+ messages in thread
From: Michael Grzeschik @ 2021-05-30 22:30 UTC (permalink / raw)
  Cc: linux-usb, laurent.pinchart, caleb.connolly, paul.elder, balbi, kernel

This patch adds support for scatter gather transfers. If the underlying
gadgets sg_supported == true, then the videeobuf2-dma-sg is used and the
encode routine maps all scatter entries to separate scatterlists for the
usb gadget.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
v1 -> v2: -

 drivers/usb/gadget/Kconfig              |  1 +
 drivers/usb/gadget/function/f_uvc.c     |  1 +
 drivers/usb/gadget/function/uvc.h       |  2 +
 drivers/usb/gadget/function/uvc_queue.c | 23 ++++++-
 drivers/usb/gadget/function/uvc_queue.h |  5 +-
 drivers/usb/gadget/function/uvc_video.c | 80 ++++++++++++++++++++++++-
 6 files changed, 105 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 2d152571a7de8..dd58094f0b85b 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -450,6 +450,7 @@ config USB_CONFIGFS_F_UVC
 	depends on USB_CONFIGFS
 	depends on VIDEO_V4L2
 	depends on VIDEO_DEV
+	select VIDEOBUF2_DMA_SG
 	select VIDEOBUF2_VMALLOC
 	select USB_F_UVC
 	help
diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index f48a00e497945..9d87c0fb8f92e 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -418,6 +418,7 @@ uvc_register_video(struct uvc_device *uvc)
 
 	/* TODO reference counting. */
 	uvc->vdev.v4l2_dev = &uvc->v4l2_dev;
+	uvc->vdev.v4l2_dev->dev = &cdev->gadget->dev;
 	uvc->vdev.fops = &uvc_v4l2_fops;
 	uvc->vdev.ioctl_ops = &uvc_v4l2_ioctl_ops;
 	uvc->vdev.release = video_device_release_empty;
diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index 83b9e945944e8..c1f06d9df5820 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -75,6 +75,8 @@ struct uvc_request {
 	struct usb_request *req;
 	__u8 *req_buffer;
 	struct uvc_video *video;
+	struct sg_table sgt;
+	u8 header[2];
 };
 
 struct uvc_video {
diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
index dcd71304d521c..e36a3506842b7 100644
--- a/drivers/usb/gadget/function/uvc_queue.c
+++ b/drivers/usb/gadget/function/uvc_queue.c
@@ -18,6 +18,7 @@
 
 #include <media/v4l2-common.h>
 #include <media/videobuf2-vmalloc.h>
+#include <media/videobuf2-dma-sg.h>
 
 #include "uvc.h"
 
@@ -52,6 +53,7 @@ static int uvc_queue_setup(struct vb2_queue *vq,
 	*nplanes = 1;
 
 	sizes[0] = video->imagesize;
+	alloc_devs[0] = video->uvc->v4l2_dev.dev->parent;
 
 	if (cdev->gadget->speed < USB_SPEED_SUPER)
 		video->uvc_num_requests = 4;
@@ -66,6 +68,9 @@ static int uvc_buffer_prepare(struct vb2_buffer *vb)
 	struct uvc_video_queue *queue = vb2_get_drv_priv(vb->vb2_queue);
 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
 	struct uvc_buffer *buf = container_of(vbuf, struct uvc_buffer, buf);
+	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
+	struct uvc_device *uvc = video->uvc;
+	struct usb_composite_dev *cdev = uvc->func.config->cdev;
 
 	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
 	    vb2_get_plane_payload(vb, 0) > vb2_plane_size(vb, 0)) {
@@ -77,7 +82,12 @@ static int uvc_buffer_prepare(struct vb2_buffer *vb)
 		return -ENODEV;
 
 	buf->state = UVC_BUF_STATE_QUEUED;
-	buf->mem = vb2_plane_vaddr(vb, 0);
+	if (cdev->gadget->sg_supported) {
+		buf->sgt = vb2_dma_sg_plane_desc(vb, 0);
+		buf->sg = buf->sgt->sgl;
+	} else {
+		buf->mem = vb2_plane_vaddr(vb, 0);
+	}
 	buf->length = vb2_plane_size(vb, 0);
 	if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		buf->bytesused = 0;
@@ -117,9 +127,11 @@ static const struct vb2_ops uvc_queue_qops = {
 	.wait_finish = vb2_ops_wait_finish,
 };
 
-int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
+int uvcg_queue_init(struct device *dev, struct uvc_video_queue *queue, enum v4l2_buf_type type,
 		    struct mutex *lock)
 {
+	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
+	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
 	int ret;
 
 	queue->queue.type = type;
@@ -128,9 +140,14 @@ int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
 	queue->queue.buf_struct_size = sizeof(struct uvc_buffer);
 	queue->queue.ops = &uvc_queue_qops;
 	queue->queue.lock = lock;
-	queue->queue.mem_ops = &vb2_vmalloc_memops;
+	if (cdev->gadget->sg_supported)
+		queue->queue.mem_ops = &vb2_dma_sg_memops;
+	else
+		queue->queue.mem_ops = &vb2_vmalloc_memops;
+
 	queue->queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC
 				     | V4L2_BUF_FLAG_TSTAMP_SRC_EOF;
+	queue->queue.dev = dev;
 	ret = vb2_queue_init(&queue->queue);
 	if (ret)
 		return ret;
diff --git a/drivers/usb/gadget/function/uvc_queue.h b/drivers/usb/gadget/function/uvc_queue.h
index 2f0fff7698430..bb8753b26074f 100644
--- a/drivers/usb/gadget/function/uvc_queue.h
+++ b/drivers/usb/gadget/function/uvc_queue.h
@@ -34,6 +34,9 @@ struct uvc_buffer {
 
 	enum uvc_buffer_state state;
 	void *mem;
+	struct sg_table *sgt;
+	struct scatterlist *sg;
+	unsigned int offset;
 	unsigned int length;
 	unsigned int bytesused;
 };
@@ -59,7 +62,7 @@ static inline int uvc_queue_streaming(struct uvc_video_queue *queue)
 	return vb2_is_streaming(&queue->queue);
 }
 
-int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
+int uvcg_queue_init(struct device *d, struct uvc_video_queue *queue, enum v4l2_buf_type type,
 		    struct mutex *lock);
 
 void uvcg_free_buffers(struct uvc_video_queue *queue);
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 57840083dfdda..240d361a45a44 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -95,6 +95,71 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video,
 		video->payload_size = 0;
 }
 
+static void
+uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video,
+		struct uvc_buffer *buf)
+{
+	int pending = buf->bytesused - video->queue.buf_used;
+	struct uvc_request *ureq = req->context;
+	struct scatterlist *sg, *iter;
+	int len = video->req_size;
+	int sg_left, part = 0;
+	int ret;
+	int i;
+
+	sg = ureq->sgt.sgl;
+	sg_init_table(sg, ureq->sgt.nents);
+
+	/* Init the header. */
+	memset(ureq->header, 0, 2);
+	ret = uvc_video_encode_header(video, buf, ureq->header,
+				      video->req_size);
+	sg_set_buf(sg, ureq->header, 2);
+	len -= ret;
+
+	if (pending <= len)
+		len = pending;
+
+	req->length = (len == pending) ? len + 2 : video->req_size;
+
+	/* Init the pending sgs with payload */
+	sg = sg_next(sg);
+
+	for_each_sg(sg, iter, ureq->sgt.nents - 1, i) {
+		if (!len || !buf->sg)
+			break;
+
+		sg_left = sg_dma_len(buf->sg) - buf->offset;
+		part = min_t(unsigned int, len, sg_left);
+
+		sg_set_page(iter, sg_page(buf->sg), part, buf->offset);
+
+		if (part == sg_left) {
+			buf->offset = 0;
+			buf->sg = sg_next(buf->sg);
+		} else {
+			buf->offset += part;
+		}
+		len -= part;
+	}
+
+	/* Assign the video data with header. */
+	req->buf = NULL;
+	req->sg	= ureq->sgt.sgl;
+	req->num_sgs = i + 1;
+
+	req->length -= len;
+	video->queue.buf_used += req->length - 2;
+
+	if (buf->bytesused == video->queue.buf_used || !buf->sg) {
+		video->queue.buf_used = 0;
+		buf->state = UVC_BUF_STATE_DONE;
+		buf->offset = 0;
+		uvcg_queue_next_buffer(&video->queue, buf);
+		video->fid ^= UVC_STREAM_FID;
+	}
+}
+
 static void
 uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
 		struct uvc_buffer *buf)
@@ -232,6 +297,10 @@ uvc_video_alloc_requests(struct uvc_video *video)
 		video->ureq[i].video = video;
 
 		list_add_tail(&video->ureq[i].req->list, &video->req_free);
+		/* req_size/PAGE_SIZE + 1 for overruns and + 1 for header */
+		sg_alloc_table(&video->ureq[i].sgt,
+			       DIV_ROUND_UP(req_size - 2, PAGE_SIZE) + 2,
+			       GFP_KERNEL);
 	}
 
 	video->req_size = req_size;
@@ -309,6 +378,7 @@ static void uvcg_video_pump(struct work_struct *work)
  */
 int uvcg_video_enable(struct uvc_video *video, int enable)
 {
+	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
 	unsigned int i;
 	int ret;
 
@@ -340,8 +410,12 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
 	if (video->max_payload_size) {
 		video->encode = uvc_video_encode_bulk;
 		video->payload_size = 0;
-	} else
-		video->encode = uvc_video_encode_isoc;
+	} else {
+		if (cdev->gadget->sg_supported)
+			video->encode = uvc_video_encode_isoc_sg;
+		else
+			video->encode = uvc_video_encode_isoc;
+	}
 
 	schedule_work(&video->pump);
 
@@ -365,7 +439,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	video->imagesize = 320 * 240 * 2;
 
 	/* Initialize the video buffers queue. */
-	uvcg_queue_init(&video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
+	uvcg_queue_init(uvc->v4l2_dev.dev, &video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
 			&video->mutex);
 	return 0;
 }
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 3/3] usb: gadget: uvc: decrease the interrupt load to a quarter
  2021-05-30 22:30 [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance Michael Grzeschik
  2021-05-30 22:30 ` [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed Michael Grzeschik
  2021-05-30 22:30 ` [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support Michael Grzeschik
@ 2021-05-30 22:30 ` Michael Grzeschik
  2021-06-14 10:35   ` paul.elder
  2021-06-09  8:17 ` [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance Greg KH
  3 siblings, 1 reply; 17+ messages in thread
From: Michael Grzeschik @ 2021-05-30 22:30 UTC (permalink / raw)
  Cc: linux-usb, laurent.pinchart, caleb.connolly, paul.elder, balbi, kernel

With usb3 we handle much more requests. It only enables the interrupt on
every quarter of the allocated requests. This patch decreases the
interrupt load.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
---
 drivers/usb/gadget/function/uvc.h       |  2 ++
 drivers/usb/gadget/function/uvc_video.c | 12 ++++++++++++
 2 files changed, 14 insertions(+)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index c1f06d9df5820..5a76e9351b530 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -101,6 +101,8 @@ struct uvc_video {
 	struct list_head req_free;
 	spinlock_t req_lock;
 
+	int req_int_count;
+
 	void (*encode) (struct usb_request *req, struct uvc_video *video,
 			struct uvc_buffer *buf);
 
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 240d361a45a44..66754687ce217 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -357,6 +357,16 @@ static void uvcg_video_pump(struct work_struct *work)
 
 		video->encode(req, video, buf);
 
+		if (list_empty(&video->req_free) ||
+		    (buf->state == UVC_BUF_STATE_DONE) ||
+		    (!(video->req_int_count %
+		       DIV_ROUND_UP(video->uvc_num_requests, 4)))) {
+			video->req_int_count = 0;
+			req->no_interrupt = 0;
+		} else {
+			req->no_interrupt = 1;
+		}
+
 		/* Queue the USB request */
 		ret = uvcg_video_ep_queue(video, req);
 		spin_unlock_irqrestore(&queue->irqlock, flags);
@@ -365,6 +375,7 @@ static void uvcg_video_pump(struct work_struct *work)
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
+		video->req_int_count++;
 	}
 
 	spin_lock_irqsave(&video->req_lock, flags);
@@ -437,6 +448,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	video->width = 320;
 	video->height = 240;
 	video->imagesize = 320 * 240 * 2;
+	video->req_int_count = 0;
 
 	/* Initialize the video buffers queue. */
 	uvcg_queue_init(uvc->v4l2_dev.dev, &video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support
  2021-05-30 22:30 ` [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support Michael Grzeschik
@ 2021-05-31  1:33   ` kernel test robot
  2021-05-31  6:30   ` kernel test robot
  2021-06-15  2:10   ` Laurent Pinchart
  2 siblings, 0 replies; 17+ messages in thread
From: kernel test robot @ 2021-05-31  1:33 UTC (permalink / raw)
  To: Michael Grzeschik
  Cc: kbuild-all, linux-usb, laurent.pinchart, caleb.connolly,
	paul.elder, balbi, kernel

[-- Attachment #1: Type: text/plain, Size: 1785 bytes --]

Hi Michael,

I love your patch! Yet something to improve:

[auto build test ERROR on usb/usb-testing]
[also build test ERROR on peter.chen-usb/for-usb-next balbi-usb/testing/next v5.13-rc4 next-20210528]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Michael-Grzeschik/usb-gadget-uvc-improve-uvc-gadget-performance/20210531-063238
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-testing
config: h8300-randconfig-r016-20210531 (attached as .config)
compiler: h8300-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/16929fc91c22102e12cbd1bfe0473bdc01d3172b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Michael-Grzeschik/usb-gadget-uvc-improve-uvc-gadget-performance/20210531-063238
        git checkout 16929fc91c22102e12cbd1bfe0473bdc01d3172b
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=h8300 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   h8300-linux-ld: drivers/usb/gadget/function/uvc_queue.o: in function `uvcg_queue_init':
>> uvc_queue.c:(.text+0x308): undefined reference to `vb2_dma_sg_memops'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 25589 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support
  2021-05-30 22:30 ` [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support Michael Grzeschik
  2021-05-31  1:33   ` kernel test robot
@ 2021-05-31  6:30   ` kernel test robot
  2021-06-15  2:10   ` Laurent Pinchart
  2 siblings, 0 replies; 17+ messages in thread
From: kernel test robot @ 2021-05-31  6:30 UTC (permalink / raw)
  To: Michael Grzeschik
  Cc: kbuild-all, clang-built-linux, linux-usb, laurent.pinchart,
	caleb.connolly, paul.elder, balbi, kernel

[-- Attachment #1: Type: text/plain, Size: 1922 bytes --]

Hi Michael,

I love your patch! Yet something to improve:

[auto build test ERROR on usb/usb-testing]
[also build test ERROR on peter.chen-usb/for-usb-next balbi-usb/testing/next v5.13-rc4 next-20210528]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Michael-Grzeschik/usb-gadget-uvc-improve-uvc-gadget-performance/20210531-063238
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-testing
config: arm64-randconfig-r014-20210531 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project bc6799f2f79f0ae87e9f1ebf9d25ba799fbd25a9)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm64 cross compiling tool for clang build
        # apt-get install binutils-aarch64-linux-gnu
        # https://github.com/0day-ci/linux/commit/16929fc91c22102e12cbd1bfe0473bdc01d3172b
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Michael-Grzeschik/usb-gadget-uvc-improve-uvc-gadget-performance/20210531-063238
        git checkout 16929fc91c22102e12cbd1bfe0473bdc01d3172b
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "vb2_dma_sg_memops" [drivers/usb/gadget/function/usb_f_uvc.ko] undefined!

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 42704 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance
  2021-05-30 22:30 [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance Michael Grzeschik
                   ` (2 preceding siblings ...)
  2021-05-30 22:30 ` [PATCH v2 3/3] usb: gadget: uvc: decrease the interrupt load to a quarter Michael Grzeschik
@ 2021-06-09  8:17 ` Greg KH
  3 siblings, 0 replies; 17+ messages in thread
From: Greg KH @ 2021-06-09  8:17 UTC (permalink / raw)
  To: Michael Grzeschik
  Cc: linux-usb, laurent.pinchart, caleb.connolly, paul.elder, balbi, kernel

<again, please fix your email headers, you didn't have anyone on To:>

On Mon, May 31, 2021 at 12:30:38AM +0200, Michael Grzeschik wrote:
> This series improves the performance of the uvc video gadget by adding a
> zero copy routine using the scatter list interface of the gadget. The
> series also increases the amount of allocated requests depending of the
> speed and it also reduces the interrupt load by only trigger on every
> 16th request in case of super-speed.
> 
> Michael Grzeschik (3):
>   usb: gadget: uvc: make uvc_num_requests depend on gadget speed
>   usb: gadget: uvc: add scatter gather support
>   usb: gadget: uvc: decrease the interrupt load to a quarter
> 
>  drivers/usb/gadget/Kconfig              |   1 +
>  drivers/usb/gadget/function/f_uvc.c     |   1 +
>  drivers/usb/gadget/function/uvc.h       |  15 ++-
>  drivers/usb/gadget/function/uvc_queue.c |  30 ++++-
>  drivers/usb/gadget/function/uvc_queue.h |   5 +-
>  drivers/usb/gadget/function/uvc_video.c | 150 +++++++++++++++++++-----
>  6 files changed, 168 insertions(+), 34 deletions(-)

Please fix up the kbuild issues before resending.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed
  2021-05-30 22:30 ` [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed Michael Grzeschik
@ 2021-06-14 10:34   ` paul.elder
  2021-06-15  1:28   ` Laurent Pinchart
  1 sibling, 0 replies; 17+ messages in thread
From: paul.elder @ 2021-06-14 10:34 UTC (permalink / raw)
  To: Michael Grzeschik
  Cc: linux-usb, laurent.pinchart, caleb.connolly, balbi, kernel

Hi Michael,

On Mon, May 31, 2021 at 12:30:39AM +0200, Michael Grzeschik wrote:
> While sending bigger images is possible with USB_SPEED_SUPER it is
> better to use more isochronous requests in flight. This patch makes the
> number uvc_num_requests dynamic by changing it depending on the gadget
> speed.
> 
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>

Looks good to me.

Reviewed-by: Paul Elder <paul.elder@ideasonboard.com>

> ---
> v1 -> v2: - fixed null pointer dereference in uvc_video_free_requests
> 
>  drivers/usb/gadget/function/uvc.h       | 11 +++--
>  drivers/usb/gadget/function/uvc_queue.c |  7 ++++
>  drivers/usb/gadget/function/uvc_video.c | 56 +++++++++++++++----------
>  3 files changed, 48 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index 23ee25383c1f7..83b9e945944e8 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -65,13 +65,17 @@ extern unsigned int uvc_gadget_trace_param;
>   * Driver specific constants
>   */
>  
> -#define UVC_NUM_REQUESTS			4
>  #define UVC_MAX_REQUEST_SIZE			64
>  #define UVC_MAX_EVENTS				4
>  
>  /* ------------------------------------------------------------------------
>   * Structures
>   */
> +struct uvc_request {
> +	struct usb_request *req;
> +	__u8 *req_buffer;
> +	struct uvc_video *video;
> +};
>  
>  struct uvc_video {
>  	struct uvc_device *uvc;
> @@ -87,10 +91,11 @@ struct uvc_video {
>  	unsigned int imagesize;
>  	struct mutex mutex;	/* protects frame parameters */
>  
> +	int uvc_num_requests;
> +
>  	/* Requests */
>  	unsigned int req_size;
> -	struct usb_request *req[UVC_NUM_REQUESTS];
> -	__u8 *req_buffer[UVC_NUM_REQUESTS];
> +	struct uvc_request *ureq;
>  	struct list_head req_free;
>  	spinlock_t req_lock;
>  
> diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
> index 61e2c94cc0b0c..dcd71304d521c 100644
> --- a/drivers/usb/gadget/function/uvc_queue.c
> +++ b/drivers/usb/gadget/function/uvc_queue.c
> @@ -43,6 +43,8 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>  {
>  	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
>  	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
> +	struct uvc_device *uvc = video->uvc;
> +	struct usb_composite_dev *cdev = uvc->func.config->cdev;
>  
>  	if (*nbuffers > UVC_MAX_VIDEO_BUFFERS)
>  		*nbuffers = UVC_MAX_VIDEO_BUFFERS;
> @@ -51,6 +53,11 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>  
>  	sizes[0] = video->imagesize;
>  
> +	if (cdev->gadget->speed < USB_SPEED_SUPER)
> +		video->uvc_num_requests = 4;
> +	else
> +		video->uvc_num_requests = 64;
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index 633e23d58d868..57840083dfdda 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -11,6 +11,7 @@
>  #include <linux/errno.h>
>  #include <linux/usb/ch9.h>
>  #include <linux/usb/gadget.h>
> +#include <linux/module.h>
>  #include <linux/usb/video.h>
>  
>  #include <media/v4l2-dev.h>
> @@ -145,7 +146,8 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>  static void
>  uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>  {
> -	struct uvc_video *video = req->context;
> +	struct uvc_request *ureq = req->context;
> +	struct uvc_video *video = ureq->video;
>  	struct uvc_video_queue *queue = &video->queue;
>  	unsigned long flags;
>  
> @@ -177,16 +179,19 @@ uvc_video_free_requests(struct uvc_video *video)
>  {
>  	unsigned int i;
>  
> -	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
> -		if (video->req[i]) {
> -			usb_ep_free_request(video->ep, video->req[i]);
> -			video->req[i] = NULL;
> -		}
> -
> -		if (video->req_buffer[i]) {
> -			kfree(video->req_buffer[i]);
> -			video->req_buffer[i] = NULL;
> +	if (video->ureq) {
> +		for (i = 0; i < video->uvc_num_requests; ++i) {
> +			if (video->ureq[i].req) {
> +				usb_ep_free_request(video->ep, video->ureq[i].req);
> +				video->ureq[i].req = NULL;
> +			}
> +			if (video->ureq[i].req_buffer) {
> +				kfree(video->ureq[i].req_buffer);
> +				video->ureq[i].req_buffer = NULL;
> +			}
>  		}
> +		kfree(video->ureq);
> +		video->ureq = NULL;
>  	}
>  
>  	INIT_LIST_HEAD(&video->req_free);
> @@ -207,21 +212,26 @@ uvc_video_alloc_requests(struct uvc_video *video)
>  		 * max_t(unsigned int, video->ep->maxburst, 1)
>  		 * (video->ep->mult);
>  
> -	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
> -		video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL);
> -		if (video->req_buffer[i] == NULL)
> +	video->ureq = kcalloc(video->uvc_num_requests, sizeof(struct uvc_request), GFP_KERNEL);
> +	if (video->ureq == NULL)
> +		return ret;
> +
> +	for (i = 0; i < video->uvc_num_requests; ++i) {
> +		video->ureq[i].req_buffer = kmalloc(req_size, GFP_KERNEL);
> +		if (video->ureq[i].req_buffer == NULL)
>  			goto error;
>  
> -		video->req[i] = usb_ep_alloc_request(video->ep, GFP_KERNEL);
> -		if (video->req[i] == NULL)
> +		video->ureq[i].req = usb_ep_alloc_request(video->ep, GFP_KERNEL);
> +		if (video->ureq[i].req == NULL)
>  			goto error;
>  
> -		video->req[i]->buf = video->req_buffer[i];
> -		video->req[i]->length = 0;
> -		video->req[i]->complete = uvc_video_complete;
> -		video->req[i]->context = video;
> +		video->ureq[i].req->buf = video->ureq[i].req_buffer;
> +		video->ureq[i].req->length = 0;
> +		video->ureq[i].req->complete = uvc_video_complete;
> +		video->ureq[i].req->context = &video->ureq[i];
> +		video->ureq[i].video = video;
>  
> -		list_add_tail(&video->req[i]->list, &video->req_free);
> +		list_add_tail(&video->ureq[i].req->list, &video->req_free);
>  	}
>  
>  	video->req_size = req_size;
> @@ -312,9 +322,9 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>  		cancel_work_sync(&video->pump);
>  		uvcg_queue_cancel(&video->queue, 0);
>  
> -		for (i = 0; i < UVC_NUM_REQUESTS; ++i)
> -			if (video->req[i])
> -				usb_ep_dequeue(video->ep, video->req[i]);
> +		for (i = 0; i < video->uvc_num_requests; ++i)
> +			if (video->ureq && video->ureq[i].req)
> +				usb_ep_dequeue(video->ep, video->ureq[i].req);
>  
>  		uvc_video_free_requests(video);
>  		uvcg_queue_enable(&video->queue, 0);
> -- 
> 2.29.2
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 3/3] usb: gadget: uvc: decrease the interrupt load to a quarter
  2021-05-30 22:30 ` [PATCH v2 3/3] usb: gadget: uvc: decrease the interrupt load to a quarter Michael Grzeschik
@ 2021-06-14 10:35   ` paul.elder
  2021-06-15  1:36     ` Laurent Pinchart
  0 siblings, 1 reply; 17+ messages in thread
From: paul.elder @ 2021-06-14 10:35 UTC (permalink / raw)
  To: Michael Grzeschik
  Cc: linux-usb, laurent.pinchart, caleb.connolly, balbi, kernel

Hi Michael,

On Mon, May 31, 2021 at 12:30:41AM +0200, Michael Grzeschik wrote:
> With usb3 we handle much more requests. It only enables the interrupt on

s/much/many/

> every quarter of the allocated requests. This patch decreases the
> interrupt load.

The last two sentences might be better combined, like:

"Decrease the interrupt load by only enabling the interrupt every
quarter of the allocated requests."

> 
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>

Other than that, looks good to me.

Reviewed-by: Paul Elder <paul.elder@ideasonboard.com>

> ---
>  drivers/usb/gadget/function/uvc.h       |  2 ++
>  drivers/usb/gadget/function/uvc_video.c | 12 ++++++++++++
>  2 files changed, 14 insertions(+)
> 
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index c1f06d9df5820..5a76e9351b530 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -101,6 +101,8 @@ struct uvc_video {
>  	struct list_head req_free;
>  	spinlock_t req_lock;
>  
> +	int req_int_count;
> +
>  	void (*encode) (struct usb_request *req, struct uvc_video *video,
>  			struct uvc_buffer *buf);
>  
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index 240d361a45a44..66754687ce217 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -357,6 +357,16 @@ static void uvcg_video_pump(struct work_struct *work)
>  
>  		video->encode(req, video, buf);
>  
> +		if (list_empty(&video->req_free) ||
> +		    (buf->state == UVC_BUF_STATE_DONE) ||
> +		    (!(video->req_int_count %
> +		       DIV_ROUND_UP(video->uvc_num_requests, 4)))) {
> +			video->req_int_count = 0;
> +			req->no_interrupt = 0;
> +		} else {
> +			req->no_interrupt = 1;
> +		}
> +
>  		/* Queue the USB request */
>  		ret = uvcg_video_ep_queue(video, req);
>  		spin_unlock_irqrestore(&queue->irqlock, flags);
> @@ -365,6 +375,7 @@ static void uvcg_video_pump(struct work_struct *work)
>  			uvcg_queue_cancel(queue, 0);
>  			break;
>  		}
> +		video->req_int_count++;
>  	}
>  
>  	spin_lock_irqsave(&video->req_lock, flags);
> @@ -437,6 +448,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>  	video->width = 320;
>  	video->height = 240;
>  	video->imagesize = 320 * 240 * 2;
> +	video->req_int_count = 0;
>  
>  	/* Initialize the video buffers queue. */
>  	uvcg_queue_init(uvc->v4l2_dev.dev, &video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
> -- 
> 2.29.2
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed
  2021-05-30 22:30 ` [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed Michael Grzeschik
  2021-06-14 10:34   ` paul.elder
@ 2021-06-15  1:28   ` Laurent Pinchart
  2021-06-15  1:38     ` Laurent Pinchart
  1 sibling, 1 reply; 17+ messages in thread
From: Laurent Pinchart @ 2021-06-15  1:28 UTC (permalink / raw)
  To: Michael Grzeschik; +Cc: linux-usb, caleb.connolly, paul.elder, balbi, kernel

Hi Michael,

Thank you for the patch.

On Mon, May 31, 2021 at 12:30:39AM +0200, Michael Grzeschik wrote:
> While sending bigger images is possible with USB_SPEED_SUPER it is
> better to use more isochronous requests in flight. This patch makes the
> number uvc_num_requests dynamic by changing it depending on the gadget
> speed.
> 
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> ---
> v1 -> v2: - fixed null pointer dereference in uvc_video_free_requests
> 
>  drivers/usb/gadget/function/uvc.h       | 11 +++--
>  drivers/usb/gadget/function/uvc_queue.c |  7 ++++
>  drivers/usb/gadget/function/uvc_video.c | 56 +++++++++++++++----------
>  3 files changed, 48 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index 23ee25383c1f7..83b9e945944e8 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -65,13 +65,17 @@ extern unsigned int uvc_gadget_trace_param;
>   * Driver specific constants
>   */
>  
> -#define UVC_NUM_REQUESTS			4
>  #define UVC_MAX_REQUEST_SIZE			64
>  #define UVC_MAX_EVENTS				4
>  
>  /* ------------------------------------------------------------------------
>   * Structures
>   */
> +struct uvc_request {
> +	struct usb_request *req;
> +	__u8 *req_buffer;

While at it, you could s/__u8/u8/ as this header isn't used by
userspace.

> +	struct uvc_video *video;
> +};
>  
>  struct uvc_video {
>  	struct uvc_device *uvc;
> @@ -87,10 +91,11 @@ struct uvc_video {
>  	unsigned int imagesize;
>  	struct mutex mutex;	/* protects frame parameters */
>  
> +	int uvc_num_requests;

Never negative, you can make it an unsigned int.

> +
>  	/* Requests */
>  	unsigned int req_size;
> -	struct usb_request *req[UVC_NUM_REQUESTS];
> -	__u8 *req_buffer[UVC_NUM_REQUESTS];
> +	struct uvc_request *ureq;
>  	struct list_head req_free;
>  	spinlock_t req_lock;
>  
> diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
> index 61e2c94cc0b0c..dcd71304d521c 100644
> --- a/drivers/usb/gadget/function/uvc_queue.c
> +++ b/drivers/usb/gadget/function/uvc_queue.c
> @@ -43,6 +43,8 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>  {
>  	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
>  	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
> +	struct uvc_device *uvc = video->uvc;
> +	struct usb_composite_dev *cdev = uvc->func.config->cdev;

	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;

>  
>  	if (*nbuffers > UVC_MAX_VIDEO_BUFFERS)
>  		*nbuffers = UVC_MAX_VIDEO_BUFFERS;
> @@ -51,6 +53,11 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>  
>  	sizes[0] = video->imagesize;
>  
> +	if (cdev->gadget->speed < USB_SPEED_SUPER)
> +		video->uvc_num_requests = 4;
> +	else
> +		video->uvc_num_requests = 64;
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index 633e23d58d868..57840083dfdda 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -11,6 +11,7 @@
>  #include <linux/errno.h>
>  #include <linux/usb/ch9.h>
>  #include <linux/usb/gadget.h>
> +#include <linux/module.h>

Alphabetical order please.

>  #include <linux/usb/video.h>
>  
>  #include <media/v4l2-dev.h>
> @@ -145,7 +146,8 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>  static void
>  uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>  {
> -	struct uvc_video *video = req->context;
> +	struct uvc_request *ureq = req->context;
> +	struct uvc_video *video = ureq->video;
>  	struct uvc_video_queue *queue = &video->queue;
>  	unsigned long flags;
>  
> @@ -177,16 +179,19 @@ uvc_video_free_requests(struct uvc_video *video)
>  {
>  	unsigned int i;
>  
> -	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
> -		if (video->req[i]) {
> -			usb_ep_free_request(video->ep, video->req[i]);
> -			video->req[i] = NULL;
> -		}
> -
> -		if (video->req_buffer[i]) {
> -			kfree(video->req_buffer[i]);
> -			video->req_buffer[i] = NULL;
> +	if (video->ureq) {
> +		for (i = 0; i < video->uvc_num_requests; ++i) {
> +			if (video->ureq[i].req) {
> +				usb_ep_free_request(video->ep, video->ureq[i].req);
> +				video->ureq[i].req = NULL;
> +			}

Blank line here please.

> +			if (video->ureq[i].req_buffer) {
> +				kfree(video->ureq[i].req_buffer);
> +				video->ureq[i].req_buffer = NULL;
> +			}
>  		}

Here too.

> +		kfree(video->ureq);
> +		video->ureq = NULL;
>  	}
>  
>  	INIT_LIST_HEAD(&video->req_free);
> @@ -207,21 +212,26 @@ uvc_video_alloc_requests(struct uvc_video *video)
>  		 * max_t(unsigned int, video->ep->maxburst, 1)
>  		 * (video->ep->mult);
>  
> -	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
> -		video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL);
> -		if (video->req_buffer[i] == NULL)
> +	video->ureq = kcalloc(video->uvc_num_requests, sizeof(struct uvc_request), GFP_KERNEL);
> +	if (video->ureq == NULL)
> +		return ret;

		return -ENOMEM;

would be more readable (I had to check the value of ret to review this
patch).

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>

> +
> +	for (i = 0; i < video->uvc_num_requests; ++i) {
> +		video->ureq[i].req_buffer = kmalloc(req_size, GFP_KERNEL);
> +		if (video->ureq[i].req_buffer == NULL)
>  			goto error;
>  
> -		video->req[i] = usb_ep_alloc_request(video->ep, GFP_KERNEL);
> -		if (video->req[i] == NULL)
> +		video->ureq[i].req = usb_ep_alloc_request(video->ep, GFP_KERNEL);
> +		if (video->ureq[i].req == NULL)
>  			goto error;
>  
> -		video->req[i]->buf = video->req_buffer[i];
> -		video->req[i]->length = 0;
> -		video->req[i]->complete = uvc_video_complete;
> -		video->req[i]->context = video;
> +		video->ureq[i].req->buf = video->ureq[i].req_buffer;
> +		video->ureq[i].req->length = 0;
> +		video->ureq[i].req->complete = uvc_video_complete;
> +		video->ureq[i].req->context = &video->ureq[i];
> +		video->ureq[i].video = video;
>  
> -		list_add_tail(&video->req[i]->list, &video->req_free);
> +		list_add_tail(&video->ureq[i].req->list, &video->req_free);
>  	}
>  
>  	video->req_size = req_size;
> @@ -312,9 +322,9 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>  		cancel_work_sync(&video->pump);
>  		uvcg_queue_cancel(&video->queue, 0);
>  
> -		for (i = 0; i < UVC_NUM_REQUESTS; ++i)
> -			if (video->req[i])
> -				usb_ep_dequeue(video->ep, video->req[i]);
> +		for (i = 0; i < video->uvc_num_requests; ++i)
> +			if (video->ureq && video->ureq[i].req)
> +				usb_ep_dequeue(video->ep, video->ureq[i].req);
>  
>  		uvc_video_free_requests(video);
>  		uvcg_queue_enable(&video->queue, 0);

-- 
Regards,

Laurent Pinchart

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 3/3] usb: gadget: uvc: decrease the interrupt load to a quarter
  2021-06-14 10:35   ` paul.elder
@ 2021-06-15  1:36     ` Laurent Pinchart
  2021-06-22 15:02       ` Michael Grzeschik
  0 siblings, 1 reply; 17+ messages in thread
From: Laurent Pinchart @ 2021-06-15  1:36 UTC (permalink / raw)
  To: paul.elder; +Cc: Michael Grzeschik, linux-usb, caleb.connolly, balbi, kernel

Hi Michael,

Thank you for the patch.

On Mon, Jun 14, 2021 at 07:35:58PM +0900, paul.elder@ideasonboard.com wrote:
> On Mon, May 31, 2021 at 12:30:41AM +0200, Michael Grzeschik wrote:
> > With usb3 we handle much more requests. It only enables the interrupt on
> 
> s/much/many/
> 
> > every quarter of the allocated requests. This patch decreases the
> > interrupt load.
> 
> The last two sentences might be better combined, like:
> 
> "Decrease the interrupt load by only enabling the interrupt every
> quarter of the allocated requests."
> 
> > Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> 
> Other than that, looks good to me.
> 
> Reviewed-by: Paul Elder <paul.elder@ideasonboard.com>
> 
> > ---
> >  drivers/usb/gadget/function/uvc.h       |  2 ++
> >  drivers/usb/gadget/function/uvc_video.c | 12 ++++++++++++
> >  2 files changed, 14 insertions(+)
> > 
> > diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> > index c1f06d9df5820..5a76e9351b530 100644
> > --- a/drivers/usb/gadget/function/uvc.h
> > +++ b/drivers/usb/gadget/function/uvc.h
> > @@ -101,6 +101,8 @@ struct uvc_video {
> >  	struct list_head req_free;
> >  	spinlock_t req_lock;
> >  
> > +	int req_int_count;

unsigned int.

> > +
> >  	void (*encode) (struct usb_request *req, struct uvc_video *video,
> >  			struct uvc_buffer *buf);
> >  
> > diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> > index 240d361a45a44..66754687ce217 100644
> > --- a/drivers/usb/gadget/function/uvc_video.c
> > +++ b/drivers/usb/gadget/function/uvc_video.c
> > @@ -357,6 +357,16 @@ static void uvcg_video_pump(struct work_struct *work)
> >  
> >  		video->encode(req, video, buf);
> >  

A comment to explain the logic would be useful.

> > +		if (list_empty(&video->req_free) ||
> > +		    (buf->state == UVC_BUF_STATE_DONE) ||

No need for parentheses here.

> > +		    (!(video->req_int_count %
> > +		       DIV_ROUND_UP(video->uvc_num_requests, 4)))) {
> > +			video->req_int_count = 0;
> > +			req->no_interrupt = 0;
> > +		} else {
> > +			req->no_interrupt = 1;
> > +		}
> > +
> >  		/* Queue the USB request */
> >  		ret = uvcg_video_ep_queue(video, req);
> >  		spin_unlock_irqrestore(&queue->irqlock, flags);
> > @@ -365,6 +375,7 @@ static void uvcg_video_pump(struct work_struct *work)
> >  			uvcg_queue_cancel(queue, 0);
> >  			break;
> >  		}
> > +		video->req_int_count++;
> >  	}
> >  
> >  	spin_lock_irqsave(&video->req_lock, flags);
> > @@ -437,6 +448,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
> >  	video->width = 320;
> >  	video->height = 240;
> >  	video->imagesize = 320 * 240 * 2;
> > +	video->req_int_count = 0;

Should this be initialized to 0 in uvcg_video_enable() instead of
uvcg_video_init(), to ensure that stop/start cycles will operate in a
predictable way ?

> >  
> >  	/* Initialize the video buffers queue. */
> >  	uvcg_queue_init(uvc->v4l2_dev.dev, &video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,

-- 
Regards,

Laurent Pinchart

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed
  2021-06-15  1:28   ` Laurent Pinchart
@ 2021-06-15  1:38     ` Laurent Pinchart
  2021-06-22 15:00       ` Michael Grzeschik
  0 siblings, 1 reply; 17+ messages in thread
From: Laurent Pinchart @ 2021-06-15  1:38 UTC (permalink / raw)
  To: Michael Grzeschik; +Cc: linux-usb, caleb.connolly, paul.elder, balbi, kernel

Hi Michael,

Another comment.

On Tue, Jun 15, 2021 at 04:28:05AM +0300, Laurent Pinchart wrote:
> On Mon, May 31, 2021 at 12:30:39AM +0200, Michael Grzeschik wrote:
> > While sending bigger images is possible with USB_SPEED_SUPER it is
> > better to use more isochronous requests in flight. This patch makes the
> > number uvc_num_requests dynamic by changing it depending on the gadget
> > speed.
> > 
> > Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> > ---
> > v1 -> v2: - fixed null pointer dereference in uvc_video_free_requests
> > 
> >  drivers/usb/gadget/function/uvc.h       | 11 +++--
> >  drivers/usb/gadget/function/uvc_queue.c |  7 ++++
> >  drivers/usb/gadget/function/uvc_video.c | 56 +++++++++++++++----------
> >  3 files changed, 48 insertions(+), 26 deletions(-)
> > 
> > diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> > index 23ee25383c1f7..83b9e945944e8 100644
> > --- a/drivers/usb/gadget/function/uvc.h
> > +++ b/drivers/usb/gadget/function/uvc.h
> > @@ -65,13 +65,17 @@ extern unsigned int uvc_gadget_trace_param;
> >   * Driver specific constants
> >   */
> >  
> > -#define UVC_NUM_REQUESTS			4
> >  #define UVC_MAX_REQUEST_SIZE			64
> >  #define UVC_MAX_EVENTS				4
> >  
> >  /* ------------------------------------------------------------------------
> >   * Structures
> >   */
> > +struct uvc_request {
> > +	struct usb_request *req;
> > +	__u8 *req_buffer;
> 
> While at it, you could s/__u8/u8/ as this header isn't used by
> userspace.
> 
> > +	struct uvc_video *video;
> > +};
> >  
> >  struct uvc_video {
> >  	struct uvc_device *uvc;
> > @@ -87,10 +91,11 @@ struct uvc_video {
> >  	unsigned int imagesize;
> >  	struct mutex mutex;	/* protects frame parameters */
> >  
> > +	int uvc_num_requests;
> 
> Never negative, you can make it an unsigned int.
> 
> > +
> >  	/* Requests */
> >  	unsigned int req_size;
> > -	struct usb_request *req[UVC_NUM_REQUESTS];
> > -	__u8 *req_buffer[UVC_NUM_REQUESTS];
> > +	struct uvc_request *ureq;
> >  	struct list_head req_free;
> >  	spinlock_t req_lock;
> >  
> > diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
> > index 61e2c94cc0b0c..dcd71304d521c 100644
> > --- a/drivers/usb/gadget/function/uvc_queue.c
> > +++ b/drivers/usb/gadget/function/uvc_queue.c
> > @@ -43,6 +43,8 @@ static int uvc_queue_setup(struct vb2_queue *vq,
> >  {
> >  	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
> >  	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
> > +	struct uvc_device *uvc = video->uvc;
> > +	struct usb_composite_dev *cdev = uvc->func.config->cdev;
> 
> 	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
> 
> >  
> >  	if (*nbuffers > UVC_MAX_VIDEO_BUFFERS)
> >  		*nbuffers = UVC_MAX_VIDEO_BUFFERS;
> > @@ -51,6 +53,11 @@ static int uvc_queue_setup(struct vb2_queue *vq,
> >  
> >  	sizes[0] = video->imagesize;
> >  
> > +	if (cdev->gadget->speed < USB_SPEED_SUPER)
> > +		video->uvc_num_requests = 4;
> > +	else
> > +		video->uvc_num_requests = 64;
> > +
> >  	return 0;
> >  }
> >  
> > diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> > index 633e23d58d868..57840083dfdda 100644
> > --- a/drivers/usb/gadget/function/uvc_video.c
> > +++ b/drivers/usb/gadget/function/uvc_video.c
> > @@ -11,6 +11,7 @@
> >  #include <linux/errno.h>
> >  #include <linux/usb/ch9.h>
> >  #include <linux/usb/gadget.h>
> > +#include <linux/module.h>
> 
> Alphabetical order please.

Why is this header needed ? I can't see anything below related to it.

> >  #include <linux/usb/video.h>
> >  
> >  #include <media/v4l2-dev.h>
> > @@ -145,7 +146,8 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
> >  static void
> >  uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
> >  {
> > -	struct uvc_video *video = req->context;
> > +	struct uvc_request *ureq = req->context;
> > +	struct uvc_video *video = ureq->video;
> >  	struct uvc_video_queue *queue = &video->queue;
> >  	unsigned long flags;
> >  
> > @@ -177,16 +179,19 @@ uvc_video_free_requests(struct uvc_video *video)
> >  {
> >  	unsigned int i;
> >  
> > -	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
> > -		if (video->req[i]) {
> > -			usb_ep_free_request(video->ep, video->req[i]);
> > -			video->req[i] = NULL;
> > -		}
> > -
> > -		if (video->req_buffer[i]) {
> > -			kfree(video->req_buffer[i]);
> > -			video->req_buffer[i] = NULL;
> > +	if (video->ureq) {
> > +		for (i = 0; i < video->uvc_num_requests; ++i) {
> > +			if (video->ureq[i].req) {
> > +				usb_ep_free_request(video->ep, video->ureq[i].req);
> > +				video->ureq[i].req = NULL;
> > +			}
> 
> Blank line here please.
> 
> > +			if (video->ureq[i].req_buffer) {
> > +				kfree(video->ureq[i].req_buffer);
> > +				video->ureq[i].req_buffer = NULL;
> > +			}
> >  		}
> 
> Here too.
> 
> > +		kfree(video->ureq);
> > +		video->ureq = NULL;
> >  	}
> >  
> >  	INIT_LIST_HEAD(&video->req_free);
> > @@ -207,21 +212,26 @@ uvc_video_alloc_requests(struct uvc_video *video)
> >  		 * max_t(unsigned int, video->ep->maxburst, 1)
> >  		 * (video->ep->mult);
> >  
> > -	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
> > -		video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL);
> > -		if (video->req_buffer[i] == NULL)
> > +	video->ureq = kcalloc(video->uvc_num_requests, sizeof(struct uvc_request), GFP_KERNEL);
> > +	if (video->ureq == NULL)
> > +		return ret;
> 
> 		return -ENOMEM;
> 
> would be more readable (I had to check the value of ret to review this
> patch).
> 
> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> 
> > +
> > +	for (i = 0; i < video->uvc_num_requests; ++i) {
> > +		video->ureq[i].req_buffer = kmalloc(req_size, GFP_KERNEL);
> > +		if (video->ureq[i].req_buffer == NULL)
> >  			goto error;
> >  
> > -		video->req[i] = usb_ep_alloc_request(video->ep, GFP_KERNEL);
> > -		if (video->req[i] == NULL)
> > +		video->ureq[i].req = usb_ep_alloc_request(video->ep, GFP_KERNEL);
> > +		if (video->ureq[i].req == NULL)
> >  			goto error;
> >  
> > -		video->req[i]->buf = video->req_buffer[i];
> > -		video->req[i]->length = 0;
> > -		video->req[i]->complete = uvc_video_complete;
> > -		video->req[i]->context = video;
> > +		video->ureq[i].req->buf = video->ureq[i].req_buffer;
> > +		video->ureq[i].req->length = 0;
> > +		video->ureq[i].req->complete = uvc_video_complete;
> > +		video->ureq[i].req->context = &video->ureq[i];
> > +		video->ureq[i].video = video;
> >  
> > -		list_add_tail(&video->req[i]->list, &video->req_free);
> > +		list_add_tail(&video->ureq[i].req->list, &video->req_free);
> >  	}
> >  
> >  	video->req_size = req_size;
> > @@ -312,9 +322,9 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
> >  		cancel_work_sync(&video->pump);
> >  		uvcg_queue_cancel(&video->queue, 0);
> >  
> > -		for (i = 0; i < UVC_NUM_REQUESTS; ++i)
> > -			if (video->req[i])
> > -				usb_ep_dequeue(video->ep, video->req[i]);
> > +		for (i = 0; i < video->uvc_num_requests; ++i)
> > +			if (video->ureq && video->ureq[i].req)
> > +				usb_ep_dequeue(video->ep, video->ureq[i].req);
> >  
> >  		uvc_video_free_requests(video);
> >  		uvcg_queue_enable(&video->queue, 0);

-- 
Regards,

Laurent Pinchart

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support
  2021-05-30 22:30 ` [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support Michael Grzeschik
  2021-05-31  1:33   ` kernel test robot
  2021-05-31  6:30   ` kernel test robot
@ 2021-06-15  2:10   ` Laurent Pinchart
  2021-06-25  9:12     ` Michael Grzeschik
  2 siblings, 1 reply; 17+ messages in thread
From: Laurent Pinchart @ 2021-06-15  2:10 UTC (permalink / raw)
  To: Michael Grzeschik; +Cc: linux-usb, caleb.connolly, paul.elder, balbi, kernel

Hi Michael,

Thank you for the patch.

On Mon, May 31, 2021 at 12:30:40AM +0200, Michael Grzeschik wrote:
> This patch adds support for scatter gather transfers. If the underlying
> gadgets sg_supported == true, then the videeobuf2-dma-sg is used and the
> encode routine maps all scatter entries to separate scatterlists for the
> usb gadget.

This is interesting. Could you please share measurements that show how
much CPU time is saved by this patch in typical use cases ?

> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> ---
> v1 -> v2: -
> 
>  drivers/usb/gadget/Kconfig              |  1 +
>  drivers/usb/gadget/function/f_uvc.c     |  1 +
>  drivers/usb/gadget/function/uvc.h       |  2 +
>  drivers/usb/gadget/function/uvc_queue.c | 23 ++++++-
>  drivers/usb/gadget/function/uvc_queue.h |  5 +-
>  drivers/usb/gadget/function/uvc_video.c | 80 ++++++++++++++++++++++++-
>  6 files changed, 105 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
> index 2d152571a7de8..dd58094f0b85b 100644
> --- a/drivers/usb/gadget/Kconfig
> +++ b/drivers/usb/gadget/Kconfig
> @@ -450,6 +450,7 @@ config USB_CONFIGFS_F_UVC
>  	depends on USB_CONFIGFS
>  	depends on VIDEO_V4L2
>  	depends on VIDEO_DEV
> +	select VIDEOBUF2_DMA_SG
>  	select VIDEOBUF2_VMALLOC
>  	select USB_F_UVC
>  	help
> diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
> index f48a00e497945..9d87c0fb8f92e 100644
> --- a/drivers/usb/gadget/function/f_uvc.c
> +++ b/drivers/usb/gadget/function/f_uvc.c
> @@ -418,6 +418,7 @@ uvc_register_video(struct uvc_device *uvc)
>  
>  	/* TODO reference counting. */
>  	uvc->vdev.v4l2_dev = &uvc->v4l2_dev;
> +	uvc->vdev.v4l2_dev->dev = &cdev->gadget->dev;

A good change, which could possibly be split to its own patch.

>  	uvc->vdev.fops = &uvc_v4l2_fops;
>  	uvc->vdev.ioctl_ops = &uvc_v4l2_ioctl_ops;
>  	uvc->vdev.release = video_device_release_empty;
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index 83b9e945944e8..c1f06d9df5820 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -75,6 +75,8 @@ struct uvc_request {
>  	struct usb_request *req;
>  	__u8 *req_buffer;
>  	struct uvc_video *video;
> +	struct sg_table sgt;
> +	u8 header[2];
>  };
>  
>  struct uvc_video {
> diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
> index dcd71304d521c..e36a3506842b7 100644
> --- a/drivers/usb/gadget/function/uvc_queue.c
> +++ b/drivers/usb/gadget/function/uvc_queue.c
> @@ -18,6 +18,7 @@
>  
>  #include <media/v4l2-common.h>
>  #include <media/videobuf2-vmalloc.h>
> +#include <media/videobuf2-dma-sg.h>

Alphabetical order please.

>  
>  #include "uvc.h"
>  
> @@ -52,6 +53,7 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>  	*nplanes = 1;
>  
>  	sizes[0] = video->imagesize;
> +	alloc_devs[0] = video->uvc->v4l2_dev.dev->parent;

Is there a guarantee that the gadget's parent is always the UDC ?

>  
>  	if (cdev->gadget->speed < USB_SPEED_SUPER)
>  		video->uvc_num_requests = 4;
> @@ -66,6 +68,9 @@ static int uvc_buffer_prepare(struct vb2_buffer *vb)
>  	struct uvc_video_queue *queue = vb2_get_drv_priv(vb->vb2_queue);
>  	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
>  	struct uvc_buffer *buf = container_of(vbuf, struct uvc_buffer, buf);
> +	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
> +	struct uvc_device *uvc = video->uvc;
> +	struct usb_composite_dev *cdev = uvc->func.config->cdev;
>  
>  	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
>  	    vb2_get_plane_payload(vb, 0) > vb2_plane_size(vb, 0)) {
> @@ -77,7 +82,12 @@ static int uvc_buffer_prepare(struct vb2_buffer *vb)
>  		return -ENODEV;
>  
>  	buf->state = UVC_BUF_STATE_QUEUED;
> -	buf->mem = vb2_plane_vaddr(vb, 0);
> +	if (cdev->gadget->sg_supported) {

How about storing a use_sg flag in uvc_video_queue, to avoid poking
through layers ? The flag can also be used in uvcg_queue_init().

> +		buf->sgt = vb2_dma_sg_plane_desc(vb, 0);
> +		buf->sg = buf->sgt->sgl;
> +	} else {
> +		buf->mem = vb2_plane_vaddr(vb, 0);
> +	}
>  	buf->length = vb2_plane_size(vb, 0);
>  	if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
>  		buf->bytesused = 0;
> @@ -117,9 +127,11 @@ static const struct vb2_ops uvc_queue_qops = {
>  	.wait_finish = vb2_ops_wait_finish,
>  };
>  
> -int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
> +int uvcg_queue_init(struct device *dev, struct uvc_video_queue *queue, enum v4l2_buf_type type,

Please move the dev parameter after queue, to pass as the first
parameter the object that the function operates on.

>  		    struct mutex *lock)
>  {
> +	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
> +	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
>  	int ret;
>  
>  	queue->queue.type = type;
> @@ -128,9 +140,14 @@ int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
>  	queue->queue.buf_struct_size = sizeof(struct uvc_buffer);
>  	queue->queue.ops = &uvc_queue_qops;
>  	queue->queue.lock = lock;
> -	queue->queue.mem_ops = &vb2_vmalloc_memops;
> +	if (cdev->gadget->sg_supported)
> +		queue->queue.mem_ops = &vb2_dma_sg_memops;
> +	else
> +		queue->queue.mem_ops = &vb2_vmalloc_memops;
> +
>  	queue->queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC
>  				     | V4L2_BUF_FLAG_TSTAMP_SRC_EOF;
> +	queue->queue.dev = dev;
>  	ret = vb2_queue_init(&queue->queue);
>  	if (ret)
>  		return ret;
> diff --git a/drivers/usb/gadget/function/uvc_queue.h b/drivers/usb/gadget/function/uvc_queue.h
> index 2f0fff7698430..bb8753b26074f 100644
> --- a/drivers/usb/gadget/function/uvc_queue.h
> +++ b/drivers/usb/gadget/function/uvc_queue.h
> @@ -34,6 +34,9 @@ struct uvc_buffer {
>  
>  	enum uvc_buffer_state state;
>  	void *mem;
> +	struct sg_table *sgt;
> +	struct scatterlist *sg;
> +	unsigned int offset;
>  	unsigned int length;
>  	unsigned int bytesused;
>  };
> @@ -59,7 +62,7 @@ static inline int uvc_queue_streaming(struct uvc_video_queue *queue)
>  	return vb2_is_streaming(&queue->queue);
>  }
>  
> -int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
> +int uvcg_queue_init(struct device *d, struct uvc_video_queue *queue, enum v4l2_buf_type type,
>  		    struct mutex *lock);
>  
>  void uvcg_free_buffers(struct uvc_video_queue *queue);
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index 57840083dfdda..240d361a45a44 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -95,6 +95,71 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video,
>  		video->payload_size = 0;
>  }
>  
> +static void
> +uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video,
> +		struct uvc_buffer *buf)
> +{
> +	int pending = buf->bytesused - video->queue.buf_used;
> +	struct uvc_request *ureq = req->context;
> +	struct scatterlist *sg, *iter;
> +	int len = video->req_size;
> +	int sg_left, part = 0;
> +	int ret;
> +	int i;

unsigned int for pending, len, sg_left, part and i.

> +
> +	sg = ureq->sgt.sgl;
> +	sg_init_table(sg, ureq->sgt.nents);
> +
> +	/* Init the header. */
> +	memset(ureq->header, 0, 2);

Can you add

#define UVCG_REQUEST_HEADER_LEN		2

somewhere, and use it here, in the uvc_request structure definition, and
in uvc_video_encode_header() ? Otherwise I fear we'll forget to update
one of the locations when we'll add support for longer headers.

Is the memset() needed though ?

> +	ret = uvc_video_encode_header(video, buf, ureq->header,
> +				      video->req_size);
> +	sg_set_buf(sg, ureq->header, 2);
> +	len -= ret;
> +
> +	if (pending <= len)
> +		len = pending;
> +
> +	req->length = (len == pending) ? len + 2 : video->req_size;
> +
> +	/* Init the pending sgs with payload */
> +	sg = sg_next(sg);
> +
> +	for_each_sg(sg, iter, ureq->sgt.nents - 1, i) {
> +		if (!len || !buf->sg)
> +			break;
> +
> +		sg_left = sg_dma_len(buf->sg) - buf->offset;
> +		part = min_t(unsigned int, len, sg_left);
> +
> +		sg_set_page(iter, sg_page(buf->sg), part, buf->offset);
> +
> +		if (part == sg_left) {
> +			buf->offset = 0;
> +			buf->sg = sg_next(buf->sg);
> +		} else {
> +			buf->offset += part;
> +		}
> +		len -= part;
> +	}
> +
> +	/* Assign the video data with header. */
> +	req->buf = NULL;
> +	req->sg	= ureq->sgt.sgl;
> +	req->num_sgs = i + 1;

Given that you construct the request scatterlist manually anyway,
wouldn't it be much simpler to use the vb2 dma contig allocator for the
V4L2 buffer ? Or do you expect that the device would run out of dma
contig space (which I expect to be provided by CMA or an IOMMU) ? It
would also likely help to fill every sg entry as much as possible, while
the above code potentially creates smaller entries in the request sgt
when reaching the boundary between two entries in the V4L2 buffer sgt.

I also wonder if this couldn't be further optimized by creating an sgt
with two entries, one for the header and one for the data,
unconditionally. What's the maximum possible request size, could it be
larger than what an sgt entry can support ?

> +
> +	req->length -= len;
> +	video->queue.buf_used += req->length - 2;
> +
> +	if (buf->bytesused == video->queue.buf_used || !buf->sg) {
> +		video->queue.buf_used = 0;
> +		buf->state = UVC_BUF_STATE_DONE;
> +		buf->offset = 0;
> +		uvcg_queue_next_buffer(&video->queue, buf);
> +		video->fid ^= UVC_STREAM_FID;
> +	}
> +}
> +
>  static void
>  uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
>  		struct uvc_buffer *buf)
> @@ -232,6 +297,10 @@ uvc_video_alloc_requests(struct uvc_video *video)
>  		video->ureq[i].video = video;
>  
>  		list_add_tail(&video->ureq[i].req->list, &video->req_free);
> +		/* req_size/PAGE_SIZE + 1 for overruns and + 1 for header */
> +		sg_alloc_table(&video->ureq[i].sgt,
> +			       DIV_ROUND_UP(req_size - 2, PAGE_SIZE) + 2,
> +			       GFP_KERNEL);

It looks like this is leaked.

>  	}
>  
>  	video->req_size = req_size;
> @@ -309,6 +378,7 @@ static void uvcg_video_pump(struct work_struct *work)
>   */
>  int uvcg_video_enable(struct uvc_video *video, int enable)
>  {
> +	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
>  	unsigned int i;
>  	int ret;
>  
> @@ -340,8 +410,12 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>  	if (video->max_payload_size) {
>  		video->encode = uvc_video_encode_bulk;
>  		video->payload_size = 0;
> -	} else
> -		video->encode = uvc_video_encode_isoc;
> +	} else {
> +		if (cdev->gadget->sg_supported)
> +			video->encode = uvc_video_encode_isoc_sg;
> +		else
> +			video->encode = uvc_video_encode_isoc;
> +	}
>  
>  	schedule_work(&video->pump);
>  
> @@ -365,7 +439,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>  	video->imagesize = 320 * 240 * 2;
>  
>  	/* Initialize the video buffers queue. */
> -	uvcg_queue_init(&video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
> +	uvcg_queue_init(uvc->v4l2_dev.dev, &video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
>  			&video->mutex);
>  	return 0;
>  }

-- 
Regards,

Laurent Pinchart

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed
  2021-06-15  1:38     ` Laurent Pinchart
@ 2021-06-22 15:00       ` Michael Grzeschik
  0 siblings, 0 replies; 17+ messages in thread
From: Michael Grzeschik @ 2021-06-22 15:00 UTC (permalink / raw)
  To: Laurent Pinchart; +Cc: linux-usb, caleb.connolly, paul.elder, balbi, kernel

[-- Attachment #1: Type: text/plain, Size: 8204 bytes --]

Hi Laurent!

On Tue, Jun 15, 2021 at 04:38:29AM +0300, Laurent Pinchart wrote:
>Hi Michael,
>
>Another comment.
>
>On Tue, Jun 15, 2021 at 04:28:05AM +0300, Laurent Pinchart wrote:
>> On Mon, May 31, 2021 at 12:30:39AM +0200, Michael Grzeschik wrote:
>> > While sending bigger images is possible with USB_SPEED_SUPER it is
>> > better to use more isochronous requests in flight. This patch makes the
>> > number uvc_num_requests dynamic by changing it depending on the gadget
>> > speed.
>> >
>> > Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>> > ---
>> > v1 -> v2: - fixed null pointer dereference in uvc_video_free_requests
>> >
>> >  drivers/usb/gadget/function/uvc.h       | 11 +++--
>> >  drivers/usb/gadget/function/uvc_queue.c |  7 ++++
>> >  drivers/usb/gadget/function/uvc_video.c | 56 +++++++++++++++----------
>> >  3 files changed, 48 insertions(+), 26 deletions(-)
>> >
>> > diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>> > index 23ee25383c1f7..83b9e945944e8 100644
>> > --- a/drivers/usb/gadget/function/uvc.h
>> > +++ b/drivers/usb/gadget/function/uvc.h
>> > @@ -65,13 +65,17 @@ extern unsigned int uvc_gadget_trace_param;
>> >   * Driver specific constants
>> >   */
>> >
>> > -#define UVC_NUM_REQUESTS			4
>> >  #define UVC_MAX_REQUEST_SIZE			64
>> >  #define UVC_MAX_EVENTS				4
>> >
>> >  /* ------------------------------------------------------------------------
>> >   * Structures
>> >   */
>> > +struct uvc_request {
>> > +	struct usb_request *req;
>> > +	__u8 *req_buffer;
>>
>> While at it, you could s/__u8/u8/ as this header isn't used by
>> userspace.
>>
>> > +	struct uvc_video *video;
>> > +};
>> >
>> >  struct uvc_video {
>> >  	struct uvc_device *uvc;
>> > @@ -87,10 +91,11 @@ struct uvc_video {
>> >  	unsigned int imagesize;
>> >  	struct mutex mutex;	/* protects frame parameters */
>> >
>> > +	int uvc_num_requests;
>>
>> Never negative, you can make it an unsigned int.
>>
>> > +
>> >  	/* Requests */
>> >  	unsigned int req_size;
>> > -	struct usb_request *req[UVC_NUM_REQUESTS];
>> > -	__u8 *req_buffer[UVC_NUM_REQUESTS];
>> > +	struct uvc_request *ureq;
>> >  	struct list_head req_free;
>> >  	spinlock_t req_lock;
>> >
>> > diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
>> > index 61e2c94cc0b0c..dcd71304d521c 100644
>> > --- a/drivers/usb/gadget/function/uvc_queue.c
>> > +++ b/drivers/usb/gadget/function/uvc_queue.c
>> > @@ -43,6 +43,8 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>> >  {
>> >  	struct uvc_video_queue *queue = vb2_get_drv_priv(vq);
>> >  	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
>> > +	struct uvc_device *uvc = video->uvc;
>> > +	struct usb_composite_dev *cdev = uvc->func.config->cdev;
>>
>> 	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
>>
>> >
>> >  	if (*nbuffers > UVC_MAX_VIDEO_BUFFERS)
>> >  		*nbuffers = UVC_MAX_VIDEO_BUFFERS;
>> > @@ -51,6 +53,11 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>> >
>> >  	sizes[0] = video->imagesize;
>> >
>> > +	if (cdev->gadget->speed < USB_SPEED_SUPER)
>> > +		video->uvc_num_requests = 4;
>> > +	else
>> > +		video->uvc_num_requests = 64;
>> > +
>> >  	return 0;
>> >  }
>> >
>> > diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>> > index 633e23d58d868..57840083dfdda 100644
>> > --- a/drivers/usb/gadget/function/uvc_video.c
>> > +++ b/drivers/usb/gadget/function/uvc_video.c
>> > @@ -11,6 +11,7 @@
>> >  #include <linux/errno.h>
>> >  #include <linux/usb/ch9.h>
>> >  #include <linux/usb/gadget.h>
>> > +#include <linux/module.h>
>>
>> Alphabetical order please.
>
>Why is this header needed ? I can't see anything below related to it.

It must have slipped through. I removed the header and worked in all
your feedback.

Thanks for the Review!

Regards,

Michael Grzeschik

>> >  #include <linux/usb/video.h>
>> >
>> >  #include <media/v4l2-dev.h>
>> > @@ -145,7 +146,8 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>> >  static void
>> >  uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>> >  {
>> > -	struct uvc_video *video = req->context;
>> > +	struct uvc_request *ureq = req->context;
>> > +	struct uvc_video *video = ureq->video;
>> >  	struct uvc_video_queue *queue = &video->queue;
>> >  	unsigned long flags;
>> >
>> > @@ -177,16 +179,19 @@ uvc_video_free_requests(struct uvc_video *video)
>> >  {
>> >  	unsigned int i;
>> >
>> > -	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
>> > -		if (video->req[i]) {
>> > -			usb_ep_free_request(video->ep, video->req[i]);
>> > -			video->req[i] = NULL;
>> > -		}
>> > -
>> > -		if (video->req_buffer[i]) {
>> > -			kfree(video->req_buffer[i]);
>> > -			video->req_buffer[i] = NULL;
>> > +	if (video->ureq) {
>> > +		for (i = 0; i < video->uvc_num_requests; ++i) {
>> > +			if (video->ureq[i].req) {
>> > +				usb_ep_free_request(video->ep, video->ureq[i].req);
>> > +				video->ureq[i].req = NULL;
>> > +			}
>>
>> Blank line here please.
>>
>> > +			if (video->ureq[i].req_buffer) {
>> > +				kfree(video->ureq[i].req_buffer);
>> > +				video->ureq[i].req_buffer = NULL;
>> > +			}
>> >  		}
>>
>> Here too.
>>
>> > +		kfree(video->ureq);
>> > +		video->ureq = NULL;
>> >  	}
>> >
>> >  	INIT_LIST_HEAD(&video->req_free);
>> > @@ -207,21 +212,26 @@ uvc_video_alloc_requests(struct uvc_video *video)
>> >  		 * max_t(unsigned int, video->ep->maxburst, 1)
>> >  		 * (video->ep->mult);
>> >
>> > -	for (i = 0; i < UVC_NUM_REQUESTS; ++i) {
>> > -		video->req_buffer[i] = kmalloc(req_size, GFP_KERNEL);
>> > -		if (video->req_buffer[i] == NULL)
>> > +	video->ureq = kcalloc(video->uvc_num_requests, sizeof(struct uvc_request), GFP_KERNEL);
>> > +	if (video->ureq == NULL)
>> > +		return ret;
>>
>> 		return -ENOMEM;
>>
>> would be more readable (I had to check the value of ret to review this
>> patch).
>>
>> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
>>
>> > +
>> > +	for (i = 0; i < video->uvc_num_requests; ++i) {
>> > +		video->ureq[i].req_buffer = kmalloc(req_size, GFP_KERNEL);
>> > +		if (video->ureq[i].req_buffer == NULL)
>> >  			goto error;
>> >
>> > -		video->req[i] = usb_ep_alloc_request(video->ep, GFP_KERNEL);
>> > -		if (video->req[i] == NULL)
>> > +		video->ureq[i].req = usb_ep_alloc_request(video->ep, GFP_KERNEL);
>> > +		if (video->ureq[i].req == NULL)
>> >  			goto error;
>> >
>> > -		video->req[i]->buf = video->req_buffer[i];
>> > -		video->req[i]->length = 0;
>> > -		video->req[i]->complete = uvc_video_complete;
>> > -		video->req[i]->context = video;
>> > +		video->ureq[i].req->buf = video->ureq[i].req_buffer;
>> > +		video->ureq[i].req->length = 0;
>> > +		video->ureq[i].req->complete = uvc_video_complete;
>> > +		video->ureq[i].req->context = &video->ureq[i];
>> > +		video->ureq[i].video = video;
>> >
>> > -		list_add_tail(&video->req[i]->list, &video->req_free);
>> > +		list_add_tail(&video->ureq[i].req->list, &video->req_free);
>> >  	}
>> >
>> >  	video->req_size = req_size;
>> > @@ -312,9 +322,9 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>> >  		cancel_work_sync(&video->pump);
>> >  		uvcg_queue_cancel(&video->queue, 0);
>> >
>> > -		for (i = 0; i < UVC_NUM_REQUESTS; ++i)
>> > -			if (video->req[i])
>> > -				usb_ep_dequeue(video->ep, video->req[i]);
>> > +		for (i = 0; i < video->uvc_num_requests; ++i)
>> > +			if (video->ureq && video->ureq[i].req)
>> > +				usb_ep_dequeue(video->ep, video->ureq[i].req);
>> >
>> >  		uvc_video_free_requests(video);
>> >  		uvcg_queue_enable(&video->queue, 0);
>
>-- 
>Regards,
>
>Laurent Pinchart
>

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 3/3] usb: gadget: uvc: decrease the interrupt load to a quarter
  2021-06-15  1:36     ` Laurent Pinchart
@ 2021-06-22 15:02       ` Michael Grzeschik
  0 siblings, 0 replies; 17+ messages in thread
From: Michael Grzeschik @ 2021-06-22 15:02 UTC (permalink / raw)
  To: Laurent Pinchart; +Cc: paul.elder, linux-usb, caleb.connolly, balbi, kernel

[-- Attachment #1: Type: text/plain, Size: 3848 bytes --]

Hi Laurent!

On Tue, Jun 15, 2021 at 04:36:53AM +0300, Laurent Pinchart wrote:
>Hi Michael,
>
>Thank you for the patch.
>
>On Mon, Jun 14, 2021 at 07:35:58PM +0900, paul.elder@ideasonboard.com wrote:
>> On Mon, May 31, 2021 at 12:30:41AM +0200, Michael Grzeschik wrote:
>> > With usb3 we handle much more requests. It only enables the interrupt on
>>
>> s/much/many/
>>
>> > every quarter of the allocated requests. This patch decreases the
>> > interrupt load.
>>
>> The last two sentences might be better combined, like:
>>
>> "Decrease the interrupt load by only enabling the interrupt every
>> quarter of the allocated requests."
>>
>> > Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>>
>> Other than that, looks good to me.
>>
>> Reviewed-by: Paul Elder <paul.elder@ideasonboard.com>
>>
>> > ---
>> >  drivers/usb/gadget/function/uvc.h       |  2 ++
>> >  drivers/usb/gadget/function/uvc_video.c | 12 ++++++++++++
>> >  2 files changed, 14 insertions(+)
>> >
>> > diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>> > index c1f06d9df5820..5a76e9351b530 100644
>> > --- a/drivers/usb/gadget/function/uvc.h
>> > +++ b/drivers/usb/gadget/function/uvc.h
>> > @@ -101,6 +101,8 @@ struct uvc_video {
>> >  	struct list_head req_free;
>> >  	spinlock_t req_lock;
>> >
>> > +	int req_int_count;
>
>unsigned int.
>
>> > +
>> >  	void (*encode) (struct usb_request *req, struct uvc_video *video,
>> >  			struct uvc_buffer *buf);
>> >
>> > diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>> > index 240d361a45a44..66754687ce217 100644
>> > --- a/drivers/usb/gadget/function/uvc_video.c
>> > +++ b/drivers/usb/gadget/function/uvc_video.c
>> > @@ -357,6 +357,16 @@ static void uvcg_video_pump(struct work_struct *work)
>> >
>> >  		video->encode(req, video, buf);
>> >
>
>A comment to explain the logic would be useful.
>
>> > +		if (list_empty(&video->req_free) ||
>> > +		    (buf->state == UVC_BUF_STATE_DONE) ||
>
>No need for parentheses here.
>
>> > +		    (!(video->req_int_count %
>> > +		       DIV_ROUND_UP(video->uvc_num_requests, 4)))) {
>> > +			video->req_int_count = 0;
>> > +			req->no_interrupt = 0;
>> > +		} else {
>> > +			req->no_interrupt = 1;
>> > +		}
>> > +
>> >  		/* Queue the USB request */
>> >  		ret = uvcg_video_ep_queue(video, req);
>> >  		spin_unlock_irqrestore(&queue->irqlock, flags);
>> > @@ -365,6 +375,7 @@ static void uvcg_video_pump(struct work_struct *work)
>> >  			uvcg_queue_cancel(queue, 0);
>> >  			break;
>> >  		}
>> > +		video->req_int_count++;
>> >  	}
>> >
>> >  	spin_lock_irqsave(&video->req_lock, flags);
>> > @@ -437,6 +448,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>> >  	video->width = 320;
>> >  	video->height = 240;
>> >  	video->imagesize = 320 * 240 * 2;
>> > +	video->req_int_count = 0;
>
>Should this be initialized to 0 in uvcg_video_enable() instead of
>uvcg_video_init(), to ensure that stop/start cycles will operate in a
>predictable way ?

This makes total sense. I don't see why it should not start by 0 on
every enable. I worked in yours and Paul's feedback and moved the
req_int_count initialization to uvcg_video_enable.

Thanks!

Michael Grzeschik

>> >
>> >  	/* Initialize the video buffers queue. */
>> >  	uvcg_queue_init(uvc->v4l2_dev.dev, &video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
>
>-- 
>Regards,
>
>Laurent Pinchart
>

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support
  2021-06-15  2:10   ` Laurent Pinchart
@ 2021-06-25  9:12     ` Michael Grzeschik
  2021-06-28  7:37       ` Michael Grzeschik
  0 siblings, 1 reply; 17+ messages in thread
From: Michael Grzeschik @ 2021-06-25  9:12 UTC (permalink / raw)
  To: Laurent Pinchart; +Cc: linux-usb, caleb.connolly, paul.elder, balbi, kernel

[-- Attachment #1: Type: text/plain, Size: 14493 bytes --]

On Tue, Jun 15, 2021 at 05:10:27AM +0300, Laurent Pinchart wrote:
>Hi Michael,
>
>Thank you for the patch.
>
>On Mon, May 31, 2021 at 12:30:40AM +0200, Michael Grzeschik wrote:
>> This patch adds support for scatter gather transfers. If the underlying
>> gadgets sg_supported == true, then the videeobuf2-dma-sg is used and the
>> encode routine maps all scatter entries to separate scatterlists for the
>> usb gadget.
>
>This is interesting. Could you please share measurements that show how
>much CPU time is saved by this patch in typical use cases ?

When streaming 1080p with request size of 1024 times 3 bytes I see this
change in top when applying this patch:

  PID USER      PR  NI    VIRT    RES  %CPU  %MEM     TIME+ S COMMAND

   64 root       0 -20    0.0m   0.0m   7.7   0.0   0:01.25 I [kworker/u5:0-uvcvideo]
   83 root       0 -20    0.0m   0.0m   4.5   0.0   0:03.71 I [kworker/u5:3-uvcvideo]
  307 root     -51   0    0.0m   0.0m   3.8   0.0   0:01.05 S [irq/51-dwc3]

vs.

   64 root       0 -20    0.0m   0.0m   5.8   0.0   0:01.79 I [kworker/u5:0-uvcvideo]
  306 root     -51   0    0.0m   0.0m   3.2   0.0   0:01.97 S [irq/51-dwc3]
   82 root       0 -20    0.0m   0.0m   0.6   0.0   0:01.86 I [kworker/u5:1-uvcvideo]

So 6.4 % less CPU load tells the measurement.

>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>> ---
>> v1 -> v2: -
>>
>>  drivers/usb/gadget/Kconfig              |  1 +
>>  drivers/usb/gadget/function/f_uvc.c     |  1 +
>>  drivers/usb/gadget/function/uvc.h       |  2 +
>>  drivers/usb/gadget/function/uvc_queue.c | 23 ++++++-
>>  drivers/usb/gadget/function/uvc_queue.h |  5 +-
>>  drivers/usb/gadget/function/uvc_video.c | 80 ++++++++++++++++++++++++-
>>  6 files changed, 105 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
>> index 2d152571a7de8..dd58094f0b85b 100644
>> --- a/drivers/usb/gadget/Kconfig
>> +++ b/drivers/usb/gadget/Kconfig
>> @@ -450,6 +450,7 @@ config USB_CONFIGFS_F_UVC
>>  	depends on USB_CONFIGFS
>>  	depends on VIDEO_V4L2
>>  	depends on VIDEO_DEV
>> +	select VIDEOBUF2_DMA_SG
>>  	select VIDEOBUF2_VMALLOC
>>  	select USB_F_UVC
>>  	help
>> diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
>> index f48a00e497945..9d87c0fb8f92e 100644
>> --- a/drivers/usb/gadget/function/f_uvc.c
>> +++ b/drivers/usb/gadget/function/f_uvc.c
>> @@ -418,6 +418,7 @@ uvc_register_video(struct uvc_device *uvc)
>>
>>  	/* TODO reference counting. */
>>  	uvc->vdev.v4l2_dev = &uvc->v4l2_dev;
>> +	uvc->vdev.v4l2_dev->dev = &cdev->gadget->dev;
>
>A good change, which could possibly be split to its own patch.

Right. I will move it to a single patch.

>>  	uvc->vdev.fops = &uvc_v4l2_fops;
>>  	uvc->vdev.ioctl_ops = &uvc_v4l2_ioctl_ops;
>>  	uvc->vdev.release = video_device_release_empty;
>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>> index 83b9e945944e8..c1f06d9df5820 100644
>> --- a/drivers/usb/gadget/function/uvc.h
>> +++ b/drivers/usb/gadget/function/uvc.h
>> @@ -75,6 +75,8 @@ struct uvc_request {
>>  	struct usb_request *req;
>>  	__u8 *req_buffer;
>>  	struct uvc_video *video;
>> +	struct sg_table sgt;
>> +	u8 header[2];
>>  };
>>
>>  struct uvc_video {
>> diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
>> index dcd71304d521c..e36a3506842b7 100644
>> --- a/drivers/usb/gadget/function/uvc_queue.c
>> +++ b/drivers/usb/gadget/function/uvc_queue.c
>> @@ -18,6 +18,7 @@
>>
>>  #include <media/v4l2-common.h>
>>  #include <media/videobuf2-vmalloc.h>
>> +#include <media/videobuf2-dma-sg.h>
>
>Alphabetical order please.

ok.

>>
>>  #include "uvc.h"
>>
>> @@ -52,6 +53,7 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>>  	*nplanes = 1;
>>
>>  	sizes[0] = video->imagesize;
>> +	alloc_devs[0] = video->uvc->v4l2_dev.dev->parent;
>
>Is there a guarantee that the gadget's parent is always the UDC ?

No, we can not be sure. Especially the dwc3 core has the dts parameter
"linux,sysdev_is_parent" where the parent can point to another dev.

I will add a function called gadget_to_udc that will always provide the udc
device. But while being at it I can also remove this alloc_devs assignment
since the queue.dev can be set to the udc dev instead, since we don't need any
per plane allocation.

>>
>>  	if (cdev->gadget->speed < USB_SPEED_SUPER)
>>  		video->uvc_num_requests = 4;
>> @@ -66,6 +68,9 @@ static int uvc_buffer_prepare(struct vb2_buffer *vb)
>>  	struct uvc_video_queue *queue = vb2_get_drv_priv(vb->vb2_queue);
>>  	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
>>  	struct uvc_buffer *buf = container_of(vbuf, struct uvc_buffer, buf);
>> +	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
>> +	struct uvc_device *uvc = video->uvc;
>> +	struct usb_composite_dev *cdev = uvc->func.config->cdev;
>>
>>  	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
>>  	    vb2_get_plane_payload(vb, 0) > vb2_plane_size(vb, 0)) {
>> @@ -77,7 +82,12 @@ static int uvc_buffer_prepare(struct vb2_buffer *vb)
>>  		return -ENODEV;
>>
>>  	buf->state = UVC_BUF_STATE_QUEUED;
>> -	buf->mem = vb2_plane_vaddr(vb, 0);
>> +	if (cdev->gadget->sg_supported) {
>
>How about storing a use_sg flag in uvc_video_queue, to avoid poking
>through layers ? The flag can also be used in uvcg_queue_init().

Yes. This makes sense. I added a bool use_sg to uvc_video_queue and
use it instead in v3.

>> +		buf->sgt = vb2_dma_sg_plane_desc(vb, 0);
>> +		buf->sg = buf->sgt->sgl;
>> +	} else {
>> +		buf->mem = vb2_plane_vaddr(vb, 0);
>> +	}
>>  	buf->length = vb2_plane_size(vb, 0);
>>  	if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
>>  		buf->bytesused = 0;
>> @@ -117,9 +127,11 @@ static const struct vb2_ops uvc_queue_qops = {
>>  	.wait_finish = vb2_ops_wait_finish,
>>  };
>>
>> -int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
>> +int uvcg_queue_init(struct device *dev, struct uvc_video_queue *queue, enum v4l2_buf_type type,
>
>Please move the dev parameter after queue, to pass as the first
>parameter the object that the function operates on.

ok. will change in v3.

>>  		    struct mutex *lock)
>>  {
>> +	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
>> +	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
>>  	int ret;
>>
>>  	queue->queue.type = type;
>> @@ -128,9 +140,14 @@ int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
>>  	queue->queue.buf_struct_size = sizeof(struct uvc_buffer);
>>  	queue->queue.ops = &uvc_queue_qops;
>>  	queue->queue.lock = lock;
>> -	queue->queue.mem_ops = &vb2_vmalloc_memops;
>> +	if (cdev->gadget->sg_supported)
>> +		queue->queue.mem_ops = &vb2_dma_sg_memops;
>> +	else
>> +		queue->queue.mem_ops = &vb2_vmalloc_memops;
>> +
>>  	queue->queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC
>>  				     | V4L2_BUF_FLAG_TSTAMP_SRC_EOF;
>> +	queue->queue.dev = dev;
>>  	ret = vb2_queue_init(&queue->queue);
>>  	if (ret)
>>  		return ret;
>> diff --git a/drivers/usb/gadget/function/uvc_queue.h b/drivers/usb/gadget/function/uvc_queue.h
>> index 2f0fff7698430..bb8753b26074f 100644
>> --- a/drivers/usb/gadget/function/uvc_queue.h
>> +++ b/drivers/usb/gadget/function/uvc_queue.h
>> @@ -34,6 +34,9 @@ struct uvc_buffer {
>>
>>  	enum uvc_buffer_state state;
>>  	void *mem;
>> +	struct sg_table *sgt;
>> +	struct scatterlist *sg;
>> +	unsigned int offset;
>>  	unsigned int length;
>>  	unsigned int bytesused;
>>  };
>> @@ -59,7 +62,7 @@ static inline int uvc_queue_streaming(struct uvc_video_queue *queue)
>>  	return vb2_is_streaming(&queue->queue);
>>  }
>>
>> -int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
>> +int uvcg_queue_init(struct device *d, struct uvc_video_queue *queue, enum v4l2_buf_type type,
>>  		    struct mutex *lock);
>>
>>  void uvcg_free_buffers(struct uvc_video_queue *queue);
>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>> index 57840083dfdda..240d361a45a44 100644
>> --- a/drivers/usb/gadget/function/uvc_video.c
>> +++ b/drivers/usb/gadget/function/uvc_video.c
>> @@ -95,6 +95,71 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video,
>>  		video->payload_size = 0;
>>  }
>>
>> +static void
>> +uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video,
>> +		struct uvc_buffer *buf)
>> +{
>> +	int pending = buf->bytesused - video->queue.buf_used;
>> +	struct uvc_request *ureq = req->context;
>> +	struct scatterlist *sg, *iter;
>> +	int len = video->req_size;
>> +	int sg_left, part = 0;
>> +	int ret;
>> +	int i;
>
>unsigned int for pending, len, sg_left, part and i.

Right.

>> +
>> +	sg = ureq->sgt.sgl;
>> +	sg_init_table(sg, ureq->sgt.nents);
>> +
>> +	/* Init the header. */
>> +	memset(ureq->header, 0, 2);
>
>Can you add
>
>#define UVCG_REQUEST_HEADER_LEN		2
>
>somewhere, and use it here, in the uvc_request structure definition, and
>in uvc_video_encode_header() ? Otherwise I fear we'll forget to update
>one of the locations when we'll add support for longer headers.

Makes sense. I will change the code to use the define instead.

>Is the memset() needed though ?
>

Yes, it is not needed. I will remove it.

>> +	ret = uvc_video_encode_header(video, buf, ureq->header,
>> +				      video->req_size);
>> +	sg_set_buf(sg, ureq->header, 2);
>> +	len -= ret;
>> +
>> +	if (pending <= len)
>> +		len = pending;
>> +
>> +	req->length = (len == pending) ? len + 2 : video->req_size;
>> +
>> +	/* Init the pending sgs with payload */
>> +	sg = sg_next(sg);
>> +
>> +	for_each_sg(sg, iter, ureq->sgt.nents - 1, i) {
>> +		if (!len || !buf->sg)
>> +			break;
>> +
>> +		sg_left = sg_dma_len(buf->sg) - buf->offset;
>> +		part = min_t(unsigned int, len, sg_left);
>> +
>> +		sg_set_page(iter, sg_page(buf->sg), part, buf->offset);
>> +
>> +		if (part == sg_left) {
>> +			buf->offset = 0;
>> +			buf->sg = sg_next(buf->sg);
>> +		} else {
>> +			buf->offset += part;
>> +		}
>> +		len -= part;
>> +	}
>> +
>> +	/* Assign the video data with header. */
>> +	req->buf = NULL;
>> +	req->sg	= ureq->sgt.sgl;
>> +	req->num_sgs = i + 1;
>
>Given that you construct the request scatterlist manually anyway,
>wouldn't it be much simpler to use the vb2 dma contig allocator for the
>V4L2 buffer ? Or do you expect that the device would run out of dma
>contig space (which I expect to be provided by CMA or an IOMMU) ?

Yes, it would be simpler. But with dma contig you are limited to get contig
memory. When we always use sg_lists it is even possible to get the data
directly from the gpu.

>It would also likely help to fill every sg entry as much as possible,
>while the above code potentially creates smaller entries in the request
>sgt when reaching the boundary between two entries in the V4L2 buffer
>sgt.

In case we get the sg_table from an contig allocator it would be a table with
one big entry. So this is also a possible use case with the current
implementation. It will never run into any boundary in that case and will run
in maximum filled chunks.

>I also wonder if this couldn't be further optimized by creating an sgt
>with two entries, one for the header and one for the data,
>unconditionally.

That is exactly what it already does.

>What's the maximum possible request size, could it be larger than what
>an sgt entry can support ?

The request size is usb maxpacket size for isoc (1024) times mult (<=3)
times burst (<=15). I think an sgt entry has no size limitation.

>> +
>> +	req->length -= len;
>> +	video->queue.buf_used += req->length - 2;
>> +
>> +	if (buf->bytesused == video->queue.buf_used || !buf->sg) {
>> +		video->queue.buf_used = 0;
>> +		buf->state = UVC_BUF_STATE_DONE;
>> +		buf->offset = 0;
>> +		uvcg_queue_next_buffer(&video->queue, buf);
>> +		video->fid ^= UVC_STREAM_FID;
>> +	}
>> +}
>> +
>>  static void
>>  uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
>>  		struct uvc_buffer *buf)
>> @@ -232,6 +297,10 @@ uvc_video_alloc_requests(struct uvc_video *video)
>>  		video->ureq[i].video = video;
>>
>>  		list_add_tail(&video->ureq[i].req->list, &video->req_free);
>> +		/* req_size/PAGE_SIZE + 1 for overruns and + 1 for header */
>> +		sg_alloc_table(&video->ureq[i].sgt,
>> +			       DIV_ROUND_UP(req_size - 2, PAGE_SIZE) + 2,
>> +			       GFP_KERNEL);
>
>It looks like this is leaked.

Oh, you are right. I will add an corresponding sg_free_table in uvc_video_free_requests.

>>  	}
>>
>>  	video->req_size = req_size;
>> @@ -309,6 +378,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>   */
>>  int uvcg_video_enable(struct uvc_video *video, int enable)
>>  {
>> +	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
>>  	unsigned int i;
>>  	int ret;
>>
>> @@ -340,8 +410,12 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>>  	if (video->max_payload_size) {
>>  		video->encode = uvc_video_encode_bulk;
>>  		video->payload_size = 0;
>> -	} else
>> -		video->encode = uvc_video_encode_isoc;
>> +	} else {
>> +		if (cdev->gadget->sg_supported)
>> +			video->encode = uvc_video_encode_isoc_sg;
>> +		else
>> +			video->encode = uvc_video_encode_isoc;
>> +	}
>>
>>  	schedule_work(&video->pump);
>>
>> @@ -365,7 +439,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>  	video->imagesize = 320 * 240 * 2;
>>
>>  	/* Initialize the video buffers queue. */
>> -	uvcg_queue_init(&video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
>> +	uvcg_queue_init(uvc->v4l2_dev.dev, &video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
>>  			&video->mutex);

This argument will be uvc->v4l2_dev.dev->parent or the result of gadget_to_udc(uvc->v4l2_dev.dev) instead.

>>  	return 0;
>>  }

Thanks,
Michael

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support
  2021-06-25  9:12     ` Michael Grzeschik
@ 2021-06-28  7:37       ` Michael Grzeschik
  0 siblings, 0 replies; 17+ messages in thread
From: Michael Grzeschik @ 2021-06-28  7:37 UTC (permalink / raw)
  To: Laurent Pinchart; +Cc: balbi, caleb.connolly, linux-usb, paul.elder, kernel

[-- Attachment #1: Type: text/plain, Size: 15436 bytes --]

Hi Laurent!

On Fri, Jun 25, 2021 at 11:12:27AM +0200, Michael Grzeschik wrote:
>On Tue, Jun 15, 2021 at 05:10:27AM +0300, Laurent Pinchart wrote:
>>Hi Michael,
>>
>>Thank you for the patch.
>>
>>On Mon, May 31, 2021 at 12:30:40AM +0200, Michael Grzeschik wrote:
>>>This patch adds support for scatter gather transfers. If the underlying
>>>gadgets sg_supported == true, then the videeobuf2-dma-sg is used and the
>>>encode routine maps all scatter entries to separate scatterlists for the
>>>usb gadget.
>>
>>This is interesting. Could you please share measurements that show how
>>much CPU time is saved by this patch in typical use cases ?
>
>When streaming 1080p with request size of 1024 times 3 bytes I see this
>change in top when applying this patch:
>
> PID USER      PR  NI    VIRT    RES  %CPU  %MEM     TIME+ S COMMAND
>
>  64 root       0 -20    0.0m   0.0m   7.7   0.0   0:01.25 I [kworker/u5:0-uvcvideo]
>  83 root       0 -20    0.0m   0.0m   4.5   0.0   0:03.71 I [kworker/u5:3-uvcvideo]
> 307 root     -51   0    0.0m   0.0m   3.8   0.0   0:01.05 S [irq/51-dwc3]
>
>vs.
>
>  64 root       0 -20    0.0m   0.0m   5.8   0.0   0:01.79 I [kworker/u5:0-uvcvideo]
> 306 root     -51   0    0.0m   0.0m   3.2   0.0   0:01.97 S [irq/51-dwc3]
>  82 root       0 -20    0.0m   0.0m   0.6   0.0   0:01.86 I [kworker/u5:1-uvcvideo]
>
>So 6.4 % less CPU load tells the measurement.
>
>>>Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>>>---
>>>v1 -> v2: -
>>>
>>> drivers/usb/gadget/Kconfig              |  1 +
>>> drivers/usb/gadget/function/f_uvc.c     |  1 +
>>> drivers/usb/gadget/function/uvc.h       |  2 +
>>> drivers/usb/gadget/function/uvc_queue.c | 23 ++++++-
>>> drivers/usb/gadget/function/uvc_queue.h |  5 +-
>>> drivers/usb/gadget/function/uvc_video.c | 80 ++++++++++++++++++++++++-
>>> 6 files changed, 105 insertions(+), 7 deletions(-)
>>>
>>>diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
>>>index 2d152571a7de8..dd58094f0b85b 100644
>>>--- a/drivers/usb/gadget/Kconfig
>>>+++ b/drivers/usb/gadget/Kconfig
>>>@@ -450,6 +450,7 @@ config USB_CONFIGFS_F_UVC
>>> 	depends on USB_CONFIGFS
>>> 	depends on VIDEO_V4L2
>>> 	depends on VIDEO_DEV
>>>+	select VIDEOBUF2_DMA_SG
>>> 	select VIDEOBUF2_VMALLOC
>>> 	select USB_F_UVC
>>> 	help
>>>diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
>>>index f48a00e497945..9d87c0fb8f92e 100644
>>>--- a/drivers/usb/gadget/function/f_uvc.c
>>>+++ b/drivers/usb/gadget/function/f_uvc.c
>>>@@ -418,6 +418,7 @@ uvc_register_video(struct uvc_device *uvc)
>>>
>>> 	/* TODO reference counting. */
>>> 	uvc->vdev.v4l2_dev = &uvc->v4l2_dev;
>>>+	uvc->vdev.v4l2_dev->dev = &cdev->gadget->dev;
>>
>>A good change, which could possibly be split to its own patch.
>
>Right. I will move it to a single patch.
>
>>> 	uvc->vdev.fops = &uvc_v4l2_fops;
>>> 	uvc->vdev.ioctl_ops = &uvc_v4l2_ioctl_ops;
>>> 	uvc->vdev.release = video_device_release_empty;
>>>diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>>>index 83b9e945944e8..c1f06d9df5820 100644
>>>--- a/drivers/usb/gadget/function/uvc.h
>>>+++ b/drivers/usb/gadget/function/uvc.h
>>>@@ -75,6 +75,8 @@ struct uvc_request {
>>> 	struct usb_request *req;
>>> 	__u8 *req_buffer;
>>> 	struct uvc_video *video;
>>>+	struct sg_table sgt;
>>>+	u8 header[2];
>>> };
>>>
>>> struct uvc_video {
>>>diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c
>>>index dcd71304d521c..e36a3506842b7 100644
>>>--- a/drivers/usb/gadget/function/uvc_queue.c
>>>+++ b/drivers/usb/gadget/function/uvc_queue.c
>>>@@ -18,6 +18,7 @@
>>>
>>> #include <media/v4l2-common.h>
>>> #include <media/videobuf2-vmalloc.h>
>>>+#include <media/videobuf2-dma-sg.h>
>>
>>Alphabetical order please.
>
>ok.
>
>>>
>>> #include "uvc.h"
>>>
>>>@@ -52,6 +53,7 @@ static int uvc_queue_setup(struct vb2_queue *vq,
>>> 	*nplanes = 1;
>>>
>>> 	sizes[0] = video->imagesize;
>>>+	alloc_devs[0] = video->uvc->v4l2_dev.dev->parent;
>>
>>Is there a guarantee that the gadget's parent is always the UDC ?
>
>No, we can not be sure. Especially the dwc3 core has the dts parameter
>"linux,sysdev_is_parent" where the parent can point to another dev.
>
>I will add a function called gadget_to_udc that will always provide the udc
>device. But while being at it I can also remove this alloc_devs assignment
>since the queue.dev can be set to the udc dev instead, since we don't need any
>per plane allocation.

I just figured that indeed dwc3 is the only case where we can not know
if the gadgets parent is the udc. But I think even this is currently
wrong in the dwc3 core. We can simply fix that, by calling
usb_initialize_gadget with dwc->sysdev instead of dwc->dev. This will
ensure that we will get the udc as parent.

I will prepend that patch in the series and send out v3.

Thanks,
Michael

>>>
>>> 	if (cdev->gadget->speed < USB_SPEED_SUPER)
>>> 		video->uvc_num_requests = 4;
>>>@@ -66,6 +68,9 @@ static int uvc_buffer_prepare(struct vb2_buffer *vb)
>>> 	struct uvc_video_queue *queue = vb2_get_drv_priv(vb->vb2_queue);
>>> 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
>>> 	struct uvc_buffer *buf = container_of(vbuf, struct uvc_buffer, buf);
>>>+	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
>>>+	struct uvc_device *uvc = video->uvc;
>>>+	struct usb_composite_dev *cdev = uvc->func.config->cdev;
>>>
>>> 	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT &&
>>> 	    vb2_get_plane_payload(vb, 0) > vb2_plane_size(vb, 0)) {
>>>@@ -77,7 +82,12 @@ static int uvc_buffer_prepare(struct vb2_buffer *vb)
>>> 		return -ENODEV;
>>>
>>> 	buf->state = UVC_BUF_STATE_QUEUED;
>>>-	buf->mem = vb2_plane_vaddr(vb, 0);
>>>+	if (cdev->gadget->sg_supported) {
>>
>>How about storing a use_sg flag in uvc_video_queue, to avoid poking
>>through layers ? The flag can also be used in uvcg_queue_init().
>
>Yes. This makes sense. I added a bool use_sg to uvc_video_queue and
>use it instead in v3.
>
>>>+		buf->sgt = vb2_dma_sg_plane_desc(vb, 0);
>>>+		buf->sg = buf->sgt->sgl;
>>>+	} else {
>>>+		buf->mem = vb2_plane_vaddr(vb, 0);
>>>+	}
>>> 	buf->length = vb2_plane_size(vb, 0);
>>> 	if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
>>> 		buf->bytesused = 0;
>>>@@ -117,9 +127,11 @@ static const struct vb2_ops uvc_queue_qops = {
>>> 	.wait_finish = vb2_ops_wait_finish,
>>> };
>>>
>>>-int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
>>>+int uvcg_queue_init(struct device *dev, struct uvc_video_queue *queue, enum v4l2_buf_type type,
>>
>>Please move the dev parameter after queue, to pass as the first
>>parameter the object that the function operates on.
>
>ok. will change in v3.
>
>>> 		    struct mutex *lock)
>>> {
>>>+	struct uvc_video *video = container_of(queue, struct uvc_video, queue);
>>>+	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
>>> 	int ret;
>>>
>>> 	queue->queue.type = type;
>>>@@ -128,9 +140,14 @@ int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
>>> 	queue->queue.buf_struct_size = sizeof(struct uvc_buffer);
>>> 	queue->queue.ops = &uvc_queue_qops;
>>> 	queue->queue.lock = lock;
>>>-	queue->queue.mem_ops = &vb2_vmalloc_memops;
>>>+	if (cdev->gadget->sg_supported)
>>>+		queue->queue.mem_ops = &vb2_dma_sg_memops;
>>>+	else
>>>+		queue->queue.mem_ops = &vb2_vmalloc_memops;
>>>+
>>> 	queue->queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC
>>> 				     | V4L2_BUF_FLAG_TSTAMP_SRC_EOF;
>>>+	queue->queue.dev = dev;
>>> 	ret = vb2_queue_init(&queue->queue);
>>> 	if (ret)
>>> 		return ret;
>>>diff --git a/drivers/usb/gadget/function/uvc_queue.h b/drivers/usb/gadget/function/uvc_queue.h
>>>index 2f0fff7698430..bb8753b26074f 100644
>>>--- a/drivers/usb/gadget/function/uvc_queue.h
>>>+++ b/drivers/usb/gadget/function/uvc_queue.h
>>>@@ -34,6 +34,9 @@ struct uvc_buffer {
>>>
>>> 	enum uvc_buffer_state state;
>>> 	void *mem;
>>>+	struct sg_table *sgt;
>>>+	struct scatterlist *sg;
>>>+	unsigned int offset;
>>> 	unsigned int length;
>>> 	unsigned int bytesused;
>>> };
>>>@@ -59,7 +62,7 @@ static inline int uvc_queue_streaming(struct uvc_video_queue *queue)
>>> 	return vb2_is_streaming(&queue->queue);
>>> }
>>>
>>>-int uvcg_queue_init(struct uvc_video_queue *queue, enum v4l2_buf_type type,
>>>+int uvcg_queue_init(struct device *d, struct uvc_video_queue *queue, enum v4l2_buf_type type,
>>> 		    struct mutex *lock);
>>>
>>> void uvcg_free_buffers(struct uvc_video_queue *queue);
>>>diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>>>index 57840083dfdda..240d361a45a44 100644
>>>--- a/drivers/usb/gadget/function/uvc_video.c
>>>+++ b/drivers/usb/gadget/function/uvc_video.c
>>>@@ -95,6 +95,71 @@ uvc_video_encode_bulk(struct usb_request *req, struct uvc_video *video,
>>> 		video->payload_size = 0;
>>> }
>>>
>>>+static void
>>>+uvc_video_encode_isoc_sg(struct usb_request *req, struct uvc_video *video,
>>>+		struct uvc_buffer *buf)
>>>+{
>>>+	int pending = buf->bytesused - video->queue.buf_used;
>>>+	struct uvc_request *ureq = req->context;
>>>+	struct scatterlist *sg, *iter;
>>>+	int len = video->req_size;
>>>+	int sg_left, part = 0;
>>>+	int ret;
>>>+	int i;
>>
>>unsigned int for pending, len, sg_left, part and i.
>
>Right.
>
>>>+
>>>+	sg = ureq->sgt.sgl;
>>>+	sg_init_table(sg, ureq->sgt.nents);
>>>+
>>>+	/* Init the header. */
>>>+	memset(ureq->header, 0, 2);
>>
>>Can you add
>>
>>#define UVCG_REQUEST_HEADER_LEN		2
>>
>>somewhere, and use it here, in the uvc_request structure definition, and
>>in uvc_video_encode_header() ? Otherwise I fear we'll forget to update
>>one of the locations when we'll add support for longer headers.
>
>Makes sense. I will change the code to use the define instead.
>
>>Is the memset() needed though ?
>>
>
>Yes, it is not needed. I will remove it.
>
>>>+	ret = uvc_video_encode_header(video, buf, ureq->header,
>>>+				      video->req_size);
>>>+	sg_set_buf(sg, ureq->header, 2);
>>>+	len -= ret;
>>>+
>>>+	if (pending <= len)
>>>+		len = pending;
>>>+
>>>+	req->length = (len == pending) ? len + 2 : video->req_size;
>>>+
>>>+	/* Init the pending sgs with payload */
>>>+	sg = sg_next(sg);
>>>+
>>>+	for_each_sg(sg, iter, ureq->sgt.nents - 1, i) {
>>>+		if (!len || !buf->sg)
>>>+			break;
>>>+
>>>+		sg_left = sg_dma_len(buf->sg) - buf->offset;
>>>+		part = min_t(unsigned int, len, sg_left);
>>>+
>>>+		sg_set_page(iter, sg_page(buf->sg), part, buf->offset);
>>>+
>>>+		if (part == sg_left) {
>>>+			buf->offset = 0;
>>>+			buf->sg = sg_next(buf->sg);
>>>+		} else {
>>>+			buf->offset += part;
>>>+		}
>>>+		len -= part;
>>>+	}
>>>+
>>>+	/* Assign the video data with header. */
>>>+	req->buf = NULL;
>>>+	req->sg	= ureq->sgt.sgl;
>>>+	req->num_sgs = i + 1;
>>
>>Given that you construct the request scatterlist manually anyway,
>>wouldn't it be much simpler to use the vb2 dma contig allocator for the
>>V4L2 buffer ? Or do you expect that the device would run out of dma
>>contig space (which I expect to be provided by CMA or an IOMMU) ?
>
>Yes, it would be simpler. But with dma contig you are limited to get contig
>memory. When we always use sg_lists it is even possible to get the data
>directly from the gpu.
>
>>It would also likely help to fill every sg entry as much as possible,
>>while the above code potentially creates smaller entries in the request
>>sgt when reaching the boundary between two entries in the V4L2 buffer
>>sgt.
>
>In case we get the sg_table from an contig allocator it would be a table with
>one big entry. So this is also a possible use case with the current
>implementation. It will never run into any boundary in that case and will run
>in maximum filled chunks.
>
>>I also wonder if this couldn't be further optimized by creating an sgt
>>with two entries, one for the header and one for the data,
>>unconditionally.
>
>That is exactly what it already does.
>
>>What's the maximum possible request size, could it be larger than what
>>an sgt entry can support ?
>
>The request size is usb maxpacket size for isoc (1024) times mult (<=3)
>times burst (<=15). I think an sgt entry has no size limitation.
>
>>>+
>>>+	req->length -= len;
>>>+	video->queue.buf_used += req->length - 2;
>>>+
>>>+	if (buf->bytesused == video->queue.buf_used || !buf->sg) {
>>>+		video->queue.buf_used = 0;
>>>+		buf->state = UVC_BUF_STATE_DONE;
>>>+		buf->offset = 0;
>>>+		uvcg_queue_next_buffer(&video->queue, buf);
>>>+		video->fid ^= UVC_STREAM_FID;
>>>+	}
>>>+}
>>>+
>>> static void
>>> uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video,
>>> 		struct uvc_buffer *buf)
>>>@@ -232,6 +297,10 @@ uvc_video_alloc_requests(struct uvc_video *video)
>>> 		video->ureq[i].video = video;
>>>
>>> 		list_add_tail(&video->ureq[i].req->list, &video->req_free);
>>>+		/* req_size/PAGE_SIZE + 1 for overruns and + 1 for header */
>>>+		sg_alloc_table(&video->ureq[i].sgt,
>>>+			       DIV_ROUND_UP(req_size - 2, PAGE_SIZE) + 2,
>>>+			       GFP_KERNEL);
>>
>>It looks like this is leaked.
>
>Oh, you are right. I will add an corresponding sg_free_table in uvc_video_free_requests.
>
>>> 	}
>>>
>>> 	video->req_size = req_size;
>>>@@ -309,6 +378,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>>  */
>>> int uvcg_video_enable(struct uvc_video *video, int enable)
>>> {
>>>+	struct usb_composite_dev *cdev = video->uvc->func.config->cdev;
>>> 	unsigned int i;
>>> 	int ret;
>>>
>>>@@ -340,8 +410,12 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>>> 	if (video->max_payload_size) {
>>> 		video->encode = uvc_video_encode_bulk;
>>> 		video->payload_size = 0;
>>>-	} else
>>>-		video->encode = uvc_video_encode_isoc;
>>>+	} else {
>>>+		if (cdev->gadget->sg_supported)
>>>+			video->encode = uvc_video_encode_isoc_sg;
>>>+		else
>>>+			video->encode = uvc_video_encode_isoc;
>>>+	}
>>>
>>> 	schedule_work(&video->pump);
>>>
>>>@@ -365,7 +439,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>> 	video->imagesize = 320 * 240 * 2;
>>>
>>> 	/* Initialize the video buffers queue. */
>>>-	uvcg_queue_init(&video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
>>>+	uvcg_queue_init(uvc->v4l2_dev.dev, &video->queue, V4L2_BUF_TYPE_VIDEO_OUTPUT,
>>> 			&video->mutex);
>
>This argument will be uvc->v4l2_dev.dev->parent or the result of gadget_to_udc(uvc->v4l2_dev.dev) instead.
>
>>> 	return 0;
>>> }
>
>Thanks,
>Michael
>
>-- 
>Pengutronix e.K.                           |                             |
>Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
>31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
>Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |



-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2021-06-28  7:37 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-30 22:30 [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance Michael Grzeschik
2021-05-30 22:30 ` [PATCH v2 1/3] usb: gadget: uvc: make uvc_num_requests depend on gadget speed Michael Grzeschik
2021-06-14 10:34   ` paul.elder
2021-06-15  1:28   ` Laurent Pinchart
2021-06-15  1:38     ` Laurent Pinchart
2021-06-22 15:00       ` Michael Grzeschik
2021-05-30 22:30 ` [PATCH v2 2/3] usb: gadget: uvc: add scatter gather support Michael Grzeschik
2021-05-31  1:33   ` kernel test robot
2021-05-31  6:30   ` kernel test robot
2021-06-15  2:10   ` Laurent Pinchart
2021-06-25  9:12     ` Michael Grzeschik
2021-06-28  7:37       ` Michael Grzeschik
2021-05-30 22:30 ` [PATCH v2 3/3] usb: gadget: uvc: decrease the interrupt load to a quarter Michael Grzeschik
2021-06-14 10:35   ` paul.elder
2021-06-15  1:36     ` Laurent Pinchart
2021-06-22 15:02       ` Michael Grzeschik
2021-06-09  8:17 ` [PATCH v2 0/3] usb: gadget: uvc: improve uvc gadget performance Greg KH

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).