All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] usb:gadget:uvc Do not use worker thread to pump usb requests
@ 2023-10-25 22:59 Jayant Chowdhary
  2023-10-26  6:58 ` Michael Grzeschik
  0 siblings, 1 reply; 31+ messages in thread
From: Jayant Chowdhary @ 2023-10-25 22:59 UTC (permalink / raw)
  To: laurent.pinchart, Greg KH, dan.scally, Michael Grzeschik
  Cc: linux-usb, linux-kernel, Thinh Nguyen

This patch is based on top of
https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:

When we use an async work queue to perform the function of pumping
usb requests to the usb controller, it is possible that thread scheduling
affects at what cadence we're able to pump requests. This could mean usb
requests miss their uframes - resulting in video stream flickers on the host
device.

In this patch, we move the pumping of usb requests to
1) uvcg_video_complete() complete handler for both isoc + bulk
   endpoints. We still send 0 length requests when there is no uvc buffer
   available to encode.
2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
   0 length requests.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
Suggested-by: Avichal Rakesh <arakesh@google.com>
Tested-by: Jayant Chowdhary <jchowdhary@google.com>
---
 drivers/usb/gadget/function/f_uvc.c     |  4 --
 drivers/usb/gadget/function/uvc.h       |  4 +-
 drivers/usb/gadget/function/uvc_v4l2.c  |  5 +-
 drivers/usb/gadget/function/uvc_video.c | 72 ++++++++++++++++---------
 drivers/usb/gadget/function/uvc_video.h |  2 +
 5 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index ae08341961eb..53cb2539486d 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -959,14 +959,10 @@ static void uvc_function_unbind(struct usb_configuration *c,
 {
 	struct usb_composite_dev *cdev = c->cdev;
 	struct uvc_device *uvc = to_uvc(f);
-	struct uvc_video *video = &uvc->video;
 	long wait_ret = 1;
 
 	uvcg_info(f, "%s()\n", __func__);
 
-	if (video->async_wq)
-		destroy_workqueue(video->async_wq);
-
 	/*
 	 * If we know we're connected via v4l2, then there should be a cleanup
 	 * of the device from userspace either via UVC_EVENT_DISCONNECT or
diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index be0d012aa244..498f344fda4b 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -88,9 +88,6 @@ struct uvc_video {
 	struct uvc_device *uvc;
 	struct usb_ep *ep;
 
-	struct work_struct pump;
-	struct workqueue_struct *async_wq;
-
 	/* Frame parameters */
 	u8 bpp;
 	u32 fcc;
@@ -116,6 +113,7 @@ struct uvc_video {
 	/* Context data used by the completion handler */
 	__u32 payload_size;
 	__u32 max_payload_size;
+	bool is_bulk;
 
 	struct uvc_video_queue queue;
 	unsigned int fid;
diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
index f4d2e24835d4..678ea6df7b5c 100644
--- a/drivers/usb/gadget/function/uvc_v4l2.c
+++ b/drivers/usb/gadget/function/uvc_v4l2.c
@@ -414,10 +414,7 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
 	ret = uvcg_queue_buffer(&video->queue, b);
 	if (ret < 0)
 		return ret;
-
-	if (uvc->state == UVC_STATE_STREAMING)
-		queue_work(video->async_wq, &video->pump);
-
+	uvcg_video_pump_qbuf(video);
 	return ret;
 }
 
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index ab3f02054e85..143453e9f003 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -24,6 +24,8 @@
  * Video codecs
  */
 
+static void uvcg_video_pump(struct uvc_video *video);
+
 static int
 uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
 		u8 *data, int len)
@@ -329,7 +331,9 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	 */
 	if (video->is_enabled) {
 		list_add_tail(&req->list, &video->req_free);
-		queue_work(video->async_wq, &video->pump);
+		spin_unlock_irqrestore(&video->req_lock, flags);
+		uvcg_video_pump(video);
+		return;
 	} else {
 		uvc_video_free_request(ureq, ep);
 	}
@@ -409,20 +413,32 @@ uvc_video_alloc_requests(struct uvc_video *video)
  * Video streaming
  */
 
+void uvcg_video_pump_qbuf(struct uvc_video *video)
+{
+	if (video->is_bulk) {
+		/*
+		 * Only call uvcg_video_pump() from qbuf, for bulk eps since
+		 * for isoc, the complete handler will call uvcg_video_pump()
+		 * consistently. Calling it for isoc eps, while correct
+		 * will increase contention for video->req_lock since the
+		 * complete handler will be called more often.
+		 */
+		uvcg_video_pump(video);
+	}
+}
+
 /*
  * uvcg_video_pump - Pump video data into the USB requests
  *
  * This function fills the available USB requests (listed in req_free) with
  * video data from the queued buffers.
  */
-static void uvcg_video_pump(struct work_struct *work)
+static void uvcg_video_pump(struct uvc_video *video)
 {
-	struct uvc_video *video = container_of(work, struct uvc_video, pump);
 	struct uvc_video_queue *queue = &video->queue;
-	/* video->max_payload_size is only set when using bulk transfer */
-	bool is_bulk = video->max_payload_size;
 	struct usb_request *req = NULL;
-	struct uvc_buffer *buf;
+	struct uvc_request *ureq = NULL;
+	struct uvc_buffer *buf = NULL, *last_buf = NULL;
 	unsigned long flags;
 	bool buf_done;
 	int ret;
@@ -455,7 +471,8 @@ static void uvcg_video_pump(struct work_struct *work)
 		if (buf != NULL) {
 			video->encode(req, video, buf);
 			buf_done = buf->state == UVC_BUF_STATE_DONE;
-		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
+		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) &&
+				!video->is_bulk) {
 			/*
 			 * No video buffer available; the queue is still connected and
 			 * we're transferring over ISOC. Queue a 0 length request to
@@ -500,18 +517,30 @@ static void uvcg_video_pump(struct work_struct *work)
 			req->no_interrupt = 1;
 		}
 
-		/* Queue the USB request */
-		ret = uvcg_video_ep_queue(video, req);
 		spin_unlock_irqrestore(&queue->irqlock, flags);
-
+		spin_lock_irqsave(&video->req_lock, flags);
+		if (video->is_enabled) {
+			/* Queue the USB request */
+			ret = uvcg_video_ep_queue(video, req);
+			/* Endpoint now owns the request */
+			req = NULL;
+			video->req_int_count++;
+		} else {
+			ret =  -ENODEV;
+			ureq = req->context;
+			last_buf = ureq->last_buf;
+			ureq->last_buf = NULL;
+		}
+		spin_unlock_irqrestore(&video->req_lock, flags);
 		if (ret < 0) {
+			if (last_buf != NULL) {
+				// Return the buffer to the queue in the case the
+				// request was not queued to the ep.
+				uvcg_complete_buffer(&video->queue, last_buf);
+			}
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
-
-		/* Endpoint now owns the request */
-		req = NULL;
-		video->req_int_count++;
 	}
 
 	if (!req)
@@ -556,7 +585,6 @@ uvcg_video_disable(struct uvc_video *video)
 	}
 	spin_unlock_irqrestore(&video->req_lock, flags);
 
-	cancel_work_sync(&video->pump);
 	uvcg_queue_cancel(&video->queue, 0);
 
 	spin_lock_irqsave(&video->req_lock, flags);
@@ -626,14 +654,16 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
 	if (video->max_payload_size) {
 		video->encode = uvc_video_encode_bulk;
 		video->payload_size = 0;
-	} else
+		video->is_bulk = true;
+	} else {
 		video->encode = video->queue.use_sg ?
 			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
+		video->is_bulk = false;
+	}
 
 	video->req_int_count = 0;
 
-	queue_work(video->async_wq, &video->pump);
-
+	uvcg_video_pump(video);
 	return ret;
 }
 
@@ -646,12 +676,6 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
 	spin_lock_init(&video->req_lock);
-	INIT_WORK(&video->pump, uvcg_video_pump);
-
-	/* Allocate a work queue for asynchronous video pump handler. */
-	video->async_wq = alloc_workqueue("uvcgadget", WQ_UNBOUND | WQ_HIGHPRI, 0);
-	if (!video->async_wq)
-		return -EINVAL;
 
 	video->uvc = uvc;
 	video->fcc = V4L2_PIX_FMT_YUYV;
diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
index 03adeefa343b..29c6b9a2e9c3 100644
--- a/drivers/usb/gadget/function/uvc_video.h
+++ b/drivers/usb/gadget/function/uvc_video.h
@@ -18,4 +18,6 @@ int uvcg_video_enable(struct uvc_video *video, int enable);
 
 int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc);
 
+void uvcg_video_pump_qbuf(struct uvc_video *video);
+
 #endif /* __UVC_VIDEO_H__ */
-- 


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-25 22:59 [PATCH] usb:gadget:uvc Do not use worker thread to pump usb requests Jayant Chowdhary
@ 2023-10-26  6:58 ` Michael Grzeschik
  2023-10-26 21:56   ` [PATCH v2] " Jayant Chowdhary
  0 siblings, 1 reply; 31+ messages in thread
From: Michael Grzeschik @ 2023-10-26  6:58 UTC (permalink / raw)
  To: Jayant Chowdhary
  Cc: laurent.pinchart, Greg KH, dan.scally, linux-usb, linux-kernel,
	Thinh Nguyen

[-- Attachment #1: Type: text/plain, Size: 9228 bytes --]

On Wed, Oct 25, 2023 at 03:59:10PM -0700, Jayant Chowdhary wrote:
>This patch is based on top of
>https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>
>When we use an async work queue to perform the function of pumping
>usb requests to the usb controller, it is possible that thread scheduling
>affects at what cadence we're able to pump requests. This could mean usb
>requests miss their uframes - resulting in video stream flickers on the host
>device.
>
>In this patch, we move the pumping of usb requests to
>1) uvcg_video_complete() complete handler for both isoc + bulk
>   endpoints. We still send 0 length requests when there is no uvc buffer
>   available to encode.
>2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
>   0 length requests.
>
>Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
>Suggested-by: Avichal Rakesh <arakesh@google.com>
>Tested-by: Jayant Chowdhary <jchowdhary@google.com>
>---
> drivers/usb/gadget/function/f_uvc.c     |  4 --
> drivers/usb/gadget/function/uvc.h       |  4 +-
> drivers/usb/gadget/function/uvc_v4l2.c  |  5 +-
> drivers/usb/gadget/function/uvc_video.c | 72 ++++++++++++++++---------
> drivers/usb/gadget/function/uvc_video.h |  2 +
> 5 files changed, 52 insertions(+), 35 deletions(-)
>
>diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
>index ae08341961eb..53cb2539486d 100644
>--- a/drivers/usb/gadget/function/f_uvc.c
>+++ b/drivers/usb/gadget/function/f_uvc.c
>@@ -959,14 +959,10 @@ static void uvc_function_unbind(struct usb_configuration *c,
> {
> 	struct usb_composite_dev *cdev = c->cdev;
> 	struct uvc_device *uvc = to_uvc(f);
>-	struct uvc_video *video = &uvc->video;
> 	long wait_ret = 1;
>
> 	uvcg_info(f, "%s()\n", __func__);
>
>-	if (video->async_wq)
>-		destroy_workqueue(video->async_wq);
>-
> 	/*
> 	 * If we know we're connected via v4l2, then there should be a cleanup
> 	 * of the device from userspace either via UVC_EVENT_DISCONNECT or
>diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>index be0d012aa244..498f344fda4b 100644
>--- a/drivers/usb/gadget/function/uvc.h
>+++ b/drivers/usb/gadget/function/uvc.h
>@@ -88,9 +88,6 @@ struct uvc_video {
> 	struct uvc_device *uvc;
> 	struct usb_ep *ep;
>
>-	struct work_struct pump;
>-	struct workqueue_struct *async_wq;
>-
> 	/* Frame parameters */
> 	u8 bpp;
> 	u32 fcc;
>@@ -116,6 +113,7 @@ struct uvc_video {
> 	/* Context data used by the completion handler */
> 	__u32 payload_size;
> 	__u32 max_payload_size;
>+	bool is_bulk;
>
> 	struct uvc_video_queue queue;
> 	unsigned int fid;
>diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
>index f4d2e24835d4..678ea6df7b5c 100644
>--- a/drivers/usb/gadget/function/uvc_v4l2.c
>+++ b/drivers/usb/gadget/function/uvc_v4l2.c
>@@ -414,10 +414,7 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
> 	ret = uvcg_queue_buffer(&video->queue, b);
> 	if (ret < 0)
> 		return ret;
>-
>-	if (uvc->state == UVC_STATE_STREAMING)
>-		queue_work(video->async_wq, &video->pump);
>-
>+	uvcg_video_pump_qbuf(video);
> 	return ret;
> }
>
>diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>index ab3f02054e85..143453e9f003 100644
>--- a/drivers/usb/gadget/function/uvc_video.c
>+++ b/drivers/usb/gadget/function/uvc_video.c
>@@ -24,6 +24,8 @@
>  * Video codecs
>  */
>
>+static void uvcg_video_pump(struct uvc_video *video);
>+
> static int
> uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
> 		u8 *data, int len)
>@@ -329,7 +331,9 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
> 	 */
> 	if (video->is_enabled) {
> 		list_add_tail(&req->list, &video->req_free);
>-		queue_work(video->async_wq, &video->pump);
>+		spin_unlock_irqrestore(&video->req_lock, flags);
>+		uvcg_video_pump(video);
>+		return;
> 	} else {
> 		uvc_video_free_request(ureq, ep);
> 	}
>@@ -409,20 +413,32 @@ uvc_video_alloc_requests(struct uvc_video *video)
>  * Video streaming
>  */
>
>+void uvcg_video_pump_qbuf(struct uvc_video *video)
>+{
>+	if (video->is_bulk) {
>+		/*
>+		 * Only call uvcg_video_pump() from qbuf, for bulk eps since
>+		 * for isoc, the complete handler will call uvcg_video_pump()
>+		 * consistently. Calling it for isoc eps, while correct
>+		 * will increase contention for video->req_lock since the
>+		 * complete handler will be called more often.
>+		 */

Could you move the comment above the condition and remove the extra
braces here.
>+		uvcg_video_pump(video);
>+	}
>+}
>+
> /*
>  * uvcg_video_pump - Pump video data into the USB requests
>  *
>  * This function fills the available USB requests (listed in req_free) with
>  * video data from the queued buffers.
>  */
>-static void uvcg_video_pump(struct work_struct *work)
>+static void uvcg_video_pump(struct uvc_video *video)
> {
>-	struct uvc_video *video = container_of(work, struct uvc_video, pump);
> 	struct uvc_video_queue *queue = &video->queue;
>-	/* video->max_payload_size is only set when using bulk transfer */
>-	bool is_bulk = video->max_payload_size;
> 	struct usb_request *req = NULL;
>-	struct uvc_buffer *buf;
>+	struct uvc_request *ureq = NULL;
>+	struct uvc_buffer *buf = NULL, *last_buf = NULL;
> 	unsigned long flags;
> 	bool buf_done;
> 	int ret;
>@@ -455,7 +471,8 @@ static void uvcg_video_pump(struct work_struct *work)
> 		if (buf != NULL) {
> 			video->encode(req, video, buf);
> 			buf_done = buf->state == UVC_BUF_STATE_DONE;
>-		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>+		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) &&
>+				!video->is_bulk) {
> 			/*
> 			 * No video buffer available; the queue is still connected and
> 			 * we're transferring over ISOC. Queue a 0 length request to
>@@ -500,18 +517,30 @@ static void uvcg_video_pump(struct work_struct *work)
> 			req->no_interrupt = 1;
> 		}
>
>-		/* Queue the USB request */
>-		ret = uvcg_video_ep_queue(video, req);
> 		spin_unlock_irqrestore(&queue->irqlock, flags);
>-
>+		spin_lock_irqsave(&video->req_lock, flags);
>+		if (video->is_enabled) {
>+			/* Queue the USB request */
>+			ret = uvcg_video_ep_queue(video, req);
>+			/* Endpoint now owns the request */
>+			req = NULL;
>+			video->req_int_count++;
>+		} else {
>+			ret =  -ENODEV;
>+			ureq = req->context;
>+			last_buf = ureq->last_buf;
>+			ureq->last_buf = NULL;
>+		}
>+		spin_unlock_irqrestore(&video->req_lock, flags);
> 		if (ret < 0) {
>+			if (last_buf != NULL) {
>+				// Return the buffer to the queue in the case the
>+				// request was not queued to the ep.
>+				uvcg_complete_buffer(&video->queue, last_buf);
>+			}
> 			uvcg_queue_cancel(queue, 0);
> 			break;
> 		}
>-
>-		/* Endpoint now owns the request */
>-		req = NULL;
>-		video->req_int_count++;
> 	}
>
> 	if (!req)
>@@ -556,7 +585,6 @@ uvcg_video_disable(struct uvc_video *video)
> 	}
> 	spin_unlock_irqrestore(&video->req_lock, flags);
>
>-	cancel_work_sync(&video->pump);
> 	uvcg_queue_cancel(&video->queue, 0);
>
> 	spin_lock_irqsave(&video->req_lock, flags);
>@@ -626,14 +654,16 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
> 	if (video->max_payload_size) {
> 		video->encode = uvc_video_encode_bulk;
> 		video->payload_size = 0;
>-	} else
>+		video->is_bulk = true;
>+	} else {
> 		video->encode = video->queue.use_sg ?
> 			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
>+		video->is_bulk = false;
>+	}
>
> 	video->req_int_count = 0;
>
>-	queue_work(video->async_wq, &video->pump);
>-
>+	uvcg_video_pump(video);
> 	return ret;
> }
>
>@@ -646,12 +676,6 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
> 	INIT_LIST_HEAD(&video->ureqs);
> 	INIT_LIST_HEAD(&video->req_free);
> 	spin_lock_init(&video->req_lock);
>-	INIT_WORK(&video->pump, uvcg_video_pump);
>-
>-	/* Allocate a work queue for asynchronous video pump handler. */
>-	video->async_wq = alloc_workqueue("uvcgadget", WQ_UNBOUND | WQ_HIGHPRI, 0);
>-	if (!video->async_wq)
>-		return -EINVAL;
>
> 	video->uvc = uvc;
> 	video->fcc = V4L2_PIX_FMT_YUYV;
>diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
>index 03adeefa343b..29c6b9a2e9c3 100644
>--- a/drivers/usb/gadget/function/uvc_video.h
>+++ b/drivers/usb/gadget/function/uvc_video.h
>@@ -18,4 +18,6 @@ int uvcg_video_enable(struct uvc_video *video, int enable);
>
> int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc);
>
>+void uvcg_video_pump_qbuf(struct uvc_video *video);
>+
> #endif /* __UVC_VIDEO_H__ */
>-- 
>
>

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-26  6:58 ` Michael Grzeschik
@ 2023-10-26 21:56   ` Jayant Chowdhary
  2023-10-27  7:19     ` Greg KH
                       ` (3 more replies)
  0 siblings, 4 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-10-26 21:56 UTC (permalink / raw)
  To: mgr, jchowdhary
  Cc: Thinh.Nguyen, arakesh, etalvala, dan.scally, gregkh,
	laurent.pinchart, linux-kernel, linux-usb, Michael Grzeschik

This patch is based on top of
https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:

When we use an async work queue to perform the function of pumping
usb requests to the usb controller, it is possible that thread scheduling
affects at what cadence we're able to pump requests. This could mean usb
requests miss their uframes - resulting in video stream flickers on the host
device.

In this patch, we move the pumping of usb requests to
1) uvcg_video_complete() complete handler for both isoc + bulk
   endpoints. We still send 0 length requests when there is no uvc buffer
   available to encode.
2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
   0 length requests.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
Suggested-by: Avichal Rakesh <arakesh@google.com>
Tested-by: Jayant Chowdhary <jchowdhary@google.com>
---
 v1->v2: Fix code style and add self Signed-off-by

 drivers/usb/gadget/function/f_uvc.c     |  4 --
 drivers/usb/gadget/function/uvc.h       |  4 +-
 drivers/usb/gadget/function/uvc_v4l2.c  |  5 +-
 drivers/usb/gadget/function/uvc_video.c | 71 ++++++++++++++++---------
 drivers/usb/gadget/function/uvc_video.h |  2 +
 5 files changed, 51 insertions(+), 35 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index ae08341961eb..53cb2539486d 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -959,14 +959,10 @@ static void uvc_function_unbind(struct usb_configuration *c,
 {
 	struct usb_composite_dev *cdev = c->cdev;
 	struct uvc_device *uvc = to_uvc(f);
-	struct uvc_video *video = &uvc->video;
 	long wait_ret = 1;
 
 	uvcg_info(f, "%s()\n", __func__);
 
-	if (video->async_wq)
-		destroy_workqueue(video->async_wq);
-
 	/*
 	 * If we know we're connected via v4l2, then there should be a cleanup
 	 * of the device from userspace either via UVC_EVENT_DISCONNECT or
diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index be0d012aa244..498f344fda4b 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -88,9 +88,6 @@ struct uvc_video {
 	struct uvc_device *uvc;
 	struct usb_ep *ep;
 
-	struct work_struct pump;
-	struct workqueue_struct *async_wq;
-
 	/* Frame parameters */
 	u8 bpp;
 	u32 fcc;
@@ -116,6 +113,7 @@ struct uvc_video {
 	/* Context data used by the completion handler */
 	__u32 payload_size;
 	__u32 max_payload_size;
+	bool is_bulk;
 
 	struct uvc_video_queue queue;
 	unsigned int fid;
diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
index f4d2e24835d4..678ea6df7b5c 100644
--- a/drivers/usb/gadget/function/uvc_v4l2.c
+++ b/drivers/usb/gadget/function/uvc_v4l2.c
@@ -414,10 +414,7 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
 	ret = uvcg_queue_buffer(&video->queue, b);
 	if (ret < 0)
 		return ret;
-
-	if (uvc->state == UVC_STATE_STREAMING)
-		queue_work(video->async_wq, &video->pump);
-
+	uvcg_video_pump_qbuf(video);
 	return ret;
 }
 
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index ab3f02054e85..0fcd8e5edbac 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -24,6 +24,8 @@
  * Video codecs
  */
 
+static void uvcg_video_pump(struct uvc_video *video);
+
 static int
 uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
 		u8 *data, int len)
@@ -329,7 +331,9 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	 */
 	if (video->is_enabled) {
 		list_add_tail(&req->list, &video->req_free);
-		queue_work(video->async_wq, &video->pump);
+		spin_unlock_irqrestore(&video->req_lock, flags);
+		uvcg_video_pump(video);
+		return;
 	} else {
 		uvc_video_free_request(ureq, ep);
 	}
@@ -409,20 +413,31 @@ uvc_video_alloc_requests(struct uvc_video *video)
  * Video streaming
  */
 
+void uvcg_video_pump_qbuf(struct uvc_video *video)
+{
+	/*
+	 * Only call uvcg_video_pump() from qbuf, for bulk eps since
+	 * for isoc, the complete handler will call uvcg_video_pump()
+	 * consistently. Calling it for isoc eps, while correct
+	 * will increase contention for video->req_lock since the
+	 * complete handler will be called more often.
+	*/
+	if (video->is_bulk)
+		uvcg_video_pump(video);
+}
+
 /*
  * uvcg_video_pump - Pump video data into the USB requests
  *
  * This function fills the available USB requests (listed in req_free) with
  * video data from the queued buffers.
  */
-static void uvcg_video_pump(struct work_struct *work)
+static void uvcg_video_pump(struct uvc_video *video)
 {
-	struct uvc_video *video = container_of(work, struct uvc_video, pump);
 	struct uvc_video_queue *queue = &video->queue;
-	/* video->max_payload_size is only set when using bulk transfer */
-	bool is_bulk = video->max_payload_size;
 	struct usb_request *req = NULL;
-	struct uvc_buffer *buf;
+	struct uvc_request *ureq = NULL;
+	struct uvc_buffer *buf = NULL, *last_buf = NULL;
 	unsigned long flags;
 	bool buf_done;
 	int ret;
@@ -455,7 +470,8 @@ static void uvcg_video_pump(struct work_struct *work)
 		if (buf != NULL) {
 			video->encode(req, video, buf);
 			buf_done = buf->state == UVC_BUF_STATE_DONE;
-		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
+		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) &&
+				!video->is_bulk) {
 			/*
 			 * No video buffer available; the queue is still connected and
 			 * we're transferring over ISOC. Queue a 0 length request to
@@ -500,18 +516,30 @@ static void uvcg_video_pump(struct work_struct *work)
 			req->no_interrupt = 1;
 		}
 
-		/* Queue the USB request */
-		ret = uvcg_video_ep_queue(video, req);
 		spin_unlock_irqrestore(&queue->irqlock, flags);
-
+		spin_lock_irqsave(&video->req_lock, flags);
+		if (video->is_enabled) {
+			/* Queue the USB request */
+			ret = uvcg_video_ep_queue(video, req);
+			/* Endpoint now owns the request */
+			req = NULL;
+			video->req_int_count++;
+		} else {
+			ret =  -ENODEV;
+			ureq = req->context;
+			last_buf = ureq->last_buf;
+			ureq->last_buf = NULL;
+		}
+		spin_unlock_irqrestore(&video->req_lock, flags);
 		if (ret < 0) {
+			if (last_buf != NULL) {
+				// Return the buffer to the queue in the case the
+				// request was not queued to the ep.
+				uvcg_complete_buffer(&video->queue, last_buf);
+			}
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
-
-		/* Endpoint now owns the request */
-		req = NULL;
-		video->req_int_count++;
 	}
 
 	if (!req)
@@ -556,7 +584,6 @@ uvcg_video_disable(struct uvc_video *video)
 	}
 	spin_unlock_irqrestore(&video->req_lock, flags);
 
-	cancel_work_sync(&video->pump);
 	uvcg_queue_cancel(&video->queue, 0);
 
 	spin_lock_irqsave(&video->req_lock, flags);
@@ -626,14 +653,16 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
 	if (video->max_payload_size) {
 		video->encode = uvc_video_encode_bulk;
 		video->payload_size = 0;
-	} else
+		video->is_bulk = true;
+	} else {
 		video->encode = video->queue.use_sg ?
 			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
+		video->is_bulk = false;
+	}
 
 	video->req_int_count = 0;
 
-	queue_work(video->async_wq, &video->pump);
-
+	uvcg_video_pump(video);
 	return ret;
 }
 
@@ -646,12 +675,6 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
 	spin_lock_init(&video->req_lock);
-	INIT_WORK(&video->pump, uvcg_video_pump);
-
-	/* Allocate a work queue for asynchronous video pump handler. */
-	video->async_wq = alloc_workqueue("uvcgadget", WQ_UNBOUND | WQ_HIGHPRI, 0);
-	if (!video->async_wq)
-		return -EINVAL;
 
 	video->uvc = uvc;
 	video->fcc = V4L2_PIX_FMT_YUYV;
diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
index 03adeefa343b..29c6b9a2e9c3 100644
--- a/drivers/usb/gadget/function/uvc_video.h
+++ b/drivers/usb/gadget/function/uvc_video.h
@@ -18,4 +18,6 @@ int uvcg_video_enable(struct uvc_video *video, int enable);
 
 int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc);
 
+void uvcg_video_pump_qbuf(struct uvc_video *video);
+
 #endif /* __UVC_VIDEO_H__ */
-- 
2.42.0.820.g83a721a137-goog


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-26 21:56   ` [PATCH v2] " Jayant Chowdhary
@ 2023-10-27  7:19     ` Greg KH
  2023-10-27  7:51     ` Laurent Pinchart
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 31+ messages in thread
From: Greg KH @ 2023-10-27  7:19 UTC (permalink / raw)
  To: Jayant Chowdhary
  Cc: mgr, Thinh.Nguyen, arakesh, etalvala, dan.scally,
	laurent.pinchart, linux-kernel, linux-usb, Michael Grzeschik

On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
> This patch is based on top of
> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:

That doesn't work in the changelog of a patch at all, it goes below the
--- line p lease.

> 
> When we use an async work queue to perform the function of pumping
> usb requests to the usb controller, it is possible that thread scheduling
> affects at what cadence we're able to pump requests. This could mean usb
> requests miss their uframes - resulting in video stream flickers on the host
> device.
> 
> In this patch, we move the pumping of usb requests to
> 1) uvcg_video_complete() complete handler for both isoc + bulk
>    endpoints. We still send 0 length requests when there is no uvc buffer
>    available to encode.
> 2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
>    0 length requests.
> 
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Avichal Rakesh <arakesh@google.com>
> Tested-by: Jayant Chowdhary <jchowdhary@google.com>
> ---
>  v1->v2: Fix code style and add self Signed-off-by

Great, but as signed-off-by kind of implies you tested it, no need for
the tested-by now, right?  Not a big deal, and normally I'd ignore it
but I know you at least have to do one more version of this based on the
above problem...

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-26 21:56   ` [PATCH v2] " Jayant Chowdhary
  2023-10-27  7:19     ` Greg KH
@ 2023-10-27  7:51     ` Laurent Pinchart
  2023-10-27 11:10       ` Michael Grzeschik
  2023-10-27 10:44     ` Greg KH
  2023-11-02  6:01     ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc " Jayant Chowdhary
  3 siblings, 1 reply; 31+ messages in thread
From: Laurent Pinchart @ 2023-10-27  7:51 UTC (permalink / raw)
  To: Jayant Chowdhary
  Cc: mgr, Thinh.Nguyen, arakesh, etalvala, dan.scally, gregkh,
	linux-kernel, linux-usb, Michael Grzeschik

Hi Jayant,

Thank you for the patch.

On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
> This patch is based on top of
> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
> 
> When we use an async work queue to perform the function of pumping
> usb requests to the usb controller, it is possible that thread scheduling
> affects at what cadence we're able to pump requests. This could mean usb
> requests miss their uframes - resulting in video stream flickers on the host
> device.
> 
> In this patch, we move the pumping of usb requests to
> 1) uvcg_video_complete() complete handler for both isoc + bulk
>    endpoints. We still send 0 length requests when there is no uvc buffer
>    available to encode.

This means you will end up copying large amounts of data in interrupt
context. The work queue was there to avoid exactly that, as it will
introduce delays that can affect other parts of the system. I think this
is a problem.

> 2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
>    0 length requests.
> 
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Avichal Rakesh <arakesh@google.com>
> Tested-by: Jayant Chowdhary <jchowdhary@google.com>
> ---
>  v1->v2: Fix code style and add self Signed-off-by
> 
>  drivers/usb/gadget/function/f_uvc.c     |  4 --
>  drivers/usb/gadget/function/uvc.h       |  4 +-
>  drivers/usb/gadget/function/uvc_v4l2.c  |  5 +-
>  drivers/usb/gadget/function/uvc_video.c | 71 ++++++++++++++++---------
>  drivers/usb/gadget/function/uvc_video.h |  2 +
>  5 files changed, 51 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
> index ae08341961eb..53cb2539486d 100644
> --- a/drivers/usb/gadget/function/f_uvc.c
> +++ b/drivers/usb/gadget/function/f_uvc.c
> @@ -959,14 +959,10 @@ static void uvc_function_unbind(struct usb_configuration *c,
>  {
>  	struct usb_composite_dev *cdev = c->cdev;
>  	struct uvc_device *uvc = to_uvc(f);
> -	struct uvc_video *video = &uvc->video;
>  	long wait_ret = 1;
>  
>  	uvcg_info(f, "%s()\n", __func__);
>  
> -	if (video->async_wq)
> -		destroy_workqueue(video->async_wq);
> -
>  	/*
>  	 * If we know we're connected via v4l2, then there should be a cleanup
>  	 * of the device from userspace either via UVC_EVENT_DISCONNECT or
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index be0d012aa244..498f344fda4b 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -88,9 +88,6 @@ struct uvc_video {
>  	struct uvc_device *uvc;
>  	struct usb_ep *ep;
>  
> -	struct work_struct pump;
> -	struct workqueue_struct *async_wq;
> -
>  	/* Frame parameters */
>  	u8 bpp;
>  	u32 fcc;
> @@ -116,6 +113,7 @@ struct uvc_video {
>  	/* Context data used by the completion handler */
>  	__u32 payload_size;
>  	__u32 max_payload_size;
> +	bool is_bulk;

This should be introduced in a separate patch.

>  
>  	struct uvc_video_queue queue;
>  	unsigned int fid;
> diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
> index f4d2e24835d4..678ea6df7b5c 100644
> --- a/drivers/usb/gadget/function/uvc_v4l2.c
> +++ b/drivers/usb/gadget/function/uvc_v4l2.c
> @@ -414,10 +414,7 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
>  	ret = uvcg_queue_buffer(&video->queue, b);
>  	if (ret < 0)
>  		return ret;
> -
> -	if (uvc->state == UVC_STATE_STREAMING)
> -		queue_work(video->async_wq, &video->pump);
> -
> +	uvcg_video_pump_qbuf(video);
>  	return ret;
>  }
>  
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index ab3f02054e85..0fcd8e5edbac 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -24,6 +24,8 @@
>   * Video codecs
>   */
>  
> +static void uvcg_video_pump(struct uvc_video *video);
> +
>  static int
>  uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
>  		u8 *data, int len)
> @@ -329,7 +331,9 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>  	 */
>  	if (video->is_enabled) {
>  		list_add_tail(&req->list, &video->req_free);
> -		queue_work(video->async_wq, &video->pump);
> +		spin_unlock_irqrestore(&video->req_lock, flags);
> +		uvcg_video_pump(video);
> +		return;
>  	} else {
>  		uvc_video_free_request(ureq, ep);
>  	}
> @@ -409,20 +413,31 @@ uvc_video_alloc_requests(struct uvc_video *video)
>   * Video streaming
>   */
>  
> +void uvcg_video_pump_qbuf(struct uvc_video *video)
> +{
> +	/*
> +	 * Only call uvcg_video_pump() from qbuf, for bulk eps since
> +	 * for isoc, the complete handler will call uvcg_video_pump()
> +	 * consistently. Calling it for isoc eps, while correct
> +	 * will increase contention for video->req_lock since the
> +	 * complete handler will be called more often.
> +	*/
> +	if (video->is_bulk)
> +		uvcg_video_pump(video);

Am I the only one to see the *major* race condition that this patch
introduces ?

> +}
> +
>  /*
>   * uvcg_video_pump - Pump video data into the USB requests
>   *
>   * This function fills the available USB requests (listed in req_free) with
>   * video data from the queued buffers.
>   */
> -static void uvcg_video_pump(struct work_struct *work)
> +static void uvcg_video_pump(struct uvc_video *video)
>  {
> -	struct uvc_video *video = container_of(work, struct uvc_video, pump);
>  	struct uvc_video_queue *queue = &video->queue;
> -	/* video->max_payload_size is only set when using bulk transfer */
> -	bool is_bulk = video->max_payload_size;
>  	struct usb_request *req = NULL;
> -	struct uvc_buffer *buf;
> +	struct uvc_request *ureq = NULL;
> +	struct uvc_buffer *buf = NULL, *last_buf = NULL;
>  	unsigned long flags;
>  	bool buf_done;
>  	int ret;
> @@ -455,7 +470,8 @@ static void uvcg_video_pump(struct work_struct *work)
>  		if (buf != NULL) {
>  			video->encode(req, video, buf);
>  			buf_done = buf->state == UVC_BUF_STATE_DONE;
> -		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
> +		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) &&
> +				!video->is_bulk) {
>  			/*
>  			 * No video buffer available; the queue is still connected and
>  			 * we're transferring over ISOC. Queue a 0 length request to
> @@ -500,18 +516,30 @@ static void uvcg_video_pump(struct work_struct *work)
>  			req->no_interrupt = 1;
>  		}
>  
> -		/* Queue the USB request */
> -		ret = uvcg_video_ep_queue(video, req);
>  		spin_unlock_irqrestore(&queue->irqlock, flags);
> -
> +		spin_lock_irqsave(&video->req_lock, flags);
> +		if (video->is_enabled) {
> +			/* Queue the USB request */
> +			ret = uvcg_video_ep_queue(video, req);
> +			/* Endpoint now owns the request */
> +			req = NULL;
> +			video->req_int_count++;
> +		} else {
> +			ret =  -ENODEV;
> +			ureq = req->context;
> +			last_buf = ureq->last_buf;
> +			ureq->last_buf = NULL;
> +		}
> +		spin_unlock_irqrestore(&video->req_lock, flags);
>  		if (ret < 0) {
> +			if (last_buf != NULL) {
> +				// Return the buffer to the queue in the case the
> +				// request was not queued to the ep.

Wrong comment style.

> +				uvcg_complete_buffer(&video->queue, last_buf);
> +			}
>  			uvcg_queue_cancel(queue, 0);
>  			break;
>  		}
> -
> -		/* Endpoint now owns the request */
> -		req = NULL;
> -		video->req_int_count++;
>  	}
>  
>  	if (!req)
> @@ -556,7 +584,6 @@ uvcg_video_disable(struct uvc_video *video)
>  	}
>  	spin_unlock_irqrestore(&video->req_lock, flags);
>  
> -	cancel_work_sync(&video->pump);
>  	uvcg_queue_cancel(&video->queue, 0);
>  
>  	spin_lock_irqsave(&video->req_lock, flags);
> @@ -626,14 +653,16 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>  	if (video->max_payload_size) {
>  		video->encode = uvc_video_encode_bulk;
>  		video->payload_size = 0;
> -	} else
> +		video->is_bulk = true;
> +	} else {
>  		video->encode = video->queue.use_sg ?
>  			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
> +		video->is_bulk = false;
> +	}
>  
>  	video->req_int_count = 0;
>  
> -	queue_work(video->async_wq, &video->pump);
> -
> +	uvcg_video_pump(video);
>  	return ret;
>  }
>  
> @@ -646,12 +675,6 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>  	INIT_LIST_HEAD(&video->ureqs);
>  	INIT_LIST_HEAD(&video->req_free);
>  	spin_lock_init(&video->req_lock);
> -	INIT_WORK(&video->pump, uvcg_video_pump);
> -
> -	/* Allocate a work queue for asynchronous video pump handler. */
> -	video->async_wq = alloc_workqueue("uvcgadget", WQ_UNBOUND | WQ_HIGHPRI, 0);
> -	if (!video->async_wq)
> -		return -EINVAL;
>  
>  	video->uvc = uvc;
>  	video->fcc = V4L2_PIX_FMT_YUYV;
> diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
> index 03adeefa343b..29c6b9a2e9c3 100644
> --- a/drivers/usb/gadget/function/uvc_video.h
> +++ b/drivers/usb/gadget/function/uvc_video.h
> @@ -18,4 +18,6 @@ int uvcg_video_enable(struct uvc_video *video, int enable);
>  
>  int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc);
>  
> +void uvcg_video_pump_qbuf(struct uvc_video *video);
> +
>  #endif /* __UVC_VIDEO_H__ */

-- 
Regards,

Laurent Pinchart

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-26 21:56   ` [PATCH v2] " Jayant Chowdhary
  2023-10-27  7:19     ` Greg KH
  2023-10-27  7:51     ` Laurent Pinchart
@ 2023-10-27 10:44     ` Greg KH
  2023-11-02  6:01     ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc " Jayant Chowdhary
  3 siblings, 0 replies; 31+ messages in thread
From: Greg KH @ 2023-10-27 10:44 UTC (permalink / raw)
  To: Jayant Chowdhary
  Cc: mgr, Thinh.Nguyen, arakesh, etalvala, dan.scally,
	laurent.pinchart, linux-kernel, linux-usb, Michael Grzeschik

On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
> This patch is based on top of
> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
> 
> When we use an async work queue to perform the function of pumping
> usb requests to the usb controller, it is possible that thread scheduling
> affects at what cadence we're able to pump requests. This could mean usb
> requests miss their uframes - resulting in video stream flickers on the host
> device.
> 
> In this patch, we move the pumping of usb requests to
> 1) uvcg_video_complete() complete handler for both isoc + bulk
>    endpoints. We still send 0 length requests when there is no uvc buffer
>    available to encode.
> 2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
>    0 length requests.

Usually when you have to enumerate things in a patch, that implies it
needs to be broken up into multiple changes.  Why isn't that necessary
here?

> 
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Avichal Rakesh <arakesh@google.com>
> Tested-by: Jayant Chowdhary <jchowdhary@google.com>
> ---
>  v1->v2: Fix code style and add self Signed-off-by
> 
>  drivers/usb/gadget/function/f_uvc.c     |  4 --
>  drivers/usb/gadget/function/uvc.h       |  4 +-
>  drivers/usb/gadget/function/uvc_v4l2.c  |  5 +-
>  drivers/usb/gadget/function/uvc_video.c | 71 ++++++++++++++++---------
>  drivers/usb/gadget/function/uvc_video.h |  2 +
>  5 files changed, 51 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
> index ae08341961eb..53cb2539486d 100644
> --- a/drivers/usb/gadget/function/f_uvc.c
> +++ b/drivers/usb/gadget/function/f_uvc.c
> @@ -959,14 +959,10 @@ static void uvc_function_unbind(struct usb_configuration *c,
>  {
>  	struct usb_composite_dev *cdev = c->cdev;
>  	struct uvc_device *uvc = to_uvc(f);
> -	struct uvc_video *video = &uvc->video;
>  	long wait_ret = 1;
>  
>  	uvcg_info(f, "%s()\n", __func__);

meta-comment, lines like this need to be deleted in the future.  Not
relevent here, but for future work if you want to add such a cleanup to
a patch series, I'd be grateful.

>  
> -	if (video->async_wq)
> -		destroy_workqueue(video->async_wq);
> -
>  	/*
>  	 * If we know we're connected via v4l2, then there should be a cleanup
>  	 * of the device from userspace either via UVC_EVENT_DISCONNECT or
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index be0d012aa244..498f344fda4b 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -88,9 +88,6 @@ struct uvc_video {
>  	struct uvc_device *uvc;
>  	struct usb_ep *ep;
>  
> -	struct work_struct pump;
> -	struct workqueue_struct *async_wq;
> -
>  	/* Frame parameters */
>  	u8 bpp;
>  	u32 fcc;
> @@ -116,6 +113,7 @@ struct uvc_video {
>  	/* Context data used by the completion handler */
>  	__u32 payload_size;
>  	__u32 max_payload_size;
> +	bool is_bulk;

As Laurent said, this should be a separate patch.

>  
>  	struct uvc_video_queue queue;
>  	unsigned int fid;
> diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
> index f4d2e24835d4..678ea6df7b5c 100644
> --- a/drivers/usb/gadget/function/uvc_v4l2.c
> +++ b/drivers/usb/gadget/function/uvc_v4l2.c
> @@ -414,10 +414,7 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
>  	ret = uvcg_queue_buffer(&video->queue, b);
>  	if (ret < 0)
>  		return ret;
> -
> -	if (uvc->state == UVC_STATE_STREAMING)
> -		queue_work(video->async_wq, &video->pump);
> -
> +	uvcg_video_pump_qbuf(video);
>  	return ret;
>  }
>  
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index ab3f02054e85..0fcd8e5edbac 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -24,6 +24,8 @@
>   * Video codecs
>   */
>  
> +static void uvcg_video_pump(struct uvc_video *video);
> +
>  static int
>  uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
>  		u8 *data, int len)
> @@ -329,7 +331,9 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>  	 */
>  	if (video->is_enabled) {
>  		list_add_tail(&req->list, &video->req_free);
> -		queue_work(video->async_wq, &video->pump);
> +		spin_unlock_irqrestore(&video->req_lock, flags);
> +		uvcg_video_pump(video);
> +		return;
>  	} else {
>  		uvc_video_free_request(ureq, ep);
>  	}
> @@ -409,20 +413,31 @@ uvc_video_alloc_requests(struct uvc_video *video)
>   * Video streaming
>   */
>  
> +void uvcg_video_pump_qbuf(struct uvc_video *video)
> +{
> +	/*
> +	 * Only call uvcg_video_pump() from qbuf, for bulk eps since
> +	 * for isoc, the complete handler will call uvcg_video_pump()
> +	 * consistently. Calling it for isoc eps, while correct
> +	 * will increase contention for video->req_lock since the
> +	 * complete handler will be called more often.
> +	*/
> +	if (video->is_bulk)
> +		uvcg_video_pump(video);
> +}
> +
>  /*
>   * uvcg_video_pump - Pump video data into the USB requests
>   *
>   * This function fills the available USB requests (listed in req_free) with
>   * video data from the queued buffers.
>   */
> -static void uvcg_video_pump(struct work_struct *work)
> +static void uvcg_video_pump(struct uvc_video *video)
>  {
> -	struct uvc_video *video = container_of(work, struct uvc_video, pump);
>  	struct uvc_video_queue *queue = &video->queue;
> -	/* video->max_payload_size is only set when using bulk transfer */
> -	bool is_bulk = video->max_payload_size;
>  	struct usb_request *req = NULL;
> -	struct uvc_buffer *buf;
> +	struct uvc_request *ureq = NULL;
> +	struct uvc_buffer *buf = NULL, *last_buf = NULL;
>  	unsigned long flags;
>  	bool buf_done;
>  	int ret;
> @@ -455,7 +470,8 @@ static void uvcg_video_pump(struct work_struct *work)
>  		if (buf != NULL) {
>  			video->encode(req, video, buf);
>  			buf_done = buf->state == UVC_BUF_STATE_DONE;
> -		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
> +		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) &&
> +				!video->is_bulk) {

No need to wrap the line.

>  			/*
>  			 * No video buffer available; the queue is still connected and
>  			 * we're transferring over ISOC. Queue a 0 length request to
> @@ -500,18 +516,30 @@ static void uvcg_video_pump(struct work_struct *work)
>  			req->no_interrupt = 1;
>  		}
>  
> -		/* Queue the USB request */
> -		ret = uvcg_video_ep_queue(video, req);
>  		spin_unlock_irqrestore(&queue->irqlock, flags);
> -
> +		spin_lock_irqsave(&video->req_lock, flags);
> +		if (video->is_enabled) {
> +			/* Queue the USB request */
> +			ret = uvcg_video_ep_queue(video, req);
> +			/* Endpoint now owns the request */
> +			req = NULL;
> +			video->req_int_count++;
> +		} else {
> +			ret =  -ENODEV;
> +			ureq = req->context;
> +			last_buf = ureq->last_buf;
> +			ureq->last_buf = NULL;
> +		}
> +		spin_unlock_irqrestore(&video->req_lock, flags);
>  		if (ret < 0) {
> +			if (last_buf != NULL) {
> +				// Return the buffer to the queue in the case the
> +				// request was not queued to the ep.
> +				uvcg_complete_buffer(&video->queue, last_buf);
> +			}
>  			uvcg_queue_cancel(queue, 0);
>  			break;
>  		}
> -
> -		/* Endpoint now owns the request */
> -		req = NULL;
> -		video->req_int_count++;
>  	}
>  
>  	if (!req)
> @@ -556,7 +584,6 @@ uvcg_video_disable(struct uvc_video *video)
>  	}
>  	spin_unlock_irqrestore(&video->req_lock, flags);
>  
> -	cancel_work_sync(&video->pump);
>  	uvcg_queue_cancel(&video->queue, 0);
>  
>  	spin_lock_irqsave(&video->req_lock, flags);
> @@ -626,14 +653,16 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>  	if (video->max_payload_size) {
>  		video->encode = uvc_video_encode_bulk;
>  		video->payload_size = 0;
> -	} else
> +		video->is_bulk = true;
> +	} else {
>  		video->encode = video->queue.use_sg ?
>  			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
> +		video->is_bulk = false;

Isn't this already set to 0?

And wait, this is still the bulk endpoint, right?  Or am I missing
something?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-27  7:51     ` Laurent Pinchart
@ 2023-10-27 11:10       ` Michael Grzeschik
  2023-10-27 11:47         ` Laurent Pinchart
  0 siblings, 1 reply; 31+ messages in thread
From: Michael Grzeschik @ 2023-10-27 11:10 UTC (permalink / raw)
  To: Laurent Pinchart
  Cc: Jayant Chowdhary, Thinh.Nguyen, arakesh, etalvala, dan.scally,
	gregkh, linux-kernel, linux-usb

[-- Attachment #1: Type: text/plain, Size: 10661 bytes --]

On Fri, Oct 27, 2023 at 10:51:17AM +0300, Laurent Pinchart wrote:
>Thank you for the patch.
>
>On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
>> This patch is based on top of
>> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>
>> When we use an async work queue to perform the function of pumping
>> usb requests to the usb controller, it is possible that thread scheduling
>> affects at what cadence we're able to pump requests. This could mean usb
>> requests miss their uframes - resulting in video stream flickers on the host
>> device.
>>
>> In this patch, we move the pumping of usb requests to
>> 1) uvcg_video_complete() complete handler for both isoc + bulk
>>    endpoints. We still send 0 length requests when there is no uvc buffer
>>    available to encode.
>
>This means you will end up copying large amounts of data in interrupt
>context. The work queue was there to avoid exactly that, as it will
>introduce delays that can affect other parts of the system. I think this
>is a problem.

Regarding Thin's argument about possible scheduling latency that is already
introducing real errors, this seemed like a good solution.

But sure, this potential latency introduced in the interrupt context can
trigger other side effects.

However I think we need some compromise since both arguments are very valid.

Any ideas, how to solve this?

>> 2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
>>    0 length requests.
>>
>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>> Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>> Tested-by: Jayant Chowdhary <jchowdhary@google.com>
>> ---
>>  v1->v2: Fix code style and add self Signed-off-by
>>
>>  drivers/usb/gadget/function/f_uvc.c     |  4 --
>>  drivers/usb/gadget/function/uvc.h       |  4 +-
>>  drivers/usb/gadget/function/uvc_v4l2.c  |  5 +-
>>  drivers/usb/gadget/function/uvc_video.c | 71 ++++++++++++++++---------
>>  drivers/usb/gadget/function/uvc_video.h |  2 +
>>  5 files changed, 51 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
>> index ae08341961eb..53cb2539486d 100644
>> --- a/drivers/usb/gadget/function/f_uvc.c
>> +++ b/drivers/usb/gadget/function/f_uvc.c
>> @@ -959,14 +959,10 @@ static void uvc_function_unbind(struct usb_configuration *c,
>>  {
>>  	struct usb_composite_dev *cdev = c->cdev;
>>  	struct uvc_device *uvc = to_uvc(f);
>> -	struct uvc_video *video = &uvc->video;
>>  	long wait_ret = 1;
>>
>>  	uvcg_info(f, "%s()\n", __func__);
>>
>> -	if (video->async_wq)
>> -		destroy_workqueue(video->async_wq);
>> -
>>  	/*
>>  	 * If we know we're connected via v4l2, then there should be a cleanup
>>  	 * of the device from userspace either via UVC_EVENT_DISCONNECT or
>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>> index be0d012aa244..498f344fda4b 100644
>> --- a/drivers/usb/gadget/function/uvc.h
>> +++ b/drivers/usb/gadget/function/uvc.h
>> @@ -88,9 +88,6 @@ struct uvc_video {
>>  	struct uvc_device *uvc;
>>  	struct usb_ep *ep;
>>
>> -	struct work_struct pump;
>> -	struct workqueue_struct *async_wq;
>> -
>>  	/* Frame parameters */
>>  	u8 bpp;
>>  	u32 fcc;
>> @@ -116,6 +113,7 @@ struct uvc_video {
>>  	/* Context data used by the completion handler */
>>  	__u32 payload_size;
>>  	__u32 max_payload_size;
>> +	bool is_bulk;
>
>This should be introduced in a separate patch.
>
>>
>>  	struct uvc_video_queue queue;
>>  	unsigned int fid;
>> diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
>> index f4d2e24835d4..678ea6df7b5c 100644
>> --- a/drivers/usb/gadget/function/uvc_v4l2.c
>> +++ b/drivers/usb/gadget/function/uvc_v4l2.c
>> @@ -414,10 +414,7 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
>>  	ret = uvcg_queue_buffer(&video->queue, b);
>>  	if (ret < 0)
>>  		return ret;
>> -
>> -	if (uvc->state == UVC_STATE_STREAMING)
>> -		queue_work(video->async_wq, &video->pump);
>> -
>> +	uvcg_video_pump_qbuf(video);
>>  	return ret;
>>  }
>>
>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>> index ab3f02054e85..0fcd8e5edbac 100644
>> --- a/drivers/usb/gadget/function/uvc_video.c
>> +++ b/drivers/usb/gadget/function/uvc_video.c
>> @@ -24,6 +24,8 @@
>>   * Video codecs
>>   */
>>
>> +static void uvcg_video_pump(struct uvc_video *video);
>> +
>>  static int
>>  uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
>>  		u8 *data, int len)
>> @@ -329,7 +331,9 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>  	 */
>>  	if (video->is_enabled) {
>>  		list_add_tail(&req->list, &video->req_free);
>> -		queue_work(video->async_wq, &video->pump);
>> +		spin_unlock_irqrestore(&video->req_lock, flags);
>> +		uvcg_video_pump(video);
>> +		return;
>>  	} else {
>>  		uvc_video_free_request(ureq, ep);
>>  	}
>> @@ -409,20 +413,31 @@ uvc_video_alloc_requests(struct uvc_video *video)
>>   * Video streaming
>>   */
>>
>> +void uvcg_video_pump_qbuf(struct uvc_video *video)
>> +{
>> +	/*
>> +	 * Only call uvcg_video_pump() from qbuf, for bulk eps since
>> +	 * for isoc, the complete handler will call uvcg_video_pump()
>> +	 * consistently. Calling it for isoc eps, while correct
>> +	 * will increase contention for video->req_lock since the
>> +	 * complete handler will be called more often.
>> +	*/
>> +	if (video->is_bulk)
>> +		uvcg_video_pump(video);
>
>Am I the only one to see the *major* race condition that this patch
>introduces ?

Possible that you are. Please elaborate.

>> +}
>> +
>>  /*
>>   * uvcg_video_pump - Pump video data into the USB requests
>>   *
>>   * This function fills the available USB requests (listed in req_free) with
>>   * video data from the queued buffers.
>>   */
>> -static void uvcg_video_pump(struct work_struct *work)
>> +static void uvcg_video_pump(struct uvc_video *video)
>>  {
>> -	struct uvc_video *video = container_of(work, struct uvc_video, pump);
>>  	struct uvc_video_queue *queue = &video->queue;
>> -	/* video->max_payload_size is only set when using bulk transfer */
>> -	bool is_bulk = video->max_payload_size;
>>  	struct usb_request *req = NULL;
>> -	struct uvc_buffer *buf;
>> +	struct uvc_request *ureq = NULL;
>> +	struct uvc_buffer *buf = NULL, *last_buf = NULL;
>>  	unsigned long flags;
>>  	bool buf_done;
>>  	int ret;
>> @@ -455,7 +470,8 @@ static void uvcg_video_pump(struct work_struct *work)
>>  		if (buf != NULL) {
>>  			video->encode(req, video, buf);
>>  			buf_done = buf->state == UVC_BUF_STATE_DONE;
>> -		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>> +		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) &&
>> +				!video->is_bulk) {
>>  			/*
>>  			 * No video buffer available; the queue is still connected and
>>  			 * we're transferring over ISOC. Queue a 0 length request to
>> @@ -500,18 +516,30 @@ static void uvcg_video_pump(struct work_struct *work)
>>  			req->no_interrupt = 1;
>>  		}
>>
>> -		/* Queue the USB request */
>> -		ret = uvcg_video_ep_queue(video, req);
>>  		spin_unlock_irqrestore(&queue->irqlock, flags);
>> -
>> +		spin_lock_irqsave(&video->req_lock, flags);
>> +		if (video->is_enabled) {
>> +			/* Queue the USB request */
>> +			ret = uvcg_video_ep_queue(video, req);
>> +			/* Endpoint now owns the request */
>> +			req = NULL;
>> +			video->req_int_count++;
>> +		} else {
>> +			ret =  -ENODEV;
>> +			ureq = req->context;
>> +			last_buf = ureq->last_buf;
>> +			ureq->last_buf = NULL;
>> +		}
>> +		spin_unlock_irqrestore(&video->req_lock, flags);
>>  		if (ret < 0) {
>> +			if (last_buf != NULL) {
>> +				// Return the buffer to the queue in the case the
>> +				// request was not queued to the ep.
>
>Wrong comment style.
>
>> +				uvcg_complete_buffer(&video->queue, last_buf);
>> +			}
>>  			uvcg_queue_cancel(queue, 0);
>>  			break;
>>  		}
>> -
>> -		/* Endpoint now owns the request */
>> -		req = NULL;
>> -		video->req_int_count++;
>>  	}
>>
>>  	if (!req)
>> @@ -556,7 +584,6 @@ uvcg_video_disable(struct uvc_video *video)
>>  	}
>>  	spin_unlock_irqrestore(&video->req_lock, flags);
>>
>> -	cancel_work_sync(&video->pump);
>>  	uvcg_queue_cancel(&video->queue, 0);
>>
>>  	spin_lock_irqsave(&video->req_lock, flags);
>> @@ -626,14 +653,16 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>>  	if (video->max_payload_size) {
>>  		video->encode = uvc_video_encode_bulk;
>>  		video->payload_size = 0;
>> -	} else
>> +		video->is_bulk = true;
>> +	} else {
>>  		video->encode = video->queue.use_sg ?
>>  			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
>> +		video->is_bulk = false;
>> +	}
>>
>>  	video->req_int_count = 0;
>>
>> -	queue_work(video->async_wq, &video->pump);
>> -
>> +	uvcg_video_pump(video);
>>  	return ret;
>>  }
>>
>> @@ -646,12 +675,6 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>  	INIT_LIST_HEAD(&video->ureqs);
>>  	INIT_LIST_HEAD(&video->req_free);
>>  	spin_lock_init(&video->req_lock);
>> -	INIT_WORK(&video->pump, uvcg_video_pump);
>> -
>> -	/* Allocate a work queue for asynchronous video pump handler. */
>> -	video->async_wq = alloc_workqueue("uvcgadget", WQ_UNBOUND | WQ_HIGHPRI, 0);
>> -	if (!video->async_wq)
>> -		return -EINVAL;
>>
>>  	video->uvc = uvc;
>>  	video->fcc = V4L2_PIX_FMT_YUYV;
>> diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
>> index 03adeefa343b..29c6b9a2e9c3 100644
>> --- a/drivers/usb/gadget/function/uvc_video.h
>> +++ b/drivers/usb/gadget/function/uvc_video.h
>> @@ -18,4 +18,6 @@ int uvcg_video_enable(struct uvc_video *video, int enable);
>>
>>  int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc);
>>
>> +void uvcg_video_pump_qbuf(struct uvc_video *video);
>> +
>>  #endif /* __UVC_VIDEO_H__ */
>
>-- 
>Regards,
>
>Laurent Pinchart
>

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-27 11:10       ` Michael Grzeschik
@ 2023-10-27 11:47         ` Laurent Pinchart
  2023-10-27 13:39           ` Michael Grzeschik
  0 siblings, 1 reply; 31+ messages in thread
From: Laurent Pinchart @ 2023-10-27 11:47 UTC (permalink / raw)
  To: Michael Grzeschik
  Cc: Jayant Chowdhary, Thinh.Nguyen, arakesh, etalvala, dan.scally,
	gregkh, linux-kernel, linux-usb

On Fri, Oct 27, 2023 at 01:10:21PM +0200, Michael Grzeschik wrote:
> On Fri, Oct 27, 2023 at 10:51:17AM +0300, Laurent Pinchart wrote:
> > On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
> >> This patch is based on top of
> >> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
> >>
> >> When we use an async work queue to perform the function of pumping
> >> usb requests to the usb controller, it is possible that thread scheduling
> >> affects at what cadence we're able to pump requests. This could mean usb
> >> requests miss their uframes - resulting in video stream flickers on the host
> >> device.
> >>
> >> In this patch, we move the pumping of usb requests to
> >> 1) uvcg_video_complete() complete handler for both isoc + bulk
> >>    endpoints. We still send 0 length requests when there is no uvc buffer
> >>    available to encode.
> >
> > This means you will end up copying large amounts of data in interrupt
> > context. The work queue was there to avoid exactly that, as it will
> > introduce delays that can affect other parts of the system. I think this
> > is a problem.
> 
> Regarding Thin's argument about possible scheduling latency that is already
> introducing real errors, this seemed like a good solution.
> 
> But sure, this potential latency introduced in the interrupt context can
> trigger other side effects.
> 
> However I think we need some compromise since both arguments are very valid.

Agreed.

> Any ideas, how to solve this?

I'm afraid not.

> >> 2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
> >>    0 length requests.
> >>
> >> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> >> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
> >> Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
> >> Suggested-by: Avichal Rakesh <arakesh@google.com>
> >> Tested-by: Jayant Chowdhary <jchowdhary@google.com>
> >> ---
> >>  v1->v2: Fix code style and add self Signed-off-by
> >>
> >>  drivers/usb/gadget/function/f_uvc.c     |  4 --
> >>  drivers/usb/gadget/function/uvc.h       |  4 +-
> >>  drivers/usb/gadget/function/uvc_v4l2.c  |  5 +-
> >>  drivers/usb/gadget/function/uvc_video.c | 71 ++++++++++++++++---------
> >>  drivers/usb/gadget/function/uvc_video.h |  2 +
> >>  5 files changed, 51 insertions(+), 35 deletions(-)
> >>
> >> diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
> >> index ae08341961eb..53cb2539486d 100644
> >> --- a/drivers/usb/gadget/function/f_uvc.c
> >> +++ b/drivers/usb/gadget/function/f_uvc.c
> >> @@ -959,14 +959,10 @@ static void uvc_function_unbind(struct usb_configuration *c,
> >>  {
> >>  	struct usb_composite_dev *cdev = c->cdev;
> >>  	struct uvc_device *uvc = to_uvc(f);
> >> -	struct uvc_video *video = &uvc->video;
> >>  	long wait_ret = 1;
> >>
> >>  	uvcg_info(f, "%s()\n", __func__);
> >>
> >> -	if (video->async_wq)
> >> -		destroy_workqueue(video->async_wq);
> >> -
> >>  	/*
> >>  	 * If we know we're connected via v4l2, then there should be a cleanup
> >>  	 * of the device from userspace either via UVC_EVENT_DISCONNECT or
> >> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> >> index be0d012aa244..498f344fda4b 100644
> >> --- a/drivers/usb/gadget/function/uvc.h
> >> +++ b/drivers/usb/gadget/function/uvc.h
> >> @@ -88,9 +88,6 @@ struct uvc_video {
> >>  	struct uvc_device *uvc;
> >>  	struct usb_ep *ep;
> >>
> >> -	struct work_struct pump;
> >> -	struct workqueue_struct *async_wq;
> >> -
> >>  	/* Frame parameters */
> >>  	u8 bpp;
> >>  	u32 fcc;
> >> @@ -116,6 +113,7 @@ struct uvc_video {
> >>  	/* Context data used by the completion handler */
> >>  	__u32 payload_size;
> >>  	__u32 max_payload_size;
> >> +	bool is_bulk;
> >
> >This should be introduced in a separate patch.
> >
> >>
> >>  	struct uvc_video_queue queue;
> >>  	unsigned int fid;
> >> diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
> >> index f4d2e24835d4..678ea6df7b5c 100644
> >> --- a/drivers/usb/gadget/function/uvc_v4l2.c
> >> +++ b/drivers/usb/gadget/function/uvc_v4l2.c
> >> @@ -414,10 +414,7 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
> >>  	ret = uvcg_queue_buffer(&video->queue, b);
> >>  	if (ret < 0)
> >>  		return ret;
> >> -
> >> -	if (uvc->state == UVC_STATE_STREAMING)
> >> -		queue_work(video->async_wq, &video->pump);
> >> -
> >> +	uvcg_video_pump_qbuf(video);
> >>  	return ret;
> >>  }
> >>
> >> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> >> index ab3f02054e85..0fcd8e5edbac 100644
> >> --- a/drivers/usb/gadget/function/uvc_video.c
> >> +++ b/drivers/usb/gadget/function/uvc_video.c
> >> @@ -24,6 +24,8 @@
> >>   * Video codecs
> >>   */
> >>
> >> +static void uvcg_video_pump(struct uvc_video *video);
> >> +
> >>  static int
> >>  uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
> >>  		u8 *data, int len)
> >> @@ -329,7 +331,9 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
> >>  	 */
> >>  	if (video->is_enabled) {
> >>  		list_add_tail(&req->list, &video->req_free);
> >> -		queue_work(video->async_wq, &video->pump);
> >> +		spin_unlock_irqrestore(&video->req_lock, flags);
> >> +		uvcg_video_pump(video);
> >> +		return;
> >>  	} else {
> >>  		uvc_video_free_request(ureq, ep);
> >>  	}
> >> @@ -409,20 +413,31 @@ uvc_video_alloc_requests(struct uvc_video *video)
> >>   * Video streaming
> >>   */
> >>
> >> +void uvcg_video_pump_qbuf(struct uvc_video *video)
> >> +{
> >> +	/*
> >> +	 * Only call uvcg_video_pump() from qbuf, for bulk eps since
> >> +	 * for isoc, the complete handler will call uvcg_video_pump()
> >> +	 * consistently. Calling it for isoc eps, while correct
> >> +	 * will increase contention for video->req_lock since the
> >> +	 * complete handler will be called more often.
> >> +	*/
> >> +	if (video->is_bulk)
> >> +		uvcg_video_pump(video);
> >
> > Am I the only one to see the *major* race condition that this patch
> > introduces ?
> 
> Possible that you are. Please elaborate.

uvcg_video_pump() can now run multiple times in parallel on multiple
CPUs. Look at the while() loop in the function, and consider what will
happen when run on two CPUs concurrently. See below for an additional
comment on this.

> >> +}
> >> +
> >>  /*
> >>   * uvcg_video_pump - Pump video data into the USB requests
> >>   *
> >>   * This function fills the available USB requests (listed in req_free) with
> >>   * video data from the queued buffers.
> >>   */
> >> -static void uvcg_video_pump(struct work_struct *work)
> >> +static void uvcg_video_pump(struct uvc_video *video)
> >>  {
> >> -	struct uvc_video *video = container_of(work, struct uvc_video, pump);
> >>  	struct uvc_video_queue *queue = &video->queue;
> >> -	/* video->max_payload_size is only set when using bulk transfer */
> >> -	bool is_bulk = video->max_payload_size;
> >>  	struct usb_request *req = NULL;
> >> -	struct uvc_buffer *buf;
> >> +	struct uvc_request *ureq = NULL;
> >> +	struct uvc_buffer *buf = NULL, *last_buf = NULL;
> >>  	unsigned long flags;
> >>  	bool buf_done;
> >>  	int ret;
> >> @@ -455,7 +470,8 @@ static void uvcg_video_pump(struct work_struct *work)
> >>  		if (buf != NULL) {
> >>  			video->encode(req, video, buf);
> >>  			buf_done = buf->state == UVC_BUF_STATE_DONE;
> >> -		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
> >> +		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) &&
> >> +				!video->is_bulk) {
> >>  			/*
> >>  			 * No video buffer available; the queue is still connected and
> >>  			 * we're transferring over ISOC. Queue a 0 length request to
> >> @@ -500,18 +516,30 @@ static void uvcg_video_pump(struct work_struct *work)
> >>  			req->no_interrupt = 1;
> >>  		}
> >>
> >> -		/* Queue the USB request */
> >> -		ret = uvcg_video_ep_queue(video, req);
> >>  		spin_unlock_irqrestore(&queue->irqlock, flags);
> >> -

Here's one problematic point. The code above may have run on CPU A,
which releases IRQ lock. CPU B may then run the same code to encode the
next chunk of data in a request, and proceed to the code below before
CPU A. The requests will then be queued in the wrong order.

In the next iteration of this patch, I would like to see a clear
explanation in the commit message of why there is no race condition
(after fixing the existing ones, of course). Writing it down forces
going through the mental exercise of thinking about the race conditions,
which should help catching them.

> >> +		spin_lock_irqsave(&video->req_lock, flags);
> >> +		if (video->is_enabled) {
> >> +			/* Queue the USB request */
> >> +			ret = uvcg_video_ep_queue(video, req);
> >> +			/* Endpoint now owns the request */
> >> +			req = NULL;
> >> +			video->req_int_count++;
> >> +		} else {
> >> +			ret =  -ENODEV;
> >> +			ureq = req->context;
> >> +			last_buf = ureq->last_buf;
> >> +			ureq->last_buf = NULL;
> >> +		}
> >> +		spin_unlock_irqrestore(&video->req_lock, flags);
> >>  		if (ret < 0) {
> >> +			if (last_buf != NULL) {
> >> +				// Return the buffer to the queue in the case the
> >> +				// request was not queued to the ep.
> >
> > Wrong comment style.
> >
> >> +				uvcg_complete_buffer(&video->queue, last_buf);
> >> +			}
> >>  			uvcg_queue_cancel(queue, 0);
> >>  			break;
> >>  		}
> >> -
> >> -		/* Endpoint now owns the request */
> >> -		req = NULL;
> >> -		video->req_int_count++;
> >>  	}
> >>
> >>  	if (!req)
> >> @@ -556,7 +584,6 @@ uvcg_video_disable(struct uvc_video *video)
> >>  	}
> >>  	spin_unlock_irqrestore(&video->req_lock, flags);
> >>
> >> -	cancel_work_sync(&video->pump);
> >>  	uvcg_queue_cancel(&video->queue, 0);
> >>
> >>  	spin_lock_irqsave(&video->req_lock, flags);
> >> @@ -626,14 +653,16 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
> >>  	if (video->max_payload_size) {
> >>  		video->encode = uvc_video_encode_bulk;
> >>  		video->payload_size = 0;
> >> -	} else
> >> +		video->is_bulk = true;
> >> +	} else {
> >>  		video->encode = video->queue.use_sg ?
> >>  			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
> >> +		video->is_bulk = false;
> >> +	}
> >>
> >>  	video->req_int_count = 0;
> >>
> >> -	queue_work(video->async_wq, &video->pump);
> >> -
> >> +	uvcg_video_pump(video);
> >>  	return ret;
> >>  }
> >>
> >> @@ -646,12 +675,6 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
> >>  	INIT_LIST_HEAD(&video->ureqs);
> >>  	INIT_LIST_HEAD(&video->req_free);
> >>  	spin_lock_init(&video->req_lock);
> >> -	INIT_WORK(&video->pump, uvcg_video_pump);
> >> -
> >> -	/* Allocate a work queue for asynchronous video pump handler. */
> >> -	video->async_wq = alloc_workqueue("uvcgadget", WQ_UNBOUND | WQ_HIGHPRI, 0);
> >> -	if (!video->async_wq)
> >> -		return -EINVAL;
> >>
> >>  	video->uvc = uvc;
> >>  	video->fcc = V4L2_PIX_FMT_YUYV;
> >> diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
> >> index 03adeefa343b..29c6b9a2e9c3 100644
> >> --- a/drivers/usb/gadget/function/uvc_video.h
> >> +++ b/drivers/usb/gadget/function/uvc_video.h
> >> @@ -18,4 +18,6 @@ int uvcg_video_enable(struct uvc_video *video, int enable);
> >>
> >>  int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc);
> >>
> >> +void uvcg_video_pump_qbuf(struct uvc_video *video);
> >> +
> >>  #endif /* __UVC_VIDEO_H__ */

-- 
Regards,

Laurent Pinchart

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-27 11:47         ` Laurent Pinchart
@ 2023-10-27 13:39           ` Michael Grzeschik
  2023-10-27 14:58             ` Alan Stern
  0 siblings, 1 reply; 31+ messages in thread
From: Michael Grzeschik @ 2023-10-27 13:39 UTC (permalink / raw)
  To: Laurent Pinchart
  Cc: Jayant Chowdhary, Thinh.Nguyen, arakesh, etalvala, dan.scally,
	gregkh, linux-kernel, linux-usb

[-- Attachment #1: Type: text/plain, Size: 12895 bytes --]

On Fri, Oct 27, 2023 at 02:47:52PM +0300, Laurent Pinchart wrote:
>On Fri, Oct 27, 2023 at 01:10:21PM +0200, Michael Grzeschik wrote:
>> On Fri, Oct 27, 2023 at 10:51:17AM +0300, Laurent Pinchart wrote:
>> > On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
>> >> This patch is based on top of
>> >> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>> >>
>> >> When we use an async work queue to perform the function of pumping
>> >> usb requests to the usb controller, it is possible that thread scheduling
>> >> affects at what cadence we're able to pump requests. This could mean usb
>> >> requests miss their uframes - resulting in video stream flickers on the host
>> >> device.
>> >>
>> >> In this patch, we move the pumping of usb requests to
>> >> 1) uvcg_video_complete() complete handler for both isoc + bulk
>> >>    endpoints. We still send 0 length requests when there is no uvc buffer
>> >>    available to encode.
>> >
>> > This means you will end up copying large amounts of data in interrupt
>> > context. The work queue was there to avoid exactly that, as it will
>> > introduce delays that can affect other parts of the system. I think this
>> > is a problem.
>>
>> Regarding Thin's argument about possible scheduling latency that is already
>> introducing real errors, this seemed like a good solution.
>>
>> But sure, this potential latency introduced in the interrupt context can
>> trigger other side effects.
>>
>> However I think we need some compromise since both arguments are very valid.
>
>Agreed.
>
>> Any ideas, how to solve this?
>
>I'm afraid not.

We discussed this and came to the conclusion that we could make use of
kthread_create and sched_setattr with an attr->sched_policy = SCHED_DEADLINE
here instead of the workqueue. This way we would ensure that the worker
would be triggered with hard definitions.

Since the SG case is not that heavy on the completion handler, we could
also make this kthread conditionaly to the memcpy case.

>> >> 2) uvc_v4l2_qbuf - only for bulk endpoints since it is not legal to send
>> >>    0 length requests.
>> >>
>> >> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>> >> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>> >> Suggested-by: Jayant Chowdhary <jchowdhary@google.com>
>> >> Suggested-by: Avichal Rakesh <arakesh@google.com>
>> >> Tested-by: Jayant Chowdhary <jchowdhary@google.com>
>> >> ---
>> >>  v1->v2: Fix code style and add self Signed-off-by
>> >>
>> >>  drivers/usb/gadget/function/f_uvc.c     |  4 --
>> >>  drivers/usb/gadget/function/uvc.h       |  4 +-
>> >>  drivers/usb/gadget/function/uvc_v4l2.c  |  5 +-
>> >>  drivers/usb/gadget/function/uvc_video.c | 71 ++++++++++++++++---------
>> >>  drivers/usb/gadget/function/uvc_video.h |  2 +
>> >>  5 files changed, 51 insertions(+), 35 deletions(-)
>> >>
>> >> diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
>> >> index ae08341961eb..53cb2539486d 100644
>> >> --- a/drivers/usb/gadget/function/f_uvc.c
>> >> +++ b/drivers/usb/gadget/function/f_uvc.c
>> >> @@ -959,14 +959,10 @@ static void uvc_function_unbind(struct usb_configuration *c,
>> >>  {
>> >>  	struct usb_composite_dev *cdev = c->cdev;
>> >>  	struct uvc_device *uvc = to_uvc(f);
>> >> -	struct uvc_video *video = &uvc->video;
>> >>  	long wait_ret = 1;
>> >>
>> >>  	uvcg_info(f, "%s()\n", __func__);
>> >>
>> >> -	if (video->async_wq)
>> >> -		destroy_workqueue(video->async_wq);
>> >> -
>> >>  	/*
>> >>  	 * If we know we're connected via v4l2, then there should be a cleanup
>> >>  	 * of the device from userspace either via UVC_EVENT_DISCONNECT or
>> >> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>> >> index be0d012aa244..498f344fda4b 100644
>> >> --- a/drivers/usb/gadget/function/uvc.h
>> >> +++ b/drivers/usb/gadget/function/uvc.h
>> >> @@ -88,9 +88,6 @@ struct uvc_video {
>> >>  	struct uvc_device *uvc;
>> >>  	struct usb_ep *ep;
>> >>
>> >> -	struct work_struct pump;
>> >> -	struct workqueue_struct *async_wq;
>> >> -
>> >>  	/* Frame parameters */
>> >>  	u8 bpp;
>> >>  	u32 fcc;
>> >> @@ -116,6 +113,7 @@ struct uvc_video {
>> >>  	/* Context data used by the completion handler */
>> >>  	__u32 payload_size;
>> >>  	__u32 max_payload_size;
>> >> +	bool is_bulk;
>> >
>> >This should be introduced in a separate patch.
>> >
>> >>
>> >>  	struct uvc_video_queue queue;
>> >>  	unsigned int fid;
>> >> diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c
>> >> index f4d2e24835d4..678ea6df7b5c 100644
>> >> --- a/drivers/usb/gadget/function/uvc_v4l2.c
>> >> +++ b/drivers/usb/gadget/function/uvc_v4l2.c
>> >> @@ -414,10 +414,7 @@ uvc_v4l2_qbuf(struct file *file, void *fh, struct v4l2_buffer *b)
>> >>  	ret = uvcg_queue_buffer(&video->queue, b);
>> >>  	if (ret < 0)
>> >>  		return ret;
>> >> -
>> >> -	if (uvc->state == UVC_STATE_STREAMING)
>> >> -		queue_work(video->async_wq, &video->pump);
>> >> -
>> >> +	uvcg_video_pump_qbuf(video);
>> >>  	return ret;
>> >>  }
>> >>
>> >> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>> >> index ab3f02054e85..0fcd8e5edbac 100644
>> >> --- a/drivers/usb/gadget/function/uvc_video.c
>> >> +++ b/drivers/usb/gadget/function/uvc_video.c
>> >> @@ -24,6 +24,8 @@
>> >>   * Video codecs
>> >>   */
>> >>
>> >> +static void uvcg_video_pump(struct uvc_video *video);
>> >> +
>> >>  static int
>> >>  uvc_video_encode_header(struct uvc_video *video, struct uvc_buffer *buf,
>> >>  		u8 *data, int len)
>> >> @@ -329,7 +331,9 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>> >>  	 */
>> >>  	if (video->is_enabled) {
>> >>  		list_add_tail(&req->list, &video->req_free);
>> >> -		queue_work(video->async_wq, &video->pump);
>> >> +		spin_unlock_irqrestore(&video->req_lock, flags);
>> >> +		uvcg_video_pump(video);
>> >> +		return;
>> >>  	} else {
>> >>  		uvc_video_free_request(ureq, ep);
>> >>  	}
>> >> @@ -409,20 +413,31 @@ uvc_video_alloc_requests(struct uvc_video *video)
>> >>   * Video streaming
>> >>   */
>> >>
>> >> +void uvcg_video_pump_qbuf(struct uvc_video *video)
>> >> +{
>> >> +	/*
>> >> +	 * Only call uvcg_video_pump() from qbuf, for bulk eps since
>> >> +	 * for isoc, the complete handler will call uvcg_video_pump()
>> >> +	 * consistently. Calling it for isoc eps, while correct
>> >> +	 * will increase contention for video->req_lock since the
>> >> +	 * complete handler will be called more often.
>> >> +	*/
>> >> +	if (video->is_bulk)
>> >> +		uvcg_video_pump(video);
>> >
>> > Am I the only one to see the *major* race condition that this patch
>> > introduces ?
>>
>> Possible that you are. Please elaborate.
>
>uvcg_video_pump() can now run multiple times in parallel on multiple
>CPUs. Look at the while() loop in the function, and consider what will
>happen when run on two CPUs concurrently. See below for an additional
>comment on this.
>
>> >> +}
>> >> +
>> >>  /*
>> >>   * uvcg_video_pump - Pump video data into the USB requests
>> >>   *
>> >>   * This function fills the available USB requests (listed in req_free) with
>> >>   * video data from the queued buffers.
>> >>   */
>> >> -static void uvcg_video_pump(struct work_struct *work)
>> >> +static void uvcg_video_pump(struct uvc_video *video)
>> >>  {
>> >> -	struct uvc_video *video = container_of(work, struct uvc_video, pump);
>> >>  	struct uvc_video_queue *queue = &video->queue;
>> >> -	/* video->max_payload_size is only set when using bulk transfer */
>> >> -	bool is_bulk = video->max_payload_size;
>> >>  	struct usb_request *req = NULL;
>> >> -	struct uvc_buffer *buf;
>> >> +	struct uvc_request *ureq = NULL;
>> >> +	struct uvc_buffer *buf = NULL, *last_buf = NULL;
>> >>  	unsigned long flags;
>> >>  	bool buf_done;
>> >>  	int ret;
>> >> @@ -455,7 +470,8 @@ static void uvcg_video_pump(struct work_struct *work)
>> >>  		if (buf != NULL) {
>> >>  			video->encode(req, video, buf);
>> >>  			buf_done = buf->state == UVC_BUF_STATE_DONE;
>> >> -		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>> >> +		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) &&
>> >> +				!video->is_bulk) {
>> >>  			/*
>> >>  			 * No video buffer available; the queue is still connected and
>> >>  			 * we're transferring over ISOC. Queue a 0 length request to
>> >> @@ -500,18 +516,30 @@ static void uvcg_video_pump(struct work_struct *work)
>> >>  			req->no_interrupt = 1;
>> >>  		}
>> >>
>> >> -		/* Queue the USB request */
>> >> -		ret = uvcg_video_ep_queue(video, req);
>> >>  		spin_unlock_irqrestore(&queue->irqlock, flags);
>> >> -
>
>Here's one problematic point. The code above may have run on CPU A,
>which releases IRQ lock. CPU B may then run the same code to encode the
>next chunk of data in a request, and proceed to the code below before
>CPU A. The requests will then be queued in the wrong order.

Right

>In the next iteration of this patch, I would like to see a clear
>explanation in the commit message of why there is no race condition
>(after fixing the existing ones, of course). Writing it down forces
>going through the mental exercise of thinking about the race conditions,
>which should help catching them.

Agreed.

>> >> +		spin_lock_irqsave(&video->req_lock, flags);
>> >> +		if (video->is_enabled) {
>> >> +			/* Queue the USB request */
>> >> +			ret = uvcg_video_ep_queue(video, req);
>> >> +			/* Endpoint now owns the request */
>> >> +			req = NULL;
>> >> +			video->req_int_count++;
>> >> +		} else {
>> >> +			ret =  -ENODEV;
>> >> +			ureq = req->context;
>> >> +			last_buf = ureq->last_buf;
>> >> +			ureq->last_buf = NULL;
>> >> +		}
>> >> +		spin_unlock_irqrestore(&video->req_lock, flags);
>> >>  		if (ret < 0) {
>> >> +			if (last_buf != NULL) {
>> >> +				// Return the buffer to the queue in the case the
>> >> +				// request was not queued to the ep.
>> >
>> > Wrong comment style.
>> >
>> >> +				uvcg_complete_buffer(&video->queue, last_buf);
>> >> +			}
>> >>  			uvcg_queue_cancel(queue, 0);
>> >>  			break;
>> >>  		}
>> >> -
>> >> -		/* Endpoint now owns the request */
>> >> -		req = NULL;
>> >> -		video->req_int_count++;
>> >>  	}
>> >>
>> >>  	if (!req)
>> >> @@ -556,7 +584,6 @@ uvcg_video_disable(struct uvc_video *video)
>> >>  	}
>> >>  	spin_unlock_irqrestore(&video->req_lock, flags);
>> >>
>> >> -	cancel_work_sync(&video->pump);
>> >>  	uvcg_queue_cancel(&video->queue, 0);
>> >>
>> >>  	spin_lock_irqsave(&video->req_lock, flags);
>> >> @@ -626,14 +653,16 @@ int uvcg_video_enable(struct uvc_video *video, int enable)
>> >>  	if (video->max_payload_size) {
>> >>  		video->encode = uvc_video_encode_bulk;
>> >>  		video->payload_size = 0;
>> >> -	} else
>> >> +		video->is_bulk = true;
>> >> +	} else {
>> >>  		video->encode = video->queue.use_sg ?
>> >>  			uvc_video_encode_isoc_sg : uvc_video_encode_isoc;
>> >> +		video->is_bulk = false;
>> >> +	}
>> >>
>> >>  	video->req_int_count = 0;
>> >>
>> >> -	queue_work(video->async_wq, &video->pump);
>> >> -
>> >> +	uvcg_video_pump(video);
>> >>  	return ret;
>> >>  }
>> >>
>> >> @@ -646,12 +675,6 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>> >>  	INIT_LIST_HEAD(&video->ureqs);
>> >>  	INIT_LIST_HEAD(&video->req_free);
>> >>  	spin_lock_init(&video->req_lock);
>> >> -	INIT_WORK(&video->pump, uvcg_video_pump);
>> >> -
>> >> -	/* Allocate a work queue for asynchronous video pump handler. */
>> >> -	video->async_wq = alloc_workqueue("uvcgadget", WQ_UNBOUND | WQ_HIGHPRI, 0);
>> >> -	if (!video->async_wq)
>> >> -		return -EINVAL;
>> >>
>> >>  	video->uvc = uvc;
>> >>  	video->fcc = V4L2_PIX_FMT_YUYV;
>> >> diff --git a/drivers/usb/gadget/function/uvc_video.h b/drivers/usb/gadget/function/uvc_video.h
>> >> index 03adeefa343b..29c6b9a2e9c3 100644
>> >> --- a/drivers/usb/gadget/function/uvc_video.h
>> >> +++ b/drivers/usb/gadget/function/uvc_video.h
>> >> @@ -18,4 +18,6 @@ int uvcg_video_enable(struct uvc_video *video, int enable);
>> >>
>> >>  int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc);
>> >>
>> >> +void uvcg_video_pump_qbuf(struct uvc_video *video);
>> >> +
>> >>  #endif /* __UVC_VIDEO_H__ */
>
>-- 
>Regards,
>
>Laurent Pinchart
>

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-27 13:39           ` Michael Grzeschik
@ 2023-10-27 14:58             ` Alan Stern
  2023-10-28 11:10               ` Michael Grzeschik
  0 siblings, 1 reply; 31+ messages in thread
From: Alan Stern @ 2023-10-27 14:58 UTC (permalink / raw)
  To: Michael Grzeschik
  Cc: Laurent Pinchart, Jayant Chowdhary, Thinh.Nguyen, arakesh,
	etalvala, dan.scally, gregkh, linux-kernel, linux-usb

On Fri, Oct 27, 2023 at 03:39:44PM +0200, Michael Grzeschik wrote:
> On Fri, Oct 27, 2023 at 02:47:52PM +0300, Laurent Pinchart wrote:
> > On Fri, Oct 27, 2023 at 01:10:21PM +0200, Michael Grzeschik wrote:
> > > On Fri, Oct 27, 2023 at 10:51:17AM +0300, Laurent Pinchart wrote:
> > > > On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
> > > >> This patch is based on top of
> > > >> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
> > > >>
> > > >> When we use an async work queue to perform the function of pumping
> > > >> usb requests to the usb controller, it is possible that thread scheduling
> > > >> affects at what cadence we're able to pump requests. This could mean usb
> > > >> requests miss their uframes - resulting in video stream flickers on the host
> > > >> device.
> > > >>
> > > >> In this patch, we move the pumping of usb requests to
> > > >> 1) uvcg_video_complete() complete handler for both isoc + bulk
> > > >>    endpoints. We still send 0 length requests when there is no uvc buffer
> > > >>    available to encode.
> > > >
> > > > This means you will end up copying large amounts of data in interrupt
> > > > context. The work queue was there to avoid exactly that, as it will
> > > > introduce delays that can affect other parts of the system. I think this
> > > > is a problem.
> > > 
> > > Regarding Thin's argument about possible scheduling latency that is already
> > > introducing real errors, this seemed like a good solution.
> > > 
> > > But sure, this potential latency introduced in the interrupt context can
> > > trigger other side effects.
> > > 
> > > However I think we need some compromise since both arguments are very valid.
> > 
> > Agreed.
> > 
> > > Any ideas, how to solve this?
> > 
> > I'm afraid not.
> 
> We discussed this and came to the conclusion that we could make use of
> kthread_create and sched_setattr with an attr->sched_policy = SCHED_DEADLINE
> here instead of the workqueue. This way we would ensure that the worker
> would be triggered with hard definitions.
> 
> Since the SG case is not that heavy on the completion handler, we could
> also make this kthread conditionaly to the memcpy case.

If you don't mind a naive suggestion from someone who knows nothing 
about the driver...

An attractive possibility is to have the work queue (or kthread) do the 
time-consuming copying, but leave the submission up to the completion 
handler.  If the data isn't ready (or there's no data to send) when the 
handler runs, then queue a 0-length request.

That will give you the best of both worlds: low latency while in 
interrupt context and a steady, constant flow of USB transfers at all 
times.  The question of how to schedule the work queue or kthread is a 
separate matter, not directly relevant to this design decision.

Alan Stern

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-27 14:58             ` Alan Stern
@ 2023-10-28 11:10               ` Michael Grzeschik
  2023-10-28 14:09                 ` Jayant Chowdhary
  0 siblings, 1 reply; 31+ messages in thread
From: Michael Grzeschik @ 2023-10-28 11:10 UTC (permalink / raw)
  To: Alan Stern
  Cc: Laurent Pinchart, Jayant Chowdhary, Thinh.Nguyen, arakesh,
	etalvala, dan.scally, gregkh, linux-kernel, linux-usb

[-- Attachment #1: Type: text/plain, Size: 3817 bytes --]

On Fri, Oct 27, 2023 at 10:58:11AM -0400, Alan Stern wrote:
>On Fri, Oct 27, 2023 at 03:39:44PM +0200, Michael Grzeschik wrote:
>> On Fri, Oct 27, 2023 at 02:47:52PM +0300, Laurent Pinchart wrote:
>> > On Fri, Oct 27, 2023 at 01:10:21PM +0200, Michael Grzeschik wrote:
>> > > On Fri, Oct 27, 2023 at 10:51:17AM +0300, Laurent Pinchart wrote:
>> > > > On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
>> > > >> This patch is based on top of
>> > > >> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>> > > >>
>> > > >> When we use an async work queue to perform the function of pumping
>> > > >> usb requests to the usb controller, it is possible that thread scheduling
>> > > >> affects at what cadence we're able to pump requests. This could mean usb
>> > > >> requests miss their uframes - resulting in video stream flickers on the host
>> > > >> device.
>> > > >>
>> > > >> In this patch, we move the pumping of usb requests to
>> > > >> 1) uvcg_video_complete() complete handler for both isoc + bulk
>> > > >>    endpoints. We still send 0 length requests when there is no uvc buffer
>> > > >>    available to encode.
>> > > >
>> > > > This means you will end up copying large amounts of data in interrupt
>> > > > context. The work queue was there to avoid exactly that, as it will
>> > > > introduce delays that can affect other parts of the system. I think this
>> > > > is a problem.
>> > >
>> > > Regarding Thin's argument about possible scheduling latency that is already
>> > > introducing real errors, this seemed like a good solution.
>> > >
>> > > But sure, this potential latency introduced in the interrupt context can
>> > > trigger other side effects.
>> > >
>> > > However I think we need some compromise since both arguments are very valid.
>> >
>> > Agreed.
>> >
>> > > Any ideas, how to solve this?
>> >
>> > I'm afraid not.
>>
>> We discussed this and came to the conclusion that we could make use of
>> kthread_create and sched_setattr with an attr->sched_policy = SCHED_DEADLINE
>> here instead of the workqueue. This way we would ensure that the worker
>> would be triggered with hard definitions.
>>
>> Since the SG case is not that heavy on the completion handler, we could
>> also make this kthread conditionaly to the memcpy case.
>
>If you don't mind a naive suggestion from someone who knows nothing
>about the driver...
>
>An attractive possibility is to have the work queue (or kthread) do the
>time-consuming copying, but leave the submission up to the completion
>handler.  If the data isn't ready (or there's no data to send) when the
>handler runs, then queue a 0-length request.
>
>That will give you the best of both worlds: low latency while in
>interrupt context and a steady, constant flow of USB transfers at all
>times.  The question of how to schedule the work queue or kthread is a
>separate matter, not directly relevant to this design decision.

That's it. This is probably the best way to tackle the overall problem.

So we leave the call of the encode callback to the worker, that will
probably still can be a workqueue. The complete callback is calling
the explicit uvcg_video_ep_queue when prepared requests are available
and if there is nothing pending it will just enqueue zero requests.

Thank you Alan, this makes so much sense!

Jayant, Laurent: Do you agree?
If yes, Jayant will you change the patch accordingly?

Michael

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-28 11:10               ` Michael Grzeschik
@ 2023-10-28 14:09                 ` Jayant Chowdhary
  2023-10-31  6:11                   ` Jayant Chowdhary
  0 siblings, 1 reply; 31+ messages in thread
From: Jayant Chowdhary @ 2023-10-28 14:09 UTC (permalink / raw)
  To: Michael Grzeschik, Alan Stern
  Cc: Laurent Pinchart, Thinh.Nguyen, arakesh, etalvala, dan.scally,
	gregkh, linux-kernel, linux-usb

Hi,

On 10/28/23 04:10, Michael Grzeschik wrote:
> On Fri, Oct 27, 2023 at 10:58:11AM -0400, Alan Stern wrote:
>> On Fri, Oct 27, 2023 at 03:39:44PM +0200, Michael Grzeschik wrote:
>>> On Fri, Oct 27, 2023 at 02:47:52PM +0300, Laurent Pinchart wrote:
>>> > On Fri, Oct 27, 2023 at 01:10:21PM +0200, Michael Grzeschik wrote:
>>> > > On Fri, Oct 27, 2023 at 10:51:17AM +0300, Laurent Pinchart wrote:
>>> > > > On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
>>> > > >> This patch is based on top of
>>> > > >> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>> > > >>
>>> > > >> When we use an async work queue to perform the function of pumping
>>> > > >> usb requests to the usb controller, it is possible that thread scheduling
>>> > > >> affects at what cadence we're able to pump requests. This could mean usb
>>> > > >> requests miss their uframes - resulting in video stream flickers on the host
>>> > > >> device.
>>> > > >>
>>> > > >> In this patch, we move the pumping of usb requests to
>>> > > >> 1) uvcg_video_complete() complete handler for both isoc + bulk
>>> > > >>    endpoints. We still send 0 length requests when there is no uvc buffer
>>> > > >>    available to encode.
>>> > > >
>>> > > > This means you will end up copying large amounts of data in interrupt
>>> > > > context. The work queue was there to avoid exactly that, as it will
>>> > > > introduce delays that can affect other parts of the system. I think this
>>> > > > is a problem.
>>> > >
>>> > > Regarding Thin's argument about possible scheduling latency that is already
>>> > > introducing real errors, this seemed like a good solution.
>>> > >
>>> > > But sure, this potential latency introduced in the interrupt context can
>>> > > trigger other side effects.
>>> > >
>>> > > However I think we need some compromise since both arguments are very valid.
>>> >
>>> > Agreed.
>>> >
>>> > > Any ideas, how to solve this?
>>> >
>>> > I'm afraid not.
>>>
>>> We discussed this and came to the conclusion that we could make use of
>>> kthread_create and sched_setattr with an attr->sched_policy = SCHED_DEADLINE
>>> here instead of the workqueue. This way we would ensure that the worker
>>> would be triggered with hard definitions.
>>>
>>> Since the SG case is not that heavy on the completion handler, we could
>>> also make this kthread conditionaly to the memcpy case.
>>
>> If you don't mind a naive suggestion from someone who knows nothing
>> about the driver...
>>
>> An attractive possibility is to have the work queue (or kthread) do the
>> time-consuming copying, but leave the submission up to the completion
>> handler.  If the data isn't ready (or there's no data to send) when the
>> handler runs, then queue a 0-length request.
>>
>> That will give you the best of both worlds: low latency while in
>> interrupt context and a steady, constant flow of USB transfers at all
>> times.  The question of how to schedule the work queue or kthread is a
>> separate matter, not directly relevant to this design decision.
>
> That's it. This is probably the best way to tackle the overall problem.
>
> So we leave the call of the encode callback to the worker, that will
> probably still can be a workqueue. The complete callback is calling
> the explicit uvcg_video_ep_queue when prepared requests are available
> and if there is nothing pending it will just enqueue zero requests.
>
> Thank you Alan, this makes so much sense!
>
> Jayant, Laurent: Do you agree?
> If yes, Jayant will you change the patch accordingly?
>
>
Thanks for all the discussion Greg, Michael, Laurent and Alan.
Apologies for not responding earlier since I am OOO.

While I  haven't tried this out this does seem like a very good idea.
Thank you Alan! I will aim to make changes and post a patch on Monday night PST.

Jayant


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-28 14:09                 ` Jayant Chowdhary
@ 2023-10-31  6:11                   ` Jayant Chowdhary
  2023-11-02  6:06                     ` Jayant Chowdhary
  0 siblings, 1 reply; 31+ messages in thread
From: Jayant Chowdhary @ 2023-10-31  6:11 UTC (permalink / raw)
  To: Michael Grzeschik, Alan Stern
  Cc: Laurent Pinchart, Thinh.Nguyen, arakesh, etalvala, dan.scally,
	gregkh, linux-kernel, linux-usb

Hi,

On 10/28/23 07:09, Jayant Chowdhary wrote:
> Hi,
>
> On 10/28/23 04:10, Michael Grzeschik wrote:
>> On Fri, Oct 27, 2023 at 10:58:11AM -0400, Alan Stern wrote:
>>> On Fri, Oct 27, 2023 at 03:39:44PM +0200, Michael Grzeschik wrote:
>>>> On Fri, Oct 27, 2023 at 02:47:52PM +0300, Laurent Pinchart wrote:
>>>>> On Fri, Oct 27, 2023 at 01:10:21PM +0200, Michael Grzeschik wrote:
>>>>>> On Fri, Oct 27, 2023 at 10:51:17AM +0300, Laurent Pinchart wrote:
>>>>>>> On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
>>>>>>>> This patch is based on top of
>>>>>>>> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>>>>>>>
>>>>>>>> When we use an async work queue to perform the function of pumping
>>>>>>>> usb requests to the usb controller, it is possible that thread scheduling
>>>>>>>> affects at what cadence we're able to pump requests. This could mean usb
>>>>>>>> requests miss their uframes - resulting in video stream flickers on the host
>>>>>>>> device.
>>>>>>>>
>>>>>>>> In this patch, we move the pumping of usb requests to
>>>>>>>> 1) uvcg_video_complete() complete handler for both isoc + bulk
>>>>>>>>     endpoints. We still send 0 length requests when there is no uvc buffer
>>>>>>>>     available to encode.
>>>>>>> This means you will end up copying large amounts of data in interrupt
>>>>>>> context. The work queue was there to avoid exactly that, as it will
>>>>>>> introduce delays that can affect other parts of the system. I think this
>>>>>>> is a problem.
>>>>>> Regarding Thin's argument about possible scheduling latency that is already
>>>>>> introducing real errors, this seemed like a good solution.
>>>>>>
>>>>>> But sure, this potential latency introduced in the interrupt context can
>>>>>> trigger other side effects.
>>>>>>
>>>>>> However I think we need some compromise since both arguments are very valid.
>>>>> Agreed.
>>>>>
>>>>>> Any ideas, how to solve this?
>>>>> I'm afraid not.
>>>> We discussed this and came to the conclusion that we could make use of
>>>> kthread_create and sched_setattr with an attr->sched_policy = SCHED_DEADLINE
>>>> here instead of the workqueue. This way we would ensure that the worker
>>>> would be triggered with hard definitions.
>>>>
>>>> Since the SG case is not that heavy on the completion handler, we could
>>>> also make this kthread conditionaly to the memcpy case.
>>> If you don't mind a naive suggestion from someone who knows nothing
>>> about the driver...
>>>
>>> An attractive possibility is to have the work queue (or kthread) do the
>>> time-consuming copying, but leave the submission up to the completion
>>> handler.  If the data isn't ready (or there's no data to send) when the
>>> handler runs, then queue a 0-length request.
>>>
>>> That will give you the best of both worlds: low latency while in
>>> interrupt context and a steady, constant flow of USB transfers at all
>>> times.  The question of how to schedule the work queue or kthread is a
>>> separate matter, not directly relevant to this design decision.
>> That's it. This is probably the best way to tackle the overall problem.
>>
>> So we leave the call of the encode callback to the worker, that will
>> probably still can be a workqueue. The complete callback is calling
>> the explicit uvcg_video_ep_queue when prepared requests are available
>> and if there is nothing pending it will just enqueue zero requests.
>>
>> Thank you Alan, this makes so much sense!
>>
>> Jayant, Laurent: Do you agree?
>> If yes, Jayant will you change the patch accordingly?
>>
>>
> Thanks for all the discussion Greg, Michael, Laurent and Alan.
> Apologies for not responding earlier since I am OOO.
>
> While I  haven't tried this out this does seem like a very good idea.
> Thank you Alan! I will aim to make changes and post a patch on Monday night PST.

I got caught up with some work which is taking longer than expected. Apologies for the
delay :) I'm testing some things out right now. I hope to be able to post a patch in the
next couple of days. Thanks for your patience.

Jayant


^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-10-26 21:56   ` [PATCH v2] " Jayant Chowdhary
                       ` (2 preceding siblings ...)
  2023-10-27 10:44     ` Greg KH
@ 2023-11-02  6:01     ` Jayant Chowdhary
  2023-11-02 16:07       ` Dan Scally
  3 siblings, 1 reply; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-02  6:01 UTC (permalink / raw)
  To: jchowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, dan.scally, etalvala, linux-kernel, linux-usb

When we use an async work queue to perform the function of pumping
usb requests to the usb controller, it is possible that amongst other
factors, thread scheduling affects at what cadence we're able to pump
requests. This could mean isoc usb requests miss their uframes - resulting
in video stream flickers on the host device.

To avoid this, we make the async_wq thread only produce isoc usb_requests
with uvc buffers encoded into them. The process of queueing to the
endpoint is done by the uvc_video_complete() handler. In case no
usb_requests are ready with encoded information, we just queue a zero
length request to the endpoint from the complete handler.

For bulk endpoints the async_wq thread still queues usb requests to the
endpoint.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
Suggested-by: Avichal Rakesh <arakesh@google.com>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
---
 Based on top of
 https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
 v1->v2: Added self Signed-Off-by and addressed review comments
 v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
	 for isoc transfers.

 drivers/usb/gadget/function/uvc.h       |   8 +
 drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
 2 files changed, 156 insertions(+), 39 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index e8d4c87f1e09..82c783410554 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -105,7 +105,15 @@ struct uvc_video {
 	bool is_enabled; /* tracks whether video stream is enabled */
 	unsigned int req_size;
 	struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
+
+	/* USB requests video pump thread can encode into*/
 	struct list_head req_free;
+
+	/*
+	 * USB requests video pump thread has already encoded into. These are
+	 * ready to be queued to the endpoint.
+	 */
+	struct list_head req_ready;
 	spinlock_t req_lock;
 
 	unsigned int req_int_count;
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 53feb790a4c3..c84183e9afcc 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
 	return ret;
 }
 
+/* This function must be called with video->req_lock held*/
+static int uvcg_video_usb_req_queue(struct uvc_video *video,
+	struct usb_request *req, bool queue_to_ep) {
+	bool is_bulk = video->max_payload_size;
+	if (!video->is_enabled) {
+		uvc_video_free_request(req->context, video->ep);
+		return -ENODEV;
+	}
+	if (queue_to_ep) {
+		struct uvc_request *ureq = req->context;
+		/*
+		 * With USB3 handling more requests at a higher speed, we can't
+		 * afford to generate an interrupt for every request. Decide to
+		 * interrupt:
+		 *
+		 * - When no more requests are available in the free queue, as
+		 *   this may be our last chance to refill the endpoint's
+		 *   request queue.
+		 *
+		 * - When this is request is the last request for the video
+		 *   buffer, as we want to start sending the next video buffer
+		 *   ASAP in case it doesn't get started already in the next
+		 *   iteration of this loop.
+		 *
+		 * - Four times over the length of the requests queue (as
+		 *   indicated by video->uvc_num_requests), as a trade-off
+		 *   between latency and interrupt load.
+		*/
+		if (list_empty(&video->req_free) || ureq->last_buf ||
+			!(video->req_int_count %
+			DIV_ROUND_UP(video->uvc_num_requests, 4))) {
+			video->req_int_count = 0;
+			req->no_interrupt = 0;
+		} else {
+			req->no_interrupt = 1;
+		}
+		video->req_int_count++;
+		return uvcg_video_ep_queue(video, req);
+	} else {
+		/*
+		* If we're not queing to the ep, for isoc we're queing
+		* to the req_ready list, otherwise req_free.
+		*/
+		struct list_head *list =
+			is_bulk ? &video->req_free : &video->req_ready;
+		list_add_tail(&req->list, list);
+	}
+	return 0;
+}
+
+static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
+	struct uvc_video *video) {
+	req->length = 0;
+	return uvcg_video_ep_queue(video, req);
+}
+
+/* Must only be called from uvcg_video_enable - since after that we only want to
+ * queue requests to the endpoint from the uvc_video_complete complete handler.
+ * This function is needed in order to 'kick start' the flow of requests from
+ * gadget driver to the usb controller.
+ */
+static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
+	struct usb_request *req = NULL;
+	unsigned long flags = 0;
+	unsigned int count = 0;
+	int ret = 0;
+	/* We only queue half of the free list since we still want to have
+	 * some free usb_requests in the free list for the video_pump async_wq
+	 * thread to encode uvc buffers into. Otherwise we could get into a
+	 * situation where the free list does not have any usb requests to
+	 * encode into - we always end up queueing 0 length requests to the
+	 * end point.
+	 */
+	unsigned half_list_size = video->uvc_num_requests / 2;
+	spin_lock_irqsave(&video->req_lock, flags);
+	/* Take these requests off the free list and queue them all to the
+	 * endpoint. Since we queue the requests with the req_lock held,
+	 */
+	while (count < half_list_size) {
+		req = list_first_entry(&video->req_free, struct usb_request,
+					list);
+		list_del(&req->list);
+		ret = uvcg_video_ep_queue_zero_length(req, video);
+		if (ret < 0) {
+			uvcg_queue_cancel(&video->queue, /*disconnect*/0);
+			break;
+		}
+		count++;
+	}
+	spin_unlock_irqrestore(&video->req_lock, flags);
+}
+
 static void
 uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 {
@@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	struct uvc_video_queue *queue = &video->queue;
 	struct uvc_buffer *last_buf = NULL;
 	unsigned long flags;
+	bool is_bulk = video->max_payload_size;
+	int ret = 0;
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	if (!video->is_enabled) {
@@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	 * back to req_free
 	 */
 	if (video->is_enabled) {
-		list_add_tail(&req->list, &video->req_free);
+		/*
+		 * Here we check whether any request is available in the ready
+		 * list. If it is, queue it to the ep and add the current
+		 * usb_request to the req_free list - for video_pump to fill in.
+		 * Otherwise, just use the current usb_request to queue a 0
+		 * length request to the ep. Since we always add to the req_free
+		 * list if we dequeue from the ready list, there will never
+		 * be a situation where the req_free list is completely out of
+		 * requests and cannot recover.
+		 */
+		struct usb_request *to_queue = req;
+		to_queue->length = 0;
+		if (!list_empty(&video->req_ready)) {
+			to_queue = list_first_entry(&video->req_ready,
+				struct usb_request, list);
+			list_del(&to_queue->list);
+			/* Add it to the free list. */
+			list_add_tail(&req->list, &video->req_free);
+		}
+		/*
+		 * Queue to the endpoint. The actual queueing to ep will
+		 * only happen on one thread - the async_wq for bulk endpoints
+		 * and this thread for isoc endpoints.
+		 */
+		ret = uvcg_video_usb_req_queue(video, to_queue,
+					       /*queue_to_ep*/!is_bulk);
+		if(ret < 0) {
+			uvcg_queue_cancel(queue, 0);
+		}
+		/* Queue work to the wq as well since its possible that a buffer
+		 * may not have been completed.
+		 */
 		queue_work(video->async_wq, &video->pump);
 	} else {
 		uvc_video_free_request(ureq, ep);
@@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
 
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	return 0;
 }
@@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
 	struct usb_request *req = NULL;
 	struct uvc_buffer *buf;
 	unsigned long flags;
-	bool buf_done;
-	int ret;
+	int ret = 0;
 
 	while (true) {
 		if (!video->ep->enabled)
@@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
 
 		if (buf != NULL) {
 			video->encode(req, video, buf);
-			buf_done = buf->state == UVC_BUF_STATE_DONE;
 		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
 			/*
 			 * No video buffer available; the queue is still connected and
@@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
 			 * prevent missed ISOC transfers.
 			 */
 			req->length = 0;
-			buf_done = false;
 		} else {
 			/*
 			 * Either the queue has been disconnected or no video buffer
@@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
 			break;
 		}
 
-		/*
-		 * With USB3 handling more requests at a higher speed, we can't
-		 * afford to generate an interrupt for every request. Decide to
-		 * interrupt:
-		 *
-		 * - When no more requests are available in the free queue, as
-		 *   this may be our last chance to refill the endpoint's
-		 *   request queue.
-		 *
-		 * - When this is request is the last request for the video
-		 *   buffer, as we want to start sending the next video buffer
-		 *   ASAP in case it doesn't get started already in the next
-		 *   iteration of this loop.
-		 *
-		 * - Four times over the length of the requests queue (as
-		 *   indicated by video->uvc_num_requests), as a trade-off
-		 *   between latency and interrupt load.
-		 */
-		if (list_empty(&video->req_free) || buf_done ||
-		    !(video->req_int_count %
-		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
-			video->req_int_count = 0;
-			req->no_interrupt = 0;
-		} else {
-			req->no_interrupt = 1;
-		}
-
-		/* Queue the USB request */
-		ret = uvcg_video_ep_queue(video, req);
 		spin_unlock_irqrestore(&queue->irqlock, flags);
 
+		/* Queue the USB request.*/
+		spin_lock_irqsave(&video->req_lock, flags);
+		/* For bulk end points we queue from the worker thread
+		 * since we would preferably not want to wait on requests
+		 * to be ready, in the uvcg_video_complete() handler.
+		 * For isoc endpoints we add the request to the ready list
+		 * and only queue it to the endpoint from the complete handler.
+		 */
+		ret = uvcg_video_usb_req_queue(video, req, is_bulk);
+		spin_unlock_irqrestore(&video->req_lock, flags);
+
 		if (ret < 0) {
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
 
-		/* Endpoint now owns the request */
+		/* The request is owned by  the endpoint / ready list*/
 		req = NULL;
-		video->req_int_count++;
 	}
 
 	if (!req)
@@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	/*
-	 * Remove all uvc_reqeusts from ureqs with list_del_init
+	 * Remove all uvc_requests from ureqs with list_del_init
 	 * This lets uvc_video_free_request correctly identify
 	 * if the uvc_request is attached to a list or not when freeing
 	 * memory.
@@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
 		list_del(&req->list);
 		uvc_video_free_request(req->context, video->ep);
 	}
-
+	list_for_each_entry_safe(req, temp, &video->req_ready, list) {
+		list_del(&req->list);
+		uvc_video_free_request(req->context, video->ep);
+	}
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	spin_unlock_irqrestore(&video->req_lock, flags);
 
@@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
 
 	video->req_int_count = 0;
 
-	queue_work(video->async_wq, &video->pump);
+	uvc_video_ep_queue_initial_requests(video);
 
 	return ret;
 }
@@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	video->is_enabled = false;
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	spin_lock_init(&video->req_lock);
 	INIT_WORK(&video->pump, uvcg_video_pump);
 
-- 
2.42.0.869.gea05f2083d-goog


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v2] usb:gadget:uvc Do not use worker thread to pump usb requests
  2023-10-31  6:11                   ` Jayant Chowdhary
@ 2023-11-02  6:06                     ` Jayant Chowdhary
  0 siblings, 0 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-02  6:06 UTC (permalink / raw)
  To: Michael Grzeschik, Alan Stern
  Cc: Laurent Pinchart, Thinh.Nguyen, arakesh, etalvala, dan.scally,
	gregkh, linux-kernel, linux-usb

Hi,

On 10/30/23 23:11, Jayant Chowdhary wrote:
> Hi,
>
> On 10/28/23 07:09, Jayant Chowdhary wrote:
>> Hi,
>>
>> On 10/28/23 04:10, Michael Grzeschik wrote:
>>> On Fri, Oct 27, 2023 at 10:58:11AM -0400, Alan Stern wrote:
>>>> On Fri, Oct 27, 2023 at 03:39:44PM +0200, Michael Grzeschik wrote:
>>>>> On Fri, Oct 27, 2023 at 02:47:52PM +0300, Laurent Pinchart wrote:
>>>>>> On Fri, Oct 27, 2023 at 01:10:21PM +0200, Michael Grzeschik wrote:
>>>>>>> On Fri, Oct 27, 2023 at 10:51:17AM +0300, Laurent Pinchart wrote:
>>>>>>>> On Thu, Oct 26, 2023 at 09:56:35PM +0000, Jayant Chowdhary wrote:
>>>>>>>>> This patch is based on top of
>>>>>>>>> https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>>>>>>>>
>>>>>>>>> When we use an async work queue to perform the function of pumping
>>>>>>>>> usb requests to the usb controller, it is possible that thread scheduling
>>>>>>>>> affects at what cadence we're able to pump requests. This could mean usb
>>>>>>>>> requests miss their uframes - resulting in video stream flickers on the host
>>>>>>>>> device.
>>>>>>>>>
>>>>>>>>> In this patch, we move the pumping of usb requests to
>>>>>>>>> 1) uvcg_video_complete() complete handler for both isoc + bulk
>>>>>>>>>     endpoints. We still send 0 length requests when there is no uvc buffer
>>>>>>>>>     available to encode.
>>>>>>>> This means you will end up copying large amounts of data in interrupt
>>>>>>>> context. The work queue was there to avoid exactly that, as it will
>>>>>>>> introduce delays that can affect other parts of the system. I think this
>>>>>>>> is a problem.
>>>>>>> Regarding Thin's argument about possible scheduling latency that is already
>>>>>>> introducing real errors, this seemed like a good solution.
>>>>>>>
>>>>>>> But sure, this potential latency introduced in the interrupt context can
>>>>>>> trigger other side effects.
>>>>>>>
>>>>>>> However I think we need some compromise since both arguments are very valid.
>>>>>> Agreed.
>>>>>>
>>>>>>> Any ideas, how to solve this?
>>>>>> I'm afraid not.
>>>>> We discussed this and came to the conclusion that we could make use of
>>>>> kthread_create and sched_setattr with an attr->sched_policy = SCHED_DEADLINE
>>>>> here instead of the workqueue. This way we would ensure that the worker
>>>>> would be triggered with hard definitions.
>>>>>
>>>>> Since the SG case is not that heavy on the completion handler, we could
>>>>> also make this kthread conditionaly to the memcpy case.
>>>> If you don't mind a naive suggestion from someone who knows nothing
>>>> about the driver...
>>>>
>>>> An attractive possibility is to have the work queue (or kthread) do the
>>>> time-consuming copying, but leave the submission up to the completion
>>>> handler.  If the data isn't ready (or there's no data to send) when the
>>>> handler runs, then queue a 0-length request.
>>>>
>>>> That will give you the best of both worlds: low latency while in
>>>> interrupt context and a steady, constant flow of USB transfers at all
>>>> times.  The question of how to schedule the work queue or kthread is a
>>>> separate matter, not directly relevant to this design decision.
>>> That's it. This is probably the best way to tackle the overall problem.
>>>
>>> So we leave the call of the encode callback to the worker, that will
>>> probably still can be a workqueue. The complete callback is calling
>>> the explicit uvcg_video_ep_queue when prepared requests are available
>>> and if there is nothing pending it will just enqueue zero requests.
>>>
>>> Thank you Alan, this makes so much sense!
>>>
>>> Jayant, Laurent: Do you agree?
>>> If yes, Jayant will you change the patch accordingly?
>>>
>>>
>> Thanks for all the discussion Greg, Michael, Laurent and Alan.
>> Apologies for not responding earlier since I am OOO.
>>
>> While I  haven't tried this out this does seem like a very good idea.
>> Thank you Alan! I will aim to make changes and post a patch on Monday night PST.
> I got caught up with some work which is taking longer than expected. Apologies for the
> delay :) I'm testing some things out right now. I hope to be able to post a patch in the
> next couple of days. Thanks for your patience.

I posted another patch at https://lore.kernel.org/linux-usb/20231102060120.1159112-1-jchowdhary@google.com/T/#u.
I've not split this into 2 patches since here, we have a common function that handles both the bulk and isoc
cases and I feel they're logically related. 

Thank you

Jayant


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-11-02  6:01     ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc " Jayant Chowdhary
@ 2023-11-02 16:07       ` Dan Scally
  2023-11-03  7:13         ` [PATCH v4] usb:gadget:uvc Do not use worker thread to pump " Jayant Chowdhary
  2023-11-03  7:28         ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue " Jayant Chowdhary
  0 siblings, 2 replies; 31+ messages in thread
From: Dan Scally @ 2023-11-02 16:07 UTC (permalink / raw)
  To: Jayant Chowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

Hi Jayant - thanks for the patch

On 02/11/2023 06:01, Jayant Chowdhary wrote:
> When we use an async work queue to perform the function of pumping
> usb requests to the usb controller, it is possible that amongst other
> factors, thread scheduling affects at what cadence we're able to pump
> requests. This could mean isoc usb requests miss their uframes - resulting
> in video stream flickers on the host device.
>
> To avoid this, we make the async_wq thread only produce isoc usb_requests
> with uvc buffers encoded into them. The process of queueing to the
> endpoint is done by the uvc_video_complete() handler. In case no
> usb_requests are ready with encoded information, we just queue a zero
> length request to the endpoint from the complete handler.
>
> For bulk endpoints the async_wq thread still queues usb requests to the
> endpoint.
>
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Avichal Rakesh <arakesh@google.com>
> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
> ---
>   Based on top of
>   https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>   v1->v2: Added self Signed-Off-by and addressed review comments
>   v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
> 	 for isoc transfers.
>
>   drivers/usb/gadget/function/uvc.h       |   8 +
>   drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>   2 files changed, 156 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index e8d4c87f1e09..82c783410554 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -105,7 +105,15 @@ struct uvc_video {
>   	bool is_enabled; /* tracks whether video stream is enabled */
>   	unsigned int req_size;
>   	struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
> +
> +	/* USB requests video pump thread can encode into*/

"USB requests that the video pump thread can encode into", and a space before the closing */ please 
(and the same a few more times below).

>   	struct list_head req_free;
> +
> +	/*
> +	 * USB requests video pump thread has already encoded into. These are
> +	 * ready to be queued to the endpoint.
> +	 */
> +	struct list_head req_ready;
>   	spinlock_t req_lock;
>   
>   	unsigned int req_int_count;
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index 53feb790a4c3..c84183e9afcc 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>   	return ret;
>   }
>   
> +/* This function must be called with video->req_lock held*/
> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
> +	struct usb_request *req, bool queue_to_ep) {
Brace on a new line please - same a few more times below
> +	bool is_bulk = video->max_payload_size;
empty line here
> +	if (!video->is_enabled) {
> +		uvc_video_free_request(req->context, video->ep);
> +		return -ENODEV;
> +	}
> +	if (queue_to_ep) {
> +		struct uvc_request *ureq = req->context;
> +		/*
> +		 * With USB3 handling more requests at a higher speed, we can't
> +		 * afford to generate an interrupt for every request. Decide to
> +		 * interrupt:
> +		 *
> +		 * - When no more requests are available in the free queue, as
> +		 *   this may be our last chance to refill the endpoint's
> +		 *   request queue.
> +		 *
> +		 * - When this is request is the last request for the video
> +		 *   buffer, as we want to start sending the next video buffer
> +		 *   ASAP in case it doesn't get started already in the next
> +		 *   iteration of this loop.
> +		 *
> +		 * - Four times over the length of the requests queue (as
> +		 *   indicated by video->uvc_num_requests), as a trade-off
> +		 *   between latency and interrupt load.
> +		*/
> +		if (list_empty(&video->req_free) || ureq->last_buf ||
> +			!(video->req_int_count %
> +			DIV_ROUND_UP(video->uvc_num_requests, 4))) {
> +			video->req_int_count = 0;
> +			req->no_interrupt = 0;
> +		} else {
> +			req->no_interrupt = 1;
> +		}
> +		video->req_int_count++;
> +		return uvcg_video_ep_queue(video, req);
> +	} else {
> +		/*
> +		* If we're not queing to the ep, for isoc we're queing
> +		* to the req_ready list, otherwise req_free.
> +		*/
> +		struct list_head *list =
> +			is_bulk ? &video->req_free : &video->req_ready;
> +		list_add_tail(&req->list, list);
> +	}
> +	return 0;
> +}
> +
> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
> +	struct uvc_video *video) {
> +	req->length = 0;
> +	return uvcg_video_ep_queue(video, req);
> +}
Not sure this is worth its own function
> +
> +/* Must only be called from uvcg_video_enable - since after that we only want to
> + * queue requests to the endpoint from the uvc_video_complete complete handler.
> + * This function is needed in order to 'kick start' the flow of requests from
> + * gadget driver to the usb controller.
> + */
> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
> +	struct usb_request *req = NULL;
> +	unsigned long flags = 0;
> +	unsigned int count = 0;
> +	int ret = 0;
> +	/* We only queue half of the free list since we still want to have
> +	 * some free usb_requests in the free list for the video_pump async_wq
> +	 * thread to encode uvc buffers into. Otherwise we could get into a
> +	 * situation where the free list does not have any usb requests to
> +	 * encode into - we always end up queueing 0 length requests to the
> +	 * end point.
> +	 */
> +	unsigned half_list_size = video->uvc_num_requests / 2;
> +	spin_lock_irqsave(&video->req_lock, flags);
> +	/* Take these requests off the free list and queue them all to the
> +	 * endpoint. Since we queue the requests with the req_lock held,
> +	 */

This comment seems to be incomplete? You also want an opening /* on its own line:


/*
  * Multi line comments
  * look like this
  */

> +	while (count < half_list_size) {
> +		req = list_first_entry(&video->req_free, struct usb_request,
> +					list);
> +		list_del(&req->list);
> +		ret = uvcg_video_ep_queue_zero_length(req, video);
> +		if (ret < 0) {
> +			uvcg_queue_cancel(&video->queue, /*disconnect*/0);
> +			break;
> +		}
> +		count++;
> +	}
> +	spin_unlock_irqrestore(&video->req_lock, flags);
> +}
> +

So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial 
flight of half the requests (between 2 and 32) that are queued as zero length - the very first one 
to .complete() being re-queued as a zero length request before the workqueue is started and encodes 
data into the _other_ half of the requests which were left in req_free and putting them into 
req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and 
they get sent out with data...does that sound right?


What are the implications of those initial 3-33 zero length requests? What kind of latency can that 
introduce to the start of the video stream?

>   static void
>   uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   {
> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   	struct uvc_video_queue *queue = &video->queue;
>   	struct uvc_buffer *last_buf = NULL;
>   	unsigned long flags;
> +	bool is_bulk = video->max_payload_size;
> +	int ret = 0;
>   
>   	spin_lock_irqsave(&video->req_lock, flags);
>   	if (!video->is_enabled) {
> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   	 * back to req_free
>   	 */
>   	if (video->is_enabled) {
> -		list_add_tail(&req->list, &video->req_free);
> +		/*
> +		 * Here we check whether any request is available in the ready
> +		 * list. If it is, queue it to the ep and add the current
> +		 * usb_request to the req_free list - for video_pump to fill in.
> +		 * Otherwise, just use the current usb_request to queue a 0
> +		 * length request to the ep. Since we always add to the req_free
> +		 * list if we dequeue from the ready list, there will never
> +		 * be a situation where the req_free list is completely out of
> +		 * requests and cannot recover.
> +		 */
> +		struct usb_request *to_queue = req;
> +		to_queue->length = 0;
> +		if (!list_empty(&video->req_ready)) {
> +			to_queue = list_first_entry(&video->req_ready,
> +				struct usb_request, list);
> +			list_del(&to_queue->list);
> +			/* Add it to the free list. */
> +			list_add_tail(&req->list, &video->req_free);
> +		}
> +		/*
> +		 * Queue to the endpoint. The actual queueing to ep will
> +		 * only happen on one thread - the async_wq for bulk endpoints
> +		 * and this thread for isoc endpoints.
> +		 */
> +		ret = uvcg_video_usb_req_queue(video, to_queue,
> +					       /*queue_to_ep*/!is_bulk);


In principle in-line comments are fine, but I don't think the parameter name is worth a comment

> +		if(ret < 0) {
> +			uvcg_queue_cancel(queue, 0);
> +		}
> +		/* Queue work to the wq as well since its possible that a buffer
> +		 * may not have been completed.
> +		 */


The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq 
here then doesn't that mean it'll never run?

>   		queue_work(video->async_wq, &video->pump);
>   	} else {
>   		uvc_video_free_request(ureq, ep);
> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>   
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	video->req_size = 0;
>   	return 0;
>   }
> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>   	struct usb_request *req = NULL;
>   	struct uvc_buffer *buf;
>   	unsigned long flags;
> -	bool buf_done;
> -	int ret;
> +	int ret = 0;
>   
>   	while (true) {
>   		if (!video->ep->enabled)
> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>   
>   		if (buf != NULL) {
>   			video->encode(req, video, buf);
> -			buf_done = buf->state == UVC_BUF_STATE_DONE;
>   		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>   			/*
>   			 * No video buffer available; the queue is still connected and
> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>   			 * prevent missed ISOC transfers.
>   			 */
>   			req->length = 0;
> -			buf_done = false;
>   		} else {
>   			/*
>   			 * Either the queue has been disconnected or no video buffer
> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>   			break;
>   		}
>   
> -		/*
> -		 * With USB3 handling more requests at a higher speed, we can't
> -		 * afford to generate an interrupt for every request. Decide to
> -		 * interrupt:
> -		 *
> -		 * - When no more requests are available in the free queue, as
> -		 *   this may be our last chance to refill the endpoint's
> -		 *   request queue.
> -		 *
> -		 * - When this is request is the last request for the video
> -		 *   buffer, as we want to start sending the next video buffer
> -		 *   ASAP in case it doesn't get started already in the next
> -		 *   iteration of this loop.
> -		 *
> -		 * - Four times over the length of the requests queue (as
> -		 *   indicated by video->uvc_num_requests), as a trade-off
> -		 *   between latency and interrupt load.
> -		 */
> -		if (list_empty(&video->req_free) || buf_done ||
> -		    !(video->req_int_count %
> -		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
> -			video->req_int_count = 0;
> -			req->no_interrupt = 0;
> -		} else {
> -			req->no_interrupt = 1;
> -		}
> -
> -		/* Queue the USB request */
> -		ret = uvcg_video_ep_queue(video, req);
>   		spin_unlock_irqrestore(&queue->irqlock, flags);
>   
> +		/* Queue the USB request.*/
I think just drop this - it was always superfluous.
> +		spin_lock_irqsave(&video->req_lock, flags);
> +		/* For bulk end points we queue from the worker thread
> +		 * since we would preferably not want to wait on requests
> +		 * to be ready, in the uvcg_video_complete() handler.
> +		 * For isoc endpoints we add the request to the ready list
> +		 * and only queue it to the endpoint from the complete handler.
> +		 */
> +		ret = uvcg_video_usb_req_queue(video, req, is_bulk);
> +		spin_unlock_irqrestore(&video->req_lock, flags);
> +
>   		if (ret < 0) {
>   			uvcg_queue_cancel(queue, 0);
>   			break;
>   		}
>   
> -		/* Endpoint now owns the request */
> +		/* The request is owned by  the endpoint / ready list*/
>   		req = NULL;
> -		video->req_int_count++;
>   	}
>   
>   	if (!req)
> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>   
>   	spin_lock_irqsave(&video->req_lock, flags);
>   	/*
> -	 * Remove all uvc_reqeusts from ureqs with list_del_init
> +	 * Remove all uvc_requests from ureqs with list_del_init
This should get fixed in the earlier series.
>   	 * This lets uvc_video_free_request correctly identify
>   	 * if the uvc_request is attached to a list or not when freeing
>   	 * memory.
> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>   		list_del(&req->list);
>   		uvc_video_free_request(req->context, video->ep);
>   	}
> -
keep the empty line please
> +	list_for_each_entry_safe(req, temp, &video->req_ready, list) {
> +		list_del(&req->list);
> +		uvc_video_free_request(req->context, video->ep);
> +	}
and one here too.
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	video->req_size = 0;
>   	spin_unlock_irqrestore(&video->req_lock, flags);
>   
> @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>   
>   	video->req_int_count = 0;
>   
> -	queue_work(video->async_wq, &video->pump);
> +	uvc_video_ep_queue_initial_requests(video);
>   
>   	return ret;
>   }
> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>   	video->is_enabled = false;
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	spin_lock_init(&video->req_lock);
>   	INIT_WORK(&video->pump, uvcg_video_pump);
>   

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v4] usb:gadget:uvc Do not use worker thread to pump isoc usb requests
  2023-11-02 16:07       ` Dan Scally
@ 2023-11-03  7:13         ` Jayant Chowdhary
  2023-11-09  2:12           ` [PATCH v5] " Jayant Chowdhary
  2023-11-03  7:28         ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue " Jayant Chowdhary
  1 sibling, 1 reply; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-03  7:13 UTC (permalink / raw)
  To: dan.scally, jchowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

When we use an async work queue to perform the function of pumping
usb requests to the usb controller, it is possible that amongst other
factors, thread scheduling affects at what cadence we're able to pump
requests. This could mean isoc usb requests miss their uframes - resulting
in video stream flickers on the host device.

To avoid this, we make the async_wq thread only produce isoc usb_requests
with uvc buffers encoded into them. The process of queueing to the
endpoint is done by the uvc_video_complete() handler. In case no
usb_requests are ready with encoded information, we just queue a zero
length request to the endpoint from the complete handler.

For bulk endpoints the async_wq thread still queues usb requests to the
endpoint.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
Suggested-by: Avichal Rakesh <arakesh@google.com>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
---
 Based on top of
 https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
 v1->v2: Added self Signed-Off-by and addressed review comments
 v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
	 for isoc transfers.
 v3->v4: Address review comments around code style.

 drivers/usb/gadget/function/uvc.h       |   8 +
 drivers/usb/gadget/function/uvc_video.c | 195 +++++++++++++++++++-----
 2 files changed, 165 insertions(+), 38 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index e8d4c87f1e09..5ff454528bd8 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -105,7 +105,15 @@ struct uvc_video {
 	bool is_enabled; /* tracks whether video stream is enabled */
 	unsigned int req_size;
 	struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
+
+	/* USB requests that the video pump thread can encode into */
 	struct list_head req_free;
+
+	/*
+	 * USB requests video pump thread has already encoded into. These are
+	 * ready to be queued to the endpoint.
+	 */
+	struct list_head req_ready;
 	spinlock_t req_lock;
 
 	unsigned int req_int_count;
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 53feb790a4c3..e99c5b567f66 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -268,6 +268,99 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
 	return ret;
 }
 
+/* This function must be called with video->req_lock held. */
+static int uvcg_video_usb_req_queue(struct uvc_video *video,
+	struct usb_request *req, bool queue_to_ep)
+{
+	bool is_bulk = video->max_payload_size;
+
+	if (!video->is_enabled) {
+		uvc_video_free_request(req->context, video->ep);
+		return -ENODEV;
+	}
+	if (queue_to_ep) {
+		struct uvc_request *ureq = req->context;
+		/*
+		 * With USB3 handling more requests at a higher speed, we can't
+		 * afford to generate an interrupt for every request. Decide to
+		 * interrupt:
+		 *
+		 * - When no more requests are available in the free queue, as
+		 *   this may be our last chance to refill the endpoint's
+		 *   request queue.
+		 *
+		 * - When this is request is the last request for the video
+		 *   buffer, as we want to start sending the next video buffer
+		 *   ASAP in case it doesn't get started already in the next
+		 *   iteration of this loop.
+		 *
+		 * - Four times over the length of the requests queue (as
+		 *   indicated by video->uvc_num_requests), as a trade-off
+		 *   between latency and interrupt load.
+		 */
+		if (list_empty(&video->req_free) || ureq->last_buf ||
+			!(video->req_int_count %
+			DIV_ROUND_UP(video->uvc_num_requests, 4))) {
+			video->req_int_count = 0;
+			req->no_interrupt = 0;
+		} else {
+			req->no_interrupt = 1;
+		}
+		video->req_int_count++;
+		return uvcg_video_ep_queue(video, req);
+	} else {
+		/*
+		* If we're not queing to the ep, for isoc we're queing
+		* to the req_ready list, otherwise req_free.
+		*/
+		struct list_head *list =
+			is_bulk ? &video->req_free : &video->req_ready;
+		list_add_tail(&req->list, list);
+	}
+	return 0;
+}
+
+/*
+ * Must only be called from uvcg_video_enable - since after that we only want to
+ * queue requests to the endpoint from the uvc_video_complete complete handler.
+ * This function is needed in order to 'kick start' the flow of requests from
+ * gadget driver to the usb controller.
+ */
+static void uvc_video_ep_queue_initial_requests(struct uvc_video *video)
+{
+	struct usb_request *req = NULL;
+	unsigned long flags = 0;
+	unsigned int count = 0;
+	int ret = 0;
+	/*
+	 * We only queue half of the free list since we still want to have
+	 * some free usb_requests in the free list for the video_pump async_wq
+	 * thread to encode uvc buffers into. Otherwise we could get into a
+	 * situation where the free list does not have any usb requests to
+	 * encode into - we always end up queueing 0 length requests to the
+	 * end point.
+	 */
+	unsigned half_list_size = video->uvc_num_requests / 2;
+	spin_lock_irqsave(&video->req_lock, flags);
+	/*
+	 * Take these requests off the free list and queue them all to the
+	 * endpoint. Since we queue the requests with the req_lock held,
+	 */
+	while (count < half_list_size) {
+		req = list_first_entry(&video->req_free, struct usb_request,
+					list);
+		list_del(&req->list);
+		req->length = 0;
+		ret = uvcg_video_ep_queue(video, req);
+		if (ret < 0) {
+			uvcg_queue_cancel(&video->queue, /*disconnect*/0);
+			break;
+		}
+		count++;
+	}
+	spin_unlock_irqrestore(&video->req_lock, flags);
+}
+
 static void
 uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 {
@@ -276,6 +369,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	struct uvc_video_queue *queue = &video->queue;
 	struct uvc_buffer *last_buf = NULL;
 	unsigned long flags;
+	bool is_bulk = video->max_payload_size;
+	int ret = 0;
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	if (!video->is_enabled) {
@@ -329,7 +424,45 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	 * back to req_free
 	 */
 	if (video->is_enabled) {
-		list_add_tail(&req->list, &video->req_free);
+		/*
+		 * Here we check whether any request is available in the ready
+		 * list. If it is, queue it to the ep and add the current
+		 * usb_request to the req_free list - for video_pump to fill in.
+		 * Otherwise, just use the current usb_request to queue a 0
+		 * length request to the ep. Since we always add to the req_free
+		 * list if we dequeue from the ready list, there will never
+		 * be a situation where the req_free list is completely out of
+		 * requests and cannot recover.
+		 */
+		struct usb_request *to_queue = req;
+		to_queue->length = 0;
+		if (!list_empty(&video->req_ready)) {
+			to_queue = list_first_entry(&video->req_ready,
+				struct usb_request, list);
+			list_del(&to_queue->list);
+			/* Add it to the free list. */
+			list_add_tail(&req->list, &video->req_free);
+		}
+		/*
+		 * Queue to the endpoint. The actual queueing to ep will
+		 * only happen on one thread - the async_wq for bulk endpoints
+		 * and this thread for isoc endpoints.
+		 */
+		ret = uvcg_video_usb_req_queue(video, to_queue, !is_bulk);
+		if(ret < 0) {
+			uvcg_queue_cancel(queue, 0);
+		}
+		/*
+		 * Queue work to the wq as well since it is possible that a
+		 * buffer may not have been completely encoded with the set of
+		 * in-flight usb requests for whih the complete callbacks are
+		 * firing.
+		 * In that case, if we do not queue work to the worker thread,
+		 * the buffer will never be marked as complete - and therefore
+		 * not be returned to userpsace. As a result,
+		 * dequeue -> queue -> dequeue flow of uvc buffers will not
+		 * happen.
+		 */
 		queue_work(video->async_wq, &video->pump);
 	} else {
 		uvc_video_free_request(ureq, ep);
@@ -347,6 +480,7 @@ uvc_video_free_requests(struct uvc_video *video)
 
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	return 0;
 }
@@ -424,8 +558,7 @@ static void uvcg_video_pump(struct work_struct *work)
 	struct usb_request *req = NULL;
 	struct uvc_buffer *buf;
 	unsigned long flags;
-	bool buf_done;
-	int ret;
+	int ret = 0;
 
 	while (true) {
 		if (!video->ep->enabled)
@@ -454,7 +587,6 @@ static void uvcg_video_pump(struct work_struct *work)
 
 		if (buf != NULL) {
 			video->encode(req, video, buf);
-			buf_done = buf->state == UVC_BUF_STATE_DONE;
 		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
 			/*
 			 * No video buffer available; the queue is still connected and
@@ -462,7 +594,6 @@ static void uvcg_video_pump(struct work_struct *work)
 			 * prevent missed ISOC transfers.
 			 */
 			req->length = 0;
-			buf_done = false;
 		} else {
 			/*
 			 * Either the queue has been disconnected or no video buffer
@@ -473,45 +604,26 @@ static void uvcg_video_pump(struct work_struct *work)
 			break;
 		}
 
-		/*
-		 * With USB3 handling more requests at a higher speed, we can't
-		 * afford to generate an interrupt for every request. Decide to
-		 * interrupt:
-		 *
-		 * - When no more requests are available in the free queue, as
-		 *   this may be our last chance to refill the endpoint's
-		 *   request queue.
-		 *
-		 * - When this is request is the last request for the video
-		 *   buffer, as we want to start sending the next video buffer
-		 *   ASAP in case it doesn't get started already in the next
-		 *   iteration of this loop.
-		 *
-		 * - Four times over the length of the requests queue (as
-		 *   indicated by video->uvc_num_requests), as a trade-off
-		 *   between latency and interrupt load.
-		 */
-		if (list_empty(&video->req_free) || buf_done ||
-		    !(video->req_int_count %
-		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
-			video->req_int_count = 0;
-			req->no_interrupt = 0;
-		} else {
-			req->no_interrupt = 1;
-		}
-
-		/* Queue the USB request */
-		ret = uvcg_video_ep_queue(video, req);
 		spin_unlock_irqrestore(&queue->irqlock, flags);
 
+		/* Queue the USB request. */
+		spin_lock_irqsave(&video->req_lock, flags);
+		/* For bulk end points we queue from the worker thread
+		 * since we would preferably not want to wait on requests
+		 * to be ready, in the uvcg_video_complete() handler.
+		 * For isoc endpoints we add the request to the ready list
+		 * and only queue it to the endpoint from the complete handler.
+		 */
+		ret = uvcg_video_usb_req_queue(video, req, is_bulk);
+		spin_unlock_irqrestore(&video->req_lock, flags);
+
 		if (ret < 0) {
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
 
-		/* Endpoint now owns the request */
+		/* The request is owned by  the endpoint / ready list. */
 		req = NULL;
-		video->req_int_count++;
 	}
 
 	if (!req)
@@ -567,7 +679,7 @@ uvcg_video_disable(struct uvc_video *video)
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	/*
-	 * Remove all uvc_reqeusts from ureqs with list_del_init
+	* Remove all uvc_requests from ureqs with list_del_init
 	 * This lets uvc_video_free_request correctly identify
 	 * if the uvc_request is attached to a list or not when freeing
 	 * memory.
@@ -580,8 +692,14 @@ uvcg_video_disable(struct uvc_video *video)
 		uvc_video_free_request(req->context, video->ep);
 	}
 
+	list_for_each_entry_safe(req, temp, &video->req_ready, list) {
+		list_del(&req->list);
+		uvc_video_free_request(req->context, video->ep);
+	}
+
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	spin_unlock_irqrestore(&video->req_lock, flags);
 
@@ -635,7 +753,7 @@ int uvcg_video_enable(struct uvc_video *video)
 
 	video->req_int_count = 0;
 
-	queue_work(video->async_wq, &video->pump);
+	uvc_video_ep_queue_initial_requests(video);
 
 	return ret;
 }
@@ -648,6 +766,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	video->is_enabled = false;
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	spin_lock_init(&video->req_lock);
 	INIT_WORK(&video->pump, uvcg_video_pump);
 
-- 
2.42.0.869.gea05f2083d-goog


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-11-02 16:07       ` Dan Scally
  2023-11-03  7:13         ` [PATCH v4] usb:gadget:uvc Do not use worker thread to pump " Jayant Chowdhary
@ 2023-11-03  7:28         ` Jayant Chowdhary
  2023-11-03 10:29           ` Michael Grzeschik
  2023-11-07 17:01           ` Dan Scally
  1 sibling, 2 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-03  7:28 UTC (permalink / raw)
  To: Dan Scally, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

Hi Dan,
Thank you for the comments. 
I uploaded a new patch at https://lore.kernel.org/linux-usb/20231103071353.1577383-1-jchowdhary@google.com/T/#u.

On 11/2/23 09:07, Dan Scally wrote:
> Hi Jayant - thanks for the patch
>
> On 02/11/2023 06:01, Jayant Chowdhary wrote:
>> When we use an async work queue to perform the function of pumping
>> usb requests to the usb controller, it is possible that amongst other
>> factors, thread scheduling affects at what cadence we're able to pump
>> requests. This could mean isoc usb requests miss their uframes - resulting
>> in video stream flickers on the host device.
>>
>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>> with uvc buffers encoded into them. The process of queueing to the
>> endpoint is done by the uvc_video_complete() handler. In case no
>> usb_requests are ready with encoded information, we just queue a zero
>> length request to the endpoint from the complete handler.
>>
>> For bulk endpoints the async_wq thread still queues usb requests to the
>> endpoint.
>>
>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>> ---
>>   Based on top of
>>   https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>   v1->v2: Added self Signed-Off-by and addressed review comments
>>   v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>>      for isoc transfers.
>>
>>   drivers/usb/gadget/function/uvc.h       |   8 +
>>   drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>>   2 files changed, 156 insertions(+), 39 deletions(-)
>>
>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>> index e8d4c87f1e09..82c783410554 100644
>> --- a/drivers/usb/gadget/function/uvc.h
>> +++ b/drivers/usb/gadget/function/uvc.h
>> @@ -105,7 +105,15 @@ struct uvc_video {
>>       bool is_enabled; /* tracks whether video stream is enabled */
>>       unsigned int req_size;
>>       struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
>> +
>> +    /* USB requests video pump thread can encode into*/
>
> "USB requests that the video pump thread can encode into", and a space before the closing */ please (and the same a few more times below).

Done.

>
>>       struct list_head req_free;
>> +
>> +    /*
>> +     * USB requests video pump thread has already encoded into. These are
>> +     * ready to be queued to the endpoint.
>> +     */
>> +    struct list_head req_ready;
>>       spinlock_t req_lock;
>>         unsigned int req_int_count;
>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>> index 53feb790a4c3..c84183e9afcc 100644
>> --- a/drivers/usb/gadget/function/uvc_video.c
>> +++ b/drivers/usb/gadget/function/uvc_video.c
>> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>>       return ret;
>>   }
>>   +/* This function must be called with video->req_lock held*/
>> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
>> +    struct usb_request *req, bool queue_to_ep) {
> Brace on a new line please - same a few more times below

Done.

>> +    bool is_bulk = video->max_payload_size;
> empty line here
>> +    if (!video->is_enabled) {
>> +        uvc_video_free_request(req->context, video->ep);
>> +        return -ENODEV;
>> +    }
>> +    if (queue_to_ep) {
>> +        struct uvc_request *ureq = req->context;
>> +        /*
>> +         * With USB3 handling more requests at a higher speed, we can't
>> +         * afford to generate an interrupt for every request. Decide to
>> +         * interrupt:
>> +         *
>> +         * - When no more requests are available in the free queue, as
>> +         *   this may be our last chance to refill the endpoint's
>> +         *   request queue.
>> +         *
>> +         * - When this is request is the last request for the video
>> +         *   buffer, as we want to start sending the next video buffer
>> +         *   ASAP in case it doesn't get started already in the next
>> +         *   iteration of this loop.
>> +         *
>> +         * - Four times over the length of the requests queue (as
>> +         *   indicated by video->uvc_num_requests), as a trade-off
>> +         *   between latency and interrupt load.
>> +        */
>> +        if (list_empty(&video->req_free) || ureq->last_buf ||
>> +            !(video->req_int_count %
>> +            DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>> +            video->req_int_count = 0;
>> +            req->no_interrupt = 0;
>> +        } else {
>> +            req->no_interrupt = 1;
>> +        }
>> +        video->req_int_count++;
>> +        return uvcg_video_ep_queue(video, req);
>> +    } else {
>> +        /*
>> +        * If we're not queing to the ep, for isoc we're queing
>> +        * to the req_ready list, otherwise req_free.
>> +        */
>> +        struct list_head *list =
>> +            is_bulk ? &video->req_free : &video->req_ready;
>> +        list_add_tail(&req->list, list);
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
>> +    struct uvc_video *video) {
>> +    req->length = 0;
>> +    return uvcg_video_ep_queue(video, req);
>> +}
> Not sure this is worth its own function

Removed the function.

>> +
>> +/* Must only be called from uvcg_video_enable - since after that we only want to
>> + * queue requests to the endpoint from the uvc_video_complete complete handler.
>> + * This function is needed in order to 'kick start' the flow of requests from
>> + * gadget driver to the usb controller.
>> + */
>> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
>> +    struct usb_request *req = NULL;
>> +    unsigned long flags = 0;
>> +    unsigned int count = 0;
>> +    int ret = 0;
>> +    /* We only queue half of the free list since we still want to have
>> +     * some free usb_requests in the free list for the video_pump async_wq
>> +     * thread to encode uvc buffers into. Otherwise we could get into a
>> +     * situation where the free list does not have any usb requests to
>> +     * encode into - we always end up queueing 0 length requests to the
>> +     * end point.
>> +     */
>> +    unsigned half_list_size = video->uvc_num_requests / 2;
>> +    spin_lock_irqsave(&video->req_lock, flags);
>> +    /* Take these requests off the free list and queue them all to the
>> +     * endpoint. Since we queue the requests with the req_lock held,
>> +     */
>
> This comment seems to be incomplete? You also want an opening /* on its own line:

Apologies I think I missed out completing this comment I will send out another patch later.

>
>
> /*
>  * Multi line comments
>  * look like this
>  */
>

Done.

>> +    while (count < half_list_size) {
>> +        req = list_first_entry(&video->req_free, struct usb_request,
>> +                    list);
>> +        list_del(&req->list);
>> +        ret = uvcg_video_ep_queue_zero_length(req, video);
>> +        if (ret < 0) {
>> +            uvcg_queue_cancel(&video->queue, /*disconnect*/0);
>> +            break;
>> +        }
>> +        count++;
>> +    }
>> +    spin_unlock_irqrestore(&video->req_lock, flags);
>> +}
>> +
>
> So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial flight of half the requests (between 2 and 32) that are queued as zero length - the very first one to .complete() being re-queued as a zero length request before the workqueue is started and encodes data into the _other_ half of the requests which were left in req_free and putting them into req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and they get sent out with data...does that sound right?
>
>
That is correct - the first half of number of usb requests allocated (2, 32) are queued at zero length initially. We’ll have half of the requests being sent to the ep in flight and half in the free list yes.
queue_work will actually start with either uvc_v4l2_qbuf (uvc_v4l2.c) or at a zero length request being completed - whichever comes first.

> What are the implications of those initial 3-33 zero length requests? What kind of latency can that introduce to the start of the video stream?

At the worst, we’ll have  a 32 x 125us(uframe period) = 4ms  delay for the first frame of the uvc buffer stream being sent out to the usb controller.
After that, since uvc buffers are typically queued at a much lower rate than usb requests being sent to the endpoint, we should be fine ?
In my local testing, I don't see any delay observable to the naked eye.

>
>>   static void
>>   uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>   {
>> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>       struct uvc_video_queue *queue = &video->queue;
>>       struct uvc_buffer *last_buf = NULL;
>>       unsigned long flags;
>> +    bool is_bulk = video->max_payload_size;
>> +    int ret = 0;
>>         spin_lock_irqsave(&video->req_lock, flags);
>>       if (!video->is_enabled) {
>> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>        * back to req_free
>>        */
>>       if (video->is_enabled) {
>> -        list_add_tail(&req->list, &video->req_free);
>> +        /*
>> +         * Here we check whether any request is available in the ready
>> +         * list. If it is, queue it to the ep and add the current
>> +         * usb_request to the req_free list - for video_pump to fill in.
>> +         * Otherwise, just use the current usb_request to queue a 0
>> +         * length request to the ep. Since we always add to the req_free
>> +         * list if we dequeue from the ready list, there will never
>> +         * be a situation where the req_free list is completely out of
>> +         * requests and cannot recover.
>> +         */
>> +        struct usb_request *to_queue = req;
>> +        to_queue->length = 0;
>> +        if (!list_empty(&video->req_ready)) {
>> +            to_queue = list_first_entry(&video->req_ready,
>> +                struct usb_request, list);
>> +            list_del(&to_queue->list);
>> +            /* Add it to the free list. */
>> +            list_add_tail(&req->list, &video->req_free);
>> +        }
>> +        /*
>> +         * Queue to the endpoint. The actual queueing to ep will
>> +         * only happen on one thread - the async_wq for bulk endpoints
>> +         * and this thread for isoc endpoints.
>> +         */
>> +        ret = uvcg_video_usb_req_queue(video, to_queue,
>> +                           /*queue_to_ep*/!is_bulk);
>
>
> In principle in-line comments are fine, but I don't think the parameter name is worth a comment

Done.

>
>> +        if(ret < 0) {
>> +            uvcg_queue_cancel(queue, 0);
>> +        }
>> +        /* Queue work to the wq as well since its possible that a buffer
>> +         * may not have been completed.
>> +         */
>
>
> The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq here then doesn't that mean it'll never run?


I've updated the comment here - it is possible that we hit a situation where the in-flight usb requests may not be enough to completely
encode a uvc buffer. In that case if we don't call queue_work, we'll never get the buffer marked as 'completed' and the buffer won't be
returned to user-space. That'll prevent the dequeue->queue->dequeue loop and flow of buffers. 

>
>>           queue_work(video->async_wq, &video->pump);
>>       } else {
>>           uvc_video_free_request(ureq, ep);
>> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>>         INIT_LIST_HEAD(&video->ureqs);
>>       INIT_LIST_HEAD(&video->req_free);
>> +    INIT_LIST_HEAD(&video->req_ready);
>>       video->req_size = 0;
>>       return 0;
>>   }
>> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>       struct usb_request *req = NULL;
>>       struct uvc_buffer *buf;
>>       unsigned long flags;
>> -    bool buf_done;
>> -    int ret;
>> +    int ret = 0;
>>         while (true) {
>>           if (!video->ep->enabled)
>> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>             if (buf != NULL) {
>>               video->encode(req, video, buf);
>> -            buf_done = buf->state == UVC_BUF_STATE_DONE;
>>           } else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>>               /*
>>                * No video buffer available; the queue is still connected and
>> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>                * prevent missed ISOC transfers.
>>                */
>>               req->length = 0;
>> -            buf_done = false;
>>           } else {
>>               /*
>>                * Either the queue has been disconnected or no video buffer
>> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>>               break;
>>           }
>>   -        /*
>> -         * With USB3 handling more requests at a higher speed, we can't
>> -         * afford to generate an interrupt for every request. Decide to
>> -         * interrupt:
>> -         *
>> -         * - When no more requests are available in the free queue, as
>> -         *   this may be our last chance to refill the endpoint's
>> -         *   request queue.
>> -         *
>> -         * - When this is request is the last request for the video
>> -         *   buffer, as we want to start sending the next video buffer
>> -         *   ASAP in case it doesn't get started already in the next
>> -         *   iteration of this loop.
>> -         *
>> -         * - Four times over the length of the requests queue (as
>> -         *   indicated by video->uvc_num_requests), as a trade-off
>> -         *   between latency and interrupt load.
>> -         */
>> -        if (list_empty(&video->req_free) || buf_done ||
>> -            !(video->req_int_count %
>> -               DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>> -            video->req_int_count = 0;
>> -            req->no_interrupt = 0;
>> -        } else {
>> -            req->no_interrupt = 1;
>> -        }
>> -
>> -        /* Queue the USB request */
>> -        ret = uvcg_video_ep_queue(video, req);
>>           spin_unlock_irqrestore(&queue->irqlock, flags);
>>   +        /* Queue the USB request.*/
> I think just drop this - it was always superfluous.

The uvcg_video_usb_req_queue function mentions that req_lock must be held while calling 
it - since its possible we might add to the req_ready list. We could say the function
should hold req_lock only when the queue_to_ep parameter is false - but that doesn't
seem as clean ?  

>> +        spin_lock_irqsave(&video->req_lock, flags);
>> +        /* For bulk end points we queue from the worker thread
>> +         * since we would preferably not want to wait on requests
>> +         * to be ready, in the uvcg_video_complete() handler.
>> +         * For isoc endpoints we add the request to the ready list
>> +         * and only queue it to the endpoint from the complete handler.
>> +         */
>> +        ret = uvcg_video_usb_req_queue(video, req, is_bulk);
>> +        spin_unlock_irqrestore(&video->req_lock, flags);
>> +
>>           if (ret < 0) {
>>               uvcg_queue_cancel(queue, 0);
>>               break;
>>           }
>>   -        /* Endpoint now owns the request */
>> +        /* The request is owned by  the endpoint / ready list*/
>>           req = NULL;
>> -        video->req_int_count++;
>>       }
>>         if (!req)
>> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>>         spin_lock_irqsave(&video->req_lock, flags);
>>       /*
>> -     * Remove all uvc_reqeusts from ureqs with list_del_init
>> +     * Remove all uvc_requests from ureqs with list_del_init
> This should get fixed in the earlier series.
>>        * This lets uvc_video_free_request correctly identify
>>        * if the uvc_request is attached to a list or not when freeing
>>        * memory.
>> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>>           list_del(&req->list);
>>           uvc_video_free_request(req->context, video->ep);
>>       }
>> -
> keep the empty line please

Done.

>> +    list_for_each_entry_safe(req, temp, &video->req_ready, list) {
>> +        list_del(&req->list);
>> +        uvc_video_free_request(req->context, video->ep);
>> +    }
> and one here too.

Done.

Thanks!

>>       INIT_LIST_HEAD(&video->ureqs);
>>       INIT_LIST_HEAD(&video->req_free);
>> +    INIT_LIST_HEAD(&video->req_ready);
>>       video->req_size = 0;
>>       spin_unlock_irqrestore(&video->req_lock, flags);
>>   @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>>         video->req_int_count = 0;
>>   -    queue_work(video->async_wq, &video->pump);
>> +    uvc_video_ep_queue_initial_requests(video);
>>         return ret;
>>   }
>> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>       video->is_enabled = false;
>>       INIT_LIST_HEAD(&video->ureqs);
>>       INIT_LIST_HEAD(&video->req_free);
>> +    INIT_LIST_HEAD(&video->req_ready);
>>       spin_lock_init(&video->req_lock);
>>       INIT_WORK(&video->pump, uvcg_video_pump);
>>   

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-11-03  7:28         ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue " Jayant Chowdhary
@ 2023-11-03 10:29           ` Michael Grzeschik
  2023-11-06 17:51             ` Jayant Chowdhary
  2023-11-07 17:01           ` Dan Scally
  1 sibling, 1 reply; 31+ messages in thread
From: Michael Grzeschik @ 2023-11-03 10:29 UTC (permalink / raw)
  To: Jayant Chowdhary
  Cc: Dan Scally, stern, laurent.pinchart, gregkh, Thinh.Nguyen,
	arakesh, etalvala, linux-kernel, linux-usb

[-- Attachment #1: Type: text/plain, Size: 21192 bytes --]

On Fri, Nov 03, 2023 at 12:28:31AM -0700, Jayant Chowdhary wrote:
>Hi Dan,
>Thank you for the comments.
>I uploaded a new patch at https://lore.kernel.org/linux-usb/20231103071353.1577383-1-jchowdhary@google.com/T/#u.
>
>On 11/2/23 09:07, Dan Scally wrote:
>> Hi Jayant - thanks for the patch
>>
>> On 02/11/2023 06:01, Jayant Chowdhary wrote:
>>> When we use an async work queue to perform the function of pumping
>>> usb requests to the usb controller, it is possible that amongst other
>>> factors, thread scheduling affects at what cadence we're able to pump
>>> requests. This could mean isoc usb requests miss their uframes - resulting
>>> in video stream flickers on the host device.
>>>
>>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>>> with uvc buffers encoded into them. The process of queueing to the
>>> endpoint is done by the uvc_video_complete() handler. In case no
>>> usb_requests are ready with encoded information, we just queue a zero
>>> length request to the endpoint from the complete handler.
>>>
>>> For bulk endpoints the async_wq thread still queues usb requests to the
>>> endpoint.
>>>
>>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>>> ---
>>>   Based on top of
>>>   https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>>   v1->v2: Added self Signed-Off-by and addressed review comments
>>>   v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>>>      for isoc transfers.
>>>
>>>   drivers/usb/gadget/function/uvc.h       |   8 +
>>>   drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>>>   2 files changed, 156 insertions(+), 39 deletions(-)
>>>
>>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>>> index e8d4c87f1e09..82c783410554 100644
>>> --- a/drivers/usb/gadget/function/uvc.h
>>> +++ b/drivers/usb/gadget/function/uvc.h
>>> @@ -105,7 +105,15 @@ struct uvc_video {
>>>       bool is_enabled; /* tracks whether video stream is enabled */
>>>       unsigned int req_size;
>>>       struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
>>> +
>>> +    /* USB requests video pump thread can encode into*/
>>
>> "USB requests that the video pump thread can encode into", and a space before the closing */ please (and the same a few more times below).
>
>Done.
>
>>
>>>       struct list_head req_free;
>>> +
>>> +    /*
>>> +     * USB requests video pump thread has already encoded into. These are
>>> +     * ready to be queued to the endpoint.
>>> +     */
>>> +    struct list_head req_ready;
>>>       spinlock_t req_lock;
>>>         unsigned int req_int_count;
>>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>>> index 53feb790a4c3..c84183e9afcc 100644
>>> --- a/drivers/usb/gadget/function/uvc_video.c
>>> +++ b/drivers/usb/gadget/function/uvc_video.c
>>> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>>>       return ret;
>>>   }
>>>   +/* This function must be called with video->req_lock held*/
>>> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
>>> +    struct usb_request *req, bool queue_to_ep) {
>> Brace on a new line please - same a few more times below
>
>Done.
>
>>> +    bool is_bulk = video->max_payload_size;
>> empty line here
>>> +    if (!video->is_enabled) {
>>> +        uvc_video_free_request(req->context, video->ep);
>>> +        return -ENODEV;
>>> +    }
>>> +    if (queue_to_ep) {
>>> +        struct uvc_request *ureq = req->context;
>>> +        /*
>>> +         * With USB3 handling more requests at a higher speed, we can't
>>> +         * afford to generate an interrupt for every request. Decide to
>>> +         * interrupt:
>>> +         *
>>> +         * - When no more requests are available in the free queue, as
>>> +         *   this may be our last chance to refill the endpoint's
>>> +         *   request queue.
>>> +         *
>>> +         * - When this is request is the last request for the video
>>> +         *   buffer, as we want to start sending the next video buffer
>>> +         *   ASAP in case it doesn't get started already in the next
>>> +         *   iteration of this loop.
>>> +         *
>>> +         * - Four times over the length of the requests queue (as
>>> +         *   indicated by video->uvc_num_requests), as a trade-off
>>> +         *   between latency and interrupt load.
>>> +        */
>>> +        if (list_empty(&video->req_free) || ureq->last_buf ||
>>> +            !(video->req_int_count %
>>> +            DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>> +            video->req_int_count = 0;
>>> +            req->no_interrupt = 0;
>>> +        } else {
>>> +            req->no_interrupt = 1;
>>> +        }
>>> +        video->req_int_count++;
>>> +        return uvcg_video_ep_queue(video, req);
>>> +    } else {
>>> +        /*
>>> +        * If we're not queing to the ep, for isoc we're queing
>>> +        * to the req_ready list, otherwise req_free.
>>> +        */
>>> +        struct list_head *list =
>>> +            is_bulk ? &video->req_free : &video->req_ready;
>>> +        list_add_tail(&req->list, list);
>>> +    }
>>> +    return 0;
>>> +}
>>> +
>>> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
>>> +    struct uvc_video *video) {
>>> +    req->length = 0;
>>> +    return uvcg_video_ep_queue(video, req);
>>> +}
>> Not sure this is worth its own function
>
>Removed the function.
>
>>> +
>>> +/* Must only be called from uvcg_video_enable - since after that we only want to
>>> + * queue requests to the endpoint from the uvc_video_complete complete handler.
>>> + * This function is needed in order to 'kick start' the flow of requests from
>>> + * gadget driver to the usb controller.
>>> + */
>>> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
>>> +    struct usb_request *req = NULL;
>>> +    unsigned long flags = 0;
>>> +    unsigned int count = 0;
>>> +    int ret = 0;
>>> +    /* We only queue half of the free list since we still want to have
>>> +     * some free usb_requests in the free list for the video_pump async_wq
>>> +     * thread to encode uvc buffers into. Otherwise we could get into a
>>> +     * situation where the free list does not have any usb requests to
>>> +     * encode into - we always end up queueing 0 length requests to the
>>> +     * end point.
>>> +     */
>>> +    unsigned half_list_size = video->uvc_num_requests / 2;
>>> +    spin_lock_irqsave(&video->req_lock, flags);
>>> +    /* Take these requests off the free list and queue them all to the
>>> +     * endpoint. Since we queue the requests with the req_lock held,
>>> +     */
>>
>> This comment seems to be incomplete? You also want an opening /* on its own line:
>
>Apologies I think I missed out completing this comment I will send out another patch later.
>
>>
>>
>> /*
>>  * Multi line comments
>>  * look like this
>>  */
>>
>
>Done.
>
>>> +    while (count < half_list_size) {
>>> +        req = list_first_entry(&video->req_free, struct usb_request,
>>> +                    list);
>>> +        list_del(&req->list);
>>> +        ret = uvcg_video_ep_queue_zero_length(req, video);
>>> +        if (ret < 0) {
>>> +            uvcg_queue_cancel(&video->queue, /*disconnect*/0);
>>> +            break;
>>> +        }
>>> +        count++;
>>> +    }
>>> +    spin_unlock_irqrestore(&video->req_lock, flags);
>>> +}
>>> +
>>
>> So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial flight of half the requests (between 2 and 32) that are queued as zero length - the very first one to .complete() being re-queued as a zero length request before the workqueue is started and encodes data into the _other_ half of the requests which were left in req_free and putting them into req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and they get sent out with data...does that sound right?
>>
>>
>That is correct - the first half of number of usb requests allocated (2, 32) are queued at zero length initially. We’ll have half of the requests being sent to the ep in flight and half in the free list yes.
>queue_work will actually start with either uvc_v4l2_qbuf (uvc_v4l2.c) or at a zero length request being completed - whichever comes first.
>
>> What are the implications of those initial 3-33 zero length requests? What kind of latency can that introduce to the start of the video stream?
>
>At the worst, we’ll have  a 32 x 125us(uframe period) = 4ms  delay for the first frame of the uvc buffer stream being sent out to the usb controller.
>After that, since uvc buffers are typically queued at a much lower rate than usb requests being sent to the endpoint, we should be fine ?
>In my local testing, I don't see any delay observable to the naked eye.

You won't see the any effect in most cases, because what you actually
lose is potential bandwidth. Let's think of a possible scenario; you
have the hard limits of displaying a frame every 33ms. If the frame is
small enough to be transmitted in the remaining 29ms with the active
bandwidth tweaks in the usb endpoint (mult, burst), the streaming is
totally fine.

The only downside is that you loose possible 4ms of processing time
after the total frame did hit the host. But this only matters if the
presentation time stamp (PTS) is actually set and would take an effect.

Regards,
Michael

>>
>>>   static void
>>>   uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>   {
>>> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>       struct uvc_video_queue *queue = &video->queue;
>>>       struct uvc_buffer *last_buf = NULL;
>>>       unsigned long flags;
>>> +    bool is_bulk = video->max_payload_size;
>>> +    int ret = 0;
>>>         spin_lock_irqsave(&video->req_lock, flags);
>>>       if (!video->is_enabled) {
>>> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>        * back to req_free
>>>        */
>>>       if (video->is_enabled) {
>>> -        list_add_tail(&req->list, &video->req_free);
>>> +        /*
>>> +         * Here we check whether any request is available in the ready
>>> +         * list. If it is, queue it to the ep and add the current
>>> +         * usb_request to the req_free list - for video_pump to fill in.
>>> +         * Otherwise, just use the current usb_request to queue a 0
>>> +         * length request to the ep. Since we always add to the req_free
>>> +         * list if we dequeue from the ready list, there will never
>>> +         * be a situation where the req_free list is completely out of
>>> +         * requests and cannot recover.
>>> +         */
>>> +        struct usb_request *to_queue = req;
>>> +        to_queue->length = 0;
>>> +        if (!list_empty(&video->req_ready)) {
>>> +            to_queue = list_first_entry(&video->req_ready,
>>> +                struct usb_request, list);
>>> +            list_del(&to_queue->list);
>>> +            /* Add it to the free list. */
>>> +            list_add_tail(&req->list, &video->req_free);
>>> +        }
>>> +        /*
>>> +         * Queue to the endpoint. The actual queueing to ep will
>>> +         * only happen on one thread - the async_wq for bulk endpoints
>>> +         * and this thread for isoc endpoints.
>>> +         */
>>> +        ret = uvcg_video_usb_req_queue(video, to_queue,
>>> +                           /*queue_to_ep*/!is_bulk);
>>
>>
>> In principle in-line comments are fine, but I don't think the parameter name is worth a comment
>
>Done.
>
>>
>>> +        if(ret < 0) {
>>> +            uvcg_queue_cancel(queue, 0);
>>> +        }
>>> +        /* Queue work to the wq as well since its possible that a buffer
>>> +         * may not have been completed.
>>> +         */
>>
>>
>> The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq here then doesn't that mean it'll never run?
>
>
>I've updated the comment here - it is possible that we hit a situation where the in-flight usb requests may not be enough to completely
>encode a uvc buffer. In that case if we don't call queue_work, we'll never get the buffer marked as 'completed' and the buffer won't be
>returned to user-space. That'll prevent the dequeue->queue->dequeue loop and flow of buffers.
>
>>
>>>           queue_work(video->async_wq, &video->pump);
>>>       } else {
>>>           uvc_video_free_request(ureq, ep);
>>> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>>>         INIT_LIST_HEAD(&video->ureqs);
>>>       INIT_LIST_HEAD(&video->req_free);
>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>       video->req_size = 0;
>>>       return 0;
>>>   }
>>> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>>       struct usb_request *req = NULL;
>>>       struct uvc_buffer *buf;
>>>       unsigned long flags;
>>> -    bool buf_done;
>>> -    int ret;
>>> +    int ret = 0;
>>>         while (true) {
>>>           if (!video->ep->enabled)
>>> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>             if (buf != NULL) {
>>>               video->encode(req, video, buf);
>>> -            buf_done = buf->state == UVC_BUF_STATE_DONE;
>>>           } else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>>>               /*
>>>                * No video buffer available; the queue is still connected and
>>> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>                * prevent missed ISOC transfers.
>>>                */
>>>               req->length = 0;
>>> -            buf_done = false;
>>>           } else {
>>>               /*
>>>                * Either the queue has been disconnected or no video buffer
>>> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>>>               break;
>>>           }
>>>   -        /*
>>> -         * With USB3 handling more requests at a higher speed, we can't
>>> -         * afford to generate an interrupt for every request. Decide to
>>> -         * interrupt:
>>> -         *
>>> -         * - When no more requests are available in the free queue, as
>>> -         *   this may be our last chance to refill the endpoint's
>>> -         *   request queue.
>>> -         *
>>> -         * - When this is request is the last request for the video
>>> -         *   buffer, as we want to start sending the next video buffer
>>> -         *   ASAP in case it doesn't get started already in the next
>>> -         *   iteration of this loop.
>>> -         *
>>> -         * - Four times over the length of the requests queue (as
>>> -         *   indicated by video->uvc_num_requests), as a trade-off
>>> -         *   between latency and interrupt load.
>>> -         */
>>> -        if (list_empty(&video->req_free) || buf_done ||
>>> -            !(video->req_int_count %
>>> -               DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>> -            video->req_int_count = 0;
>>> -            req->no_interrupt = 0;
>>> -        } else {
>>> -            req->no_interrupt = 1;
>>> -        }
>>> -
>>> -        /* Queue the USB request */
>>> -        ret = uvcg_video_ep_queue(video, req);
>>>           spin_unlock_irqrestore(&queue->irqlock, flags);
>>>   +        /* Queue the USB request.*/
>> I think just drop this - it was always superfluous.
>
>The uvcg_video_usb_req_queue function mentions that req_lock must be held while calling
>it - since its possible we might add to the req_ready list. We could say the function
>should hold req_lock only when the queue_to_ep parameter is false - but that doesn't
>seem as clean ?
>
>>> +        spin_lock_irqsave(&video->req_lock, flags);
>>> +        /* For bulk end points we queue from the worker thread
>>> +         * since we would preferably not want to wait on requests
>>> +         * to be ready, in the uvcg_video_complete() handler.
>>> +         * For isoc endpoints we add the request to the ready list
>>> +         * and only queue it to the endpoint from the complete handler.
>>> +         */
>>> +        ret = uvcg_video_usb_req_queue(video, req, is_bulk);
>>> +        spin_unlock_irqrestore(&video->req_lock, flags);
>>> +
>>>           if (ret < 0) {
>>>               uvcg_queue_cancel(queue, 0);
>>>               break;
>>>           }
>>>   -        /* Endpoint now owns the request */
>>> +        /* The request is owned by  the endpoint / ready list*/
>>>           req = NULL;
>>> -        video->req_int_count++;
>>>       }
>>>         if (!req)
>>> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>>>         spin_lock_irqsave(&video->req_lock, flags);
>>>       /*
>>> -     * Remove all uvc_reqeusts from ureqs with list_del_init
>>> +     * Remove all uvc_requests from ureqs with list_del_init
>> This should get fixed in the earlier series.
>>>        * This lets uvc_video_free_request correctly identify
>>>        * if the uvc_request is attached to a list or not when freeing
>>>        * memory.
>>> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>>>           list_del(&req->list);
>>>           uvc_video_free_request(req->context, video->ep);
>>>       }
>>> -
>> keep the empty line please
>
>Done.
>
>>> +    list_for_each_entry_safe(req, temp, &video->req_ready, list) {
>>> +        list_del(&req->list);
>>> +        uvc_video_free_request(req->context, video->ep);
>>> +    }
>> and one here too.
>
>Done.
>
>Thanks!
>
>>>       INIT_LIST_HEAD(&video->ureqs);
>>>       INIT_LIST_HEAD(&video->req_free);
>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>       video->req_size = 0;
>>>       spin_unlock_irqrestore(&video->req_lock, flags);
>>>   @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>>>         video->req_int_count = 0;
>>>   -    queue_work(video->async_wq, &video->pump);
>>> +    uvc_video_ep_queue_initial_requests(video);
>>>         return ret;
>>>   }
>>> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>>       video->is_enabled = false;
>>>       INIT_LIST_HEAD(&video->ureqs);
>>>       INIT_LIST_HEAD(&video->req_free);
>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>       spin_lock_init(&video->req_lock);
>>>       INIT_WORK(&video->pump, uvcg_video_pump);
>>>  
>

-- 
Pengutronix e.K.                           |                             |
Steuerwalder Str. 21                       | http://www.pengutronix.de/  |
31137 Hildesheim, Germany                  | Phone: +49-5121-206917-0    |
Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-11-03 10:29           ` Michael Grzeschik
@ 2023-11-06 17:51             ` Jayant Chowdhary
  0 siblings, 0 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-06 17:51 UTC (permalink / raw)
  To: Michael Grzeschik
  Cc: Dan Scally, stern, laurent.pinchart, gregkh, Thinh.Nguyen,
	arakesh, etalvala, linux-kernel, linux-usb

Hi Michael,

On 11/3/23 03:29, Michael Grzeschik wrote:
> On Fri, Nov 03, 2023 at 12:28:31AM -0700, Jayant Chowdhary wrote:
>> Hi Dan,
>> Thank you for the comments.
>> I uploaded a new patch at https://lore.kernel.org/linux-usb/20231103071353.1577383-1-jchowdhary@google.com/T/#u.
>>
>> On 11/2/23 09:07, Dan Scally wrote:
>>> Hi Jayant - thanks for the patch
>>>
>>> On 02/11/2023 06:01, Jayant Chowdhary wrote:
>>>> When we use an async work queue to perform the function of pumping
>>>> usb requests to the usb controller, it is possible that amongst other
>>>> factors, thread scheduling affects at what cadence we're able to pump
>>>> requests. This could mean isoc usb requests miss their uframes - resulting
>>>> in video stream flickers on the host device.
>>>>
>>>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>>>> with uvc buffers encoded into them. The process of queueing to the
>>>> endpoint is done by the uvc_video_complete() handler. In case no
>>>> usb_requests are ready with encoded information, we just queue a zero
>>>> length request to the endpoint from the complete handler.
>>>>
>>>> For bulk endpoints the async_wq thread still queues usb requests to the
>>>> endpoint.
>>>>
>>>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>>>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>>>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>>>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>>>> ---
>>>>   Based on top of
>>>>   https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>>>   v1->v2: Added self Signed-Off-by and addressed review comments
>>>>   v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>>>>      for isoc transfers.
>>>>
>>>>   drivers/usb/gadget/function/uvc.h       |   8 +
>>>>   drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>>>>   2 files changed, 156 insertions(+), 39 deletions(-)
>>>>
>>>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>>>> index e8d4c87f1e09..82c783410554 100644
>>>> --- a/drivers/usb/gadget/function/uvc.h
>>>> +++ b/drivers/usb/gadget/function/uvc.h
>>>> @@ -105,7 +105,15 @@ struct uvc_video {
>>>>       bool is_enabled; /* tracks whether video stream is enabled */
>>>>       unsigned int req_size;
>>>>       struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
>>>> +
>>>> +    /* USB requests video pump thread can encode into*/
>>>
>>> "USB requests that the video pump thread can encode into", and a space before the closing */ please (and the same a few more times below).
>>
>> Done.
>>
>>>
>>>>       struct list_head req_free;
>>>> +
>>>> +    /*
>>>> +     * USB requests video pump thread has already encoded into. These are
>>>> +     * ready to be queued to the endpoint.
>>>> +     */
>>>> +    struct list_head req_ready;
>>>>       spinlock_t req_lock;
>>>>         unsigned int req_int_count;
>>>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>>>> index 53feb790a4c3..c84183e9afcc 100644
>>>> --- a/drivers/usb/gadget/function/uvc_video.c
>>>> +++ b/drivers/usb/gadget/function/uvc_video.c
>>>> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>>>>       return ret;
>>>>   }
>>>>   +/* This function must be called with video->req_lock held*/
>>>> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
>>>> +    struct usb_request *req, bool queue_to_ep) {
>>> Brace on a new line please - same a few more times below
>>
>> Done.
>>
>>>> +    bool is_bulk = video->max_payload_size;
>>> empty line here
>>>> +    if (!video->is_enabled) {
>>>> +        uvc_video_free_request(req->context, video->ep);
>>>> +        return -ENODEV;
>>>> +    }
>>>> +    if (queue_to_ep) {
>>>> +        struct uvc_request *ureq = req->context;
>>>> +        /*
>>>> +         * With USB3 handling more requests at a higher speed, we can't
>>>> +         * afford to generate an interrupt for every request. Decide to
>>>> +         * interrupt:
>>>> +         *
>>>> +         * - When no more requests are available in the free queue, as
>>>> +         *   this may be our last chance to refill the endpoint's
>>>> +         *   request queue.
>>>> +         *
>>>> +         * - When this is request is the last request for the video
>>>> +         *   buffer, as we want to start sending the next video buffer
>>>> +         *   ASAP in case it doesn't get started already in the next
>>>> +         *   iteration of this loop.
>>>> +         *
>>>> +         * - Four times over the length of the requests queue (as
>>>> +         *   indicated by video->uvc_num_requests), as a trade-off
>>>> +         *   between latency and interrupt load.
>>>> +        */
>>>> +        if (list_empty(&video->req_free) || ureq->last_buf ||
>>>> +            !(video->req_int_count %
>>>> +            DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>>> +            video->req_int_count = 0;
>>>> +            req->no_interrupt = 0;
>>>> +        } else {
>>>> +            req->no_interrupt = 1;
>>>> +        }
>>>> +        video->req_int_count++;
>>>> +        return uvcg_video_ep_queue(video, req);
>>>> +    } else {
>>>> +        /*
>>>> +        * If we're not queing to the ep, for isoc we're queing
>>>> +        * to the req_ready list, otherwise req_free.
>>>> +        */
>>>> +        struct list_head *list =
>>>> +            is_bulk ? &video->req_free : &video->req_ready;
>>>> +        list_add_tail(&req->list, list);
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
>>>> +    struct uvc_video *video) {
>>>> +    req->length = 0;
>>>> +    return uvcg_video_ep_queue(video, req);
>>>> +}
>>> Not sure this is worth its own function
>>
>> Removed the function.
>>
>>>> +
>>>> +/* Must only be called from uvcg_video_enable - since after that we only want to
>>>> + * queue requests to the endpoint from the uvc_video_complete complete handler.
>>>> + * This function is needed in order to 'kick start' the flow of requests from
>>>> + * gadget driver to the usb controller.
>>>> + */
>>>> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
>>>> +    struct usb_request *req = NULL;
>>>> +    unsigned long flags = 0;
>>>> +    unsigned int count = 0;
>>>> +    int ret = 0;
>>>> +    /* We only queue half of the free list since we still want to have
>>>> +     * some free usb_requests in the free list for the video_pump async_wq
>>>> +     * thread to encode uvc buffers into. Otherwise we could get into a
>>>> +     * situation where the free list does not have any usb requests to
>>>> +     * encode into - we always end up queueing 0 length requests to the
>>>> +     * end point.
>>>> +     */
>>>> +    unsigned half_list_size = video->uvc_num_requests / 2;
>>>> +    spin_lock_irqsave(&video->req_lock, flags);
>>>> +    /* Take these requests off the free list and queue them all to the
>>>> +     * endpoint. Since we queue the requests with the req_lock held,
>>>> +     */
>>>
>>> This comment seems to be incomplete? You also want an opening /* on its own line:
>>
>> Apologies I think I missed out completing this comment I will send out another patch later.
>>
>>>
>>>
>>> /*
>>>  * Multi line comments
>>>  * look like this
>>>  */
>>>
>>
>> Done.
>>
>>>> +    while (count < half_list_size) {
>>>> +        req = list_first_entry(&video->req_free, struct usb_request,
>>>> +                    list);
>>>> +        list_del(&req->list);
>>>> +        ret = uvcg_video_ep_queue_zero_length(req, video);
>>>> +        if (ret < 0) {
>>>> +            uvcg_queue_cancel(&video->queue, /*disconnect*/0);
>>>> +            break;
>>>> +        }
>>>> +        count++;
>>>> +    }
>>>> +    spin_unlock_irqrestore(&video->req_lock, flags);
>>>> +}
>>>> +
>>>
>>> So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial flight of half the requests (between 2 and 32) that are queued as zero length - the very first one to .complete() being re-queued as a zero length request before the workqueue is started and encodes data into the _other_ half of the requests which were left in req_free and putting them into req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and they get sent out with data...does that sound right?
>>>
>>>
>> That is correct - the first half of number of usb requests allocated (2, 32) are queued at zero length initially. We’ll have half of the requests being sent to the ep in flight and half in the free list yes.
>> queue_work will actually start with either uvc_v4l2_qbuf (uvc_v4l2.c) or at a zero length request being completed - whichever comes first.
>>
>>> What are the implications of those initial 3-33 zero length requests? What kind of latency can that introduce to the start of the video stream?
>>
>> At the worst, we’ll have  a 32 x 125us(uframe period) = 4ms  delay for the first frame of the uvc buffer stream being sent out to the usb controller.
>> After that, since uvc buffers are typically queued at a much lower rate than usb requests being sent to the endpoint, we should be fine ?
>> In my local testing, I don't see any delay observable to the naked eye.
>
> You won't see the any effect in most cases, because what you actually
> lose is potential bandwidth. Let's think of a possible scenario; you
> have the hard limits of displaying a frame every 33ms. If the frame is
> small enough to be transmitted in the remaining 29ms with the active
> bandwidth tweaks in the usb endpoint (mult, burst), the streaming is
> totally fine.
>
Would we really lose bandwidth as long as the time taken to encode a frame and
send it to the USB endpoint is really <= 33ms (in that case we can't meet the 30 fps
requirement anyway)

If we consider a 30fps stream and picture the timeline. The host expects the first
frame at 33ms, second at 66ms and so on. We start encoding the first frame at A- 4ms.
Let's say we take 32ms to encode it and send it. Its received by the host at B- 36ms.
The next frame starts encoding and again takes 32ms. The host gets it at point C which is
at 68ms in the timeline. If we keep following the train, we see that the difference in the skew
keeps decreasing and we eventually start hitting the expected timeline.


0ms  4ms                  33ms  36ms          66ms  68ms               99ms 100ms
|----|---------------------|-----|-------------|----|--------------------|--|

     A<------------------------->B<----------------->C<--------------------->D


Thanks,
Jayant


> The only downside is that you loose possible 4ms of processing time
> after the total frame did hit the host. But this only matters if the
> presentation time stamp (PTS) is actually set and would take an effect.
>
> Regards,
> Michael
>
>>>
>>>>   static void
>>>>   uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>   {
>>>> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>       struct uvc_video_queue *queue = &video->queue;
>>>>       struct uvc_buffer *last_buf = NULL;
>>>>       unsigned long flags;
>>>> +    bool is_bulk = video->max_payload_size;
>>>> +    int ret = 0;
>>>>         spin_lock_irqsave(&video->req_lock, flags);
>>>>       if (!video->is_enabled) {
>>>> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>        * back to req_free
>>>>        */
>>>>       if (video->is_enabled) {
>>>> -        list_add_tail(&req->list, &video->req_free);
>>>> +        /*
>>>> +         * Here we check whether any request is available in the ready
>>>> +         * list. If it is, queue it to the ep and add the current
>>>> +         * usb_request to the req_free list - for video_pump to fill in.
>>>> +         * Otherwise, just use the current usb_request to queue a 0
>>>> +         * length request to the ep. Since we always add to the req_free
>>>> +         * list if we dequeue from the ready list, there will never
>>>> +         * be a situation where the req_free list is completely out of
>>>> +         * requests and cannot recover.
>>>> +         */
>>>> +        struct usb_request *to_queue = req;
>>>> +        to_queue->length = 0;
>>>> +        if (!list_empty(&video->req_ready)) {
>>>> +            to_queue = list_first_entry(&video->req_ready,
>>>> +                struct usb_request, list);
>>>> +            list_del(&to_queue->list);
>>>> +            /* Add it to the free list. */
>>>> +            list_add_tail(&req->list, &video->req_free);
>>>> +        }
>>>> +        /*
>>>> +         * Queue to the endpoint. The actual queueing to ep will
>>>> +         * only happen on one thread - the async_wq for bulk endpoints
>>>> +         * and this thread for isoc endpoints.
>>>> +         */
>>>> +        ret = uvcg_video_usb_req_queue(video, to_queue,
>>>> +                           /*queue_to_ep*/!is_bulk);
>>>
>>>
>>> In principle in-line comments are fine, but I don't think the parameter name is worth a comment
>>
>> Done.
>>
>>>
>>>> +        if(ret < 0) {
>>>> +            uvcg_queue_cancel(queue, 0);
>>>> +        }
>>>> +        /* Queue work to the wq as well since its possible that a buffer
>>>> +         * may not have been completed.
>>>> +         */
>>>
>>>
>>> The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq here then doesn't that mean it'll never run?
>>
>>
>> I've updated the comment here - it is possible that we hit a situation where the in-flight usb requests may not be enough to completely
>> encode a uvc buffer. In that case if we don't call queue_work, we'll never get the buffer marked as 'completed' and the buffer won't be
>> returned to user-space. That'll prevent the dequeue->queue->dequeue loop and flow of buffers.
>>
>>>
>>>>           queue_work(video->async_wq, &video->pump);
>>>>       } else {
>>>>           uvc_video_free_request(ureq, ep);
>>>> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>>>>         INIT_LIST_HEAD(&video->ureqs);
>>>>       INIT_LIST_HEAD(&video->req_free);
>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>       video->req_size = 0;
>>>>       return 0;
>>>>   }
>>>> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>       struct usb_request *req = NULL;
>>>>       struct uvc_buffer *buf;
>>>>       unsigned long flags;
>>>> -    bool buf_done;
>>>> -    int ret;
>>>> +    int ret = 0;
>>>>         while (true) {
>>>>           if (!video->ep->enabled)
>>>> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>             if (buf != NULL) {
>>>>               video->encode(req, video, buf);
>>>> -            buf_done = buf->state == UVC_BUF_STATE_DONE;
>>>>           } else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>>>>               /*
>>>>                * No video buffer available; the queue is still connected and
>>>> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>                * prevent missed ISOC transfers.
>>>>                */
>>>>               req->length = 0;
>>>> -            buf_done = false;
>>>>           } else {
>>>>               /*
>>>>                * Either the queue has been disconnected or no video buffer
>>>> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>               break;
>>>>           }
>>>>   -        /*
>>>> -         * With USB3 handling more requests at a higher speed, we can't
>>>> -         * afford to generate an interrupt for every request. Decide to
>>>> -         * interrupt:
>>>> -         *
>>>> -         * - When no more requests are available in the free queue, as
>>>> -         *   this may be our last chance to refill the endpoint's
>>>> -         *   request queue.
>>>> -         *
>>>> -         * - When this is request is the last request for the video
>>>> -         *   buffer, as we want to start sending the next video buffer
>>>> -         *   ASAP in case it doesn't get started already in the next
>>>> -         *   iteration of this loop.
>>>> -         *
>>>> -         * - Four times over the length of the requests queue (as
>>>> -         *   indicated by video->uvc_num_requests), as a trade-off
>>>> -         *   between latency and interrupt load.
>>>> -         */
>>>> -        if (list_empty(&video->req_free) || buf_done ||
>>>> -            !(video->req_int_count %
>>>> -               DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>>> -            video->req_int_count = 0;
>>>> -            req->no_interrupt = 0;
>>>> -        } else {
>>>> -            req->no_interrupt = 1;
>>>> -        }
>>>> -
>>>> -        /* Queue the USB request */
>>>> -        ret = uvcg_video_ep_queue(video, req);
>>>>           spin_unlock_irqrestore(&queue->irqlock, flags);
>>>>   +        /* Queue the USB request.*/
>>> I think just drop this - it was always superfluous.
>>
>> The uvcg_video_usb_req_queue function mentions that req_lock must be held while calling
>> it - since its possible we might add to the req_ready list. We could say the function
>> should hold req_lock only when the queue_to_ep parameter is false - but that doesn't
>> seem as clean ?
>>
>>>> +        spin_lock_irqsave(&video->req_lock, flags);
>>>> +        /* For bulk end points we queue from the worker thread
>>>> +         * since we would preferably not want to wait on requests
>>>> +         * to be ready, in the uvcg_video_complete() handler.
>>>> +         * For isoc endpoints we add the request to the ready list
>>>> +         * and only queue it to the endpoint from the complete handler.
>>>> +         */
>>>> +        ret = uvcg_video_usb_req_queue(video, req, is_bulk);
>>>> +        spin_unlock_irqrestore(&video->req_lock, flags);
>>>> +
>>>>           if (ret < 0) {
>>>>               uvcg_queue_cancel(queue, 0);
>>>>               break;
>>>>           }
>>>>   -        /* Endpoint now owns the request */
>>>> +        /* The request is owned by  the endpoint / ready list*/
>>>>           req = NULL;
>>>> -        video->req_int_count++;
>>>>       }
>>>>         if (!req)
>>>> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>>>>         spin_lock_irqsave(&video->req_lock, flags);
>>>>       /*
>>>> -     * Remove all uvc_reqeusts from ureqs with list_del_init
>>>> +     * Remove all uvc_requests from ureqs with list_del_init
>>> This should get fixed in the earlier series.
>>>>        * This lets uvc_video_free_request correctly identify
>>>>        * if the uvc_request is attached to a list or not when freeing
>>>>        * memory.
>>>> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>>>>           list_del(&req->list);
>>>>           uvc_video_free_request(req->context, video->ep);
>>>>       }
>>>> -
>>> keep the empty line please
>>
>> Done.
>>
>>>> +    list_for_each_entry_safe(req, temp, &video->req_ready, list) {
>>>> +        list_del(&req->list);
>>>> +        uvc_video_free_request(req->context, video->ep);
>>>> +    }
>>> and one here too.
>>
>> Done.
>>
>> Thanks!
>>
>>>>       INIT_LIST_HEAD(&video->ureqs);
>>>>       INIT_LIST_HEAD(&video->req_free);
>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>       video->req_size = 0;
>>>>       spin_unlock_irqrestore(&video->req_lock, flags);
>>>>   @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>>>>         video->req_int_count = 0;
>>>>   -    queue_work(video->async_wq, &video->pump);
>>>> +    uvc_video_ep_queue_initial_requests(video);
>>>>         return ret;
>>>>   }
>>>> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>>>       video->is_enabled = false;
>>>>       INIT_LIST_HEAD(&video->ureqs);
>>>>       INIT_LIST_HEAD(&video->req_free);
>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>       spin_lock_init(&video->req_lock);
>>>>       INIT_WORK(&video->pump, uvcg_video_pump);
>>>>  
>>
>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-11-03  7:28         ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue " Jayant Chowdhary
  2023-11-03 10:29           ` Michael Grzeschik
@ 2023-11-07 17:01           ` Dan Scally
  2023-11-09 16:46             ` Jayant Chowdhary
  1 sibling, 1 reply; 31+ messages in thread
From: Dan Scally @ 2023-11-07 17:01 UTC (permalink / raw)
  To: Jayant Chowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

Hi Jayant

On 03/11/2023 07:28, Jayant Chowdhary wrote:
> Hi Dan,
> Thank you for the comments.
> I uploaded a new patch at https://lore.kernel.org/linux-usb/20231103071353.1577383-1-jchowdhary@google.com/T/#u.
>
> On 11/2/23 09:07, Dan Scally wrote:
>> Hi Jayant - thanks for the patch
>>
>> On 02/11/2023 06:01, Jayant Chowdhary wrote:
>>> When we use an async work queue to perform the function of pumping
>>> usb requests to the usb controller, it is possible that amongst other
>>> factors, thread scheduling affects at what cadence we're able to pump
>>> requests. This could mean isoc usb requests miss their uframes - resulting
>>> in video stream flickers on the host device.
>>>
>>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>>> with uvc buffers encoded into them. The process of queueing to the
>>> endpoint is done by the uvc_video_complete() handler. In case no
>>> usb_requests are ready with encoded information, we just queue a zero
>>> length request to the endpoint from the complete handler.
>>>
>>> For bulk endpoints the async_wq thread still queues usb requests to the
>>> endpoint.
>>>
>>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>>> ---
>>>    Based on top of
>>>    https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>>    v1->v2: Added self Signed-Off-by and addressed review comments
>>>    v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>>>       for isoc transfers.
>>>
>>>    drivers/usb/gadget/function/uvc.h       |   8 +
>>>    drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>>>    2 files changed, 156 insertions(+), 39 deletions(-)
>>>
>>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>>> index e8d4c87f1e09..82c783410554 100644
>>> --- a/drivers/usb/gadget/function/uvc.h
>>> +++ b/drivers/usb/gadget/function/uvc.h
>>> @@ -105,7 +105,15 @@ struct uvc_video {
>>>        bool is_enabled; /* tracks whether video stream is enabled */
>>>        unsigned int req_size;
>>>        struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
>>> +
>>> +    /* USB requests video pump thread can encode into*/
>> "USB requests that the video pump thread can encode into", and a space before the closing */ please (and the same a few more times below).
> Done.
>
>>>        struct list_head req_free;
>>> +
>>> +    /*
>>> +     * USB requests video pump thread has already encoded into. These are
>>> +     * ready to be queued to the endpoint.
>>> +     */
>>> +    struct list_head req_ready;
>>>        spinlock_t req_lock;
>>>          unsigned int req_int_count;
>>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>>> index 53feb790a4c3..c84183e9afcc 100644
>>> --- a/drivers/usb/gadget/function/uvc_video.c
>>> +++ b/drivers/usb/gadget/function/uvc_video.c
>>> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>>>        return ret;
>>>    }
>>>    +/* This function must be called with video->req_lock held*/
>>> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
>>> +    struct usb_request *req, bool queue_to_ep) {
>> Brace on a new line please - same a few more times below
> Done.
>
>>> +    bool is_bulk = video->max_payload_size;
>> empty line here
>>> +    if (!video->is_enabled) {
>>> +        uvc_video_free_request(req->context, video->ep);
>>> +        return -ENODEV;
>>> +    }
>>> +    if (queue_to_ep) {
>>> +        struct uvc_request *ureq = req->context;
>>> +        /*
>>> +         * With USB3 handling more requests at a higher speed, we can't
>>> +         * afford to generate an interrupt for every request. Decide to
>>> +         * interrupt:
>>> +         *
>>> +         * - When no more requests are available in the free queue, as
>>> +         *   this may be our last chance to refill the endpoint's
>>> +         *   request queue.
>>> +         *
>>> +         * - When this is request is the last request for the video
>>> +         *   buffer, as we want to start sending the next video buffer
>>> +         *   ASAP in case it doesn't get started already in the next
>>> +         *   iteration of this loop.
>>> +         *
>>> +         * - Four times over the length of the requests queue (as
>>> +         *   indicated by video->uvc_num_requests), as a trade-off
>>> +         *   between latency and interrupt load.
>>> +        */
>>> +        if (list_empty(&video->req_free) || ureq->last_buf ||
>>> +            !(video->req_int_count %
>>> +            DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>> +            video->req_int_count = 0;
>>> +            req->no_interrupt = 0;
>>> +        } else {
>>> +            req->no_interrupt = 1;
>>> +        }
>>> +        video->req_int_count++;
>>> +        return uvcg_video_ep_queue(video, req);
>>> +    } else {
>>> +        /*
>>> +        * If we're not queing to the ep, for isoc we're queing
>>> +        * to the req_ready list, otherwise req_free.
>>> +        */
>>> +        struct list_head *list =
>>> +            is_bulk ? &video->req_free : &video->req_ready;
>>> +        list_add_tail(&req->list, list);
>>> +    }
>>> +    return 0;
>>> +}
>>> +
>>> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
>>> +    struct uvc_video *video) {
>>> +    req->length = 0;
>>> +    return uvcg_video_ep_queue(video, req);
>>> +}
>> Not sure this is worth its own function
> Removed the function.
>
>>> +
>>> +/* Must only be called from uvcg_video_enable - since after that we only want to
>>> + * queue requests to the endpoint from the uvc_video_complete complete handler.
>>> + * This function is needed in order to 'kick start' the flow of requests from
>>> + * gadget driver to the usb controller.
>>> + */
>>> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
>>> +    struct usb_request *req = NULL;
>>> +    unsigned long flags = 0;
>>> +    unsigned int count = 0;
>>> +    int ret = 0;
>>> +    /* We only queue half of the free list since we still want to have
>>> +     * some free usb_requests in the free list for the video_pump async_wq
>>> +     * thread to encode uvc buffers into. Otherwise we could get into a
>>> +     * situation where the free list does not have any usb requests to
>>> +     * encode into - we always end up queueing 0 length requests to the
>>> +     * end point.
>>> +     */
>>> +    unsigned half_list_size = video->uvc_num_requests / 2;
>>> +    spin_lock_irqsave(&video->req_lock, flags);
>>> +    /* Take these requests off the free list and queue them all to the
>>> +     * endpoint. Since we queue the requests with the req_lock held,
>>> +     */
>> This comment seems to be incomplete? You also want an opening /* on its own line:
> Apologies I think I missed out completing this comment I will send out another patch later.
>
>>
>> /*
>>   * Multi line comments
>>   * look like this
>>   */
>>
> Done.
>
>>> +    while (count < half_list_size) {
>>> +        req = list_first_entry(&video->req_free, struct usb_request,
>>> +                    list);
>>> +        list_del(&req->list);
>>> +        ret = uvcg_video_ep_queue_zero_length(req, video);
>>> +        if (ret < 0) {
>>> +            uvcg_queue_cancel(&video->queue, /*disconnect*/0);
>>> +            break;
>>> +        }
>>> +        count++;
>>> +    }
>>> +    spin_unlock_irqrestore(&video->req_lock, flags);
>>> +}
>>> +
>> So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial flight of half the requests (between 2 and 32) that are queued as zero length - the very first one to .complete() being re-queued as a zero length request before the workqueue is started and encodes data into the _other_ half of the requests which were left in req_free and putting them into req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and they get sent out with data...does that sound right?
>>
>>
> That is correct - the first half of number of usb requests allocated (2, 32) are queued at zero length initially. We’ll have half of the requests being sent to the ep in flight and half in the free list yes.
> queue_work will actually start with either uvc_v4l2_qbuf (uvc_v4l2.c) or at a zero length request being completed - whichever comes first.
>
>> What are the implications of those initial 3-33 zero length requests? What kind of latency can that introduce to the start of the video stream?
> At the worst, we’ll have  a 32 x 125us(uframe period) = 4ms  delay for the first frame of the uvc buffer stream being sent out to the usb controller.
> After that, since uvc buffers are typically queued at a much lower rate than usb requests being sent to the endpoint, we should be fine ?


I think that the 'ongoing' stream should be fine using this method yes, though if possible I'd like 
to avoid introducing the delay to the first frame. Do you know if there's a simple way to remove it? 
I recognise the delay is small so I don't think it's necessarily a dealbreaker but it would be nice 
if we could avoid it.

> In my local testing, I don't see any delay observable to the naked eye.
>
>>>    static void
>>>    uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>    {
>>> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>        struct uvc_video_queue *queue = &video->queue;
>>>        struct uvc_buffer *last_buf = NULL;
>>>        unsigned long flags;
>>> +    bool is_bulk = video->max_payload_size;
>>> +    int ret = 0;
>>>          spin_lock_irqsave(&video->req_lock, flags);
>>>        if (!video->is_enabled) {
>>> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>         * back to req_free
>>>         */
>>>        if (video->is_enabled) {
>>> -        list_add_tail(&req->list, &video->req_free);
>>> +        /*
>>> +         * Here we check whether any request is available in the ready
>>> +         * list. If it is, queue it to the ep and add the current
>>> +         * usb_request to the req_free list - for video_pump to fill in.
>>> +         * Otherwise, just use the current usb_request to queue a 0
>>> +         * length request to the ep. Since we always add to the req_free
>>> +         * list if we dequeue from the ready list, there will never
>>> +         * be a situation where the req_free list is completely out of
>>> +         * requests and cannot recover.
>>> +         */
>>> +        struct usb_request *to_queue = req;
>>> +        to_queue->length = 0;
>>> +        if (!list_empty(&video->req_ready)) {
>>> +            to_queue = list_first_entry(&video->req_ready,
>>> +                struct usb_request, list);
>>> +            list_del(&to_queue->list);
>>> +            /* Add it to the free list. */
>>> +            list_add_tail(&req->list, &video->req_free);
>>> +        }
>>> +        /*
>>> +         * Queue to the endpoint. The actual queueing to ep will
>>> +         * only happen on one thread - the async_wq for bulk endpoints
>>> +         * and this thread for isoc endpoints.
>>> +         */
>>> +        ret = uvcg_video_usb_req_queue(video, to_queue,
>>> +                           /*queue_to_ep*/!is_bulk);
>>
>> In principle in-line comments are fine, but I don't think the parameter name is worth a comment
> Done.
>
>>> +        if(ret < 0) {
>>> +            uvcg_queue_cancel(queue, 0);
>>> +        }
>>> +        /* Queue work to the wq as well since its possible that a buffer
>>> +         * may not have been completed.
>>> +         */
>>
>> The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq here then doesn't that mean it'll never run?
>
> I've updated the comment here - it is possible that we hit a situation where the in-flight usb requests may not be enough to completely
> encode a uvc buffer. In that case if we don't call queue_work, we'll never get the buffer marked as 'completed' and the buffer won't be
> returned to user-space. That'll prevent the dequeue->queue->dequeue loop and flow of buffers.
>
>>>            queue_work(video->async_wq, &video->pump);
>>>        } else {
>>>            uvc_video_free_request(ureq, ep);
>>> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>>>          INIT_LIST_HEAD(&video->ureqs);
>>>        INIT_LIST_HEAD(&video->req_free);
>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>        video->req_size = 0;
>>>        return 0;
>>>    }
>>> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>>        struct usb_request *req = NULL;
>>>        struct uvc_buffer *buf;
>>>        unsigned long flags;
>>> -    bool buf_done;
>>> -    int ret;
>>> +    int ret = 0;
>>>          while (true) {
>>>            if (!video->ep->enabled)
>>> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>              if (buf != NULL) {
>>>                video->encode(req, video, buf);
>>> -            buf_done = buf->state == UVC_BUF_STATE_DONE;
>>>            } else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>>>                /*
>>>                 * No video buffer available; the queue is still connected and
>>> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>                 * prevent missed ISOC transfers.
>>>                 */
>>>                req->length = 0;
>>> -            buf_done = false;
>>>            } else {
>>>                /*
>>>                 * Either the queue has been disconnected or no video buffer
>>> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>>>                break;
>>>            }
>>>    -        /*
>>> -         * With USB3 handling more requests at a higher speed, we can't
>>> -         * afford to generate an interrupt for every request. Decide to
>>> -         * interrupt:
>>> -         *
>>> -         * - When no more requests are available in the free queue, as
>>> -         *   this may be our last chance to refill the endpoint's
>>> -         *   request queue.
>>> -         *
>>> -         * - When this is request is the last request for the video
>>> -         *   buffer, as we want to start sending the next video buffer
>>> -         *   ASAP in case it doesn't get started already in the next
>>> -         *   iteration of this loop.
>>> -         *
>>> -         * - Four times over the length of the requests queue (as
>>> -         *   indicated by video->uvc_num_requests), as a trade-off
>>> -         *   between latency and interrupt load.
>>> -         */
>>> -        if (list_empty(&video->req_free) || buf_done ||
>>> -            !(video->req_int_count %
>>> -               DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>> -            video->req_int_count = 0;
>>> -            req->no_interrupt = 0;
>>> -        } else {
>>> -            req->no_interrupt = 1;
>>> -        }
>>> -
>>> -        /* Queue the USB request */
>>> -        ret = uvcg_video_ep_queue(video, req);
>>>            spin_unlock_irqrestore(&queue->irqlock, flags);
>>>    +        /* Queue the USB request.*/
>> I think just drop this - it was always superfluous.
> The uvcg_video_usb_req_queue function mentions that req_lock must be held while calling
> it - since its possible we might add to the req_ready list. We could say the function
> should hold req_lock only when the queue_to_ep parameter is false - but that doesn't
> seem as clean ?


Sorry - I wasn't clear here. I meant that the comment "Queue the USB request" was superfluous rather 
than the spin_lock_irqsave()

>
>>> +        spin_lock_irqsave(&video->req_lock, flags);
>>> +        /* For bulk end points we queue from the worker thread
>>> +         * since we would preferably not want to wait on requests
>>> +         * to be ready, in the uvcg_video_complete() handler.
>>> +         * For isoc endpoints we add the request to the ready list
>>> +         * and only queue it to the endpoint from the complete handler.
>>> +         */
>>> +        ret = uvcg_video_usb_req_queue(video, req, is_bulk);
>>> +        spin_unlock_irqrestore(&video->req_lock, flags);
>>> +
>>>            if (ret < 0) {
>>>                uvcg_queue_cancel(queue, 0);
>>>                break;
>>>            }
>>>    -        /* Endpoint now owns the request */
>>> +        /* The request is owned by  the endpoint / ready list*/
>>>            req = NULL;
>>> -        video->req_int_count++;
>>>        }
>>>          if (!req)
>>> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>>>          spin_lock_irqsave(&video->req_lock, flags);
>>>        /*
>>> -     * Remove all uvc_reqeusts from ureqs with list_del_init
>>> +     * Remove all uvc_requests from ureqs with list_del_init
>> This should get fixed in the earlier series.
>>>         * This lets uvc_video_free_request correctly identify
>>>         * if the uvc_request is attached to a list or not when freeing
>>>         * memory.
>>> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>>>            list_del(&req->list);
>>>            uvc_video_free_request(req->context, video->ep);
>>>        }
>>> -
>> keep the empty line please
> Done.
>
>>> +    list_for_each_entry_safe(req, temp, &video->req_ready, list) {
>>> +        list_del(&req->list);
>>> +        uvc_video_free_request(req->context, video->ep);
>>> +    }
>> and one here too.
> Done.
>
> Thanks!
>
>>>        INIT_LIST_HEAD(&video->ureqs);
>>>        INIT_LIST_HEAD(&video->req_free);
>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>        video->req_size = 0;
>>>        spin_unlock_irqrestore(&video->req_lock, flags);
>>>    @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>>>          video->req_int_count = 0;
>>>    -    queue_work(video->async_wq, &video->pump);
>>> +    uvc_video_ep_queue_initial_requests(video);
>>>          return ret;
>>>    }
>>> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>>        video->is_enabled = false;
>>>        INIT_LIST_HEAD(&video->ureqs);
>>>        INIT_LIST_HEAD(&video->req_free);
>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>        spin_lock_init(&video->req_lock);
>>>        INIT_WORK(&video->pump, uvcg_video_pump);
>>>    

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v5] usb:gadget:uvc Do not use worker thread to pump isoc usb requests
  2023-11-03  7:13         ` [PATCH v4] usb:gadget:uvc Do not use worker thread to pump " Jayant Chowdhary
@ 2023-11-09  2:12           ` Jayant Chowdhary
  2023-11-09  5:29             ` Greg KH
  2023-11-09  7:34             ` [PATCH v6] " Jayant Chowdhary
  0 siblings, 2 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-09  2:12 UTC (permalink / raw)
  To: dan.scally, jchowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

When we use an async work queue to perform the function of pumping
usb requests to the usb controller, it is possible that amongst other
factors, thread scheduling affects at what cadence we're able to pump
requests. This could mean isoc usb requests miss their uframes - resulting
in video stream flickers on the host device.

To avoid this, we make the async_wq thread only produce isoc usb_requests
with uvc buffers encoded into them. The process of queueing to the
endpoint is done by the uvc_video_complete() handler. In case no
usb_requests are ready with encoded information, we just queue a zero
length request to the endpoint from the complete handler.

For bulk endpoints the async_wq thread still queues usb requests to the
endpoint.

Change-Id: I8a33cbf83fb2f04376826185079f8b25404fe761
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
Suggested-by: Avichal Rakesh <arakesh@google.com>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
---
 Based on top of
 https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
 v1->v2: Added self Signed-Off-by and addressed review comments
 v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
	 for isoc transfers.
 v3->v4: Address review comments around code style.
 v4->v5: Update comments. Remove 0 length request queueing from async_wq
	 thread since it is already done by the complete handler.

 drivers/usb/gadget/function/uvc.h       |   8 +
 drivers/usb/gadget/function/uvc_video.c | 204 ++++++++++++++++++------
 2 files changed, 166 insertions(+), 46 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index e8d4c87f1e09..5ff454528bd8 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -105,7 +105,15 @@ struct uvc_video {
 	bool is_enabled; /* tracks whether video stream is enabled */
 	unsigned int req_size;
 	struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
+
+	/* USB requests that the video pump thread can encode into */
 	struct list_head req_free;
+
+	/*
+	 * USB requests video pump thread has already encoded into. These are
+	 * ready to be queued to the endpoint.
+	 */
+	struct list_head req_ready;
 	spinlock_t req_lock;
 
 	unsigned int req_int_count;
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 53feb790a4c3..338e4b43e735 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -268,6 +268,100 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
 	return ret;
 }
 
+/* This function must be called with video->req_lock held. */
+static int uvcg_video_usb_req_queue(struct uvc_video *video,
+	struct usb_request *req, bool queue_to_ep)
+{
+	bool is_bulk = video->max_payload_size;
+
+	if (!video->is_enabled) {
+		uvc_video_free_request(req->context, video->ep);
+		return -ENODEV;
+	}
+	if (queue_to_ep) {
+		struct uvc_request *ureq = req->context;
+		/*
+		 * With USB3 handling more requests at a higher speed, we can't
+		 * afford to generate an interrupt for every request. Decide to
+		 * interrupt:
+		 *
+		 * - When no more requests are available in the free queue, as
+		 *   this may be our last chance to refill the endpoint's
+		 *   request queue.
+		 *
+		 * - When this is request is the last request for the video
+		 *   buffer, as we want to start sending the next video buffer
+		 *   ASAP in case it doesn't get started already in the next
+		 *   iteration of this loop.
+		 *
+		 * - Four times over the length of the requests queue (as
+		 *   indicated by video->uvc_num_requests), as a trade-off
+		 *   between latency and interrupt load.
+		 */
+		if (list_empty(&video->req_free) || ureq->last_buf ||
+			!(video->req_int_count %
+			DIV_ROUND_UP(video->uvc_num_requests, 4))) {
+			video->req_int_count = 0;
+			req->no_interrupt = 0;
+		} else {
+			req->no_interrupt = 1;
+		}
+		video->req_int_count++;
+		return uvcg_video_ep_queue(video, req);
+	} else {
+		/*
+		* If we're not queing to the ep, for isoc we're queing
+		* to the req_ready list, otherwise req_free.
+		*/
+		struct list_head *list =
+			is_bulk ? &video->req_free : &video->req_ready;
+		list_add_tail(&req->list, list);
+	}
+	return 0;
+}
+
+/*
+ * Must only be called from uvcg_video_enable - since after that we only want to
+ * queue requests to the endpoint from the uvc_video_complete complete handler.
+ * This function is needed in order to 'kick start' the flow of requests from
+ * gadget driver to the usb controller.
+ */
+static void uvc_video_ep_queue_initial_requests(struct uvc_video *video)
+{
+	struct usb_request *req = NULL;
+	unsigned long flags = 0;
+	unsigned int count = 0;
+	int ret = 0;
+	/*
+	 * We only queue half of the free list since we still want to have
+	 * some free usb_requests in the free list for the video_pump async_wq
+	 * thread to encode uvc buffers into. Otherwise we could get into a
+	 * situation where the free list does not have any usb requests to
+	 * encode into - we always end up queueing 0 length requests to the
+	 * end point.
+	 */
+	unsigned half_list_size = video->uvc_num_requests / 2;
+	spin_lock_irqsave(&video->req_lock, flags);
+	/*
+	 * Take these requests off the free list and queue them all to the
+	 * endpoint. Since we queue 0 length requests with the req_lock held,
+	 * there isn't any 'data' race involved here with the complete handler.
+	 */
+	while (count < half_list_size) {
+		req = list_first_entry(&video->req_free, struct usb_request,
+					list);
+		list_del(&req->list);
+		req->length = 0;
+		ret = uvcg_video_ep_queue(video, req);
+		if (ret < 0) {
+			uvcg_queue_cancel(&video->queue, /*disconnect*/0);
+			break;
+		}
+		count++;
+	}
+	spin_unlock_irqrestore(&video->req_lock, flags);
+}
+
 static void
 uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 {
@@ -276,6 +370,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	struct uvc_video_queue *queue = &video->queue;
 	struct uvc_buffer *last_buf = NULL;
 	unsigned long flags;
+	bool is_bulk = video->max_payload_size;
+	int ret = 0;
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	if (!video->is_enabled) {
@@ -329,8 +425,46 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	 * back to req_free
 	 */
 	if (video->is_enabled) {
-		list_add_tail(&req->list, &video->req_free);
-		queue_work(video->async_wq, &video->pump);
+		/*
+		 * Here we check whether any request is available in the ready
+		 * list. If it is, queue it to the ep and add the current
+		 * usb_request to the req_free list - for video_pump to fill in.
+		 * Otherwise, just use the current usb_request to queue a 0
+		 * length request to the ep. Since we always add to the req_free
+		 * list if we dequeue from the ready list, there will never
+		 * be a situation where the req_free list is completely out of
+		 * requests and cannot recover.
+		 */
+		struct usb_request *to_queue = req;
+		to_queue->length = 0;
+		if (!list_empty(&video->req_ready)) {
+			to_queue = list_first_entry(&video->req_ready,
+				struct usb_request, list);
+			list_del(&to_queue->list);
+			/* Add it to the free list. */
+			list_add_tail(&req->list, &video->req_free);
+			/*
+			* Queue work to the wq as well since it is possible that a
+			* buffer may not have been completely encoded with the set of
+			* in-flight usb requests for whih the complete callbacks are
+			* firing.
+			* In that case, if we do not queue work to the worker thread,
+			* the buffer will never be marked as complete - and therefore
+			* not be returned to userpsace. As a result,
+			* dequeue -> queue -> dequeue flow of uvc buffers will not
+			* happen.
+			*/
+			queue_work(video->async_wq, &video->pump);
+		}
+		/*
+		 * Queue to the endpoint. The actual queueing to ep will
+		 * only happen on one thread - the async_wq for bulk endpoints
+		 * and this thread for isoc endpoints.
+		 */
+		ret = uvcg_video_usb_req_queue(video, to_queue, !is_bulk);
+		if(ret < 0) {
+			uvcg_queue_cancel(queue, 0);
+		}
 	} else {
 		uvc_video_free_request(ureq, ep);
 	}
@@ -347,6 +481,7 @@ uvc_video_free_requests(struct uvc_video *video)
 
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	return 0;
 }
@@ -424,8 +559,7 @@ static void uvcg_video_pump(struct work_struct *work)
 	struct usb_request *req = NULL;
 	struct uvc_buffer *buf;
 	unsigned long flags;
-	bool buf_done;
-	int ret;
+	int ret = 0;
 
 	while (true) {
 		if (!video->ep->enabled)
@@ -454,15 +588,6 @@ static void uvcg_video_pump(struct work_struct *work)
 
 		if (buf != NULL) {
 			video->encode(req, video, buf);
-			buf_done = buf->state == UVC_BUF_STATE_DONE;
-		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
-			/*
-			 * No video buffer available; the queue is still connected and
-			 * we're transferring over ISOC. Queue a 0 length request to
-			 * prevent missed ISOC transfers.
-			 */
-			req->length = 0;
-			buf_done = false;
 		} else {
 			/*
 			 * Either the queue has been disconnected or no video buffer
@@ -473,45 +598,25 @@ static void uvcg_video_pump(struct work_struct *work)
 			break;
 		}
 
-		/*
-		 * With USB3 handling more requests at a higher speed, we can't
-		 * afford to generate an interrupt for every request. Decide to
-		 * interrupt:
-		 *
-		 * - When no more requests are available in the free queue, as
-		 *   this may be our last chance to refill the endpoint's
-		 *   request queue.
-		 *
-		 * - When this is request is the last request for the video
-		 *   buffer, as we want to start sending the next video buffer
-		 *   ASAP in case it doesn't get started already in the next
-		 *   iteration of this loop.
-		 *
-		 * - Four times over the length of the requests queue (as
-		 *   indicated by video->uvc_num_requests), as a trade-off
-		 *   between latency and interrupt load.
-		 */
-		if (list_empty(&video->req_free) || buf_done ||
-		    !(video->req_int_count %
-		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
-			video->req_int_count = 0;
-			req->no_interrupt = 0;
-		} else {
-			req->no_interrupt = 1;
-		}
-
-		/* Queue the USB request */
-		ret = uvcg_video_ep_queue(video, req);
 		spin_unlock_irqrestore(&queue->irqlock, flags);
 
+		spin_lock_irqsave(&video->req_lock, flags);
+		/* For bulk end points we queue from the worker thread
+		 * since we would preferably not want to wait on requests
+		 * to be ready, in the uvcg_video_complete() handler.
+		 * For isoc endpoints we add the request to the ready list
+		 * and only queue it to the endpoint from the complete handler.
+		 */
+		ret = uvcg_video_usb_req_queue(video, req, is_bulk);
+		spin_unlock_irqrestore(&video->req_lock, flags);
+
 		if (ret < 0) {
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
 
-		/* Endpoint now owns the request */
+		/* The request is owned by  the endpoint / ready list. */
 		req = NULL;
-		video->req_int_count++;
 	}
 
 	if (!req)
@@ -567,7 +672,7 @@ uvcg_video_disable(struct uvc_video *video)
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	/*
-	 * Remove all uvc_reqeusts from ureqs with list_del_init
+	* Remove all uvc_requests from ureqs with list_del_init
 	 * This lets uvc_video_free_request correctly identify
 	 * if the uvc_request is attached to a list or not when freeing
 	 * memory.
@@ -580,8 +685,14 @@ uvcg_video_disable(struct uvc_video *video)
 		uvc_video_free_request(req->context, video->ep);
 	}
 
+	list_for_each_entry_safe(req, temp, &video->req_ready, list) {
+		list_del(&req->list);
+		uvc_video_free_request(req->context, video->ep);
+	}
+
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	spin_unlock_irqrestore(&video->req_lock, flags);
 
@@ -635,7 +746,7 @@ int uvcg_video_enable(struct uvc_video *video)
 
 	video->req_int_count = 0;
 
-	queue_work(video->async_wq, &video->pump);
+	uvc_video_ep_queue_initial_requests(video);
 
 	return ret;
 }
@@ -648,6 +759,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	video->is_enabled = false;
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	spin_lock_init(&video->req_lock);
 	INIT_WORK(&video->pump, uvcg_video_pump);
 
-- 
2.42.0.869.gea05f2083d-goog


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v5] usb:gadget:uvc Do not use worker thread to pump isoc usb requests
  2023-11-09  2:12           ` [PATCH v5] " Jayant Chowdhary
@ 2023-11-09  5:29             ` Greg KH
  2023-11-09  7:38               ` Jayant Chowdhary
  2023-11-09  7:34             ` [PATCH v6] " Jayant Chowdhary
  1 sibling, 1 reply; 31+ messages in thread
From: Greg KH @ 2023-11-09  5:29 UTC (permalink / raw)
  To: Jayant Chowdhary
  Cc: dan.scally, stern, laurent.pinchart, m.grzeschik, Thinh.Nguyen,
	arakesh, etalvala, linux-kernel, linux-usb

On Thu, Nov 09, 2023 at 02:12:50AM +0000, Jayant Chowdhary wrote:
> When we use an async work queue to perform the function of pumping
> usb requests to the usb controller, it is possible that amongst other
> factors, thread scheduling affects at what cadence we're able to pump
> requests. This could mean isoc usb requests miss their uframes - resulting
> in video stream flickers on the host device.
> 
> To avoid this, we make the async_wq thread only produce isoc usb_requests
> with uvc buffers encoded into them. The process of queueing to the
> endpoint is done by the uvc_video_complete() handler. In case no
> usb_requests are ready with encoded information, we just queue a zero
> length request to the endpoint from the complete handler.
> 
> For bulk endpoints the async_wq thread still queues usb requests to the
> endpoint.
> 
> Change-Id: I8a33cbf83fb2f04376826185079f8b25404fe761
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Avichal Rakesh <arakesh@google.com>
> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
> ---
>  Based on top of
>  https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>  v1->v2: Added self Signed-Off-by and addressed review comments
>  v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
> 	 for isoc transfers.
>  v3->v4: Address review comments around code style.
>  v4->v5: Update comments. Remove 0 length request queueing from async_wq
> 	 thread since it is already done by the complete handler.

You forgot to run checkpatch.pl :(

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v6] usb:gadget:uvc Do not use worker thread to pump isoc usb requests
  2023-11-09  2:12           ` [PATCH v5] " Jayant Chowdhary
  2023-11-09  5:29             ` Greg KH
@ 2023-11-09  7:34             ` Jayant Chowdhary
  2023-11-16 10:09               ` Dan Scally
  2023-11-20  6:20               ` [PATCH v7] " Jayant Chowdhary
  1 sibling, 2 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-09  7:34 UTC (permalink / raw)
  To: dan.scally, jchowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

When we use an async work queue to perform the function of pumping
usb requests to the usb controller, it is possible that amongst other
factors, thread scheduling affects at what cadence we're able to pump
requests. This could mean isoc usb requests miss their uframes - resulting
in video stream flickers on the host device.

To avoid this, we make the async_wq thread only produce isoc usb_requests
with uvc buffers encoded into them. The process of queueing to the
endpoint is done by the uvc_video_complete() handler. In case no
usb_requests are ready with encoded information, we just queue a zero
length request to the endpoint from the complete handler.

For bulk endpoints the async_wq thread still queues usb requests to the
endpoint.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
Suggested-by: Avichal Rakesh <arakesh@google.com>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
---
 Based on top of
 https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
 v1->v2: Added self Signed-Off-by and addressed review comments
 v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
	 for isoc transfers.
 v3->v4: Address review comments around code style.
 v4->v5: Update comments. Remove 0 length request queueing from async_wq
	 thread since it is already done by the complete handler.
 v5->v6: Fix checkpatch.pl suggestions.

 drivers/usb/gadget/function/uvc.h       |   8 +
 drivers/usb/gadget/function/uvc_video.c | 204 ++++++++++++++++++------
 2 files changed, 166 insertions(+), 46 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index e8d4c87f1e09..5ff454528bd8 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -105,7 +105,15 @@ struct uvc_video {
 	bool is_enabled; /* tracks whether video stream is enabled */
 	unsigned int req_size;
 	struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
+
+	/* USB requests that the video pump thread can encode into */
 	struct list_head req_free;
+
+	/*
+	 * USB requests video pump thread has already encoded into. These are
+	 * ready to be queued to the endpoint.
+	 */
+	struct list_head req_ready;
 	spinlock_t req_lock;
 
 	unsigned int req_int_count;
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 53feb790a4c3..d5311456fa8a 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -268,6 +268,100 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
 	return ret;
 }
 
+/* This function must be called with video->req_lock held. */
+static int uvcg_video_usb_req_queue(struct uvc_video *video,
+	struct usb_request *req, bool queue_to_ep)
+{
+	bool is_bulk = video->max_payload_size;
+	struct list_head *list = NULL;
+
+	if (!video->is_enabled) {
+		uvc_video_free_request(req->context, video->ep);
+		return -ENODEV;
+	}
+	if (queue_to_ep) {
+		struct uvc_request *ureq = req->context;
+		/*
+		 * With USB3 handling more requests at a higher speed, we can't
+		 * afford to generate an interrupt for every request. Decide to
+		 * interrupt:
+		 *
+		 * - When no more requests are available in the free queue, as
+		 *   this may be our last chance to refill the endpoint's
+		 *   request queue.
+		 *
+		 * - When this is request is the last request for the video
+		 *   buffer, as we want to start sending the next video buffer
+		 *   ASAP in case it doesn't get started already in the next
+		 *   iteration of this loop.
+		 *
+		 * - Four times over the length of the requests queue (as
+		 *   indicated by video->uvc_num_requests), as a trade-off
+		 *   between latency and interrupt load.
+		 */
+		if (list_empty(&video->req_free) || ureq->last_buf ||
+			!(video->req_int_count %
+			DIV_ROUND_UP(video->uvc_num_requests, 4))) {
+			video->req_int_count = 0;
+			req->no_interrupt = 0;
+		} else {
+			req->no_interrupt = 1;
+		}
+		video->req_int_count++;
+		return uvcg_video_ep_queue(video, req);
+	}
+	/*
+	 * If we're not queuing to the ep, for isoc we're queuing
+	 * to the req_ready list, otherwise req_free.
+	 */
+	list = is_bulk ? &video->req_free : &video->req_ready;
+	list_add_tail(&req->list, list);
+	return 0;
+}
+
+/*
+ * Must only be called from uvcg_video_enable - since after that we only want to
+ * queue requests to the endpoint from the uvc_video_complete complete handler.
+ * This function is needed in order to 'kick start' the flow of requests from
+ * gadget driver to the usb controller.
+ */
+static void uvc_video_ep_queue_initial_requests(struct uvc_video *video)
+{
+	struct usb_request *req = NULL;
+	unsigned long flags = 0;
+	unsigned int count = 0;
+	int ret = 0;
+	/*
+	 * We only queue half of the free list since we still want to have
+	 * some free usb_requests in the free list for the video_pump async_wq
+	 * thread to encode uvc buffers into. Otherwise we could get into a
+	 * situation where the free list does not have any usb requests to
+	 * encode into - we always end up queueing 0 length requests to the
+	 * end point.
+	 */
+	unsigned int half_list_size = video->uvc_num_requests / 2;
+
+	spin_lock_irqsave(&video->req_lock, flags);
+	/*
+	 * Take these requests off the free list and queue them all to the
+	 * endpoint. Since we queue 0 length requests with the req_lock held,
+	 * there isn't any 'data' race involved here with the complete handler.
+	 */
+	while (count < half_list_size) {
+		req = list_first_entry(&video->req_free, struct usb_request,
+					list);
+		list_del(&req->list);
+		req->length = 0;
+		ret = uvcg_video_ep_queue(video, req);
+		if (ret < 0) {
+			uvcg_queue_cancel(&video->queue, /*disconnect*/0);
+			break;
+		}
+		count++;
+	}
+	spin_unlock_irqrestore(&video->req_lock, flags);
+}
+
 static void
 uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 {
@@ -276,6 +370,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	struct uvc_video_queue *queue = &video->queue;
 	struct uvc_buffer *last_buf = NULL;
 	unsigned long flags;
+	bool is_bulk = video->max_payload_size;
+	int ret = 0;
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	if (!video->is_enabled) {
@@ -329,8 +425,46 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	 * back to req_free
 	 */
 	if (video->is_enabled) {
-		list_add_tail(&req->list, &video->req_free);
-		queue_work(video->async_wq, &video->pump);
+		/*
+		 * Here we check whether any request is available in the ready
+		 * list. If it is, queue it to the ep and add the current
+		 * usb_request to the req_free list - for video_pump to fill in.
+		 * Otherwise, just use the current usb_request to queue a 0
+		 * length request to the ep. Since we always add to the req_free
+		 * list if we dequeue from the ready list, there will never
+		 * be a situation where the req_free list is completely out of
+		 * requests and cannot recover.
+		 */
+		struct usb_request *to_queue = req;
+
+		to_queue->length = 0;
+		if (!list_empty(&video->req_ready)) {
+			to_queue = list_first_entry(&video->req_ready,
+				struct usb_request, list);
+			list_del(&to_queue->list);
+			/* Add it to the free list. */
+			list_add_tail(&req->list, &video->req_free);
+			/*
+			 * Queue work to the wq as well since it is possible that a
+			 * buffer may not have been completely encoded with the set of
+			 * in-flight usb requests for whih the complete callbacks are
+			 * firing.
+			 * In that case, if we do not queue work to the worker thread,
+			 * the buffer will never be marked as complete - and therefore
+			 * not be returned to userpsace. As a result,
+			 * dequeue -> queue -> dequeue flow of uvc buffers will not
+			 * happen.
+			 */
+			queue_work(video->async_wq, &video->pump);
+		}
+		/*
+		 * Queue to the endpoint. The actual queueing to ep will
+		 * only happen on one thread - the async_wq for bulk endpoints
+		 * and this thread for isoc endpoints.
+		 */
+		ret = uvcg_video_usb_req_queue(video, to_queue, !is_bulk);
+		if (ret < 0)
+			uvcg_queue_cancel(queue, 0);
 	} else {
 		uvc_video_free_request(ureq, ep);
 	}
@@ -347,6 +481,7 @@ uvc_video_free_requests(struct uvc_video *video)
 
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	return 0;
 }
@@ -424,8 +559,7 @@ static void uvcg_video_pump(struct work_struct *work)
 	struct usb_request *req = NULL;
 	struct uvc_buffer *buf;
 	unsigned long flags;
-	bool buf_done;
-	int ret;
+	int ret = 0;
 
 	while (true) {
 		if (!video->ep->enabled)
@@ -454,15 +588,6 @@ static void uvcg_video_pump(struct work_struct *work)
 
 		if (buf != NULL) {
 			video->encode(req, video, buf);
-			buf_done = buf->state == UVC_BUF_STATE_DONE;
-		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
-			/*
-			 * No video buffer available; the queue is still connected and
-			 * we're transferring over ISOC. Queue a 0 length request to
-			 * prevent missed ISOC transfers.
-			 */
-			req->length = 0;
-			buf_done = false;
 		} else {
 			/*
 			 * Either the queue has been disconnected or no video buffer
@@ -473,45 +598,25 @@ static void uvcg_video_pump(struct work_struct *work)
 			break;
 		}
 
-		/*
-		 * With USB3 handling more requests at a higher speed, we can't
-		 * afford to generate an interrupt for every request. Decide to
-		 * interrupt:
-		 *
-		 * - When no more requests are available in the free queue, as
-		 *   this may be our last chance to refill the endpoint's
-		 *   request queue.
-		 *
-		 * - When this is request is the last request for the video
-		 *   buffer, as we want to start sending the next video buffer
-		 *   ASAP in case it doesn't get started already in the next
-		 *   iteration of this loop.
-		 *
-		 * - Four times over the length of the requests queue (as
-		 *   indicated by video->uvc_num_requests), as a trade-off
-		 *   between latency and interrupt load.
-		 */
-		if (list_empty(&video->req_free) || buf_done ||
-		    !(video->req_int_count %
-		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
-			video->req_int_count = 0;
-			req->no_interrupt = 0;
-		} else {
-			req->no_interrupt = 1;
-		}
-
-		/* Queue the USB request */
-		ret = uvcg_video_ep_queue(video, req);
 		spin_unlock_irqrestore(&queue->irqlock, flags);
 
+		spin_lock_irqsave(&video->req_lock, flags);
+		/* For bulk end points we queue from the worker thread
+		 * since we would preferably not want to wait on requests
+		 * to be ready, in the uvcg_video_complete() handler.
+		 * For isoc endpoints we add the request to the ready list
+		 * and only queue it to the endpoint from the complete handler.
+		 */
+		ret = uvcg_video_usb_req_queue(video, req, is_bulk);
+		spin_unlock_irqrestore(&video->req_lock, flags);
+
 		if (ret < 0) {
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
 
-		/* Endpoint now owns the request */
+		/* The request is owned by  the endpoint / ready list. */
 		req = NULL;
-		video->req_int_count++;
 	}
 
 	if (!req)
@@ -567,7 +672,7 @@ uvcg_video_disable(struct uvc_video *video)
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	/*
-	 * Remove all uvc_reqeusts from ureqs with list_del_init
+	* Remove all uvc_requests from ureqs with list_del_init
 	 * This lets uvc_video_free_request correctly identify
 	 * if the uvc_request is attached to a list or not when freeing
 	 * memory.
@@ -580,8 +685,14 @@ uvcg_video_disable(struct uvc_video *video)
 		uvc_video_free_request(req->context, video->ep);
 	}
 
+	list_for_each_entry_safe(req, temp, &video->req_ready, list) {
+		list_del(&req->list);
+		uvc_video_free_request(req->context, video->ep);
+	}
+
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	spin_unlock_irqrestore(&video->req_lock, flags);
 
@@ -635,7 +746,7 @@ int uvcg_video_enable(struct uvc_video *video)
 
 	video->req_int_count = 0;
 
-	queue_work(video->async_wq, &video->pump);
+	uvc_video_ep_queue_initial_requests(video);
 
 	return ret;
 }
@@ -648,6 +759,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	video->is_enabled = false;
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	spin_lock_init(&video->req_lock);
 	INIT_WORK(&video->pump, uvcg_video_pump);
 
-- 
2.42.0.869.gea05f2083d-goog


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v5] usb:gadget:uvc Do not use worker thread to pump isoc usb requests
  2023-11-09  5:29             ` Greg KH
@ 2023-11-09  7:38               ` Jayant Chowdhary
  0 siblings, 0 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-09  7:38 UTC (permalink / raw)
  To: Greg KH
  Cc: dan.scally, stern, laurent.pinchart, m.grzeschik, Thinh.Nguyen,
	arakesh, etalvala, linux-kernel, linux-usb

On 11/8/23 21:29, Greg KH wrote:
> On Thu, Nov 09, 2023 at 02:12:50AM +0000, Jayant Chowdhary wrote:
>> When we use an async work queue to perform the function of pumping
>> usb requests to the usb controller, it is possible that amongst other
>> factors, thread scheduling affects at what cadence we're able to pump
>> requests. This could mean isoc usb requests miss their uframes - resulting
>> in video stream flickers on the host device.
>>
>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>> with uvc buffers encoded into them. The process of queueing to the
>> endpoint is done by the uvc_video_complete() handler. In case no
>> usb_requests are ready with encoded information, we just queue a zero
>> length request to the endpoint from the complete handler.
>>
>> For bulk endpoints the async_wq thread still queues usb requests to the
>> endpoint.
>>
>> Change-Id: I8a33cbf83fb2f04376826185079f8b25404fe761
>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>> ---
>>  Based on top of
>>  https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>  v1->v2: Added self Signed-Off-by and addressed review comments
>>  v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>> 	 for isoc transfers.
>>  v3->v4: Address review comments around code style.
>>  v4->v5: Update comments. Remove 0 length request queueing from async_wq
>> 	 thread since it is already done by the complete handler.
> You forgot to run checkpatch.pl :(

My apologies, I sent out v6 with checkpatch.pl fixed.

Thanks


^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-11-07 17:01           ` Dan Scally
@ 2023-11-09 16:46             ` Jayant Chowdhary
  2023-11-14 18:52               ` Jayant Chowdhary
  0 siblings, 1 reply; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-09 16:46 UTC (permalink / raw)
  To: Dan Scally, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

Hi Dan,
Thanks for the comments.
I sent out v6 here https://lore.kernel.org/linux-usb/20231109073453.751860-1-jchowdhary@google.com/T/#u

On 11/7/23 09:01, Dan Scally wrote:
> Hi Jayant
>
> On 03/11/2023 07:28, Jayant Chowdhary wrote:
>> Hi Dan,
>> Thank you for the comments.
>> I uploaded a new patch at https://lore.kernel.org/linux-usb/20231103071353.1577383-1-jchowdhary@google.com/T/#u.
>>
>> On 11/2/23 09:07, Dan Scally wrote:
>>> Hi Jayant - thanks for the patch
>>>
>>> On 02/11/2023 06:01, Jayant Chowdhary wrote:
>>>> When we use an async work queue to perform the function of pumping
>>>> usb requests to the usb controller, it is possible that amongst other
>>>> factors, thread scheduling affects at what cadence we're able to pump
>>>> requests. This could mean isoc usb requests miss their uframes - resulting
>>>> in video stream flickers on the host device.
>>>>
>>>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>>>> with uvc buffers encoded into them. The process of queueing to the
>>>> endpoint is done by the uvc_video_complete() handler. In case no
>>>> usb_requests are ready with encoded information, we just queue a zero
>>>> length request to the endpoint from the complete handler.
>>>>
>>>> For bulk endpoints the async_wq thread still queues usb requests to the
>>>> endpoint.
>>>>
>>>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>>>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>>>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>>>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>>>> ---
>>>>    Based on top of
>>>>    https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>>>    v1->v2: Added self Signed-Off-by and addressed review comments
>>>>    v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>>>>       for isoc transfers.
>>>>
>>>>    drivers/usb/gadget/function/uvc.h       |   8 +
>>>>    drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>>>>    2 files changed, 156 insertions(+), 39 deletions(-)
>>>>
>>>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>>>> index e8d4c87f1e09..82c783410554 100644
>>>> --- a/drivers/usb/gadget/function/uvc.h
>>>> +++ b/drivers/usb/gadget/function/uvc.h
>>>> @@ -105,7 +105,15 @@ struct uvc_video {
>>>>        bool is_enabled; /* tracks whether video stream is enabled */
>>>>        unsigned int req_size;
>>>>        struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
>>>> +
>>>> +    /* USB requests video pump thread can encode into*/
>>> "USB requests that the video pump thread can encode into", and a space before the closing */ please (and the same a few more times below).
>> Done.
>>
>>>>        struct list_head req_free;
>>>> +
>>>> +    /*
>>>> +     * USB requests video pump thread has already encoded into. These are
>>>> +     * ready to be queued to the endpoint.
>>>> +     */
>>>> +    struct list_head req_ready;
>>>>        spinlock_t req_lock;
>>>>          unsigned int req_int_count;
>>>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>>>> index 53feb790a4c3..c84183e9afcc 100644
>>>> --- a/drivers/usb/gadget/function/uvc_video.c
>>>> +++ b/drivers/usb/gadget/function/uvc_video.c
>>>> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>>>>        return ret;
>>>>    }
>>>>    +/* This function must be called with video->req_lock held*/
>>>> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
>>>> +    struct usb_request *req, bool queue_to_ep) {
>>> Brace on a new line please - same a few more times below
>> Done.
>>
>>>> +    bool is_bulk = video->max_payload_size;
>>> empty line here
>>>> +    if (!video->is_enabled) {
>>>> +        uvc_video_free_request(req->context, video->ep);
>>>> +        return -ENODEV;
>>>> +    }
>>>> +    if (queue_to_ep) {
>>>> +        struct uvc_request *ureq = req->context;
>>>> +        /*
>>>> +         * With USB3 handling more requests at a higher speed, we can't
>>>> +         * afford to generate an interrupt for every request. Decide to
>>>> +         * interrupt:
>>>> +         *
>>>> +         * - When no more requests are available in the free queue, as
>>>> +         *   this may be our last chance to refill the endpoint's
>>>> +         *   request queue.
>>>> +         *
>>>> +         * - When this is request is the last request for the video
>>>> +         *   buffer, as we want to start sending the next video buffer
>>>> +         *   ASAP in case it doesn't get started already in the next
>>>> +         *   iteration of this loop.
>>>> +         *
>>>> +         * - Four times over the length of the requests queue (as
>>>> +         *   indicated by video->uvc_num_requests), as a trade-off
>>>> +         *   between latency and interrupt load.
>>>> +        */
>>>> +        if (list_empty(&video->req_free) || ureq->last_buf ||
>>>> +            !(video->req_int_count %
>>>> +            DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>>> +            video->req_int_count = 0;
>>>> +            req->no_interrupt = 0;
>>>> +        } else {
>>>> +            req->no_interrupt = 1;
>>>> +        }
>>>> +        video->req_int_count++;
>>>> +        return uvcg_video_ep_queue(video, req);
>>>> +    } else {
>>>> +        /*
>>>> +        * If we're not queing to the ep, for isoc we're queing
>>>> +        * to the req_ready list, otherwise req_free.
>>>> +        */
>>>> +        struct list_head *list =
>>>> +            is_bulk ? &video->req_free : &video->req_ready;
>>>> +        list_add_tail(&req->list, list);
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
>>>> +    struct uvc_video *video) {
>>>> +    req->length = 0;
>>>> +    return uvcg_video_ep_queue(video, req);
>>>> +}
>>> Not sure this is worth its own function
>> Removed the function.
>>
>>>> +
>>>> +/* Must only be called from uvcg_video_enable - since after that we only want to
>>>> + * queue requests to the endpoint from the uvc_video_complete complete handler.
>>>> + * This function is needed in order to 'kick start' the flow of requests from
>>>> + * gadget driver to the usb controller.
>>>> + */
>>>> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
>>>> +    struct usb_request *req = NULL;
>>>> +    unsigned long flags = 0;
>>>> +    unsigned int count = 0;
>>>> +    int ret = 0;
>>>> +    /* We only queue half of the free list since we still want to have
>>>> +     * some free usb_requests in the free list for the video_pump async_wq
>>>> +     * thread to encode uvc buffers into. Otherwise we could get into a
>>>> +     * situation where the free list does not have any usb requests to
>>>> +     * encode into - we always end up queueing 0 length requests to the
>>>> +     * end point.
>>>> +     */
>>>> +    unsigned half_list_size = video->uvc_num_requests / 2;
>>>> +    spin_lock_irqsave(&video->req_lock, flags);
>>>> +    /* Take these requests off the free list and queue them all to the
>>>> +     * endpoint. Since we queue the requests with the req_lock held,
>>>> +     */
>>> This comment seems to be incomplete? You also want an opening /* on its own line:
>> Apologies I think I missed out completing this comment I will send out another patch later.
>>
>>>
>>> /*
>>>   * Multi line comments
>>>   * look like this
>>>   */
>>>
>> Done.
>>
>>>> +    while (count < half_list_size) {
>>>> +        req = list_first_entry(&video->req_free, struct usb_request,
>>>> +                    list);
>>>> +        list_del(&req->list);
>>>> +        ret = uvcg_video_ep_queue_zero_length(req, video);
>>>> +        if (ret < 0) {
>>>> +            uvcg_queue_cancel(&video->queue, /*disconnect*/0);
>>>> +            break;
>>>> +        }
>>>> +        count++;
>>>> +    }
>>>> +    spin_unlock_irqrestore(&video->req_lock, flags);
>>>> +}
>>>> +
>>> So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial flight of half the requests (between 2 and 32) that are queued as zero length - the very first one to .complete() being re-queued as a zero length request before the workqueue is started and encodes data into the _other_ half of the requests which were left in req_free and putting them into req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and they get sent out with data...does that sound right?
>>>
>>>
>> That is correct - the first half of number of usb requests allocated (2, 32) are queued at zero length initially. We’ll have half of the requests being sent to the ep in flight and half in the free list yes.
>> queue_work will actually start with either uvc_v4l2_qbuf (uvc_v4l2.c) or at a zero length request being completed - whichever comes first.
>>
>>> What are the implications of those initial 3-33 zero length requests? What kind of latency can that introduce to the start of the video stream?
>> At the worst, we’ll have  a 32 x 125us(uframe period) = 4ms  delay for the first frame of the uvc buffer stream being sent out to the usb controller.
>> After that, since uvc buffers are typically queued at a much lower rate than usb requests being sent to the endpoint, we should be fine ?
>
>
> I think that the 'ongoing' stream should be fine using this method yes, though if possible I'd like to avoid introducing the delay to the first frame. Do you know if there's a simple way to remove it? I recognise the delay is small so I don't think it's necessarily a dealbreaker but it would be nice if we could avoid it.

We could introduce a flag and have the async_wq thread queue requests to the ep for the first uvc buffer. However, what that would do is it would possibly add a skew between the first and second frames. 
Let's say we send out frame 1 at t = 0ms. It is possible that by the time frame 2 comes around, we have 32 0 length usb requests queued up in the usb controller. As a result the time distance
between frame 1's start and frame 2's start would be 33ms + 4ms = 37ms (instead of 4ms and 37ms). So its a tradeoff between skew vs 4ms delay in starting the stream. The current logic avoids the
skew at the expense of the delay in the first frame - and its simpler to follow in code. Happy to hear your and others' thoughts on this as well.


>
>> In my local testing, I don't see any delay observable to the naked eye.
>>
>>>>    static void
>>>>    uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>    {
>>>> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>        struct uvc_video_queue *queue = &video->queue;
>>>>        struct uvc_buffer *last_buf = NULL;
>>>>        unsigned long flags;
>>>> +    bool is_bulk = video->max_payload_size;
>>>> +    int ret = 0;
>>>>          spin_lock_irqsave(&video->req_lock, flags);
>>>>        if (!video->is_enabled) {
>>>> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>         * back to req_free
>>>>         */
>>>>        if (video->is_enabled) {
>>>> -        list_add_tail(&req->list, &video->req_free);
>>>> +        /*
>>>> +         * Here we check whether any request is available in the ready
>>>> +         * list. If it is, queue it to the ep and add the current
>>>> +         * usb_request to the req_free list - for video_pump to fill in.
>>>> +         * Otherwise, just use the current usb_request to queue a 0
>>>> +         * length request to the ep. Since we always add to the req_free
>>>> +         * list if we dequeue from the ready list, there will never
>>>> +         * be a situation where the req_free list is completely out of
>>>> +         * requests and cannot recover.
>>>> +         */
>>>> +        struct usb_request *to_queue = req;
>>>> +        to_queue->length = 0;
>>>> +        if (!list_empty(&video->req_ready)) {
>>>> +            to_queue = list_first_entry(&video->req_ready,
>>>> +                struct usb_request, list);
>>>> +            list_del(&to_queue->list);
>>>> +            /* Add it to the free list. */
>>>> +            list_add_tail(&req->list, &video->req_free);
>>>> +        }
>>>> +        /*
>>>> +         * Queue to the endpoint. The actual queueing to ep will
>>>> +         * only happen on one thread - the async_wq for bulk endpoints
>>>> +         * and this thread for isoc endpoints.
>>>> +         */
>>>> +        ret = uvcg_video_usb_req_queue(video, to_queue,
>>>> +                           /*queue_to_ep*/!is_bulk);
>>>
>>> In principle in-line comments are fine, but I don't think the parameter name is worth a comment
>> Done.
>>
>>>> +        if(ret < 0) {
>>>> +            uvcg_queue_cancel(queue, 0);
>>>> +        }
>>>> +        /* Queue work to the wq as well since its possible that a buffer
>>>> +         * may not have been completed.
>>>> +         */
>>>
>>> The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq here then doesn't that mean it'll never run?
>>
>> I've updated the comment here - it is possible that we hit a situation where the in-flight usb requests may not be enough to completely
>> encode a uvc buffer. In that case if we don't call queue_work, we'll never get the buffer marked as 'completed' and the buffer won't be
>> returned to user-space. That'll prevent the dequeue->queue->dequeue loop and flow of buffers.

I added this queue_work call to the if statement which checks if the ready list has any requests. If it doesn't, there's no point
in queuing any work - since we wouldn't be giving back to the req_free list.

Thank you

>>
>>>>            queue_work(video->async_wq, &video->pump);
>>>>        } else {
>>>>            uvc_video_free_request(ureq, ep);
>>>> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>>>>          INIT_LIST_HEAD(&video->ureqs);
>>>>        INIT_LIST_HEAD(&video->req_free);
>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>        video->req_size = 0;
>>>>        return 0;
>>>>    }
>>>> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>        struct usb_request *req = NULL;
>>>>        struct uvc_buffer *buf;
>>>>        unsigned long flags;
>>>> -    bool buf_done;
>>>> -    int ret;
>>>> +    int ret = 0;
>>>>          while (true) {
>>>>            if (!video->ep->enabled)
>>>> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>              if (buf != NULL) {
>>>>                video->encode(req, video, buf);
>>>> -            buf_done = buf->state == UVC_BUF_STATE_DONE;
>>>>            } else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>>>>                /*
>>>>                 * No video buffer available; the queue is still connected and
>>>> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>                 * prevent missed ISOC transfers.
>>>>                 */
>>>>                req->length = 0;
>>>> -            buf_done = false;
>>>>            } else {
>>>>                /*
>>>>                 * Either the queue has been disconnected or no video buffer
>>>> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>                break;
>>>>            }
>>>>    -        /*
>>>> -         * With USB3 handling more requests at a higher speed, we can't
>>>> -         * afford to generate an interrupt for every request. Decide to
>>>> -         * interrupt:
>>>> -         *
>>>> -         * - When no more requests are available in the free queue, as
>>>> -         *   this may be our last chance to refill the endpoint's
>>>> -         *   request queue.
>>>> -         *
>>>> -         * - When this is request is the last request for the video
>>>> -         *   buffer, as we want to start sending the next video buffer
>>>> -         *   ASAP in case it doesn't get started already in the next
>>>> -         *   iteration of this loop.
>>>> -         *
>>>> -         * - Four times over the length of the requests queue (as
>>>> -         *   indicated by video->uvc_num_requests), as a trade-off
>>>> -         *   between latency and interrupt load.
>>>> -         */
>>>> -        if (list_empty(&video->req_free) || buf_done ||
>>>> -            !(video->req_int_count %
>>>> -               DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>>> -            video->req_int_count = 0;
>>>> -            req->no_interrupt = 0;
>>>> -        } else {
>>>> -            req->no_interrupt = 1;
>>>> -        }
>>>> -
>>>> -        /* Queue the USB request */
>>>> -        ret = uvcg_video_ep_queue(video, req);
>>>>            spin_unlock_irqrestore(&queue->irqlock, flags);
>>>>    +        /* Queue the USB request.*/
>>> I think just drop this - it was always superfluous.
>> The uvcg_video_usb_req_queue function mentions that req_lock must be held while calling
>> it - since its possible we might add to the req_ready list. We could say the function
>> should hold req_lock only when the queue_to_ep parameter is false - but that doesn't
>> seem as clean ?
>
>
> Sorry - I wasn't clear here. I meant that the comment "Queue the USB request" was superfluous rather than the spin_lock_irqsave()

Removed.

>
>>
>>>> +        spin_lock_irqsave(&video->req_lock, flags);
>>>> +        /* For bulk end points we queue from the worker thread
>>>> +         * since we would preferably not want to wait on requests
>>>> +         * to be ready, in the uvcg_video_complete() handler.
>>>> +         * For isoc endpoints we add the request to the ready list
>>>> +         * and only queue it to the endpoint from the complete handler.
>>>> +         */
>>>> +        ret = uvcg_video_usb_req_queue(video, req, is_bulk);
>>>> +        spin_unlock_irqrestore(&video->req_lock, flags);
>>>> +
>>>>            if (ret < 0) {
>>>>                uvcg_queue_cancel(queue, 0);
>>>>                break;
>>>>            }
>>>>    -        /* Endpoint now owns the request */
>>>> +        /* The request is owned by  the endpoint / ready list*/
>>>>            req = NULL;
>>>> -        video->req_int_count++;
>>>>        }
>>>>          if (!req)
>>>> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>>>>          spin_lock_irqsave(&video->req_lock, flags);
>>>>        /*
>>>> -     * Remove all uvc_reqeusts from ureqs with list_del_init
>>>> +     * Remove all uvc_requests from ureqs with list_del_init
>>> This should get fixed in the earlier series.
>>>>         * This lets uvc_video_free_request correctly identify
>>>>         * if the uvc_request is attached to a list or not when freeing
>>>>         * memory.
>>>> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>>>>            list_del(&req->list);
>>>>            uvc_video_free_request(req->context, video->ep);
>>>>        }
>>>> -
>>> keep the empty line please
>> Done.
>>
>>>> +    list_for_each_entry_safe(req, temp, &video->req_ready, list) {
>>>> +        list_del(&req->list);
>>>> +        uvc_video_free_request(req->context, video->ep);
>>>> +    }
>>> and one here too.
>> Done.
>>
>> Thanks!
>>
>>>>        INIT_LIST_HEAD(&video->ureqs);
>>>>        INIT_LIST_HEAD(&video->req_free);
>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>        video->req_size = 0;
>>>>        spin_unlock_irqrestore(&video->req_lock, flags);
>>>>    @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>>>>          video->req_int_count = 0;
>>>>    -    queue_work(video->async_wq, &video->pump);
>>>> +    uvc_video_ep_queue_initial_requests(video);
>>>>          return ret;
>>>>    }
>>>> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>>>        video->is_enabled = false;
>>>>        INIT_LIST_HEAD(&video->ureqs);
>>>>        INIT_LIST_HEAD(&video->req_free);
>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>        spin_lock_init(&video->req_lock);
>>>>        INIT_WORK(&video->pump, uvcg_video_pump);
>>>>    

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-11-09 16:46             ` Jayant Chowdhary
@ 2023-11-14 18:52               ` Jayant Chowdhary
  2023-11-16 10:10                 ` Dan Scally
  0 siblings, 1 reply; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-14 18:52 UTC (permalink / raw)
  To: Dan Scally, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

Hi Dan,
I was wondering if you had a chance to look at v6 of the patch :
https://lore.kernel.org/linux-usb/20231109073453.751860-1-jchowdhary@google.com/T/#u ?

Thank you!

Jayant

On 11/9/23 08:46, Jayant Chowdhary wrote:
> Hi Dan,
> Thanks for the comments.
> I sent out v6 here https://lore.kernel.org/linux-usb/20231109073453.751860-1-jchowdhary@google.com/T/#u
>
> On 11/7/23 09:01, Dan Scally wrote:
>> Hi Jayant
>>
>> On 03/11/2023 07:28, Jayant Chowdhary wrote:
>>> Hi Dan,
>>> Thank you for the comments.
>>> I uploaded a new patch at https://lore.kernel.org/linux-usb/20231103071353.1577383-1-jchowdhary@google.com/T/#u.
>>>
>>> On 11/2/23 09:07, Dan Scally wrote:
>>>> Hi Jayant - thanks for the patch
>>>>
>>>> On 02/11/2023 06:01, Jayant Chowdhary wrote:
>>>>> When we use an async work queue to perform the function of pumping
>>>>> usb requests to the usb controller, it is possible that amongst other
>>>>> factors, thread scheduling affects at what cadence we're able to pump
>>>>> requests. This could mean isoc usb requests miss their uframes - resulting
>>>>> in video stream flickers on the host device.
>>>>>
>>>>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>>>>> with uvc buffers encoded into them. The process of queueing to the
>>>>> endpoint is done by the uvc_video_complete() handler. In case no
>>>>> usb_requests are ready with encoded information, we just queue a zero
>>>>> length request to the endpoint from the complete handler.
>>>>>
>>>>> For bulk endpoints the async_wq thread still queues usb requests to the
>>>>> endpoint.
>>>>>
>>>>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>>>>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>>>>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>>>>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>>>>> ---
>>>>>    Based on top of
>>>>>    https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>>>>    v1->v2: Added self Signed-Off-by and addressed review comments
>>>>>    v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>>>>>       for isoc transfers.
>>>>>
>>>>>    drivers/usb/gadget/function/uvc.h       |   8 +
>>>>>    drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>>>>>    2 files changed, 156 insertions(+), 39 deletions(-)
>>>>>
>>>>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>>>>> index e8d4c87f1e09..82c783410554 100644
>>>>> --- a/drivers/usb/gadget/function/uvc.h
>>>>> +++ b/drivers/usb/gadget/function/uvc.h
>>>>> @@ -105,7 +105,15 @@ struct uvc_video {
>>>>>        bool is_enabled; /* tracks whether video stream is enabled */
>>>>>        unsigned int req_size;
>>>>>        struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
>>>>> +
>>>>> +    /* USB requests video pump thread can encode into*/
>>>> "USB requests that the video pump thread can encode into", and a space before the closing */ please (and the same a few more times below).
>>> Done.
>>>
>>>>>        struct list_head req_free;
>>>>> +
>>>>> +    /*
>>>>> +     * USB requests video pump thread has already encoded into. These are
>>>>> +     * ready to be queued to the endpoint.
>>>>> +     */
>>>>> +    struct list_head req_ready;
>>>>>        spinlock_t req_lock;
>>>>>          unsigned int req_int_count;
>>>>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>>>>> index 53feb790a4c3..c84183e9afcc 100644
>>>>> --- a/drivers/usb/gadget/function/uvc_video.c
>>>>> +++ b/drivers/usb/gadget/function/uvc_video.c
>>>>> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>>>>>        return ret;
>>>>>    }
>>>>>    +/* This function must be called with video->req_lock held*/
>>>>> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
>>>>> +    struct usb_request *req, bool queue_to_ep) {
>>>> Brace on a new line please - same a few more times below
>>> Done.
>>>
>>>>> +    bool is_bulk = video->max_payload_size;
>>>> empty line here
>>>>> +    if (!video->is_enabled) {
>>>>> +        uvc_video_free_request(req->context, video->ep);
>>>>> +        return -ENODEV;
>>>>> +    }
>>>>> +    if (queue_to_ep) {
>>>>> +        struct uvc_request *ureq = req->context;
>>>>> +        /*
>>>>> +         * With USB3 handling more requests at a higher speed, we can't
>>>>> +         * afford to generate an interrupt for every request. Decide to
>>>>> +         * interrupt:
>>>>> +         *
>>>>> +         * - When no more requests are available in the free queue, as
>>>>> +         *   this may be our last chance to refill the endpoint's
>>>>> +         *   request queue.
>>>>> +         *
>>>>> +         * - When this is request is the last request for the video
>>>>> +         *   buffer, as we want to start sending the next video buffer
>>>>> +         *   ASAP in case it doesn't get started already in the next
>>>>> +         *   iteration of this loop.
>>>>> +         *
>>>>> +         * - Four times over the length of the requests queue (as
>>>>> +         *   indicated by video->uvc_num_requests), as a trade-off
>>>>> +         *   between latency and interrupt load.
>>>>> +        */
>>>>> +        if (list_empty(&video->req_free) || ureq->last_buf ||
>>>>> +            !(video->req_int_count %
>>>>> +            DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>>>> +            video->req_int_count = 0;
>>>>> +            req->no_interrupt = 0;
>>>>> +        } else {
>>>>> +            req->no_interrupt = 1;
>>>>> +        }
>>>>> +        video->req_int_count++;
>>>>> +        return uvcg_video_ep_queue(video, req);
>>>>> +    } else {
>>>>> +        /*
>>>>> +        * If we're not queing to the ep, for isoc we're queing
>>>>> +        * to the req_ready list, otherwise req_free.
>>>>> +        */
>>>>> +        struct list_head *list =
>>>>> +            is_bulk ? &video->req_free : &video->req_ready;
>>>>> +        list_add_tail(&req->list, list);
>>>>> +    }
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
>>>>> +    struct uvc_video *video) {
>>>>> +    req->length = 0;
>>>>> +    return uvcg_video_ep_queue(video, req);
>>>>> +}
>>>> Not sure this is worth its own function
>>> Removed the function.
>>>
>>>>> +
>>>>> +/* Must only be called from uvcg_video_enable - since after that we only want to
>>>>> + * queue requests to the endpoint from the uvc_video_complete complete handler.
>>>>> + * This function is needed in order to 'kick start' the flow of requests from
>>>>> + * gadget driver to the usb controller.
>>>>> + */
>>>>> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
>>>>> +    struct usb_request *req = NULL;
>>>>> +    unsigned long flags = 0;
>>>>> +    unsigned int count = 0;
>>>>> +    int ret = 0;
>>>>> +    /* We only queue half of the free list since we still want to have
>>>>> +     * some free usb_requests in the free list for the video_pump async_wq
>>>>> +     * thread to encode uvc buffers into. Otherwise we could get into a
>>>>> +     * situation where the free list does not have any usb requests to
>>>>> +     * encode into - we always end up queueing 0 length requests to the
>>>>> +     * end point.
>>>>> +     */
>>>>> +    unsigned half_list_size = video->uvc_num_requests / 2;
>>>>> +    spin_lock_irqsave(&video->req_lock, flags);
>>>>> +    /* Take these requests off the free list and queue them all to the
>>>>> +     * endpoint. Since we queue the requests with the req_lock held,
>>>>> +     */
>>>> This comment seems to be incomplete? You also want an opening /* on its own line:
>>> Apologies I think I missed out completing this comment I will send out another patch later.
>>>
>>>> /*
>>>>   * Multi line comments
>>>>   * look like this
>>>>   */
>>>>
>>> Done.
>>>
>>>>> +    while (count < half_list_size) {
>>>>> +        req = list_first_entry(&video->req_free, struct usb_request,
>>>>> +                    list);
>>>>> +        list_del(&req->list);
>>>>> +        ret = uvcg_video_ep_queue_zero_length(req, video);
>>>>> +        if (ret < 0) {
>>>>> +            uvcg_queue_cancel(&video->queue, /*disconnect*/0);
>>>>> +            break;
>>>>> +        }
>>>>> +        count++;
>>>>> +    }
>>>>> +    spin_unlock_irqrestore(&video->req_lock, flags);
>>>>> +}
>>>>> +
>>>> So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial flight of half the requests (between 2 and 32) that are queued as zero length - the very first one to .complete() being re-queued as a zero length request before the workqueue is started and encodes data into the _other_ half of the requests which were left in req_free and putting them into req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and they get sent out with data...does that sound right?
>>>>
>>>>
>>> That is correct - the first half of number of usb requests allocated (2, 32) are queued at zero length initially. We’ll have half of the requests being sent to the ep in flight and half in the free list yes.
>>> queue_work will actually start with either uvc_v4l2_qbuf (uvc_v4l2.c) or at a zero length request being completed - whichever comes first.
>>>
>>>> What are the implications of those initial 3-33 zero length requests? What kind of latency can that introduce to the start of the video stream?
>>> At the worst, we’ll have  a 32 x 125us(uframe period) = 4ms  delay for the first frame of the uvc buffer stream being sent out to the usb controller.
>>> After that, since uvc buffers are typically queued at a much lower rate than usb requests being sent to the endpoint, we should be fine ?
>>
>> I think that the 'ongoing' stream should be fine using this method yes, though if possible I'd like to avoid introducing the delay to the first frame. Do you know if there's a simple way to remove it? I recognise the delay is small so I don't think it's necessarily a dealbreaker but it would be nice if we could avoid it.
> We could introduce a flag and have the async_wq thread queue requests to the ep for the first uvc buffer. However, what that would do is it would possibly add a skew between the first and second frames. 
> Let's say we send out frame 1 at t = 0ms. It is possible that by the time frame 2 comes around, we have 32 0 length usb requests queued up in the usb controller. As a result the time distance
> between frame 1's start and frame 2's start would be 33ms + 4ms = 37ms (instead of 4ms and 37ms). So its a tradeoff between skew vs 4ms delay in starting the stream. The current logic avoids the
> skew at the expense of the delay in the first frame - and its simpler to follow in code. Happy to hear your and others' thoughts on this as well.
>
>
>>> In my local testing, I don't see any delay observable to the naked eye.
>>>
>>>>>    static void
>>>>>    uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>>    {
>>>>> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>>        struct uvc_video_queue *queue = &video->queue;
>>>>>        struct uvc_buffer *last_buf = NULL;
>>>>>        unsigned long flags;
>>>>> +    bool is_bulk = video->max_payload_size;
>>>>> +    int ret = 0;
>>>>>          spin_lock_irqsave(&video->req_lock, flags);
>>>>>        if (!video->is_enabled) {
>>>>> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>>         * back to req_free
>>>>>         */
>>>>>        if (video->is_enabled) {
>>>>> -        list_add_tail(&req->list, &video->req_free);
>>>>> +        /*
>>>>> +         * Here we check whether any request is available in the ready
>>>>> +         * list. If it is, queue it to the ep and add the current
>>>>> +         * usb_request to the req_free list - for video_pump to fill in.
>>>>> +         * Otherwise, just use the current usb_request to queue a 0
>>>>> +         * length request to the ep. Since we always add to the req_free
>>>>> +         * list if we dequeue from the ready list, there will never
>>>>> +         * be a situation where the req_free list is completely out of
>>>>> +         * requests and cannot recover.
>>>>> +         */
>>>>> +        struct usb_request *to_queue = req;
>>>>> +        to_queue->length = 0;
>>>>> +        if (!list_empty(&video->req_ready)) {
>>>>> +            to_queue = list_first_entry(&video->req_ready,
>>>>> +                struct usb_request, list);
>>>>> +            list_del(&to_queue->list);
>>>>> +            /* Add it to the free list. */
>>>>> +            list_add_tail(&req->list, &video->req_free);
>>>>> +        }
>>>>> +        /*
>>>>> +         * Queue to the endpoint. The actual queueing to ep will
>>>>> +         * only happen on one thread - the async_wq for bulk endpoints
>>>>> +         * and this thread for isoc endpoints.
>>>>> +         */
>>>>> +        ret = uvcg_video_usb_req_queue(video, to_queue,
>>>>> +                           /*queue_to_ep*/!is_bulk);
>>>> In principle in-line comments are fine, but I don't think the parameter name is worth a comment
>>> Done.
>>>
>>>>> +        if(ret < 0) {
>>>>> +            uvcg_queue_cancel(queue, 0);
>>>>> +        }
>>>>> +        /* Queue work to the wq as well since its possible that a buffer
>>>>> +         * may not have been completed.
>>>>> +         */
>>>> The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq here then doesn't that mean it'll never run?
>>> I've updated the comment here - it is possible that we hit a situation where the in-flight usb requests may not be enough to completely
>>> encode a uvc buffer. In that case if we don't call queue_work, we'll never get the buffer marked as 'completed' and the buffer won't be
>>> returned to user-space. That'll prevent the dequeue->queue->dequeue loop and flow of buffers.
> I added this queue_work call to the if statement which checks if the ready list has any requests. If it doesn't, there's no point
> in queuing any work - since we wouldn't be giving back to the req_free list.
>
> Thank you
>
>>>>>            queue_work(video->async_wq, &video->pump);
>>>>>        } else {
>>>>>            uvc_video_free_request(ureq, ep);
>>>>> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>>>>>          INIT_LIST_HEAD(&video->ureqs);
>>>>>        INIT_LIST_HEAD(&video->req_free);
>>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>>        video->req_size = 0;
>>>>>        return 0;
>>>>>    }
>>>>> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>>        struct usb_request *req = NULL;
>>>>>        struct uvc_buffer *buf;
>>>>>        unsigned long flags;
>>>>> -    bool buf_done;
>>>>> -    int ret;
>>>>> +    int ret = 0;
>>>>>          while (true) {
>>>>>            if (!video->ep->enabled)
>>>>> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>>              if (buf != NULL) {
>>>>>                video->encode(req, video, buf);
>>>>> -            buf_done = buf->state == UVC_BUF_STATE_DONE;
>>>>>            } else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>>>>>                /*
>>>>>                 * No video buffer available; the queue is still connected and
>>>>> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>>                 * prevent missed ISOC transfers.
>>>>>                 */
>>>>>                req->length = 0;
>>>>> -            buf_done = false;
>>>>>            } else {
>>>>>                /*
>>>>>                 * Either the queue has been disconnected or no video buffer
>>>>> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>>                break;
>>>>>            }
>>>>>    -        /*
>>>>> -         * With USB3 handling more requests at a higher speed, we can't
>>>>> -         * afford to generate an interrupt for every request. Decide to
>>>>> -         * interrupt:
>>>>> -         *
>>>>> -         * - When no more requests are available in the free queue, as
>>>>> -         *   this may be our last chance to refill the endpoint's
>>>>> -         *   request queue.
>>>>> -         *
>>>>> -         * - When this is request is the last request for the video
>>>>> -         *   buffer, as we want to start sending the next video buffer
>>>>> -         *   ASAP in case it doesn't get started already in the next
>>>>> -         *   iteration of this loop.
>>>>> -         *
>>>>> -         * - Four times over the length of the requests queue (as
>>>>> -         *   indicated by video->uvc_num_requests), as a trade-off
>>>>> -         *   between latency and interrupt load.
>>>>> -         */
>>>>> -        if (list_empty(&video->req_free) || buf_done ||
>>>>> -            !(video->req_int_count %
>>>>> -               DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>>>> -            video->req_int_count = 0;
>>>>> -            req->no_interrupt = 0;
>>>>> -        } else {
>>>>> -            req->no_interrupt = 1;
>>>>> -        }
>>>>> -
>>>>> -        /* Queue the USB request */
>>>>> -        ret = uvcg_video_ep_queue(video, req);
>>>>>            spin_unlock_irqrestore(&queue->irqlock, flags);
>>>>>    +        /* Queue the USB request.*/
>>>> I think just drop this - it was always superfluous.
>>> The uvcg_video_usb_req_queue function mentions that req_lock must be held while calling
>>> it - since its possible we might add to the req_ready list. We could say the function
>>> should hold req_lock only when the queue_to_ep parameter is false - but that doesn't
>>> seem as clean ?
>>
>> Sorry - I wasn't clear here. I meant that the comment "Queue the USB request" was superfluous rather than the spin_lock_irqsave()
> Removed.
>
>>>>> +        spin_lock_irqsave(&video->req_lock, flags);
>>>>> +        /* For bulk end points we queue from the worker thread
>>>>> +         * since we would preferably not want to wait on requests
>>>>> +         * to be ready, in the uvcg_video_complete() handler.
>>>>> +         * For isoc endpoints we add the request to the ready list
>>>>> +         * and only queue it to the endpoint from the complete handler.
>>>>> +         */
>>>>> +        ret = uvcg_video_usb_req_queue(video, req, is_bulk);
>>>>> +        spin_unlock_irqrestore(&video->req_lock, flags);
>>>>> +
>>>>>            if (ret < 0) {
>>>>>                uvcg_queue_cancel(queue, 0);
>>>>>                break;
>>>>>            }
>>>>>    -        /* Endpoint now owns the request */
>>>>> +        /* The request is owned by  the endpoint / ready list*/
>>>>>            req = NULL;
>>>>> -        video->req_int_count++;
>>>>>        }
>>>>>          if (!req)
>>>>> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>>>>>          spin_lock_irqsave(&video->req_lock, flags);
>>>>>        /*
>>>>> -     * Remove all uvc_reqeusts from ureqs with list_del_init
>>>>> +     * Remove all uvc_requests from ureqs with list_del_init
>>>> This should get fixed in the earlier series.
>>>>>         * This lets uvc_video_free_request correctly identify
>>>>>         * if the uvc_request is attached to a list or not when freeing
>>>>>         * memory.
>>>>> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>>>>>            list_del(&req->list);
>>>>>            uvc_video_free_request(req->context, video->ep);
>>>>>        }
>>>>> -
>>>> keep the empty line please
>>> Done.
>>>
>>>>> +    list_for_each_entry_safe(req, temp, &video->req_ready, list) {
>>>>> +        list_del(&req->list);
>>>>> +        uvc_video_free_request(req->context, video->ep);
>>>>> +    }
>>>> and one here too.
>>> Done.
>>>
>>> Thanks!
>>>
>>>>>        INIT_LIST_HEAD(&video->ureqs);
>>>>>        INIT_LIST_HEAD(&video->req_free);
>>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>>        video->req_size = 0;
>>>>>        spin_unlock_irqrestore(&video->req_lock, flags);
>>>>>    @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>>>>>          video->req_int_count = 0;
>>>>>    -    queue_work(video->async_wq, &video->pump);
>>>>> +    uvc_video_ep_queue_initial_requests(video);
>>>>>          return ret;
>>>>>    }
>>>>> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>>>>        video->is_enabled = false;
>>>>>        INIT_LIST_HEAD(&video->ureqs);
>>>>>        INIT_LIST_HEAD(&video->req_free);
>>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>>        spin_lock_init(&video->req_lock);
>>>>>        INIT_WORK(&video->pump, uvcg_video_pump);
>>>>>    

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v6] usb:gadget:uvc Do not use worker thread to pump isoc usb requests
  2023-11-09  7:34             ` [PATCH v6] " Jayant Chowdhary
@ 2023-11-16 10:09               ` Dan Scally
  2023-11-20  6:30                 ` Jayant Chowdhary
  2023-11-20  6:20               ` [PATCH v7] " Jayant Chowdhary
  1 sibling, 1 reply; 31+ messages in thread
From: Dan Scally @ 2023-11-16 10:09 UTC (permalink / raw)
  To: Jayant Chowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

Hi Jayant, thanks for the update. I just have a couple of styling comments.

On 09/11/2023 07:34, Jayant Chowdhary wrote:
> When we use an async work queue to perform the function of pumping
> usb requests to the usb controller, it is possible that amongst other
> factors, thread scheduling affects at what cadence we're able to pump
> requests. This could mean isoc usb requests miss their uframes - resulting
> in video stream flickers on the host device.
>
> To avoid this, we make the async_wq thread only produce isoc usb_requests
> with uvc buffers encoded into them. The process of queueing to the
> endpoint is done by the uvc_video_complete() handler. In case no
> usb_requests are ready with encoded information, we just queue a zero
> length request to the endpoint from the complete handler.
>
> For bulk endpoints the async_wq thread still queues usb requests to the
> endpoint.
>
> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
> Suggested-by: Avichal Rakesh <arakesh@google.com>
> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
> ---
>   Based on top of
>   https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>   v1->v2: Added self Signed-Off-by and addressed review comments
>   v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
> 	 for isoc transfers.
>   v3->v4: Address review comments around code style.
>   v4->v5: Update comments. Remove 0 length request queueing from async_wq
> 	 thread since it is already done by the complete handler.
>   v5->v6: Fix checkpatch.pl suggestions.
>
>   drivers/usb/gadget/function/uvc.h       |   8 +
>   drivers/usb/gadget/function/uvc_video.c | 204 ++++++++++++++++++------
>   2 files changed, 166 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index e8d4c87f1e09..5ff454528bd8 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -105,7 +105,15 @@ struct uvc_video {
>   	bool is_enabled; /* tracks whether video stream is enabled */
>   	unsigned int req_size;
>   	struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
> +
> +	/* USB requests that the video pump thread can encode into */
>   	struct list_head req_free;
> +
> +	/*
> +	 * USB requests video pump thread has already encoded into. These are
> +	 * ready to be queued to the endpoint.
> +	 */
> +	struct list_head req_ready;
>   	spinlock_t req_lock;
>   
>   	unsigned int req_int_count;
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index 53feb790a4c3..d5311456fa8a 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -268,6 +268,100 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>   	return ret;
>   }
>   
> +/* This function must be called with video->req_lock held. */
> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
> +	struct usb_request *req, bool queue_to_ep)
> +{
> +	bool is_bulk = video->max_payload_size;
> +	struct list_head *list = NULL;
> +
> +	if (!video->is_enabled) {
> +		uvc_video_free_request(req->context, video->ep);
> +		return -ENODEV;
> +	}
> +	if (queue_to_ep) {
> +		struct uvc_request *ureq = req->context;
> +		/*
> +		 * With USB3 handling more requests at a higher speed, we can't
> +		 * afford to generate an interrupt for every request. Decide to
> +		 * interrupt:
> +		 *
> +		 * - When no more requests are available in the free queue, as
> +		 *   this may be our last chance to refill the endpoint's
> +		 *   request queue.
> +		 *
> +		 * - When this is request is the last request for the video
> +		 *   buffer, as we want to start sending the next video buffer
> +		 *   ASAP in case it doesn't get started already in the next
> +		 *   iteration of this loop.
> +		 *
> +		 * - Four times over the length of the requests queue (as
> +		 *   indicated by video->uvc_num_requests), as a trade-off
> +		 *   between latency and interrupt load.
> +		 */
> +		if (list_empty(&video->req_free) || ureq->last_buf ||
> +			!(video->req_int_count %
> +			DIV_ROUND_UP(video->uvc_num_requests, 4))) {
> +			video->req_int_count = 0;
> +			req->no_interrupt = 0;
> +		} else {
> +			req->no_interrupt = 1;
> +		}
> +		video->req_int_count++;
> +		return uvcg_video_ep_queue(video, req);
> +	}
> +	/*
> +	 * If we're not queuing to the ep, for isoc we're queuing
> +	 * to the req_ready list, otherwise req_free.
> +	 */
> +	list = is_bulk ? &video->req_free : &video->req_ready;
> +	list_add_tail(&req->list, list);
> +	return 0;
> +}
> +
> +/*
> + * Must only be called from uvcg_video_enable - since after that we only want to
> + * queue requests to the endpoint from the uvc_video_complete complete handler.
> + * This function is needed in order to 'kick start' the flow of requests from
> + * gadget driver to the usb controller.
> + */
> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video)
> +{
> +	struct usb_request *req = NULL;
> +	unsigned long flags = 0;
> +	unsigned int count = 0;
> +	int ret = 0;
Add an empty line here please
> +	/*
> +	 * We only queue half of the free list since we still want to have
> +	 * some free usb_requests in the free list for the video_pump async_wq
> +	 * thread to encode uvc buffers into. Otherwise we could get into a
> +	 * situation where the free list does not have any usb requests to
> +	 * encode into - we always end up queueing 0 length requests to the
> +	 * end point.
> +	 */
> +	unsigned int half_list_size = video->uvc_num_requests / 2;
> +
> +	spin_lock_irqsave(&video->req_lock, flags);
> +	/*
> +	 * Take these requests off the free list and queue them all to the
> +	 * endpoint. Since we queue 0 length requests with the req_lock held,
> +	 * there isn't any 'data' race involved here with the complete handler.
> +	 */
> +	while (count < half_list_size) {
> +		req = list_first_entry(&video->req_free, struct usb_request,
> +					list);
> +		list_del(&req->list);
> +		req->length = 0;
> +		ret = uvcg_video_ep_queue(video, req);
> +		if (ret < 0) {
> +			uvcg_queue_cancel(&video->queue, /*disconnect*/0);


Drop the /*disconnect*/ comment please

> +			break;
> +		}
> +		count++;
> +	}
> +	spin_unlock_irqrestore(&video->req_lock, flags);
> +}
> +
>   static void
>   uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   {
> @@ -276,6 +370,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   	struct uvc_video_queue *queue = &video->queue;
>   	struct uvc_buffer *last_buf = NULL;
>   	unsigned long flags;
> +	bool is_bulk = video->max_payload_size;
> +	int ret = 0;
>   
>   	spin_lock_irqsave(&video->req_lock, flags);
>   	if (!video->is_enabled) {
> @@ -329,8 +425,46 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   	 * back to req_free
>   	 */
>   	if (video->is_enabled) {
> -		list_add_tail(&req->list, &video->req_free);
> -		queue_work(video->async_wq, &video->pump);
> +		/*
> +		 * Here we check whether any request is available in the ready
> +		 * list. If it is, queue it to the ep and add the current
> +		 * usb_request to the req_free list - for video_pump to fill in.
> +		 * Otherwise, just use the current usb_request to queue a 0
> +		 * length request to the ep. Since we always add to the req_free
> +		 * list if we dequeue from the ready list, there will never
> +		 * be a situation where the req_free list is completely out of
> +		 * requests and cannot recover.
> +		 */
> +		struct usb_request *to_queue = req;
> +
> +		to_queue->length = 0;
> +		if (!list_empty(&video->req_ready)) {
> +			to_queue = list_first_entry(&video->req_ready,
> +				struct usb_request, list);
> +			list_del(&to_queue->list);
> +			/* Add it to the free list. */
I would drop the "Add it to the free list" comment; the code is clear already.
> +			list_add_tail(&req->list, &video->req_free);
> +			/*
> +			 * Queue work to the wq as well since it is possible that a
> +			 * buffer may not have been completely encoded with the set of
> +			 * in-flight usb requests for whih the complete callbacks are
> +			 * firing.
> +			 * In that case, if we do not queue work to the worker thread,
> +			 * the buffer will never be marked as complete - and therefore
> +			 * not be returned to userpsace. As a result,
> +			 * dequeue -> queue -> dequeue flow of uvc buffers will not
> +			 * happen.
> +			 */
> +			queue_work(video->async_wq, &video->pump);
> +		}
> +		/*
> +		 * Queue to the endpoint. The actual queueing to ep will
> +		 * only happen on one thread - the async_wq for bulk endpoints
> +		 * and this thread for isoc endpoints.
> +		 */
> +		ret = uvcg_video_usb_req_queue(video, to_queue, !is_bulk);
> +		if (ret < 0)
> +			uvcg_queue_cancel(queue, 0);
>   	} else {
>   		uvc_video_free_request(ureq, ep);
>   	}
> @@ -347,6 +481,7 @@ uvc_video_free_requests(struct uvc_video *video)
>   
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	video->req_size = 0;
>   	return 0;
>   }
> @@ -424,8 +559,7 @@ static void uvcg_video_pump(struct work_struct *work)
>   	struct usb_request *req = NULL;
>   	struct uvc_buffer *buf;
>   	unsigned long flags;
> -	bool buf_done;
> -	int ret;
> +	int ret = 0;
>   
>   	while (true) {
>   		if (!video->ep->enabled)
> @@ -454,15 +588,6 @@ static void uvcg_video_pump(struct work_struct *work)
>   
>   		if (buf != NULL) {
>   			video->encode(req, video, buf);
> -			buf_done = buf->state == UVC_BUF_STATE_DONE;
> -		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
> -			/*
> -			 * No video buffer available; the queue is still connected and
> -			 * we're transferring over ISOC. Queue a 0 length request to
> -			 * prevent missed ISOC transfers.
> -			 */
> -			req->length = 0;
> -			buf_done = false;
>   		} else {
>   			/*
>   			 * Either the queue has been disconnected or no video buffer
> @@ -473,45 +598,25 @@ static void uvcg_video_pump(struct work_struct *work)
>   			break;
>   		}
>   
> -		/*
> -		 * With USB3 handling more requests at a higher speed, we can't
> -		 * afford to generate an interrupt for every request. Decide to
> -		 * interrupt:
> -		 *
> -		 * - When no more requests are available in the free queue, as
> -		 *   this may be our last chance to refill the endpoint's
> -		 *   request queue.
> -		 *
> -		 * - When this is request is the last request for the video
> -		 *   buffer, as we want to start sending the next video buffer
> -		 *   ASAP in case it doesn't get started already in the next
> -		 *   iteration of this loop.
> -		 *
> -		 * - Four times over the length of the requests queue (as
> -		 *   indicated by video->uvc_num_requests), as a trade-off
> -		 *   between latency and interrupt load.
> -		 */
> -		if (list_empty(&video->req_free) || buf_done ||
> -		    !(video->req_int_count %
> -		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
> -			video->req_int_count = 0;
> -			req->no_interrupt = 0;
> -		} else {
> -			req->no_interrupt = 1;
> -		}
> -
> -		/* Queue the USB request */
> -		ret = uvcg_video_ep_queue(video, req);
>   		spin_unlock_irqrestore(&queue->irqlock, flags);
>   
> +		spin_lock_irqsave(&video->req_lock, flags);
> +		/* For bulk end points we queue from the worker thread
> +		 * since we would preferably not want to wait on requests
> +		 * to be ready, in the uvcg_video_complete() handler.
> +		 * For isoc endpoints we add the request to the ready list
> +		 * and only queue it to the endpoint from the complete handler.
> +		 */
> +		ret = uvcg_video_usb_req_queue(video, req, is_bulk);
> +		spin_unlock_irqrestore(&video->req_lock, flags);
> +
>   		if (ret < 0) {
>   			uvcg_queue_cancel(queue, 0);
>   			break;
>   		}
>   
> -		/* Endpoint now owns the request */
> +		/* The request is owned by  the endpoint / ready list. */
>   		req = NULL;
> -		video->req_int_count++;
>   	}
>   
>   	if (!req)
> @@ -567,7 +672,7 @@ uvcg_video_disable(struct uvc_video *video)
>   
>   	spin_lock_irqsave(&video->req_lock, flags);
>   	/*
> -	 * Remove all uvc_reqeusts from ureqs with list_del_init
> +	* Remove all uvc_requests from ureqs with list_del_init

Did the alignment of the * get messed up here as well as the typo fix or is it just my mail client 
being weird?


>   	 * This lets uvc_video_free_request correctly identify
>   	 * if the uvc_request is attached to a list or not when freeing
>   	 * memory.
> @@ -580,8 +685,14 @@ uvcg_video_disable(struct uvc_video *video)
>   		uvc_video_free_request(req->context, video->ep);
>   	}
>   
> +	list_for_each_entry_safe(req, temp, &video->req_ready, list) {
> +		list_del(&req->list);
> +		uvc_video_free_request(req->context, video->ep);
> +	}
> +
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	video->req_size = 0;
>   	spin_unlock_irqrestore(&video->req_lock, flags);
>   
> @@ -635,7 +746,7 @@ int uvcg_video_enable(struct uvc_video *video)
>   
>   	video->req_int_count = 0;
>   
> -	queue_work(video->async_wq, &video->pump);
> +	uvc_video_ep_queue_initial_requests(video);
>   
>   	return ret;
>   }
> @@ -648,6 +759,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>   	video->is_enabled = false;
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	spin_lock_init(&video->req_lock);
>   	INIT_WORK(&video->pump, uvcg_video_pump);
>   

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests
  2023-11-14 18:52               ` Jayant Chowdhary
@ 2023-11-16 10:10                 ` Dan Scally
  0 siblings, 0 replies; 31+ messages in thread
From: Dan Scally @ 2023-11-16 10:10 UTC (permalink / raw)
  To: Jayant Chowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

Morning

On 14/11/2023 18:52, Jayant Chowdhary wrote:
> Hi Dan,
> I was wondering if you had a chance to look at v6 of the patch :
> https://lore.kernel.org/linux-usb/20231109073453.751860-1-jchowdhary@google.com/T/#u ?


Now I did, sorry for the delay

> Thank you!
>
> Jayant
>
> On 11/9/23 08:46, Jayant Chowdhary wrote:
>> Hi Dan,
>> Thanks for the comments.
>> I sent out v6 here https://lore.kernel.org/linux-usb/20231109073453.751860-1-jchowdhary@google.com/T/#u
>>
>> On 11/7/23 09:01, Dan Scally wrote:
>>> Hi Jayant
>>>
>>> On 03/11/2023 07:28, Jayant Chowdhary wrote:
>>>> Hi Dan,
>>>> Thank you for the comments.
>>>> I uploaded a new patch at https://lore.kernel.org/linux-usb/20231103071353.1577383-1-jchowdhary@google.com/T/#u.
>>>>
>>>> On 11/2/23 09:07, Dan Scally wrote:
>>>>> Hi Jayant - thanks for the patch
>>>>>
>>>>> On 02/11/2023 06:01, Jayant Chowdhary wrote:
>>>>>> When we use an async work queue to perform the function of pumping
>>>>>> usb requests to the usb controller, it is possible that amongst other
>>>>>> factors, thread scheduling affects at what cadence we're able to pump
>>>>>> requests. This could mean isoc usb requests miss their uframes - resulting
>>>>>> in video stream flickers on the host device.
>>>>>>
>>>>>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>>>>>> with uvc buffers encoded into them. The process of queueing to the
>>>>>> endpoint is done by the uvc_video_complete() handler. In case no
>>>>>> usb_requests are ready with encoded information, we just queue a zero
>>>>>> length request to the endpoint from the complete handler.
>>>>>>
>>>>>> For bulk endpoints the async_wq thread still queues usb requests to the
>>>>>> endpoint.
>>>>>>
>>>>>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>>>>>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>>>>>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>>>>>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>>>>>> ---
>>>>>>     Based on top of
>>>>>>     https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>>>>>     v1->v2: Added self Signed-Off-by and addressed review comments
>>>>>>     v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>>>>>>        for isoc transfers.
>>>>>>
>>>>>>     drivers/usb/gadget/function/uvc.h       |   8 +
>>>>>>     drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>>>>>>     2 files changed, 156 insertions(+), 39 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>>>>>> index e8d4c87f1e09..82c783410554 100644
>>>>>> --- a/drivers/usb/gadget/function/uvc.h
>>>>>> +++ b/drivers/usb/gadget/function/uvc.h
>>>>>> @@ -105,7 +105,15 @@ struct uvc_video {
>>>>>>         bool is_enabled; /* tracks whether video stream is enabled */
>>>>>>         unsigned int req_size;
>>>>>>         struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
>>>>>> +
>>>>>> +    /* USB requests video pump thread can encode into*/
>>>>> "USB requests that the video pump thread can encode into", and a space before the closing */ please (and the same a few more times below).
>>>> Done.
>>>>
>>>>>>         struct list_head req_free;
>>>>>> +
>>>>>> +    /*
>>>>>> +     * USB requests video pump thread has already encoded into. These are
>>>>>> +     * ready to be queued to the endpoint.
>>>>>> +     */
>>>>>> +    struct list_head req_ready;
>>>>>>         spinlock_t req_lock;
>>>>>>           unsigned int req_int_count;
>>>>>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>>>>>> index 53feb790a4c3..c84183e9afcc 100644
>>>>>> --- a/drivers/usb/gadget/function/uvc_video.c
>>>>>> +++ b/drivers/usb/gadget/function/uvc_video.c
>>>>>> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>>>>>>         return ret;
>>>>>>     }
>>>>>>     +/* This function must be called with video->req_lock held*/
>>>>>> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
>>>>>> +    struct usb_request *req, bool queue_to_ep) {
>>>>> Brace on a new line please - same a few more times below
>>>> Done.
>>>>
>>>>>> +    bool is_bulk = video->max_payload_size;
>>>>> empty line here
>>>>>> +    if (!video->is_enabled) {
>>>>>> +        uvc_video_free_request(req->context, video->ep);
>>>>>> +        return -ENODEV;
>>>>>> +    }
>>>>>> +    if (queue_to_ep) {
>>>>>> +        struct uvc_request *ureq = req->context;
>>>>>> +        /*
>>>>>> +         * With USB3 handling more requests at a higher speed, we can't
>>>>>> +         * afford to generate an interrupt for every request. Decide to
>>>>>> +         * interrupt:
>>>>>> +         *
>>>>>> +         * - When no more requests are available in the free queue, as
>>>>>> +         *   this may be our last chance to refill the endpoint's
>>>>>> +         *   request queue.
>>>>>> +         *
>>>>>> +         * - When this is request is the last request for the video
>>>>>> +         *   buffer, as we want to start sending the next video buffer
>>>>>> +         *   ASAP in case it doesn't get started already in the next
>>>>>> +         *   iteration of this loop.
>>>>>> +         *
>>>>>> +         * - Four times over the length of the requests queue (as
>>>>>> +         *   indicated by video->uvc_num_requests), as a trade-off
>>>>>> +         *   between latency and interrupt load.
>>>>>> +        */
>>>>>> +        if (list_empty(&video->req_free) || ureq->last_buf ||
>>>>>> +            !(video->req_int_count %
>>>>>> +            DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>>>>> +            video->req_int_count = 0;
>>>>>> +            req->no_interrupt = 0;
>>>>>> +        } else {
>>>>>> +            req->no_interrupt = 1;
>>>>>> +        }
>>>>>> +        video->req_int_count++;
>>>>>> +        return uvcg_video_ep_queue(video, req);
>>>>>> +    } else {
>>>>>> +        /*
>>>>>> +        * If we're not queing to the ep, for isoc we're queing
>>>>>> +        * to the req_ready list, otherwise req_free.
>>>>>> +        */
>>>>>> +        struct list_head *list =
>>>>>> +            is_bulk ? &video->req_free : &video->req_ready;
>>>>>> +        list_add_tail(&req->list, list);
>>>>>> +    }
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
>>>>>> +    struct uvc_video *video) {
>>>>>> +    req->length = 0;
>>>>>> +    return uvcg_video_ep_queue(video, req);
>>>>>> +}
>>>>> Not sure this is worth its own function
>>>> Removed the function.
>>>>
>>>>>> +
>>>>>> +/* Must only be called from uvcg_video_enable - since after that we only want to
>>>>>> + * queue requests to the endpoint from the uvc_video_complete complete handler.
>>>>>> + * This function is needed in order to 'kick start' the flow of requests from
>>>>>> + * gadget driver to the usb controller.
>>>>>> + */
>>>>>> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
>>>>>> +    struct usb_request *req = NULL;
>>>>>> +    unsigned long flags = 0;
>>>>>> +    unsigned int count = 0;
>>>>>> +    int ret = 0;
>>>>>> +    /* We only queue half of the free list since we still want to have
>>>>>> +     * some free usb_requests in the free list for the video_pump async_wq
>>>>>> +     * thread to encode uvc buffers into. Otherwise we could get into a
>>>>>> +     * situation where the free list does not have any usb requests to
>>>>>> +     * encode into - we always end up queueing 0 length requests to the
>>>>>> +     * end point.
>>>>>> +     */
>>>>>> +    unsigned half_list_size = video->uvc_num_requests / 2;
>>>>>> +    spin_lock_irqsave(&video->req_lock, flags);
>>>>>> +    /* Take these requests off the free list and queue them all to the
>>>>>> +     * endpoint. Since we queue the requests with the req_lock held,
>>>>>> +     */
>>>>> This comment seems to be incomplete? You also want an opening /* on its own line:
>>>> Apologies I think I missed out completing this comment I will send out another patch later.
>>>>
>>>>> /*
>>>>>    * Multi line comments
>>>>>    * look like this
>>>>>    */
>>>>>
>>>> Done.
>>>>
>>>>>> +    while (count < half_list_size) {
>>>>>> +        req = list_first_entry(&video->req_free, struct usb_request,
>>>>>> +                    list);
>>>>>> +        list_del(&req->list);
>>>>>> +        ret = uvcg_video_ep_queue_zero_length(req, video);
>>>>>> +        if (ret < 0) {
>>>>>> +            uvcg_queue_cancel(&video->queue, /*disconnect*/0);
>>>>>> +            break;
>>>>>> +        }
>>>>>> +        count++;
>>>>>> +    }
>>>>>> +    spin_unlock_irqrestore(&video->req_lock, flags);
>>>>>> +}
>>>>>> +
>>>>> So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial flight of half the requests (between 2 and 32) that are queued as zero length - the very first one to .complete() being re-queued as a zero length request before the workqueue is started and encodes data into the _other_ half of the requests which were left in req_free and putting them into req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and they get sent out with data...does that sound right?
>>>>>
>>>>>
>>>> That is correct - the first half of number of usb requests allocated (2, 32) are queued at zero length initially. We’ll have half of the requests being sent to the ep in flight and half in the free list yes.
>>>> queue_work will actually start with either uvc_v4l2_qbuf (uvc_v4l2.c) or at a zero length request being completed - whichever comes first.
>>>>
>>>>> What are the implications of those initial 3-33 zero length requests? What kind of latency can that introduce to the start of the video stream?
>>>> At the worst, we’ll have  a 32 x 125us(uframe period) = 4ms  delay for the first frame of the uvc buffer stream being sent out to the usb controller.
>>>> After that, since uvc buffers are typically queued at a much lower rate than usb requests being sent to the endpoint, we should be fine ?
>>> I think that the 'ongoing' stream should be fine using this method yes, though if possible I'd like to avoid introducing the delay to the first frame. Do you know if there's a simple way to remove it? I recognise the delay is small so I don't think it's necessarily a dealbreaker but it would be nice if we could avoid it.
>> We could introduce a flag and have the async_wq thread queue requests to the ep for the first uvc buffer. However, what that would do is it would possibly add a skew between the first and second frames.
>> Let's say we send out frame 1 at t = 0ms. It is possible that by the time frame 2 comes around, we have 32 0 length usb requests queued up in the usb controller. As a result the time distance
>> between frame 1's start and frame 2's start would be 33ms + 4ms = 37ms (instead of 4ms and 37ms). So its a tradeoff between skew vs 4ms delay in starting the stream. The current logic avoids the
>> skew at the expense of the delay in the first frame - and its simpler to follow in code. Happy to hear your and others' thoughts on this as well.


Alright; I like the skew less than the delay. Like I say I don't think it's a dealbreaker, so let's 
leave that part as is.

>>
>>
>>>> In my local testing, I don't see any delay observable to the naked eye.
>>>>
>>>>>>     static void
>>>>>>     uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>>>     {
>>>>>> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>>>         struct uvc_video_queue *queue = &video->queue;
>>>>>>         struct uvc_buffer *last_buf = NULL;
>>>>>>         unsigned long flags;
>>>>>> +    bool is_bulk = video->max_payload_size;
>>>>>> +    int ret = 0;
>>>>>>           spin_lock_irqsave(&video->req_lock, flags);
>>>>>>         if (!video->is_enabled) {
>>>>>> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>>>>>          * back to req_free
>>>>>>          */
>>>>>>         if (video->is_enabled) {
>>>>>> -        list_add_tail(&req->list, &video->req_free);
>>>>>> +        /*
>>>>>> +         * Here we check whether any request is available in the ready
>>>>>> +         * list. If it is, queue it to the ep and add the current
>>>>>> +         * usb_request to the req_free list - for video_pump to fill in.
>>>>>> +         * Otherwise, just use the current usb_request to queue a 0
>>>>>> +         * length request to the ep. Since we always add to the req_free
>>>>>> +         * list if we dequeue from the ready list, there will never
>>>>>> +         * be a situation where the req_free list is completely out of
>>>>>> +         * requests and cannot recover.
>>>>>> +         */
>>>>>> +        struct usb_request *to_queue = req;
>>>>>> +        to_queue->length = 0;
>>>>>> +        if (!list_empty(&video->req_ready)) {
>>>>>> +            to_queue = list_first_entry(&video->req_ready,
>>>>>> +                struct usb_request, list);
>>>>>> +            list_del(&to_queue->list);
>>>>>> +            /* Add it to the free list. */
>>>>>> +            list_add_tail(&req->list, &video->req_free);
>>>>>> +        }
>>>>>> +        /*
>>>>>> +         * Queue to the endpoint. The actual queueing to ep will
>>>>>> +         * only happen on one thread - the async_wq for bulk endpoints
>>>>>> +         * and this thread for isoc endpoints.
>>>>>> +         */
>>>>>> +        ret = uvcg_video_usb_req_queue(video, to_queue,
>>>>>> +                           /*queue_to_ep*/!is_bulk);
>>>>> In principle in-line comments are fine, but I don't think the parameter name is worth a comment
>>>> Done.
>>>>
>>>>>> +        if(ret < 0) {
>>>>>> +            uvcg_queue_cancel(queue, 0);
>>>>>> +        }
>>>>>> +        /* Queue work to the wq as well since its possible that a buffer
>>>>>> +         * may not have been completed.
>>>>>> +         */
>>>>> The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq here then doesn't that mean it'll never run?
>>>> I've updated the comment here - it is possible that we hit a situation where the in-flight usb requests may not be enough to completely
>>>> encode a uvc buffer. In that case if we don't call queue_work, we'll never get the buffer marked as 'completed' and the buffer won't be
>>>> returned to user-space. That'll prevent the dequeue->queue->dequeue loop and flow of buffers.
>> I added this queue_work call to the if statement which checks if the ready list has any requests. If it doesn't, there's no point
>> in queuing any work - since we wouldn't be giving back to the req_free list.
>>
>> Thank you
>>
>>>>>>             queue_work(video->async_wq, &video->pump);
>>>>>>         } else {
>>>>>>             uvc_video_free_request(ureq, ep);
>>>>>> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>>>>>>           INIT_LIST_HEAD(&video->ureqs);
>>>>>>         INIT_LIST_HEAD(&video->req_free);
>>>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>>>         video->req_size = 0;
>>>>>>         return 0;
>>>>>>     }
>>>>>> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>>>         struct usb_request *req = NULL;
>>>>>>         struct uvc_buffer *buf;
>>>>>>         unsigned long flags;
>>>>>> -    bool buf_done;
>>>>>> -    int ret;
>>>>>> +    int ret = 0;
>>>>>>           while (true) {
>>>>>>             if (!video->ep->enabled)
>>>>>> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>>>               if (buf != NULL) {
>>>>>>                 video->encode(req, video, buf);
>>>>>> -            buf_done = buf->state == UVC_BUF_STATE_DONE;
>>>>>>             } else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>>>>>>                 /*
>>>>>>                  * No video buffer available; the queue is still connected and
>>>>>> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>>>                  * prevent missed ISOC transfers.
>>>>>>                  */
>>>>>>                 req->length = 0;
>>>>>> -            buf_done = false;
>>>>>>             } else {
>>>>>>                 /*
>>>>>>                  * Either the queue has been disconnected or no video buffer
>>>>>> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>>>>>>                 break;
>>>>>>             }
>>>>>>     -        /*
>>>>>> -         * With USB3 handling more requests at a higher speed, we can't
>>>>>> -         * afford to generate an interrupt for every request. Decide to
>>>>>> -         * interrupt:
>>>>>> -         *
>>>>>> -         * - When no more requests are available in the free queue, as
>>>>>> -         *   this may be our last chance to refill the endpoint's
>>>>>> -         *   request queue.
>>>>>> -         *
>>>>>> -         * - When this is request is the last request for the video
>>>>>> -         *   buffer, as we want to start sending the next video buffer
>>>>>> -         *   ASAP in case it doesn't get started already in the next
>>>>>> -         *   iteration of this loop.
>>>>>> -         *
>>>>>> -         * - Four times over the length of the requests queue (as
>>>>>> -         *   indicated by video->uvc_num_requests), as a trade-off
>>>>>> -         *   between latency and interrupt load.
>>>>>> -         */
>>>>>> -        if (list_empty(&video->req_free) || buf_done ||
>>>>>> -            !(video->req_int_count %
>>>>>> -               DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>>>>>> -            video->req_int_count = 0;
>>>>>> -            req->no_interrupt = 0;
>>>>>> -        } else {
>>>>>> -            req->no_interrupt = 1;
>>>>>> -        }
>>>>>> -
>>>>>> -        /* Queue the USB request */
>>>>>> -        ret = uvcg_video_ep_queue(video, req);
>>>>>>             spin_unlock_irqrestore(&queue->irqlock, flags);
>>>>>>     +        /* Queue the USB request.*/
>>>>> I think just drop this - it was always superfluous.
>>>> The uvcg_video_usb_req_queue function mentions that req_lock must be held while calling
>>>> it - since its possible we might add to the req_ready list. We could say the function
>>>> should hold req_lock only when the queue_to_ep parameter is false - but that doesn't
>>>> seem as clean ?
>>> Sorry - I wasn't clear here. I meant that the comment "Queue the USB request" was superfluous rather than the spin_lock_irqsave()
>> Removed.
>>
>>>>>> +        spin_lock_irqsave(&video->req_lock, flags);
>>>>>> +        /* For bulk end points we queue from the worker thread
>>>>>> +         * since we would preferably not want to wait on requests
>>>>>> +         * to be ready, in the uvcg_video_complete() handler.
>>>>>> +         * For isoc endpoints we add the request to the ready list
>>>>>> +         * and only queue it to the endpoint from the complete handler.
>>>>>> +         */
>>>>>> +        ret = uvcg_video_usb_req_queue(video, req, is_bulk);
>>>>>> +        spin_unlock_irqrestore(&video->req_lock, flags);
>>>>>> +
>>>>>>             if (ret < 0) {
>>>>>>                 uvcg_queue_cancel(queue, 0);
>>>>>>                 break;
>>>>>>             }
>>>>>>     -        /* Endpoint now owns the request */
>>>>>> +        /* The request is owned by  the endpoint / ready list*/
>>>>>>             req = NULL;
>>>>>> -        video->req_int_count++;
>>>>>>         }
>>>>>>           if (!req)
>>>>>> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>>>>>>           spin_lock_irqsave(&video->req_lock, flags);
>>>>>>         /*
>>>>>> -     * Remove all uvc_reqeusts from ureqs with list_del_init
>>>>>> +     * Remove all uvc_requests from ureqs with list_del_init
>>>>> This should get fixed in the earlier series.
>>>>>>          * This lets uvc_video_free_request correctly identify
>>>>>>          * if the uvc_request is attached to a list or not when freeing
>>>>>>          * memory.
>>>>>> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>>>>>>             list_del(&req->list);
>>>>>>             uvc_video_free_request(req->context, video->ep);
>>>>>>         }
>>>>>> -
>>>>> keep the empty line please
>>>> Done.
>>>>
>>>>>> +    list_for_each_entry_safe(req, temp, &video->req_ready, list) {
>>>>>> +        list_del(&req->list);
>>>>>> +        uvc_video_free_request(req->context, video->ep);
>>>>>> +    }
>>>>> and one here too.
>>>> Done.
>>>>
>>>> Thanks!
>>>>
>>>>>>         INIT_LIST_HEAD(&video->ureqs);
>>>>>>         INIT_LIST_HEAD(&video->req_free);
>>>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>>>         video->req_size = 0;
>>>>>>         spin_unlock_irqrestore(&video->req_lock, flags);
>>>>>>     @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>>>>>>           video->req_int_count = 0;
>>>>>>     -    queue_work(video->async_wq, &video->pump);
>>>>>> +    uvc_video_ep_queue_initial_requests(video);
>>>>>>           return ret;
>>>>>>     }
>>>>>> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>>>>>>         video->is_enabled = false;
>>>>>>         INIT_LIST_HEAD(&video->ureqs);
>>>>>>         INIT_LIST_HEAD(&video->req_free);
>>>>>> +    INIT_LIST_HEAD(&video->req_ready);
>>>>>>         spin_lock_init(&video->req_lock);
>>>>>>         INIT_WORK(&video->pump, uvcg_video_pump);
>>>>>>     

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH v7] usb:gadget:uvc Do not use worker thread to pump isoc usb requests
  2023-11-09  7:34             ` [PATCH v6] " Jayant Chowdhary
  2023-11-16 10:09               ` Dan Scally
@ 2023-11-20  6:20               ` Jayant Chowdhary
  1 sibling, 0 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-20  6:20 UTC (permalink / raw)
  To: dan.scally, jchowdhary, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

When we use an async work queue to perform the function of pumping
usb requests to the usb controller, it is possible that amongst other
factors, thread scheduling affects at what cadence we're able to pump
requests. This could mean isoc usb requests miss their uframes - resulting
in video stream flickers on the host device.

To avoid this, we make the async_wq thread only produce isoc usb_requests
with uvc buffers encoded into them. The process of queueing to the
endpoint is done by the uvc_video_complete() handler. In case no
usb_requests are ready with encoded information, we just queue a zero
length request to the endpoint from the complete handler.

For bulk endpoints the async_wq thread still queues usb requests to the
endpoint.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
Suggested-by: Avichal Rakesh <arakesh@google.com>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
---
 Based on top of
 https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
 v1->v2: Added self Signed-Off-by and addressed review comments
 v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
	 for isoc transfers.
 v3->v4: Address review comments around code style.
 v4->v5: Update comments. Remove 0 length request queueing from async_wq
	 thread since it is already done by the complete handler.
 v5->v6: Fix checkpatch.pl suggestions.
 v6->v7: Fix code style review comments.

 drivers/usb/gadget/function/uvc.h       |   8 +
 drivers/usb/gadget/function/uvc_video.c | 204 ++++++++++++++++++------
 2 files changed, 166 insertions(+), 46 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index e8d4c87f1e09..5ff454528bd8 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -105,7 +105,15 @@ struct uvc_video {
 	bool is_enabled; /* tracks whether video stream is enabled */
 	unsigned int req_size;
 	struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
+
+	/* USB requests that the video pump thread can encode into */
 	struct list_head req_free;
+
+	/*
+	 * USB requests video pump thread has already encoded into. These are
+	 * ready to be queued to the endpoint.
+	 */
+	struct list_head req_ready;
 	spinlock_t req_lock;
 
 	unsigned int req_int_count;
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 53feb790a4c3..dbf055d8094f 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -268,6 +268,101 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
 	return ret;
 }
 
+/* This function must be called with video->req_lock held. */
+static int uvcg_video_usb_req_queue(struct uvc_video *video,
+	struct usb_request *req, bool queue_to_ep)
+{
+	bool is_bulk = video->max_payload_size;
+	struct list_head *list = NULL;
+
+	if (!video->is_enabled) {
+		uvc_video_free_request(req->context, video->ep);
+		return -ENODEV;
+	}
+	if (queue_to_ep) {
+		struct uvc_request *ureq = req->context;
+		/*
+		 * With USB3 handling more requests at a higher speed, we can't
+		 * afford to generate an interrupt for every request. Decide to
+		 * interrupt:
+		 *
+		 * - When no more requests are available in the free queue, as
+		 *   this may be our last chance to refill the endpoint's
+		 *   request queue.
+		 *
+		 * - When this is request is the last request for the video
+		 *   buffer, as we want to start sending the next video buffer
+		 *   ASAP in case it doesn't get started already in the next
+		 *   iteration of this loop.
+		 *
+		 * - Four times over the length of the requests queue (as
+		 *   indicated by video->uvc_num_requests), as a trade-off
+		 *   between latency and interrupt load.
+		 */
+		if (list_empty(&video->req_free) || ureq->last_buf ||
+			!(video->req_int_count %
+			DIV_ROUND_UP(video->uvc_num_requests, 4))) {
+			video->req_int_count = 0;
+			req->no_interrupt = 0;
+		} else {
+			req->no_interrupt = 1;
+		}
+		video->req_int_count++;
+		return uvcg_video_ep_queue(video, req);
+	}
+	/*
+	 * If we're not queuing to the ep, for isoc we're queuing
+	 * to the req_ready list, otherwise req_free.
+	 */
+	list = is_bulk ? &video->req_free : &video->req_ready;
+	list_add_tail(&req->list, list);
+	return 0;
+}
+
+/*
+ * Must only be called from uvcg_video_enable - since after that we only want to
+ * queue requests to the endpoint from the uvc_video_complete complete handler.
+ * This function is needed in order to 'kick start' the flow of requests from
+ * gadget driver to the usb controller.
+ */
+static void uvc_video_ep_queue_initial_requests(struct uvc_video *video)
+{
+	struct usb_request *req = NULL;
+	unsigned long flags = 0;
+	unsigned int count = 0;
+	int ret = 0;
+
+	/*
+	 * We only queue half of the free list since we still want to have
+	 * some free usb_requests in the free list for the video_pump async_wq
+	 * thread to encode uvc buffers into. Otherwise we could get into a
+	 * situation where the free list does not have any usb requests to
+	 * encode into - we always end up queueing 0 length requests to the
+	 * end point.
+	 */
+	unsigned int half_list_size = video->uvc_num_requests / 2;
+
+	spin_lock_irqsave(&video->req_lock, flags);
+	/*
+	 * Take these requests off the free list and queue them all to the
+	 * endpoint. Since we queue 0 length requests with the req_lock held,
+	 * there isn't any 'data' race involved here with the complete handler.
+	 */
+	while (count < half_list_size) {
+		req = list_first_entry(&video->req_free, struct usb_request,
+					list);
+		list_del(&req->list);
+		req->length = 0;
+		ret = uvcg_video_ep_queue(video, req);
+		if (ret < 0) {
+			uvcg_queue_cancel(&video->queue, 0);
+			break;
+		}
+		count++;
+	}
+	spin_unlock_irqrestore(&video->req_lock, flags);
+}
+
 static void
 uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 {
@@ -276,6 +371,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	struct uvc_video_queue *queue = &video->queue;
 	struct uvc_buffer *last_buf = NULL;
 	unsigned long flags;
+	bool is_bulk = video->max_payload_size;
+	int ret = 0;
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	if (!video->is_enabled) {
@@ -329,8 +426,45 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
 	 * back to req_free
 	 */
 	if (video->is_enabled) {
-		list_add_tail(&req->list, &video->req_free);
-		queue_work(video->async_wq, &video->pump);
+		/*
+		 * Here we check whether any request is available in the ready
+		 * list. If it is, queue it to the ep and add the current
+		 * usb_request to the req_free list - for video_pump to fill in.
+		 * Otherwise, just use the current usb_request to queue a 0
+		 * length request to the ep. Since we always add to the req_free
+		 * list if we dequeue from the ready list, there will never
+		 * be a situation where the req_free list is completely out of
+		 * requests and cannot recover.
+		 */
+		struct usb_request *to_queue = req;
+
+		to_queue->length = 0;
+		if (!list_empty(&video->req_ready)) {
+			to_queue = list_first_entry(&video->req_ready,
+				struct usb_request, list);
+			list_del(&to_queue->list);
+			list_add_tail(&req->list, &video->req_free);
+			/*
+			 * Queue work to the wq as well since it is possible that a
+			 * buffer may not have been completely encoded with the set of
+			 * in-flight usb requests for whih the complete callbacks are
+			 * firing.
+			 * In that case, if we do not queue work to the worker thread,
+			 * the buffer will never be marked as complete - and therefore
+			 * not be returned to userpsace. As a result,
+			 * dequeue -> queue -> dequeue flow of uvc buffers will not
+			 * happen.
+			 */
+			queue_work(video->async_wq, &video->pump);
+		}
+		/*
+		 * Queue to the endpoint. The actual queueing to ep will
+		 * only happen on one thread - the async_wq for bulk endpoints
+		 * and this thread for isoc endpoints.
+		 */
+		ret = uvcg_video_usb_req_queue(video, to_queue, !is_bulk);
+		if (ret < 0)
+			uvcg_queue_cancel(queue, 0);
 	} else {
 		uvc_video_free_request(ureq, ep);
 	}
@@ -347,6 +481,7 @@ uvc_video_free_requests(struct uvc_video *video)
 
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	return 0;
 }
@@ -424,8 +559,7 @@ static void uvcg_video_pump(struct work_struct *work)
 	struct usb_request *req = NULL;
 	struct uvc_buffer *buf;
 	unsigned long flags;
-	bool buf_done;
-	int ret;
+	int ret = 0;
 
 	while (true) {
 		if (!video->ep->enabled)
@@ -454,15 +588,6 @@ static void uvcg_video_pump(struct work_struct *work)
 
 		if (buf != NULL) {
 			video->encode(req, video, buf);
-			buf_done = buf->state == UVC_BUF_STATE_DONE;
-		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
-			/*
-			 * No video buffer available; the queue is still connected and
-			 * we're transferring over ISOC. Queue a 0 length request to
-			 * prevent missed ISOC transfers.
-			 */
-			req->length = 0;
-			buf_done = false;
 		} else {
 			/*
 			 * Either the queue has been disconnected or no video buffer
@@ -473,45 +598,25 @@ static void uvcg_video_pump(struct work_struct *work)
 			break;
 		}
 
-		/*
-		 * With USB3 handling more requests at a higher speed, we can't
-		 * afford to generate an interrupt for every request. Decide to
-		 * interrupt:
-		 *
-		 * - When no more requests are available in the free queue, as
-		 *   this may be our last chance to refill the endpoint's
-		 *   request queue.
-		 *
-		 * - When this is request is the last request for the video
-		 *   buffer, as we want to start sending the next video buffer
-		 *   ASAP in case it doesn't get started already in the next
-		 *   iteration of this loop.
-		 *
-		 * - Four times over the length of the requests queue (as
-		 *   indicated by video->uvc_num_requests), as a trade-off
-		 *   between latency and interrupt load.
-		 */
-		if (list_empty(&video->req_free) || buf_done ||
-		    !(video->req_int_count %
-		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
-			video->req_int_count = 0;
-			req->no_interrupt = 0;
-		} else {
-			req->no_interrupt = 1;
-		}
-
-		/* Queue the USB request */
-		ret = uvcg_video_ep_queue(video, req);
 		spin_unlock_irqrestore(&queue->irqlock, flags);
 
+		spin_lock_irqsave(&video->req_lock, flags);
+		/* For bulk end points we queue from the worker thread
+		 * since we would preferably not want to wait on requests
+		 * to be ready, in the uvcg_video_complete() handler.
+		 * For isoc endpoints we add the request to the ready list
+		 * and only queue it to the endpoint from the complete handler.
+		 */
+		ret = uvcg_video_usb_req_queue(video, req, is_bulk);
+		spin_unlock_irqrestore(&video->req_lock, flags);
+
 		if (ret < 0) {
 			uvcg_queue_cancel(queue, 0);
 			break;
 		}
 
-		/* Endpoint now owns the request */
+		/* The request is owned by  the endpoint / ready list. */
 		req = NULL;
-		video->req_int_count++;
 	}
 
 	if (!req)
@@ -567,7 +672,7 @@ uvcg_video_disable(struct uvc_video *video)
 
 	spin_lock_irqsave(&video->req_lock, flags);
 	/*
-	 * Remove all uvc_reqeusts from ureqs with list_del_init
+	 * Remove all uvc_requests from ureqs with list_del_init
 	 * This lets uvc_video_free_request correctly identify
 	 * if the uvc_request is attached to a list or not when freeing
 	 * memory.
@@ -580,8 +685,14 @@ uvcg_video_disable(struct uvc_video *video)
 		uvc_video_free_request(req->context, video->ep);
 	}
 
+	list_for_each_entry_safe(req, temp, &video->req_ready, list) {
+		list_del(&req->list);
+		uvc_video_free_request(req->context, video->ep);
+	}
+
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	video->req_size = 0;
 	spin_unlock_irqrestore(&video->req_lock, flags);
 
@@ -635,7 +746,7 @@ int uvcg_video_enable(struct uvc_video *video)
 
 	video->req_int_count = 0;
 
-	queue_work(video->async_wq, &video->pump);
+	uvc_video_ep_queue_initial_requests(video);
 
 	return ret;
 }
@@ -648,6 +759,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
 	video->is_enabled = false;
 	INIT_LIST_HEAD(&video->ureqs);
 	INIT_LIST_HEAD(&video->req_free);
+	INIT_LIST_HEAD(&video->req_ready);
 	spin_lock_init(&video->req_lock);
 	INIT_WORK(&video->pump, uvcg_video_pump);
 
-- 
2.43.0.rc0.421.g78406f8d94-goog


^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH v6] usb:gadget:uvc Do not use worker thread to pump isoc usb requests
  2023-11-16 10:09               ` Dan Scally
@ 2023-11-20  6:30                 ` Jayant Chowdhary
  0 siblings, 0 replies; 31+ messages in thread
From: Jayant Chowdhary @ 2023-11-20  6:30 UTC (permalink / raw)
  To: Dan Scally, stern, laurent.pinchart, m.grzeschik, gregkh
  Cc: Thinh.Nguyen, arakesh, etalvala, linux-kernel, linux-usb

Hi Dan,
Thanks for the comments. I've uploaded a new patch at https://lore.kernel.org/linux-usb/20231120062026.3759463-1-jchowdhary@google.com/T/#u.

On 11/16/23 02:09, Dan Scally wrote:
> Hi Jayant, thanks for the update. I just have a couple of styling comments.
>
> On 09/11/2023 07:34, Jayant Chowdhary wrote:
>> When we use an async work queue to perform the function of pumping
>> usb requests to the usb controller, it is possible that amongst other
>> factors, thread scheduling affects at what cadence we're able to pump
>> requests. This could mean isoc usb requests miss their uframes - resulting
>> in video stream flickers on the host device.
>>
>> To avoid this, we make the async_wq thread only produce isoc usb_requests
>> with uvc buffers encoded into them. The process of queueing to the
>> endpoint is done by the uvc_video_complete() handler. In case no
>> usb_requests are ready with encoded information, we just queue a zero
>> length request to the endpoint from the complete handler.
>>
>> For bulk endpoints the async_wq thread still queues usb requests to the
>> endpoint.
>>
>> Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
>> Signed-off-by: Jayant Chowdhary <jchowdhary@google.com>
>> Suggested-by: Avichal Rakesh <arakesh@google.com>
>> Suggested-by: Alan Stern <stern@rowland.harvard.edu>
>> ---
>>   Based on top of
>>   https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>>   v1->v2: Added self Signed-Off-by and addressed review comments
>>   v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
>>      for isoc transfers.
>>   v3->v4: Address review comments around code style.
>>   v4->v5: Update comments. Remove 0 length request queueing from async_wq
>>      thread since it is already done by the complete handler.
>>   v5->v6: Fix checkpatch.pl suggestions.
>>
>>   drivers/usb/gadget/function/uvc.h       |   8 +
>>   drivers/usb/gadget/function/uvc_video.c | 204 ++++++++++++++++++------
>>   2 files changed, 166 insertions(+), 46 deletions(-)
>>
>> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
>> index e8d4c87f1e09..5ff454528bd8 100644
>> --- a/drivers/usb/gadget/function/uvc.h
>> +++ b/drivers/usb/gadget/function/uvc.h
>> @@ -105,7 +105,15 @@ struct uvc_video {
>>       bool is_enabled; /* tracks whether video stream is enabled */
>>       unsigned int req_size;
>>       struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
>> +
>> +    /* USB requests that the video pump thread can encode into */
>>       struct list_head req_free;
>> +
>> +    /*
>> +     * USB requests video pump thread has already encoded into. These are
>> +     * ready to be queued to the endpoint.
>> +     */
>> +    struct list_head req_ready;
>>       spinlock_t req_lock;
>>         unsigned int req_int_count;
>> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
>> index 53feb790a4c3..d5311456fa8a 100644
>> --- a/drivers/usb/gadget/function/uvc_video.c
>> +++ b/drivers/usb/gadget/function/uvc_video.c
>> @@ -268,6 +268,100 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>>       return ret;
>>   }
>>   +/* This function must be called with video->req_lock held. */
>> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
>> +    struct usb_request *req, bool queue_to_ep)
>> +{
>> +    bool is_bulk = video->max_payload_size;
>> +    struct list_head *list = NULL;
>> +
>> +    if (!video->is_enabled) {
>> +        uvc_video_free_request(req->context, video->ep);
>> +        return -ENODEV;
>> +    }
>> +    if (queue_to_ep) {
>> +        struct uvc_request *ureq = req->context;
>> +        /*
>> +         * With USB3 handling more requests at a higher speed, we can't
>> +         * afford to generate an interrupt for every request. Decide to
>> +         * interrupt:
>> +         *
>> +         * - When no more requests are available in the free queue, as
>> +         *   this may be our last chance to refill the endpoint's
>> +         *   request queue.
>> +         *
>> +         * - When this is request is the last request for the video
>> +         *   buffer, as we want to start sending the next video buffer
>> +         *   ASAP in case it doesn't get started already in the next
>> +         *   iteration of this loop.
>> +         *
>> +         * - Four times over the length of the requests queue (as
>> +         *   indicated by video->uvc_num_requests), as a trade-off
>> +         *   between latency and interrupt load.
>> +         */
>> +        if (list_empty(&video->req_free) || ureq->last_buf ||
>> +            !(video->req_int_count %
>> +            DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>> +            video->req_int_count = 0;
>> +            req->no_interrupt = 0;
>> +        } else {
>> +            req->no_interrupt = 1;
>> +        }
>> +        video->req_int_count++;
>> +        return uvcg_video_ep_queue(video, req);
>> +    }
>> +    /*
>> +     * If we're not queuing to the ep, for isoc we're queuing
>> +     * to the req_ready list, otherwise req_free.
>> +     */
>> +    list = is_bulk ? &video->req_free : &video->req_ready;
>> +    list_add_tail(&req->list, list);
>> +    return 0;
>> +}
>> +
>> +/*
>> + * Must only be called from uvcg_video_enable - since after that we only want to
>> + * queue requests to the endpoint from the uvc_video_complete complete handler.
>> + * This function is needed in order to 'kick start' the flow of requests from
>> + * gadget driver to the usb controller.
>> + */
>> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video)
>> +{
>> +    struct usb_request *req = NULL;
>> +    unsigned long flags = 0;
>> +    unsigned int count = 0;
>> +    int ret = 0;
> Add an empty line here please

Done.

>> +    /*
>> +     * We only queue half of the free list since we still want to have
>> +     * some free usb_requests in the free list for the video_pump async_wq
>> +     * thread to encode uvc buffers into. Otherwise we could get into a
>> +     * situation where the free list does not have any usb requests to
>> +     * encode into - we always end up queueing 0 length requests to the
>> +     * end point.
>> +     */
>> +    unsigned int half_list_size = video->uvc_num_requests / 2;
>> +
>> +    spin_lock_irqsave(&video->req_lock, flags);
>> +    /*
>> +     * Take these requests off the free list and queue them all to the
>> +     * endpoint. Since we queue 0 length requests with the req_lock held,
>> +     * there isn't any 'data' race involved here with the complete handler.
>> +     */
>> +    while (count < half_list_size) {
>> +        req = list_first_entry(&video->req_free, struct usb_request,
>> +                    list);
>> +        list_del(&req->list);
>> +        req->length = 0;
>> +        ret = uvcg_video_ep_queue(video, req);
>> +        if (ret < 0) {
>> +            uvcg_queue_cancel(&video->queue, /*disconnect*/0);
>
>
> Drop the /*disconnect*/ comment please

Done.

>
>> +            break;
>> +        }
>> +        count++;
>> +    }
>> +    spin_unlock_irqrestore(&video->req_lock, flags);
>> +}
>> +
>>   static void
>>   uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>   {
>> @@ -276,6 +370,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>       struct uvc_video_queue *queue = &video->queue;
>>       struct uvc_buffer *last_buf = NULL;
>>       unsigned long flags;
>> +    bool is_bulk = video->max_payload_size;
>> +    int ret = 0;
>>         spin_lock_irqsave(&video->req_lock, flags);
>>       if (!video->is_enabled) {
>> @@ -329,8 +425,46 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>>        * back to req_free
>>        */
>>       if (video->is_enabled) {
>> -        list_add_tail(&req->list, &video->req_free);
>> -        queue_work(video->async_wq, &video->pump);
>> +        /*
>> +         * Here we check whether any request is available in the ready
>> +         * list. If it is, queue it to the ep and add the current
>> +         * usb_request to the req_free list - for video_pump to fill in.
>> +         * Otherwise, just use the current usb_request to queue a 0
>> +         * length request to the ep. Since we always add to the req_free
>> +         * list if we dequeue from the ready list, there will never
>> +         * be a situation where the req_free list is completely out of
>> +         * requests and cannot recover.
>> +         */
>> +        struct usb_request *to_queue = req;
>> +
>> +        to_queue->length = 0;
>> +        if (!list_empty(&video->req_ready)) {
>> +            to_queue = list_first_entry(&video->req_ready,
>> +                struct usb_request, list);
>> +            list_del(&to_queue->list);
>> +            /* Add it to the free list. */
> I would drop the "Add it to the free list" comment; the code is clear already.

Done.

>> +            list_add_tail(&req->list, &video->req_free);
>> +            /*
>> +             * Queue work to the wq as well since it is possible that a
>> +             * buffer may not have been completely encoded with the set of
>> +             * in-flight usb requests for whih the complete callbacks are
>> +             * firing.
>> +             * In that case, if we do not queue work to the worker thread,
>> +             * the buffer will never be marked as complete - and therefore
>> +             * not be returned to userpsace. As a result,
>> +             * dequeue -> queue -> dequeue flow of uvc buffers will not
>> +             * happen.
>> +             */
>> +            queue_work(video->async_wq, &video->pump);
>> +        }
>> +        /*
>> +         * Queue to the endpoint. The actual queueing to ep will
>> +         * only happen on one thread - the async_wq for bulk endpoints
>> +         * and this thread for isoc endpoints.
>> +         */
>> +        ret = uvcg_video_usb_req_queue(video, to_queue, !is_bulk);
>> +        if (ret < 0)
>> +            uvcg_queue_cancel(queue, 0);
>>       } else {
>>           uvc_video_free_request(ureq, ep);
>>       }
>> @@ -347,6 +481,7 @@ uvc_video_free_requests(struct uvc_video *video)
>>         INIT_LIST_HEAD(&video->ureqs);
>>       INIT_LIST_HEAD(&video->req_free);
>> +    INIT_LIST_HEAD(&video->req_ready);
>>       video->req_size = 0;
>>       return 0;
>>   }
>> @@ -424,8 +559,7 @@ static void uvcg_video_pump(struct work_struct *work)
>>       struct usb_request *req = NULL;
>>       struct uvc_buffer *buf;
>>       unsigned long flags;
>> -    bool buf_done;
>> -    int ret;
>> +    int ret = 0;
>>         while (true) {
>>           if (!video->ep->enabled)
>> @@ -454,15 +588,6 @@ static void uvcg_video_pump(struct work_struct *work)
>>             if (buf != NULL) {
>>               video->encode(req, video, buf);
>> -            buf_done = buf->state == UVC_BUF_STATE_DONE;
>> -        } else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>> -            /*
>> -             * No video buffer available; the queue is still connected and
>> -             * we're transferring over ISOC. Queue a 0 length request to
>> -             * prevent missed ISOC transfers.
>> -             */
>> -            req->length = 0;
>> -            buf_done = false;
>>           } else {
>>               /*
>>                * Either the queue has been disconnected or no video buffer
>> @@ -473,45 +598,25 @@ static void uvcg_video_pump(struct work_struct *work)
>>               break;
>>           }
>>   -        /*
>> -         * With USB3 handling more requests at a higher speed, we can't
>> -         * afford to generate an interrupt for every request. Decide to
>> -         * interrupt:
>> -         *
>> -         * - When no more requests are available in the free queue, as
>> -         *   this may be our last chance to refill the endpoint's
>> -         *   request queue.
>> -         *
>> -         * - When this is request is the last request for the video
>> -         *   buffer, as we want to start sending the next video buffer
>> -         *   ASAP in case it doesn't get started already in the next
>> -         *   iteration of this loop.
>> -         *
>> -         * - Four times over the length of the requests queue (as
>> -         *   indicated by video->uvc_num_requests), as a trade-off
>> -         *   between latency and interrupt load.
>> -         */
>> -        if (list_empty(&video->req_free) || buf_done ||
>> -            !(video->req_int_count %
>> -               DIV_ROUND_UP(video->uvc_num_requests, 4))) {
>> -            video->req_int_count = 0;
>> -            req->no_interrupt = 0;
>> -        } else {
>> -            req->no_interrupt = 1;
>> -        }
>> -
>> -        /* Queue the USB request */
>> -        ret = uvcg_video_ep_queue(video, req);
>>           spin_unlock_irqrestore(&queue->irqlock, flags);
>>   +        spin_lock_irqsave(&video->req_lock, flags);
>> +        /* For bulk end points we queue from the worker thread
>> +         * since we would preferably not want to wait on requests
>> +         * to be ready, in the uvcg_video_complete() handler.
>> +         * For isoc endpoints we add the request to the ready list
>> +         * and only queue it to the endpoint from the complete handler.
>> +         */
>> +        ret = uvcg_video_usb_req_queue(video, req, is_bulk);
>> +        spin_unlock_irqrestore(&video->req_lock, flags);
>> +
>>           if (ret < 0) {
>>               uvcg_queue_cancel(queue, 0);
>>               break;
>>           }
>>   -        /* Endpoint now owns the request */
>> +        /* The request is owned by  the endpoint / ready list. */
>>           req = NULL;
>> -        video->req_int_count++;
>>       }
>>         if (!req)
>> @@ -567,7 +672,7 @@ uvcg_video_disable(struct uvc_video *video)
>>         spin_lock_irqsave(&video->req_lock, flags);
>>       /*
>> -     * Remove all uvc_reqeusts from ureqs with list_del_init
>> +    * Remove all uvc_requests from ureqs with list_del_init
>
> Did the alignment of the * get messed up here as well as the typo fix or is it just my mail client being weird?
>

I think the alignment did indeed get messed up. Fixed.

Thank you,

Jayant


^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2023-11-20  6:32 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-25 22:59 [PATCH] usb:gadget:uvc Do not use worker thread to pump usb requests Jayant Chowdhary
2023-10-26  6:58 ` Michael Grzeschik
2023-10-26 21:56   ` [PATCH v2] " Jayant Chowdhary
2023-10-27  7:19     ` Greg KH
2023-10-27  7:51     ` Laurent Pinchart
2023-10-27 11:10       ` Michael Grzeschik
2023-10-27 11:47         ` Laurent Pinchart
2023-10-27 13:39           ` Michael Grzeschik
2023-10-27 14:58             ` Alan Stern
2023-10-28 11:10               ` Michael Grzeschik
2023-10-28 14:09                 ` Jayant Chowdhary
2023-10-31  6:11                   ` Jayant Chowdhary
2023-11-02  6:06                     ` Jayant Chowdhary
2023-10-27 10:44     ` Greg KH
2023-11-02  6:01     ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc " Jayant Chowdhary
2023-11-02 16:07       ` Dan Scally
2023-11-03  7:13         ` [PATCH v4] usb:gadget:uvc Do not use worker thread to pump " Jayant Chowdhary
2023-11-09  2:12           ` [PATCH v5] " Jayant Chowdhary
2023-11-09  5:29             ` Greg KH
2023-11-09  7:38               ` Jayant Chowdhary
2023-11-09  7:34             ` [PATCH v6] " Jayant Chowdhary
2023-11-16 10:09               ` Dan Scally
2023-11-20  6:30                 ` Jayant Chowdhary
2023-11-20  6:20               ` [PATCH v7] " Jayant Chowdhary
2023-11-03  7:28         ` [PATCH v3] usb:gadget:uvc Do not use worker thread to queue " Jayant Chowdhary
2023-11-03 10:29           ` Michael Grzeschik
2023-11-06 17:51             ` Jayant Chowdhary
2023-11-07 17:01           ` Dan Scally
2023-11-09 16:46             ` Jayant Chowdhary
2023-11-14 18:52               ` Jayant Chowdhary
2023-11-16 10:10                 ` Dan Scally

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.