All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] media: cedrus: Add watchdog for job completion
@ 2022-02-01 18:33 ` Jernej Skrabec
  0 siblings, 0 replies; 14+ messages in thread
From: Jernej Skrabec @ 2022-02-01 18:33 UTC (permalink / raw)
  To: mripard, paul.kocialkowski
  Cc: mchehab, wens, linux-media, linux-staging, linux-arm-kernel,
	linux-sunxi, linux-kernel, Jernej Skrabec

Currently, if job is not completed for whatever reason, userspace
application can hang on ioctl and thus become unkillable.

In order to prevent that, implement watchdog, which will complete job
after 2 seconds with error state.

Concept is borrowed from hantro driver.

Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
---
 drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
 drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
 .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
 .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
 .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
 5 files changed, 36 insertions(+)

diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 4a4b714b0f26..68b3dcdb5df3 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device *pdev)
 
 	mutex_init(&dev->dev_mutex);
 
+	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
+
 	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to register V4L2 device\n");
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index c345f2984041..3bc094eb497f 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -24,6 +24,7 @@
 
 #include <linux/iopoll.h>
 #include <linux/platform_device.h>
+#include <linux/workqueue.h>
 
 #define CEDRUS_NAME			"cedrus"
 
@@ -194,6 +195,8 @@ struct cedrus_dev {
 	struct reset_control	*rstc;
 
 	unsigned int		capabilities;
+
+	struct delayed_work	watchdog_work;
 };
 
 extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index a16c1422558f..9c7200299465 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
 		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
 
 	dev->dec_ops[ctx->current_codec]->trigger(ctx);
+
+	/* Start the watchdog timer. */
+	schedule_delayed_work(&dev->watchdog_work,
+			      msecs_to_jiffies(2000));
 }
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
index 2d7663726467..a6470a89851e 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
@@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
 	enum vb2_buffer_state state;
 	enum cedrus_irq_status status;
 
+	/*
+	 * If cancel_delayed_work returns false it means watchdog already
+	 * executed and finished the job.
+	 */
+	if (!cancel_delayed_work(&dev->watchdog_work))
+		return IRQ_HANDLED;
+
 	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
 	if (!ctx) {
 		v4l2_err(&dev->v4l2_dev,
@@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+void cedrus_watchdog(struct work_struct *work)
+{
+	struct cedrus_dev *dev;
+	struct cedrus_ctx *ctx;
+
+	dev = container_of(to_delayed_work(work),
+			   struct cedrus_dev, watchdog_work);
+
+	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
+	if (!ctx)
+		return;
+
+	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
+	reset_control_reset(dev->rstc);
+	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx,
+					 VB2_BUF_STATE_ERROR);
+}
+
 int cedrus_hw_suspend(struct device *device)
 {
 	struct cedrus_dev *dev = dev_get_drvdata(device);
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
index 45f641f0bfa2..7c92f00e36da 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
@@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
 int cedrus_hw_probe(struct cedrus_dev *dev);
 void cedrus_hw_remove(struct cedrus_dev *dev);
 
+void cedrus_watchdog(struct work_struct *work);
+
 #endif
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH] media: cedrus: Add watchdog for job completion
@ 2022-02-01 18:33 ` Jernej Skrabec
  0 siblings, 0 replies; 14+ messages in thread
From: Jernej Skrabec @ 2022-02-01 18:33 UTC (permalink / raw)
  To: mripard, paul.kocialkowski
  Cc: mchehab, wens, linux-media, linux-staging, linux-arm-kernel,
	linux-sunxi, linux-kernel, Jernej Skrabec

Currently, if job is not completed for whatever reason, userspace
application can hang on ioctl and thus become unkillable.

In order to prevent that, implement watchdog, which will complete job
after 2 seconds with error state.

Concept is borrowed from hantro driver.

Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
---
 drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
 drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
 .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
 .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
 .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
 5 files changed, 36 insertions(+)

diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 4a4b714b0f26..68b3dcdb5df3 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device *pdev)
 
 	mutex_init(&dev->dev_mutex);
 
+	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
+
 	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to register V4L2 device\n");
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index c345f2984041..3bc094eb497f 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -24,6 +24,7 @@
 
 #include <linux/iopoll.h>
 #include <linux/platform_device.h>
+#include <linux/workqueue.h>
 
 #define CEDRUS_NAME			"cedrus"
 
@@ -194,6 +195,8 @@ struct cedrus_dev {
 	struct reset_control	*rstc;
 
 	unsigned int		capabilities;
+
+	struct delayed_work	watchdog_work;
 };
 
 extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index a16c1422558f..9c7200299465 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
 		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
 
 	dev->dec_ops[ctx->current_codec]->trigger(ctx);
+
+	/* Start the watchdog timer. */
+	schedule_delayed_work(&dev->watchdog_work,
+			      msecs_to_jiffies(2000));
 }
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
index 2d7663726467..a6470a89851e 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
@@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
 	enum vb2_buffer_state state;
 	enum cedrus_irq_status status;
 
+	/*
+	 * If cancel_delayed_work returns false it means watchdog already
+	 * executed and finished the job.
+	 */
+	if (!cancel_delayed_work(&dev->watchdog_work))
+		return IRQ_HANDLED;
+
 	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
 	if (!ctx) {
 		v4l2_err(&dev->v4l2_dev,
@@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+void cedrus_watchdog(struct work_struct *work)
+{
+	struct cedrus_dev *dev;
+	struct cedrus_ctx *ctx;
+
+	dev = container_of(to_delayed_work(work),
+			   struct cedrus_dev, watchdog_work);
+
+	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
+	if (!ctx)
+		return;
+
+	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
+	reset_control_reset(dev->rstc);
+	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx,
+					 VB2_BUF_STATE_ERROR);
+}
+
 int cedrus_hw_suspend(struct device *device)
 {
 	struct cedrus_dev *dev = dev_get_drvdata(device);
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
index 45f641f0bfa2..7c92f00e36da 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
@@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
 int cedrus_hw_probe(struct cedrus_dev *dev);
 void cedrus_hw_remove(struct cedrus_dev *dev);
 
+void cedrus_watchdog(struct work_struct *work);
+
 #endif
-- 
2.35.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
  2022-02-01 18:33 ` Jernej Skrabec
@ 2022-02-04  9:09   ` Paul Kocialkowski
  -1 siblings, 0 replies; 14+ messages in thread
From: Paul Kocialkowski @ 2022-02-04  9:09 UTC (permalink / raw)
  To: Jernej Skrabec
  Cc: mripard, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 5262 bytes --]

Hi Jernej,

On Tue 01 Feb 22, 19:33, Jernej Skrabec wrote:
> Currently, if job is not completed for whatever reason, userspace
> application can hang on ioctl and thus become unkillable.
> 
> In order to prevent that, implement watchdog, which will complete job
> after 2 seconds with error state.
> 
> Concept is borrowed from hantro driver.

Good idea to implement a watchdog here, thanks!
See comments below.

> Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> ---
>  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
>  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
>  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
>  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
>  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
>  5 files changed, 36 insertions(+)
> 
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
> index 4a4b714b0f26..68b3dcdb5df3 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device *pdev)
>  
>  	mutex_init(&dev->dev_mutex);
>  
> +	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> +
>  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
>  	if (ret) {
>  		dev_err(&pdev->dev, "Failed to register V4L2 device\n");
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
> index c345f2984041..3bc094eb497f 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> @@ -24,6 +24,7 @@
>  
>  #include <linux/iopoll.h>
>  #include <linux/platform_device.h>
> +#include <linux/workqueue.h>
>  
>  #define CEDRUS_NAME			"cedrus"
>  
> @@ -194,6 +195,8 @@ struct cedrus_dev {
>  	struct reset_control	*rstc;
>  
>  	unsigned int		capabilities;
> +
> +	struct delayed_work	watchdog_work;
>  };
>  
>  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> index a16c1422558f..9c7200299465 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
>  		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
>  
>  	dev->dec_ops[ctx->current_codec]->trigger(ctx);
> +
> +	/* Start the watchdog timer. */
> +	schedule_delayed_work(&dev->watchdog_work,
> +			      msecs_to_jiffies(2000));
>  }
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> index 2d7663726467..a6470a89851e 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
>  	enum vb2_buffer_state state;
>  	enum cedrus_irq_status status;
>  
> +	/*
> +	 * If cancel_delayed_work returns false it means watchdog already
> +	 * executed and finished the job.
> +	 */
> +	if (!cancel_delayed_work(&dev->watchdog_work))
> +		return IRQ_HANDLED;
> +
>  	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
>  	if (!ctx) {
>  		v4l2_err(&dev->v4l2_dev,
> @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
>  	return IRQ_HANDLED;
>  }
>  
> +void cedrus_watchdog(struct work_struct *work)
> +{
> +	struct cedrus_dev *dev;
> +	struct cedrus_ctx *ctx;
> +
> +	dev = container_of(to_delayed_work(work),
> +			   struct cedrus_dev, watchdog_work);
> +
> +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> +	if (!ctx)
> +		return;
> +
> +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> +	reset_control_reset(dev->rstc);

I don't think playing with the reset is the right approach here.
First we don't really know if the reset is shared or not, so this might have
no effect. Then even if it does, wouldn't this just reset the state of the
registers to an unconfigured state, making it impossible to decode any
future frame in the same context?

Honestly I'm not sure what a good process would be to get back on track
here so I would be tempted to just do nothing an return errors.

That's already better than being stuck.

Paul

> +	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx,
> +					 VB2_BUF_STATE_ERROR);
> +}
> +
>  int cedrus_hw_suspend(struct device *device)
>  {
>  	struct cedrus_dev *dev = dev_get_drvdata(device);
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> index 45f641f0bfa2..7c92f00e36da 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
>  int cedrus_hw_probe(struct cedrus_dev *dev);
>  void cedrus_hw_remove(struct cedrus_dev *dev);
>  
> +void cedrus_watchdog(struct work_struct *work);
> +
>  #endif
> -- 
> 2.35.1
> 

-- 
Paul Kocialkowski, Bootlin
Embedded Linux and kernel engineering
https://bootlin.com

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
@ 2022-02-04  9:09   ` Paul Kocialkowski
  0 siblings, 0 replies; 14+ messages in thread
From: Paul Kocialkowski @ 2022-02-04  9:09 UTC (permalink / raw)
  To: Jernej Skrabec
  Cc: mripard, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 5262 bytes --]

Hi Jernej,

On Tue 01 Feb 22, 19:33, Jernej Skrabec wrote:
> Currently, if job is not completed for whatever reason, userspace
> application can hang on ioctl and thus become unkillable.
> 
> In order to prevent that, implement watchdog, which will complete job
> after 2 seconds with error state.
> 
> Concept is borrowed from hantro driver.

Good idea to implement a watchdog here, thanks!
See comments below.

> Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> ---
>  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
>  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
>  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
>  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
>  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
>  5 files changed, 36 insertions(+)
> 
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
> index 4a4b714b0f26..68b3dcdb5df3 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device *pdev)
>  
>  	mutex_init(&dev->dev_mutex);
>  
> +	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> +
>  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
>  	if (ret) {
>  		dev_err(&pdev->dev, "Failed to register V4L2 device\n");
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
> index c345f2984041..3bc094eb497f 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> @@ -24,6 +24,7 @@
>  
>  #include <linux/iopoll.h>
>  #include <linux/platform_device.h>
> +#include <linux/workqueue.h>
>  
>  #define CEDRUS_NAME			"cedrus"
>  
> @@ -194,6 +195,8 @@ struct cedrus_dev {
>  	struct reset_control	*rstc;
>  
>  	unsigned int		capabilities;
> +
> +	struct delayed_work	watchdog_work;
>  };
>  
>  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> index a16c1422558f..9c7200299465 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
>  		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
>  
>  	dev->dec_ops[ctx->current_codec]->trigger(ctx);
> +
> +	/* Start the watchdog timer. */
> +	schedule_delayed_work(&dev->watchdog_work,
> +			      msecs_to_jiffies(2000));
>  }
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> index 2d7663726467..a6470a89851e 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
>  	enum vb2_buffer_state state;
>  	enum cedrus_irq_status status;
>  
> +	/*
> +	 * If cancel_delayed_work returns false it means watchdog already
> +	 * executed and finished the job.
> +	 */
> +	if (!cancel_delayed_work(&dev->watchdog_work))
> +		return IRQ_HANDLED;
> +
>  	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
>  	if (!ctx) {
>  		v4l2_err(&dev->v4l2_dev,
> @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
>  	return IRQ_HANDLED;
>  }
>  
> +void cedrus_watchdog(struct work_struct *work)
> +{
> +	struct cedrus_dev *dev;
> +	struct cedrus_ctx *ctx;
> +
> +	dev = container_of(to_delayed_work(work),
> +			   struct cedrus_dev, watchdog_work);
> +
> +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> +	if (!ctx)
> +		return;
> +
> +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> +	reset_control_reset(dev->rstc);

I don't think playing with the reset is the right approach here.
First we don't really know if the reset is shared or not, so this might have
no effect. Then even if it does, wouldn't this just reset the state of the
registers to an unconfigured state, making it impossible to decode any
future frame in the same context?

Honestly I'm not sure what a good process would be to get back on track
here so I would be tempted to just do nothing an return errors.

That's already better than being stuck.

Paul

> +	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx,
> +					 VB2_BUF_STATE_ERROR);
> +}
> +
>  int cedrus_hw_suspend(struct device *device)
>  {
>  	struct cedrus_dev *dev = dev_get_drvdata(device);
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> index 45f641f0bfa2..7c92f00e36da 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
>  int cedrus_hw_probe(struct cedrus_dev *dev);
>  void cedrus_hw_remove(struct cedrus_dev *dev);
>  
> +void cedrus_watchdog(struct work_struct *work);
> +
>  #endif
> -- 
> 2.35.1
> 

-- 
Paul Kocialkowski, Bootlin
Embedded Linux and kernel engineering
https://bootlin.com

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

[-- Attachment #2: Type: text/plain, Size: 176 bytes --]

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
  2022-02-04  9:09   ` Paul Kocialkowski
@ 2022-02-05  7:40     ` Jernej Škrabec
  -1 siblings, 0 replies; 14+ messages in thread
From: Jernej Škrabec @ 2022-02-05  7:40 UTC (permalink / raw)
  To: Paul Kocialkowski
  Cc: mripard, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel

Dne petek, 04. februar 2022 ob 10:09:56 CET je Paul Kocialkowski napisal(a):
> Hi Jernej,
> 
> On Tue 01 Feb 22, 19:33, Jernej Skrabec wrote:
> > Currently, if job is not completed for whatever reason, userspace
> > application can hang on ioctl and thus become unkillable.
> > 
> > In order to prevent that, implement watchdog, which will complete job
> > after 2 seconds with error state.
> > 
> > Concept is borrowed from hantro driver.
> 
> Good idea to implement a watchdog here, thanks!
> See comments below.
> 
> > Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> > ---
> > 
> >  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
> >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
> >  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
> >  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
> >  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
> >  5 files changed, 36 insertions(+)
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus.c index
> > 4a4b714b0f26..68b3dcdb5df3 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> > @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device *pdev)
> > 
> >  	mutex_init(&dev->dev_mutex);
> > 
> > +	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> > +
> > 
> >  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
> >  	if (ret) {
> >  	
> >  		dev_err(&pdev->dev, "Failed to register V4L2 
device\n");
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > c345f2984041..3bc094eb497f 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > @@ -24,6 +24,7 @@
> > 
> >  #include <linux/iopoll.h>
> >  #include <linux/platform_device.h>
> > 
> > +#include <linux/workqueue.h>
> > 
> >  #define CEDRUS_NAME			"cedrus"
> > 
> > @@ -194,6 +195,8 @@ struct cedrus_dev {
> > 
> >  	struct reset_control	*rstc;
> >  	
> >  	unsigned int		capabilities;
> > 
> > +
> > +	struct delayed_work	watchdog_work;
> > 
> >  };
> >  
> >  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c index
> > a16c1422558f..9c7200299465 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
> > 
> >  		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
> >  	
> >  	dev->dec_ops[ctx->current_codec]->trigger(ctx);
> > 
> > +
> > +	/* Start the watchdog timer. */
> > +	schedule_delayed_work(&dev->watchdog_work,
> > +			      msecs_to_jiffies(2000));
> > 
> >  }
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index
> > 2d7663726467..a6470a89851e 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > 
> >  	enum vb2_buffer_state state;
> >  	enum cedrus_irq_status status;
> > 
> > +	/*
> > +	 * If cancel_delayed_work returns false it means watchdog already
> > +	 * executed and finished the job.
> > +	 */
> > +	if (!cancel_delayed_work(&dev->watchdog_work))
> > +		return IRQ_HANDLED;
> > +
> > 
> >  	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> >  	if (!ctx) {
> >  	
> >  		v4l2_err(&dev->v4l2_dev,
> > 
> > @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > 
> >  	return IRQ_HANDLED;
> >  
> >  }
> > 
> > +void cedrus_watchdog(struct work_struct *work)
> > +{
> > +	struct cedrus_dev *dev;
> > +	struct cedrus_ctx *ctx;
> > +
> > +	dev = container_of(to_delayed_work(work),
> > +			   struct cedrus_dev, watchdog_work);
> > +
> > +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > +	if (!ctx)
> > +		return;
> > +
> > +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> > +	reset_control_reset(dev->rstc);
> 
> I don't think playing with the reset is the right approach here.
> First we don't really know if the reset is shared or not, so this might have
> no effect.

AFAIK only few reset lines are shared in all Allwinner SoC, never for Cedrus 
and even then, this is considered as HW issue. So, I'm good with using reset 
line. This principle is also taken from Hantro driver.

> Then even if it does, wouldn't this just reset the state of the
> registers to an unconfigured state, making it impossible to decode any
> future frame in the same context?

Being stateless core, all context is held in auxiliary buffers, reference 
frames and controls, which are not reset with pulsing reset line, so no, state 
is not lost. Anyway, if decoding fails, you're generally screwed until next 
key frame. You'll have to deal with decoding issues/artefacts nevertheless.

> 
> Honestly I'm not sure what a good process would be to get back on track
> here so I would be tempted to just do nothing an return errors.
> 
> That's already better than being stuck.

Doing nothing will solve only current job, but HW will still be stuck in 
decoding state. I doubt reprogramming registers and triggering new decoding 
will actually do anything.

I'll check BSP lib sources again. Maybe selecting non-existing decoding mode 
would reset the core. That is already suggested as good thing to do in order 
to put core in low power mode.

IMO we have to do something. Doing nothing will probably just lock up the core 
until next reboot or maybe until trying different decoding mode.

Anyway, I have to find another way to cause decoding job to time out. Currently 
I'm doing this with IOMMU on H6, but that brings down several other things, 
which requires reboot anyway.

Best regards,
Jernej

> 
> Paul
> 
> > +	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx-
>fh.m2m_ctx,
> > +					 VB2_BUF_STATE_ERROR);
> > +}
> > +
> > 
> >  int cedrus_hw_suspend(struct device *device)
> >  {
> >  
> >  	struct cedrus_dev *dev = dev_get_drvdata(device);
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h index
> > 45f641f0bfa2..7c92f00e36da 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
> > 
> >  int cedrus_hw_probe(struct cedrus_dev *dev);
> >  void cedrus_hw_remove(struct cedrus_dev *dev);
> > 
> > +void cedrus_watchdog(struct work_struct *work);
> > +
> > 
> >  #endif





^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
@ 2022-02-05  7:40     ` Jernej Škrabec
  0 siblings, 0 replies; 14+ messages in thread
From: Jernej Škrabec @ 2022-02-05  7:40 UTC (permalink / raw)
  To: Paul Kocialkowski
  Cc: mripard, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel

Dne petek, 04. februar 2022 ob 10:09:56 CET je Paul Kocialkowski napisal(a):
> Hi Jernej,
> 
> On Tue 01 Feb 22, 19:33, Jernej Skrabec wrote:
> > Currently, if job is not completed for whatever reason, userspace
> > application can hang on ioctl and thus become unkillable.
> > 
> > In order to prevent that, implement watchdog, which will complete job
> > after 2 seconds with error state.
> > 
> > Concept is borrowed from hantro driver.
> 
> Good idea to implement a watchdog here, thanks!
> See comments below.
> 
> > Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> > ---
> > 
> >  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
> >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
> >  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
> >  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
> >  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
> >  5 files changed, 36 insertions(+)
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus.c index
> > 4a4b714b0f26..68b3dcdb5df3 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> > @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device *pdev)
> > 
> >  	mutex_init(&dev->dev_mutex);
> > 
> > +	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> > +
> > 
> >  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
> >  	if (ret) {
> >  	
> >  		dev_err(&pdev->dev, "Failed to register V4L2 
device\n");
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > c345f2984041..3bc094eb497f 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > @@ -24,6 +24,7 @@
> > 
> >  #include <linux/iopoll.h>
> >  #include <linux/platform_device.h>
> > 
> > +#include <linux/workqueue.h>
> > 
> >  #define CEDRUS_NAME			"cedrus"
> > 
> > @@ -194,6 +195,8 @@ struct cedrus_dev {
> > 
> >  	struct reset_control	*rstc;
> >  	
> >  	unsigned int		capabilities;
> > 
> > +
> > +	struct delayed_work	watchdog_work;
> > 
> >  };
> >  
> >  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c index
> > a16c1422558f..9c7200299465 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
> > 
> >  		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
> >  	
> >  	dev->dec_ops[ctx->current_codec]->trigger(ctx);
> > 
> > +
> > +	/* Start the watchdog timer. */
> > +	schedule_delayed_work(&dev->watchdog_work,
> > +			      msecs_to_jiffies(2000));
> > 
> >  }
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index
> > 2d7663726467..a6470a89851e 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > 
> >  	enum vb2_buffer_state state;
> >  	enum cedrus_irq_status status;
> > 
> > +	/*
> > +	 * If cancel_delayed_work returns false it means watchdog already
> > +	 * executed and finished the job.
> > +	 */
> > +	if (!cancel_delayed_work(&dev->watchdog_work))
> > +		return IRQ_HANDLED;
> > +
> > 
> >  	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> >  	if (!ctx) {
> >  	
> >  		v4l2_err(&dev->v4l2_dev,
> > 
> > @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > 
> >  	return IRQ_HANDLED;
> >  
> >  }
> > 
> > +void cedrus_watchdog(struct work_struct *work)
> > +{
> > +	struct cedrus_dev *dev;
> > +	struct cedrus_ctx *ctx;
> > +
> > +	dev = container_of(to_delayed_work(work),
> > +			   struct cedrus_dev, watchdog_work);
> > +
> > +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > +	if (!ctx)
> > +		return;
> > +
> > +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> > +	reset_control_reset(dev->rstc);
> 
> I don't think playing with the reset is the right approach here.
> First we don't really know if the reset is shared or not, so this might have
> no effect.

AFAIK only few reset lines are shared in all Allwinner SoC, never for Cedrus 
and even then, this is considered as HW issue. So, I'm good with using reset 
line. This principle is also taken from Hantro driver.

> Then even if it does, wouldn't this just reset the state of the
> registers to an unconfigured state, making it impossible to decode any
> future frame in the same context?

Being stateless core, all context is held in auxiliary buffers, reference 
frames and controls, which are not reset with pulsing reset line, so no, state 
is not lost. Anyway, if decoding fails, you're generally screwed until next 
key frame. You'll have to deal with decoding issues/artefacts nevertheless.

> 
> Honestly I'm not sure what a good process would be to get back on track
> here so I would be tempted to just do nothing an return errors.
> 
> That's already better than being stuck.

Doing nothing will solve only current job, but HW will still be stuck in 
decoding state. I doubt reprogramming registers and triggering new decoding 
will actually do anything.

I'll check BSP lib sources again. Maybe selecting non-existing decoding mode 
would reset the core. That is already suggested as good thing to do in order 
to put core in low power mode.

IMO we have to do something. Doing nothing will probably just lock up the core 
until next reboot or maybe until trying different decoding mode.

Anyway, I have to find another way to cause decoding job to time out. Currently 
I'm doing this with IOMMU on H6, but that brings down several other things, 
which requires reboot anyway.

Best regards,
Jernej

> 
> Paul
> 
> > +	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx-
>fh.m2m_ctx,
> > +					 VB2_BUF_STATE_ERROR);
> > +}
> > +
> > 
> >  int cedrus_hw_suspend(struct device *device)
> >  {
> >  
> >  	struct cedrus_dev *dev = dev_get_drvdata(device);
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h index
> > 45f641f0bfa2..7c92f00e36da 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
> > 
> >  int cedrus_hw_probe(struct cedrus_dev *dev);
> >  void cedrus_hw_remove(struct cedrus_dev *dev);
> > 
> > +void cedrus_watchdog(struct work_struct *work);
> > +
> > 
> >  #endif





_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
  2022-02-05  7:40     ` Jernej Škrabec
@ 2022-02-05  8:41       ` Jernej Škrabec
  -1 siblings, 0 replies; 14+ messages in thread
From: Jernej Škrabec @ 2022-02-05  8:41 UTC (permalink / raw)
  To: Paul Kocialkowski
  Cc: mripard, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel

Dne sobota, 05. februar 2022 ob 08:40:21 CET je Jernej Škrabec napisal(a):
> Dne petek, 04. februar 2022 ob 10:09:56 CET je Paul Kocialkowski napisal(a):
> > Hi Jernej,
> > 
> > On Tue 01 Feb 22, 19:33, Jernej Skrabec wrote:
> > > Currently, if job is not completed for whatever reason, userspace
> > > application can hang on ioctl and thus become unkillable.
> > > 
> > > In order to prevent that, implement watchdog, which will complete job
> > > after 2 seconds with error state.
> > > 
> > > Concept is borrowed from hantro driver.
> > 
> > Good idea to implement a watchdog here, thanks!
> > See comments below.
> > 
> > > Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> > > ---
> > > 
> > >  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
> > >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
> > >  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
> > >  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
> > >  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
> > >  5 files changed, 36 insertions(+)
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > b/drivers/staging/media/sunxi/cedrus/cedrus.c index
> > > 4a4b714b0f26..68b3dcdb5df3 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device
> > > *pdev)
> > > 
> > >  	mutex_init(&dev->dev_mutex);
> > > 
> > > +	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> > > +
> > > 
> > >  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
> > >  	if (ret) {
> > >  	
> > >  		dev_err(&pdev->dev, "Failed to register V4L2
> 
> device\n");
> 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > > c345f2984041..3bc094eb497f 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > @@ -24,6 +24,7 @@
> > > 
> > >  #include <linux/iopoll.h>
> > >  #include <linux/platform_device.h>
> > > 
> > > +#include <linux/workqueue.h>
> > > 
> > >  #define CEDRUS_NAME			"cedrus"
> > > 
> > > @@ -194,6 +195,8 @@ struct cedrus_dev {
> > > 
> > >  	struct reset_control	*rstc;
> > >  	
> > >  	unsigned int		capabilities;
> > > 
> > > +
> > > +	struct delayed_work	watchdog_work;
> > > 
> > >  };
> > >  
> > >  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c index
> > > a16c1422558f..9c7200299465 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
> > > 
> > >  		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
> > >  	
> > >  	dev->dec_ops[ctx->current_codec]->trigger(ctx);
> > > 
> > > +
> > > +	/* Start the watchdog timer. */
> > > +	schedule_delayed_work(&dev->watchdog_work,
> > > +			      msecs_to_jiffies(2000));
> > > 
> > >  }
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index
> > > 2d7663726467..a6470a89851e 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > > 
> > >  	enum vb2_buffer_state state;
> > >  	enum cedrus_irq_status status;
> > > 
> > > +	/*
> > > +	 * If cancel_delayed_work returns false it means watchdog already
> > > +	 * executed and finished the job.
> > > +	 */
> > > +	if (!cancel_delayed_work(&dev->watchdog_work))
> > > +		return IRQ_HANDLED;
> > > +
> > > 
> > >  	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > >  	if (!ctx) {
> > >  	
> > >  		v4l2_err(&dev->v4l2_dev,
> > > 
> > > @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > > 
> > >  	return IRQ_HANDLED;
> > >  
> > >  }
> > > 
> > > +void cedrus_watchdog(struct work_struct *work)
> > > +{
> > > +	struct cedrus_dev *dev;
> > > +	struct cedrus_ctx *ctx;
> > > +
> > > +	dev = container_of(to_delayed_work(work),
> > > +			   struct cedrus_dev, watchdog_work);
> > > +
> > > +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > > +	if (!ctx)
> > > +		return;
> > > +
> > > +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> > > +	reset_control_reset(dev->rstc);
> > 
> > I don't think playing with the reset is the right approach here.
> > First we don't really know if the reset is shared or not, so this might
> > have no effect.
> 
> AFAIK only few reset lines are shared in all Allwinner SoC, never for Cedrus
> and even then, this is considered as HW issue. So, I'm good with using
> reset line. This principle is also taken from Hantro driver.
> 
> > Then even if it does, wouldn't this just reset the state of the
> > registers to an unconfigured state, making it impossible to decode any
> > future frame in the same context?
> 
> Being stateless core, all context is held in auxiliary buffers, reference
> frames and controls, which are not reset with pulsing reset line, so no,
> state is not lost. Anyway, if decoding fails, you're generally screwed
> until next key frame. You'll have to deal with decoding issues/artefacts
> nevertheless.
> > Honestly I'm not sure what a good process would be to get back on track
> > here so I would be tempted to just do nothing an return errors.
> > 
> > That's already better than being stuck.
> 
> Doing nothing will solve only current job, but HW will still be stuck in
> decoding state. I doubt reprogramming registers and triggering new decoding
> will actually do anything.
> 
> I'll check BSP lib sources again. Maybe selecting non-existing decoding mode
> would reset the core. That is already suggested as good thing to do in
> order to put core in low power mode.

BSP kernel driver also pulses reset line:
https://github.com/orangepi-xunlong/linux-orangepi/blob/orange-pi-4.9-sun50iw9/drivers/media/cedar-ve/cedar_ve.c#L848-L851

I still think this is the way to go.

Best regards,
Jernej

> 
> IMO we have to do something. Doing nothing will probably just lock up the
> core until next reboot or maybe until trying different decoding mode.
> 
> Anyway, I have to find another way to cause decoding job to time out.
> Currently I'm doing this with IOMMU on H6, but that brings down several
> other things, which requires reboot anyway.
> 
> Best regards,
> Jernej
> 
> > Paul
> > 
> > > +	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx-
> >
> >fh.m2m_ctx,
> >
> > > +					 VB2_BUF_STATE_ERROR);
> > > +}
> > > +
> > > 
> > >  int cedrus_hw_suspend(struct device *device)
> > >  {
> > >  
> > >  	struct cedrus_dev *dev = dev_get_drvdata(device);
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h index
> > > 45f641f0bfa2..7c92f00e36da 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
> > > 
> > >  int cedrus_hw_probe(struct cedrus_dev *dev);
> > >  void cedrus_hw_remove(struct cedrus_dev *dev);
> > > 
> > > +void cedrus_watchdog(struct work_struct *work);
> > > +
> > > 
> > >  #endif





^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
@ 2022-02-05  8:41       ` Jernej Škrabec
  0 siblings, 0 replies; 14+ messages in thread
From: Jernej Škrabec @ 2022-02-05  8:41 UTC (permalink / raw)
  To: Paul Kocialkowski
  Cc: mripard, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel

Dne sobota, 05. februar 2022 ob 08:40:21 CET je Jernej Škrabec napisal(a):
> Dne petek, 04. februar 2022 ob 10:09:56 CET je Paul Kocialkowski napisal(a):
> > Hi Jernej,
> > 
> > On Tue 01 Feb 22, 19:33, Jernej Skrabec wrote:
> > > Currently, if job is not completed for whatever reason, userspace
> > > application can hang on ioctl and thus become unkillable.
> > > 
> > > In order to prevent that, implement watchdog, which will complete job
> > > after 2 seconds with error state.
> > > 
> > > Concept is borrowed from hantro driver.
> > 
> > Good idea to implement a watchdog here, thanks!
> > See comments below.
> > 
> > > Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> > > ---
> > > 
> > >  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
> > >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
> > >  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
> > >  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
> > >  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
> > >  5 files changed, 36 insertions(+)
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > b/drivers/staging/media/sunxi/cedrus/cedrus.c index
> > > 4a4b714b0f26..68b3dcdb5df3 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device
> > > *pdev)
> > > 
> > >  	mutex_init(&dev->dev_mutex);
> > > 
> > > +	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> > > +
> > > 
> > >  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
> > >  	if (ret) {
> > >  	
> > >  		dev_err(&pdev->dev, "Failed to register V4L2
> 
> device\n");
> 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > > c345f2984041..3bc094eb497f 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > @@ -24,6 +24,7 @@
> > > 
> > >  #include <linux/iopoll.h>
> > >  #include <linux/platform_device.h>
> > > 
> > > +#include <linux/workqueue.h>
> > > 
> > >  #define CEDRUS_NAME			"cedrus"
> > > 
> > > @@ -194,6 +195,8 @@ struct cedrus_dev {
> > > 
> > >  	struct reset_control	*rstc;
> > >  	
> > >  	unsigned int		capabilities;
> > > 
> > > +
> > > +	struct delayed_work	watchdog_work;
> > > 
> > >  };
> > >  
> > >  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c index
> > > a16c1422558f..9c7200299465 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
> > > 
> > >  		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
> > >  	
> > >  	dev->dec_ops[ctx->current_codec]->trigger(ctx);
> > > 
> > > +
> > > +	/* Start the watchdog timer. */
> > > +	schedule_delayed_work(&dev->watchdog_work,
> > > +			      msecs_to_jiffies(2000));
> > > 
> > >  }
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index
> > > 2d7663726467..a6470a89851e 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > > 
> > >  	enum vb2_buffer_state state;
> > >  	enum cedrus_irq_status status;
> > > 
> > > +	/*
> > > +	 * If cancel_delayed_work returns false it means watchdog already
> > > +	 * executed and finished the job.
> > > +	 */
> > > +	if (!cancel_delayed_work(&dev->watchdog_work))
> > > +		return IRQ_HANDLED;
> > > +
> > > 
> > >  	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > >  	if (!ctx) {
> > >  	
> > >  		v4l2_err(&dev->v4l2_dev,
> > > 
> > > @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > > 
> > >  	return IRQ_HANDLED;
> > >  
> > >  }
> > > 
> > > +void cedrus_watchdog(struct work_struct *work)
> > > +{
> > > +	struct cedrus_dev *dev;
> > > +	struct cedrus_ctx *ctx;
> > > +
> > > +	dev = container_of(to_delayed_work(work),
> > > +			   struct cedrus_dev, watchdog_work);
> > > +
> > > +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > > +	if (!ctx)
> > > +		return;
> > > +
> > > +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> > > +	reset_control_reset(dev->rstc);
> > 
> > I don't think playing with the reset is the right approach here.
> > First we don't really know if the reset is shared or not, so this might
> > have no effect.
> 
> AFAIK only few reset lines are shared in all Allwinner SoC, never for Cedrus
> and even then, this is considered as HW issue. So, I'm good with using
> reset line. This principle is also taken from Hantro driver.
> 
> > Then even if it does, wouldn't this just reset the state of the
> > registers to an unconfigured state, making it impossible to decode any
> > future frame in the same context?
> 
> Being stateless core, all context is held in auxiliary buffers, reference
> frames and controls, which are not reset with pulsing reset line, so no,
> state is not lost. Anyway, if decoding fails, you're generally screwed
> until next key frame. You'll have to deal with decoding issues/artefacts
> nevertheless.
> > Honestly I'm not sure what a good process would be to get back on track
> > here so I would be tempted to just do nothing an return errors.
> > 
> > That's already better than being stuck.
> 
> Doing nothing will solve only current job, but HW will still be stuck in
> decoding state. I doubt reprogramming registers and triggering new decoding
> will actually do anything.
> 
> I'll check BSP lib sources again. Maybe selecting non-existing decoding mode
> would reset the core. That is already suggested as good thing to do in
> order to put core in low power mode.

BSP kernel driver also pulses reset line:
https://github.com/orangepi-xunlong/linux-orangepi/blob/orange-pi-4.9-sun50iw9/drivers/media/cedar-ve/cedar_ve.c#L848-L851

I still think this is the way to go.

Best regards,
Jernej

> 
> IMO we have to do something. Doing nothing will probably just lock up the
> core until next reboot or maybe until trying different decoding mode.
> 
> Anyway, I have to find another way to cause decoding job to time out.
> Currently I'm doing this with IOMMU on H6, but that brings down several
> other things, which requires reboot anyway.
> 
> Best regards,
> Jernej
> 
> > Paul
> > 
> > > +	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx-
> >
> >fh.m2m_ctx,
> >
> > > +					 VB2_BUF_STATE_ERROR);
> > > +}
> > > +
> > > 
> > >  int cedrus_hw_suspend(struct device *device)
> > >  {
> > >  
> > >  	struct cedrus_dev *dev = dev_get_drvdata(device);
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h index
> > > 45f641f0bfa2..7c92f00e36da 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
> > > 
> > >  int cedrus_hw_probe(struct cedrus_dev *dev);
> > >  void cedrus_hw_remove(struct cedrus_dev *dev);
> > > 
> > > +void cedrus_watchdog(struct work_struct *work);
> > > +
> > > 
> > >  #endif





_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
  2022-02-05  7:40     ` Jernej Škrabec
@ 2022-02-05 10:28       ` Maxime Ripard
  -1 siblings, 0 replies; 14+ messages in thread
From: Maxime Ripard @ 2022-02-05 10:28 UTC (permalink / raw)
  To: Jernej Škrabec
  Cc: Paul Kocialkowski, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel

On Sat, Feb 05, 2022 at 08:40:21AM +0100, Jernej Škrabec wrote:
> > > +void cedrus_watchdog(struct work_struct *work)
> > > +{
> > > +	struct cedrus_dev *dev;
> > > +	struct cedrus_ctx *ctx;
> > > +
> > > +	dev = container_of(to_delayed_work(work),
> > > +			   struct cedrus_dev, watchdog_work);
> > > +
> > > +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > > +	if (!ctx)
> > > +		return;
> > > +
> > > +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> > > +	reset_control_reset(dev->rstc);
> > 
> > I don't think playing with the reset is the right approach here.
> > First we don't really know if the reset is shared or not, so this might have
> > no effect.
> 
> AFAIK only few reset lines are shared in all Allwinner SoC, never for Cedrus 
> and even then, this is considered as HW issue. So, I'm good with using reset 
> line. This principle is also taken from Hantro driver.

Also, we got the reset handle through reset_control_get_exclusive, so
we're guaranteed to be the only user.

Maxime

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
@ 2022-02-05 10:28       ` Maxime Ripard
  0 siblings, 0 replies; 14+ messages in thread
From: Maxime Ripard @ 2022-02-05 10:28 UTC (permalink / raw)
  To: Jernej Škrabec
  Cc: Paul Kocialkowski, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel

On Sat, Feb 05, 2022 at 08:40:21AM +0100, Jernej Škrabec wrote:
> > > +void cedrus_watchdog(struct work_struct *work)
> > > +{
> > > +	struct cedrus_dev *dev;
> > > +	struct cedrus_ctx *ctx;
> > > +
> > > +	dev = container_of(to_delayed_work(work),
> > > +			   struct cedrus_dev, watchdog_work);
> > > +
> > > +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > > +	if (!ctx)
> > > +		return;
> > > +
> > > +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> > > +	reset_control_reset(dev->rstc);
> > 
> > I don't think playing with the reset is the right approach here.
> > First we don't really know if the reset is shared or not, so this might have
> > no effect.
> 
> AFAIK only few reset lines are shared in all Allwinner SoC, never for Cedrus 
> and even then, this is considered as HW issue. So, I'm good with using reset 
> line. This principle is also taken from Hantro driver.

Also, we got the reset handle through reset_control_get_exclusive, so
we're guaranteed to be the only user.

Maxime

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
  2022-02-01 18:33 ` Jernej Skrabec
@ 2022-02-06 13:09   ` Ezequiel Garcia
  -1 siblings, 0 replies; 14+ messages in thread
From: Ezequiel Garcia @ 2022-02-06 13:09 UTC (permalink / raw)
  To: Jernej Skrabec
  Cc: Maxime Ripard, Paul Kocialkowski, Mauro Carvalho Chehab,
	Chen-Yu Tsai, linux-media, open list:STAGING SUBSYSTEM,
	linux-arm-kernel, linux-sunxi, Linux Kernel Mailing List

Hello,

On Tue, 1 Feb 2022 at 15:34, Jernej Skrabec <jernej.skrabec@gmail.com> wrote:
>
> Currently, if job is not completed for whatever reason, userspace
> application can hang on ioctl and thus become unkillable.
>
> In order to prevent that, implement watchdog, which will complete job
> after 2 seconds with error state.
>
> Concept is borrowed from hantro driver.
>
> Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>

This has been very useful developing Hantro,
to get some indication of hardware being programmed wrongly:

Patch looks correct.

Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>


> ---
>  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
>  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
>  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
>  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
>  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
>  5 files changed, 36 insertions(+)
>
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
> index 4a4b714b0f26..68b3dcdb5df3 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device *pdev)
>
>         mutex_init(&dev->dev_mutex);
>
> +       INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> +
>         ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
>         if (ret) {
>                 dev_err(&pdev->dev, "Failed to register V4L2 device\n");
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
> index c345f2984041..3bc094eb497f 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> @@ -24,6 +24,7 @@
>
>  #include <linux/iopoll.h>
>  #include <linux/platform_device.h>
> +#include <linux/workqueue.h>
>
>  #define CEDRUS_NAME                    "cedrus"
>
> @@ -194,6 +195,8 @@ struct cedrus_dev {
>         struct reset_control    *rstc;
>
>         unsigned int            capabilities;
> +
> +       struct delayed_work     watchdog_work;
>  };
>
>  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> index a16c1422558f..9c7200299465 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
>                 v4l2_ctrl_request_complete(src_req, &ctx->hdl);
>
>         dev->dec_ops[ctx->current_codec]->trigger(ctx);
> +
> +       /* Start the watchdog timer. */
> +       schedule_delayed_work(&dev->watchdog_work,
> +                             msecs_to_jiffies(2000));
>  }
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> index 2d7663726467..a6470a89851e 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
>         enum vb2_buffer_state state;
>         enum cedrus_irq_status status;
>
> +       /*
> +        * If cancel_delayed_work returns false it means watchdog already
> +        * executed and finished the job.
> +        */
> +       if (!cancel_delayed_work(&dev->watchdog_work))
> +               return IRQ_HANDLED;
> +
>         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
>         if (!ctx) {
>                 v4l2_err(&dev->v4l2_dev,
> @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
>         return IRQ_HANDLED;
>  }
>
> +void cedrus_watchdog(struct work_struct *work)
> +{
> +       struct cedrus_dev *dev;
> +       struct cedrus_ctx *ctx;
> +
> +       dev = container_of(to_delayed_work(work),
> +                          struct cedrus_dev, watchdog_work);
> +
> +       ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> +       if (!ctx)
> +               return;
> +
> +       v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> +       reset_control_reset(dev->rstc);
> +       v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx,
> +                                        VB2_BUF_STATE_ERROR);
> +}
> +
>  int cedrus_hw_suspend(struct device *device)
>  {
>         struct cedrus_dev *dev = dev_get_drvdata(device);
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> index 45f641f0bfa2..7c92f00e36da 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
>  int cedrus_hw_probe(struct cedrus_dev *dev);
>  void cedrus_hw_remove(struct cedrus_dev *dev);
>
> +void cedrus_watchdog(struct work_struct *work);
> +
>  #endif
> --
> 2.35.1
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
@ 2022-02-06 13:09   ` Ezequiel Garcia
  0 siblings, 0 replies; 14+ messages in thread
From: Ezequiel Garcia @ 2022-02-06 13:09 UTC (permalink / raw)
  To: Jernej Skrabec
  Cc: Maxime Ripard, Paul Kocialkowski, Mauro Carvalho Chehab,
	Chen-Yu Tsai, linux-media, open list:STAGING SUBSYSTEM,
	linux-arm-kernel, linux-sunxi, Linux Kernel Mailing List

Hello,

On Tue, 1 Feb 2022 at 15:34, Jernej Skrabec <jernej.skrabec@gmail.com> wrote:
>
> Currently, if job is not completed for whatever reason, userspace
> application can hang on ioctl and thus become unkillable.
>
> In order to prevent that, implement watchdog, which will complete job
> after 2 seconds with error state.
>
> Concept is borrowed from hantro driver.
>
> Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>

This has been very useful developing Hantro,
to get some indication of hardware being programmed wrongly:

Patch looks correct.

Reviewed-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>


> ---
>  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
>  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
>  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
>  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
>  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
>  5 files changed, 36 insertions(+)
>
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
> index 4a4b714b0f26..68b3dcdb5df3 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device *pdev)
>
>         mutex_init(&dev->dev_mutex);
>
> +       INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> +
>         ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
>         if (ret) {
>                 dev_err(&pdev->dev, "Failed to register V4L2 device\n");
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
> index c345f2984041..3bc094eb497f 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> @@ -24,6 +24,7 @@
>
>  #include <linux/iopoll.h>
>  #include <linux/platform_device.h>
> +#include <linux/workqueue.h>
>
>  #define CEDRUS_NAME                    "cedrus"
>
> @@ -194,6 +195,8 @@ struct cedrus_dev {
>         struct reset_control    *rstc;
>
>         unsigned int            capabilities;
> +
> +       struct delayed_work     watchdog_work;
>  };
>
>  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> index a16c1422558f..9c7200299465 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
>                 v4l2_ctrl_request_complete(src_req, &ctx->hdl);
>
>         dev->dec_ops[ctx->current_codec]->trigger(ctx);
> +
> +       /* Start the watchdog timer. */
> +       schedule_delayed_work(&dev->watchdog_work,
> +                             msecs_to_jiffies(2000));
>  }
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> index 2d7663726467..a6470a89851e 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
>         enum vb2_buffer_state state;
>         enum cedrus_irq_status status;
>
> +       /*
> +        * If cancel_delayed_work returns false it means watchdog already
> +        * executed and finished the job.
> +        */
> +       if (!cancel_delayed_work(&dev->watchdog_work))
> +               return IRQ_HANDLED;
> +
>         ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
>         if (!ctx) {
>                 v4l2_err(&dev->v4l2_dev,
> @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
>         return IRQ_HANDLED;
>  }
>
> +void cedrus_watchdog(struct work_struct *work)
> +{
> +       struct cedrus_dev *dev;
> +       struct cedrus_ctx *ctx;
> +
> +       dev = container_of(to_delayed_work(work),
> +                          struct cedrus_dev, watchdog_work);
> +
> +       ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> +       if (!ctx)
> +               return;
> +
> +       v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> +       reset_control_reset(dev->rstc);
> +       v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx,
> +                                        VB2_BUF_STATE_ERROR);
> +}
> +
>  int cedrus_hw_suspend(struct device *device)
>  {
>         struct cedrus_dev *dev = dev_get_drvdata(device);
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> index 45f641f0bfa2..7c92f00e36da 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
>  int cedrus_hw_probe(struct cedrus_dev *dev);
>  void cedrus_hw_remove(struct cedrus_dev *dev);
>
> +void cedrus_watchdog(struct work_struct *work);
> +
>  #endif
> --
> 2.35.1
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
  2022-02-05  8:41       ` Jernej Škrabec
@ 2022-02-07  8:33         ` Paul Kocialkowski
  -1 siblings, 0 replies; 14+ messages in thread
From: Paul Kocialkowski @ 2022-02-07  8:33 UTC (permalink / raw)
  To: Jernej Škrabec
  Cc: mripard, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel


[-- Attachment #1.1: Type: text/plain, Size: 8607 bytes --]

Hi,

On Sat 05 Feb 22, 09:41, Jernej Škrabec wrote:
> Dne sobota, 05. februar 2022 ob 08:40:21 CET je Jernej Škrabec napisal(a):
> > Dne petek, 04. februar 2022 ob 10:09:56 CET je Paul Kocialkowski napisal(a):
> > > Hi Jernej,
> > > 
> > > On Tue 01 Feb 22, 19:33, Jernej Skrabec wrote:
> > > > Currently, if job is not completed for whatever reason, userspace
> > > > application can hang on ioctl and thus become unkillable.
> > > > 
> > > > In order to prevent that, implement watchdog, which will complete job
> > > > after 2 seconds with error state.
> > > > 
> > > > Concept is borrowed from hantro driver.
> > > 
> > > Good idea to implement a watchdog here, thanks!
> > > See comments below.
> > > 
> > > > Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> > > > ---
> > > > 
> > > >  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
> > > >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
> > > >  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
> > > >  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
> > > >  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
> > > >  5 files changed, 36 insertions(+)
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus.c index
> > > > 4a4b714b0f26..68b3dcdb5df3 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > > @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device
> > > > *pdev)
> > > > 
> > > >  	mutex_init(&dev->dev_mutex);
> > > > 
> > > > +	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> > > > +
> > > > 
> > > >  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
> > > >  	if (ret) {
> > > >  	
> > > >  		dev_err(&pdev->dev, "Failed to register V4L2
> > 
> > device\n");
> > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > > > c345f2984041..3bc094eb497f 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > @@ -24,6 +24,7 @@
> > > > 
> > > >  #include <linux/iopoll.h>
> > > >  #include <linux/platform_device.h>
> > > > 
> > > > +#include <linux/workqueue.h>
> > > > 
> > > >  #define CEDRUS_NAME			"cedrus"
> > > > 
> > > > @@ -194,6 +195,8 @@ struct cedrus_dev {
> > > > 
> > > >  	struct reset_control	*rstc;
> > > >  	
> > > >  	unsigned int		capabilities;
> > > > 
> > > > +
> > > > +	struct delayed_work	watchdog_work;
> > > > 
> > > >  };
> > > >  
> > > >  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c index
> > > > a16c1422558f..9c7200299465 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > > @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
> > > > 
> > > >  		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
> > > >  	
> > > >  	dev->dec_ops[ctx->current_codec]->trigger(ctx);
> > > > 
> > > > +
> > > > +	/* Start the watchdog timer. */
> > > > +	schedule_delayed_work(&dev->watchdog_work,
> > > > +			      msecs_to_jiffies(2000));
> > > > 
> > > >  }
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index
> > > > 2d7663726467..a6470a89851e 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > > @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > > > 
> > > >  	enum vb2_buffer_state state;
> > > >  	enum cedrus_irq_status status;
> > > > 
> > > > +	/*
> > > > +	 * If cancel_delayed_work returns false it means watchdog already
> > > > +	 * executed and finished the job.
> > > > +	 */
> > > > +	if (!cancel_delayed_work(&dev->watchdog_work))
> > > > +		return IRQ_HANDLED;
> > > > +
> > > > 
> > > >  	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > > >  	if (!ctx) {
> > > >  	
> > > >  		v4l2_err(&dev->v4l2_dev,
> > > > 
> > > > @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > > > 
> > > >  	return IRQ_HANDLED;
> > > >  
> > > >  }
> > > > 
> > > > +void cedrus_watchdog(struct work_struct *work)
> > > > +{
> > > > +	struct cedrus_dev *dev;
> > > > +	struct cedrus_ctx *ctx;
> > > > +
> > > > +	dev = container_of(to_delayed_work(work),
> > > > +			   struct cedrus_dev, watchdog_work);
> > > > +
> > > > +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > > > +	if (!ctx)
> > > > +		return;
> > > > +
> > > > +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> > > > +	reset_control_reset(dev->rstc);
> > > 
> > > I don't think playing with the reset is the right approach here.
> > > First we don't really know if the reset is shared or not, so this might
> > > have no effect.
> > 
> > AFAIK only few reset lines are shared in all Allwinner SoC, never for Cedrus
> > and even then, this is considered as HW issue. So, I'm good with using
> > reset line. This principle is also taken from Hantro driver.
> > 
> > > Then even if it does, wouldn't this just reset the state of the
> > > registers to an unconfigured state, making it impossible to decode any
> > > future frame in the same context?
> > 
> > Being stateless core, all context is held in auxiliary buffers, reference
> > frames and controls, which are not reset with pulsing reset line, so no,
> > state is not lost. Anyway, if decoding fails, you're generally screwed
> > until next key frame. You'll have to deal with decoding issues/artefacts
> > nevertheless.

Ah right, it's true that we fully configure the hardware on each slice
so indeed, it should be able to bring it back up on the next one.

> > > Honestly I'm not sure what a good process would be to get back on track
> > > here so I would be tempted to just do nothing an return errors.
> > > 
> > > That's already better than being stuck.
> > 
> > Doing nothing will solve only current job, but HW will still be stuck in
> > decoding state. I doubt reprogramming registers and triggering new decoding
> > will actually do anything.
> > 
> > I'll check BSP lib sources again. Maybe selecting non-existing decoding mode
> > would reset the core. That is already suggested as good thing to do in
> > order to put core in low power mode.
> 
> BSP kernel driver also pulses reset line:
> https://github.com/orangepi-xunlong/linux-orangepi/blob/orange-pi-4.9-sun50iw9/drivers/media/cedar-ve/cedar_ve.c#L848-L851
> 
> I still think this is the way to go.

Okay so that's probably the right thing to do then, my comment doesn't stand!

So the change looks good to me at this point:
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>

Thanks!

Paul

> Best regards,
> Jernej
> 
> > 
> > IMO we have to do something. Doing nothing will probably just lock up the
> > core until next reboot or maybe until trying different decoding mode.
> > 
> > Anyway, I have to find another way to cause decoding job to time out.
> > Currently I'm doing this with IOMMU on H6, but that brings down several
> > other things, which requires reboot anyway.
> > 
> > Best regards,
> > Jernej
> > 
> > > Paul
> > > 
> > > > +	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx-
> > >
> > >fh.m2m_ctx,
> > >
> > > > +					 VB2_BUF_STATE_ERROR);
> > > > +}
> > > > +
> > > > 
> > > >  int cedrus_hw_suspend(struct device *device)
> > > >  {
> > > >  
> > > >  	struct cedrus_dev *dev = dev_get_drvdata(device);
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h index
> > > > 45f641f0bfa2..7c92f00e36da 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > > @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
> > > > 
> > > >  int cedrus_hw_probe(struct cedrus_dev *dev);
> > > >  void cedrus_hw_remove(struct cedrus_dev *dev);
> > > > 
> > > > +void cedrus_watchdog(struct work_struct *work);
> > > > +
> > > > 
> > > >  #endif
> 
> 
> 
> 

-- 
Paul Kocialkowski, Bootlin
Embedded Linux and kernel engineering
https://bootlin.com

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

[-- Attachment #2: Type: text/plain, Size: 176 bytes --]

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] media: cedrus: Add watchdog for job completion
@ 2022-02-07  8:33         ` Paul Kocialkowski
  0 siblings, 0 replies; 14+ messages in thread
From: Paul Kocialkowski @ 2022-02-07  8:33 UTC (permalink / raw)
  To: Jernej Škrabec
  Cc: mripard, mchehab, wens, linux-media, linux-staging,
	linux-arm-kernel, linux-sunxi, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 8607 bytes --]

Hi,

On Sat 05 Feb 22, 09:41, Jernej Škrabec wrote:
> Dne sobota, 05. februar 2022 ob 08:40:21 CET je Jernej Škrabec napisal(a):
> > Dne petek, 04. februar 2022 ob 10:09:56 CET je Paul Kocialkowski napisal(a):
> > > Hi Jernej,
> > > 
> > > On Tue 01 Feb 22, 19:33, Jernej Skrabec wrote:
> > > > Currently, if job is not completed for whatever reason, userspace
> > > > application can hang on ioctl and thus become unkillable.
> > > > 
> > > > In order to prevent that, implement watchdog, which will complete job
> > > > after 2 seconds with error state.
> > > > 
> > > > Concept is borrowed from hantro driver.
> > > 
> > > Good idea to implement a watchdog here, thanks!
> > > See comments below.
> > > 
> > > > Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
> > > > ---
> > > > 
> > > >  drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 ++
> > > >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  3 +++
> > > >  .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +++
> > > >  .../staging/media/sunxi/cedrus/cedrus_hw.c    | 25 +++++++++++++++++++
> > > >  .../staging/media/sunxi/cedrus/cedrus_hw.h    |  2 ++
> > > >  5 files changed, 36 insertions(+)
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus.c index
> > > > 4a4b714b0f26..68b3dcdb5df3 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
> > > > @@ -439,6 +439,8 @@ static int cedrus_probe(struct platform_device
> > > > *pdev)
> > > > 
> > > >  	mutex_init(&dev->dev_mutex);
> > > > 
> > > > +	INIT_DELAYED_WORK(&dev->watchdog_work, cedrus_watchdog);
> > > > +
> > > > 
> > > >  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
> > > >  	if (ret) {
> > > >  	
> > > >  		dev_err(&pdev->dev, "Failed to register V4L2
> > 
> > device\n");
> > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > > > c345f2984041..3bc094eb497f 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > @@ -24,6 +24,7 @@
> > > > 
> > > >  #include <linux/iopoll.h>
> > > >  #include <linux/platform_device.h>
> > > > 
> > > > +#include <linux/workqueue.h>
> > > > 
> > > >  #define CEDRUS_NAME			"cedrus"
> > > > 
> > > > @@ -194,6 +195,8 @@ struct cedrus_dev {
> > > > 
> > > >  	struct reset_control	*rstc;
> > > >  	
> > > >  	unsigned int		capabilities;
> > > > 
> > > > +
> > > > +	struct delayed_work	watchdog_work;
> > > > 
> > > >  };
> > > >  
> > > >  extern struct cedrus_dec_ops cedrus_dec_ops_mpeg2;
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c index
> > > > a16c1422558f..9c7200299465 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
> > > > @@ -97,4 +97,8 @@ void cedrus_device_run(void *priv)
> > > > 
> > > >  		v4l2_ctrl_request_complete(src_req, &ctx->hdl);
> > > >  	
> > > >  	dev->dec_ops[ctx->current_codec]->trigger(ctx);
> > > > 
> > > > +
> > > > +	/* Start the watchdog timer. */
> > > > +	schedule_delayed_work(&dev->watchdog_work,
> > > > +			      msecs_to_jiffies(2000));
> > > > 
> > > >  }
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index
> > > > 2d7663726467..a6470a89851e 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c
> > > > @@ -118,6 +118,13 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > > > 
> > > >  	enum vb2_buffer_state state;
> > > >  	enum cedrus_irq_status status;
> > > > 
> > > > +	/*
> > > > +	 * If cancel_delayed_work returns false it means watchdog already
> > > > +	 * executed and finished the job.
> > > > +	 */
> > > > +	if (!cancel_delayed_work(&dev->watchdog_work))
> > > > +		return IRQ_HANDLED;
> > > > +
> > > > 
> > > >  	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > > >  	if (!ctx) {
> > > >  	
> > > >  		v4l2_err(&dev->v4l2_dev,
> > > > 
> > > > @@ -143,6 +150,24 @@ static irqreturn_t cedrus_irq(int irq, void *data)
> > > > 
> > > >  	return IRQ_HANDLED;
> > > >  
> > > >  }
> > > > 
> > > > +void cedrus_watchdog(struct work_struct *work)
> > > > +{
> > > > +	struct cedrus_dev *dev;
> > > > +	struct cedrus_ctx *ctx;
> > > > +
> > > > +	dev = container_of(to_delayed_work(work),
> > > > +			   struct cedrus_dev, watchdog_work);
> > > > +
> > > > +	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
> > > > +	if (!ctx)
> > > > +		return;
> > > > +
> > > > +	v4l2_err(&dev->v4l2_dev, "frame processing timed out!\n");
> > > > +	reset_control_reset(dev->rstc);
> > > 
> > > I don't think playing with the reset is the right approach here.
> > > First we don't really know if the reset is shared or not, so this might
> > > have no effect.
> > 
> > AFAIK only few reset lines are shared in all Allwinner SoC, never for Cedrus
> > and even then, this is considered as HW issue. So, I'm good with using
> > reset line. This principle is also taken from Hantro driver.
> > 
> > > Then even if it does, wouldn't this just reset the state of the
> > > registers to an unconfigured state, making it impossible to decode any
> > > future frame in the same context?
> > 
> > Being stateless core, all context is held in auxiliary buffers, reference
> > frames and controls, which are not reset with pulsing reset line, so no,
> > state is not lost. Anyway, if decoding fails, you're generally screwed
> > until next key frame. You'll have to deal with decoding issues/artefacts
> > nevertheless.

Ah right, it's true that we fully configure the hardware on each slice
so indeed, it should be able to bring it back up on the next one.

> > > Honestly I'm not sure what a good process would be to get back on track
> > > here so I would be tempted to just do nothing an return errors.
> > > 
> > > That's already better than being stuck.
> > 
> > Doing nothing will solve only current job, but HW will still be stuck in
> > decoding state. I doubt reprogramming registers and triggering new decoding
> > will actually do anything.
> > 
> > I'll check BSP lib sources again. Maybe selecting non-existing decoding mode
> > would reset the core. That is already suggested as good thing to do in
> > order to put core in low power mode.
> 
> BSP kernel driver also pulses reset line:
> https://github.com/orangepi-xunlong/linux-orangepi/blob/orange-pi-4.9-sun50iw9/drivers/media/cedar-ve/cedar_ve.c#L848-L851
> 
> I still think this is the way to go.

Okay so that's probably the right thing to do then, my comment doesn't stand!

So the change looks good to me at this point:
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>

Thanks!

Paul

> Best regards,
> Jernej
> 
> > 
> > IMO we have to do something. Doing nothing will probably just lock up the
> > core until next reboot or maybe until trying different decoding mode.
> > 
> > Anyway, I have to find another way to cause decoding job to time out.
> > Currently I'm doing this with IOMMU on H6, but that brings down several
> > other things, which requires reboot anyway.
> > 
> > Best regards,
> > Jernej
> > 
> > > Paul
> > > 
> > > > +	v4l2_m2m_buf_done_and_job_finish(ctx->dev->m2m_dev, ctx-
> > >
> > >fh.m2m_ctx,
> > >
> > > > +					 VB2_BUF_STATE_ERROR);
> > > > +}
> > > > +
> > > > 
> > > >  int cedrus_hw_suspend(struct device *device)
> > > >  {
> > > >  
> > > >  	struct cedrus_dev *dev = dev_get_drvdata(device);
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h index
> > > > 45f641f0bfa2..7c92f00e36da 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.h
> > > > @@ -28,4 +28,6 @@ int cedrus_hw_resume(struct device *device);
> > > > 
> > > >  int cedrus_hw_probe(struct cedrus_dev *dev);
> > > >  void cedrus_hw_remove(struct cedrus_dev *dev);
> > > > 
> > > > +void cedrus_watchdog(struct work_struct *work);
> > > > +
> > > > 
> > > >  #endif
> 
> 
> 
> 

-- 
Paul Kocialkowski, Bootlin
Embedded Linux and kernel engineering
https://bootlin.com

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2022-02-07  8:48 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-01 18:33 [PATCH] media: cedrus: Add watchdog for job completion Jernej Skrabec
2022-02-01 18:33 ` Jernej Skrabec
2022-02-04  9:09 ` Paul Kocialkowski
2022-02-04  9:09   ` Paul Kocialkowski
2022-02-05  7:40   ` Jernej Škrabec
2022-02-05  7:40     ` Jernej Škrabec
2022-02-05  8:41     ` Jernej Škrabec
2022-02-05  8:41       ` Jernej Škrabec
2022-02-07  8:33       ` Paul Kocialkowski
2022-02-07  8:33         ` Paul Kocialkowski
2022-02-05 10:28     ` Maxime Ripard
2022-02-05 10:28       ` Maxime Ripard
2022-02-06 13:09 ` Ezequiel Garcia
2022-02-06 13:09   ` Ezequiel Garcia

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.