linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
@ 2021-10-29 21:48 Tadeusz Struk
  2021-10-29 23:59 ` John Stultz
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Tadeusz Struk @ 2021-10-29 21:48 UTC (permalink / raw)
  To: Stanimir Varbanov, Andy Gross
  Cc: Bjorn Andersson, Mauro Carvalho Chehab, Lee Jones, Amit Pundir,
	John Stultz, Tadeusz Struk, linux-media, linux-arm-msm,
	linux-kernel

Venus video encode/decode hardware driver consists of three modules.
The parent module venus-core, and two sub modules venus-enc and venus-dec.
The venus-core module allocates a common structure that is used by the
enc/dec modules, loads the firmware, and performs some common hardware
initialization. Since the three modules are loaded one after the other,
and their probe functions can run in parallel it is possible that
the venc_probe and vdec_probe functions can finish before the core
venus_probe function, which then can fail when, for example it
fails to load the firmware. In this case the subsequent call to venc_open
causes an Oops as it tries to dereference already uninitialized structures
through dev->parent and the system crashes in __pm_runtime_resume() as in
the trace below:

[   26.064835][  T485] Internal error: Oops: 96000006 [#1] PREEMPT SMP
[   26.270914][  T485] Hardware name: Thundercomm Dragonboard 845c (DT)
[   26.285019][  T485] pc : __pm_runtime_resume+0x34/0x178
[   26.286374][  T213] lt9611 10-003b: hdmi cable connected
[   26.290285][  T485] lr : venc_open+0xc0/0x278 [venus_enc]
[   26.290326][  T485] Call trace:
[   26.290328][  T485]  __pm_runtime_resume+0x34/0x178
[   26.290330][  T485]  venc_open+0xc0/0x278 [venus_enc]
[   26.290335][  T485]  v4l2_open+0x184/0x294
[   26.290340][  T485]  chrdev_open+0x468/0x5c8
[   26.290344][  T485]  do_dentry_open+0x260/0x54c
[   26.290349][  T485]  path_openat+0xbe8/0xd5c
[   26.290352][  T485]  do_filp_open+0xb8/0x168
[   26.290354][  T485]  do_sys_openat2+0xa4/0x1e8
[   26.290357][  T485]  __arm64_compat_sys_openat+0x70/0x9c
[   26.290359][  T485]  invoke_syscall+0x60/0x170
[   26.290363][  T485]  el0_svc_common+0xb8/0xf8
[   26.290365][  T485]  do_el0_svc_compat+0x20/0x30
[   26.290367][  T485]  el0_svc_compat+0x24/0x84
[   26.290372][  T485]  el0t_32_sync_handler+0x7c/0xbc
[   26.290374][  T485]  el0t_32_sync+0x1b8/0x1bc
[   26.290381][  T485] ---[ end trace 04ca7c088b4c1a9c ]---
[   26.290383][  T485] Kernel panic - not syncing: Oops: Fatal exception

This can be fixed by synchronizing the three probe functions and
only allowing the venc_probe() and vdec_probe() to pass when venus_probe()
returns success.

Changes in v2:
- Change locking from mutex_lock to mutex_trylock
  in venc_probe and vdec_probe to avoid potential deadlock.

Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
---
 drivers/media/platform/qcom/venus/core.c |  6 ++++++
 drivers/media/platform/qcom/venus/core.h |  2 ++
 drivers/media/platform/qcom/venus/vdec.c | 24 +++++++++++++++++++++---
 drivers/media/platform/qcom/venus/venc.c | 24 +++++++++++++++++++++---
 4 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
index 91b15842c555..18f3e3a9823f 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -317,6 +317,7 @@ static int venus_probe(struct platform_device *pdev)
 
 	INIT_LIST_HEAD(&core->instances);
 	mutex_init(&core->lock);
+	mutex_init(&core->sync_lock);
 	INIT_DELAYED_WORK(&core->work, venus_sys_error_handler);
 
 	ret = devm_request_threaded_irq(dev, core->irq, hfi_isr, hfi_isr_thread,
@@ -331,6 +332,8 @@ static int venus_probe(struct platform_device *pdev)
 
 	venus_assign_register_offsets(core);
 
+	mutex_lock(&core->sync_lock);
+
 	ret = v4l2_device_register(dev, &core->v4l2_dev);
 	if (ret)
 		goto err_core_deinit;
@@ -377,6 +380,7 @@ static int venus_probe(struct platform_device *pdev)
 		goto err_dev_unregister;
 	}
 
+	mutex_unlock(&core->sync_lock);
 	venus_dbgfs_init(core);
 
 	return 0;
@@ -392,6 +396,7 @@ static int venus_probe(struct platform_device *pdev)
 	hfi_destroy(core);
 err_core_deinit:
 	hfi_core_deinit(core, false);
+	mutex_unlock(&core->sync_lock);
 err_core_put:
 	if (core->pm_ops->core_put)
 		core->pm_ops->core_put(core);
@@ -428,6 +433,7 @@ static int venus_remove(struct platform_device *pdev)
 
 	mutex_destroy(&core->pm_lock);
 	mutex_destroy(&core->lock);
+	mutex_destroy(&core->sync_lock);
 	venus_dbgfs_deinit(core);
 
 	return ret;
diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
index 5ec851115eca..3f80dc26febb 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -119,6 +119,7 @@ struct venus_format {
  * @use_tz:	a flag that suggests presence of trustzone
  * @fw:		structure of firmware parameters
  * @lock:	a lock for this strucure
+ * @sync_lock	a lock for probe sync between venus_core and venus_enc/dec
  * @instances:	a list_head of all instances
  * @insts_count:	num of instances
  * @state:	the state of the venus core
@@ -176,6 +177,7 @@ struct venus_core {
 		size_t mem_size;
 	} fw;
 	struct mutex lock;
+	struct mutex sync_lock;
 	struct list_head instances;
 	atomic_t insts_count;
 	unsigned int state;
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index 198e47eb63f4..959e43bb6c00 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -1659,17 +1659,32 @@ static int vdec_probe(struct platform_device *pdev)
 	if (!core)
 		return -EPROBE_DEFER;
 
+	/* Sync and wait on the venus core to initialize first.
+	 * If we manage to acquire the sync_lock here it means
+	 * that the venus_probe() finished running */
+	ret = mutex_trylock(&core->sync_lock);
+	if (!ret) {
+		return -EPROBE_DEFER;
+	} else {
+		if (core->state != CORE_INIT) {
+			ret = -ENODEV;
+			goto err_core_unlock;
+		}
+	}
+
 	platform_set_drvdata(pdev, core);
 
 	if (core->pm_ops->vdec_get) {
 		ret = core->pm_ops->vdec_get(dev);
 		if (ret)
-			return ret;
+			goto err_core_unlock;
 	}
 
 	vdev = video_device_alloc();
-	if (!vdev)
-		return -ENOMEM;
+	if (!vdev) {
+		ret = -ENOMEM;
+		goto err_core_unlock;
+	}
 
 	strscpy(vdev->name, "qcom-venus-decoder", sizeof(vdev->name));
 	vdev->release = video_device_release;
@@ -1690,11 +1705,14 @@ static int vdec_probe(struct platform_device *pdev)
 	pm_runtime_set_autosuspend_delay(dev, 2000);
 	pm_runtime_use_autosuspend(dev);
 	pm_runtime_enable(dev);
+	mutex_unlock(&core->sync_lock);
 
 	return 0;
 
 err_vdev_release:
 	video_device_release(vdev);
+err_core_unlock:
+	mutex_unlock(&core->sync_lock);
 	return ret;
 }
 
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index bc1c42dd53c0..11ec7bff5e3f 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -1338,17 +1338,32 @@ static int venc_probe(struct platform_device *pdev)
 	if (!core)
 		return -EPROBE_DEFER;
 
+	/* Sync and wait on the venus core to initialize first.
+	 * If we manage to acquire the sync_lock here it means
+	 * that the venus_probe() finished running */
+	ret = mutex_trylock(&core->sync_lock);
+	if (!ret) {
+		return -EPROBE_DEFER;
+	} else {
+		if (core->state != CORE_INIT) {
+			ret = -ENODEV;
+			goto err_core_unlock;
+		}
+	}
+
 	platform_set_drvdata(pdev, core);
 
 	if (core->pm_ops->venc_get) {
 		ret = core->pm_ops->venc_get(dev);
 		if (ret)
-			return ret;
+			goto err_core_unlock;
 	}
 
 	vdev = video_device_alloc();
-	if (!vdev)
-		return -ENOMEM;
+	if (!vdev) {
+		ret = -ENOMEM;
+		goto err_core_unlock;
+	}
 
 	strscpy(vdev->name, "qcom-venus-encoder", sizeof(vdev->name));
 	vdev->release = video_device_release;
@@ -1367,11 +1382,14 @@ static int venc_probe(struct platform_device *pdev)
 
 	video_set_drvdata(vdev, core);
 	pm_runtime_enable(dev);
+	mutex_unlock(&core->sync_lock);
 
 	return 0;
 
 err_vdev_release:
 	video_device_release(vdev);
+err_core_unlock:
+	mutex_unlock(&core->sync_lock);
 	return ret;
 }
 
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
  2021-10-29 21:48 [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec Tadeusz Struk
@ 2021-10-29 23:59 ` John Stultz
  2021-11-11  0:28 ` John Stultz
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: John Stultz @ 2021-10-29 23:59 UTC (permalink / raw)
  To: Tadeusz Struk
  Cc: Stanimir Varbanov, Andy Gross, Bjorn Andersson,
	Mauro Carvalho Chehab, Lee Jones, Amit Pundir, linux-media,
	linux-arm-msm, linux-kernel

On Fri, Oct 29, 2021 at 2:48 PM Tadeusz Struk <tadeusz.struk@linaro.org> wrote:
>
> Venus video encode/decode hardware driver consists of three modules.
> The parent module venus-core, and two sub modules venus-enc and venus-dec.
> The venus-core module allocates a common structure that is used by the
> enc/dec modules, loads the firmware, and performs some common hardware
> initialization. Since the three modules are loaded one after the other,
> and their probe functions can run in parallel it is possible that
> the venc_probe and vdec_probe functions can finish before the core
> venus_probe function, which then can fail when, for example it
> fails to load the firmware. In this case the subsequent call to venc_open
> causes an Oops as it tries to dereference already uninitialized structures
> through dev->parent and the system crashes in __pm_runtime_resume() as in
> the trace below:
>
> [   26.064835][  T485] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> [   26.270914][  T485] Hardware name: Thundercomm Dragonboard 845c (DT)
> [   26.285019][  T485] pc : __pm_runtime_resume+0x34/0x178
> [   26.286374][  T213] lt9611 10-003b: hdmi cable connected
> [   26.290285][  T485] lr : venc_open+0xc0/0x278 [venus_enc]
> [   26.290326][  T485] Call trace:
> [   26.290328][  T485]  __pm_runtime_resume+0x34/0x178
> [   26.290330][  T485]  venc_open+0xc0/0x278 [venus_enc]
> [   26.290335][  T485]  v4l2_open+0x184/0x294
> [   26.290340][  T485]  chrdev_open+0x468/0x5c8
> [   26.290344][  T485]  do_dentry_open+0x260/0x54c
> [   26.290349][  T485]  path_openat+0xbe8/0xd5c
> [   26.290352][  T485]  do_filp_open+0xb8/0x168
> [   26.290354][  T485]  do_sys_openat2+0xa4/0x1e8
> [   26.290357][  T485]  __arm64_compat_sys_openat+0x70/0x9c
> [   26.290359][  T485]  invoke_syscall+0x60/0x170
> [   26.290363][  T485]  el0_svc_common+0xb8/0xf8
> [   26.290365][  T485]  do_el0_svc_compat+0x20/0x30
> [   26.290367][  T485]  el0_svc_compat+0x24/0x84
> [   26.290372][  T485]  el0t_32_sync_handler+0x7c/0xbc
> [   26.290374][  T485]  el0t_32_sync+0x1b8/0x1bc
> [   26.290381][  T485] ---[ end trace 04ca7c088b4c1a9c ]---
> [   26.290383][  T485] Kernel panic - not syncing: Oops: Fatal exception
>
> This can be fixed by synchronizing the three probe functions and
> only allowing the venc_probe() and vdec_probe() to pass when venus_probe()
> returns success.
>
> Changes in v2:
> - Change locking from mutex_lock to mutex_trylock
>   in venc_probe and vdec_probe to avoid potential deadlock.
>
> Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>

This works for me, and avoids the deadlock I was hitting with the
earlier version of the patch!

Tested-by: John Stultz <john.stultz@linaro.org>

thanks so much!
-john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
  2021-10-29 21:48 [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec Tadeusz Struk
  2021-10-29 23:59 ` John Stultz
@ 2021-11-11  0:28 ` John Stultz
  2021-11-23 22:57 ` Peter Collingbourne
  2021-11-24  3:31 ` Bjorn Andersson
  3 siblings, 0 replies; 9+ messages in thread
From: John Stultz @ 2021-11-11  0:28 UTC (permalink / raw)
  To: Tadeusz Struk
  Cc: Stanimir Varbanov, Andy Gross, Bjorn Andersson,
	Mauro Carvalho Chehab, Lee Jones, Amit Pundir, linux-media,
	linux-arm-msm, linux-kernel

On Fri, Oct 29, 2021 at 2:48 PM Tadeusz Struk <tadeusz.struk@linaro.org> wrote:
>
> Venus video encode/decode hardware driver consists of three modules.
> The parent module venus-core, and two sub modules venus-enc and venus-dec.
> The venus-core module allocates a common structure that is used by the
> enc/dec modules, loads the firmware, and performs some common hardware
> initialization. Since the three modules are loaded one after the other,
> and their probe functions can run in parallel it is possible that
> the venc_probe and vdec_probe functions can finish before the core
> venus_probe function, which then can fail when, for example it
> fails to load the firmware. In this case the subsequent call to venc_open
> causes an Oops as it tries to dereference already uninitialized structures
> through dev->parent and the system crashes in __pm_runtime_resume() as in
> the trace below:
>
> [   26.064835][  T485] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> [   26.270914][  T485] Hardware name: Thundercomm Dragonboard 845c (DT)
> [   26.285019][  T485] pc : __pm_runtime_resume+0x34/0x178
> [   26.286374][  T213] lt9611 10-003b: hdmi cable connected
> [   26.290285][  T485] lr : venc_open+0xc0/0x278 [venus_enc]
> [   26.290326][  T485] Call trace:
> [   26.290328][  T485]  __pm_runtime_resume+0x34/0x178
> [   26.290330][  T485]  venc_open+0xc0/0x278 [venus_enc]
> [   26.290335][  T485]  v4l2_open+0x184/0x294
> [   26.290340][  T485]  chrdev_open+0x468/0x5c8
> [   26.290344][  T485]  do_dentry_open+0x260/0x54c
> [   26.290349][  T485]  path_openat+0xbe8/0xd5c
> [   26.290352][  T485]  do_filp_open+0xb8/0x168
> [   26.290354][  T485]  do_sys_openat2+0xa4/0x1e8
> [   26.290357][  T485]  __arm64_compat_sys_openat+0x70/0x9c
> [   26.290359][  T485]  invoke_syscall+0x60/0x170
> [   26.290363][  T485]  el0_svc_common+0xb8/0xf8
> [   26.290365][  T485]  do_el0_svc_compat+0x20/0x30
> [   26.290367][  T485]  el0_svc_compat+0x24/0x84
> [   26.290372][  T485]  el0t_32_sync_handler+0x7c/0xbc
> [   26.290374][  T485]  el0t_32_sync+0x1b8/0x1bc
> [   26.290381][  T485] ---[ end trace 04ca7c088b4c1a9c ]---
> [   26.290383][  T485] Kernel panic - not syncing: Oops: Fatal exception
>
> This can be fixed by synchronizing the three probe functions and
> only allowing the venc_probe() and vdec_probe() to pass when venus_probe()
> returns success.
>
> Changes in v2:
> - Change locking from mutex_lock to mutex_trylock
>   in venc_probe and vdec_probe to avoid potential deadlock.
>
> Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>

Just wanted to ping folks on this patch, as it does resolve a frequent
crash that we see on db845c/RB3 and RB5 hardware, so it would be nice
to see it land & backported to -stable.

thanks
-john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
  2021-10-29 21:48 [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec Tadeusz Struk
  2021-10-29 23:59 ` John Stultz
  2021-11-11  0:28 ` John Stultz
@ 2021-11-23 22:57 ` Peter Collingbourne
  2021-11-24  3:31 ` Bjorn Andersson
  3 siblings, 0 replies; 9+ messages in thread
From: Peter Collingbourne @ 2021-11-23 22:57 UTC (permalink / raw)
  To: Tadeusz Struk
  Cc: Stanimir Varbanov, Andy Gross, Bjorn Andersson,
	Mauro Carvalho Chehab, Lee Jones, Amit Pundir, John Stultz,
	linux-media, linux-arm-msm, linux-kernel

On Fri, Oct 29, 2021 at 02:48:33PM -0700, Tadeusz Struk wrote:
> Venus video encode/decode hardware driver consists of three modules.
> The parent module venus-core, and two sub modules venus-enc and venus-dec.
> The venus-core module allocates a common structure that is used by the
> enc/dec modules, loads the firmware, and performs some common hardware
> initialization. Since the three modules are loaded one after the other,
> and their probe functions can run in parallel it is possible that
> the venc_probe and vdec_probe functions can finish before the core
> venus_probe function, which then can fail when, for example it
> fails to load the firmware. In this case the subsequent call to venc_open
> causes an Oops as it tries to dereference already uninitialized structures
> through dev->parent and the system crashes in __pm_runtime_resume() as in
> the trace below:
> 
> [   26.064835][  T485] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> [   26.270914][  T485] Hardware name: Thundercomm Dragonboard 845c (DT)
> [   26.285019][  T485] pc : __pm_runtime_resume+0x34/0x178
> [   26.286374][  T213] lt9611 10-003b: hdmi cable connected
> [   26.290285][  T485] lr : venc_open+0xc0/0x278 [venus_enc]
> [   26.290326][  T485] Call trace:
> [   26.290328][  T485]  __pm_runtime_resume+0x34/0x178
> [   26.290330][  T485]  venc_open+0xc0/0x278 [venus_enc]
> [   26.290335][  T485]  v4l2_open+0x184/0x294
> [   26.290340][  T485]  chrdev_open+0x468/0x5c8
> [   26.290344][  T485]  do_dentry_open+0x260/0x54c
> [   26.290349][  T485]  path_openat+0xbe8/0xd5c
> [   26.290352][  T485]  do_filp_open+0xb8/0x168
> [   26.290354][  T485]  do_sys_openat2+0xa4/0x1e8
> [   26.290357][  T485]  __arm64_compat_sys_openat+0x70/0x9c
> [   26.290359][  T485]  invoke_syscall+0x60/0x170
> [   26.290363][  T485]  el0_svc_common+0xb8/0xf8
> [   26.290365][  T485]  do_el0_svc_compat+0x20/0x30
> [   26.290367][  T485]  el0_svc_compat+0x24/0x84
> [   26.290372][  T485]  el0t_32_sync_handler+0x7c/0xbc
> [   26.290374][  T485]  el0t_32_sync+0x1b8/0x1bc
> [   26.290381][  T485] ---[ end trace 04ca7c088b4c1a9c ]---
> [   26.290383][  T485] Kernel panic - not syncing: Oops: Fatal exception
> 
> This can be fixed by synchronizing the three probe functions and
> only allowing the venc_probe() and vdec_probe() to pass when venus_probe()
> returns success.
> 
> Changes in v2:
> - Change locking from mutex_lock to mutex_trylock
>   in venc_probe and vdec_probe to avoid potential deadlock.
> 
> Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>

I've had this patched in, no crashes so far after several reboots.

Tested-by: Peter Collingbourne <pcc@google.com>

Peter

> ---
>  drivers/media/platform/qcom/venus/core.c |  6 ++++++
>  drivers/media/platform/qcom/venus/core.h |  2 ++
>  drivers/media/platform/qcom/venus/vdec.c | 24 +++++++++++++++++++++---
>  drivers/media/platform/qcom/venus/venc.c | 24 +++++++++++++++++++++---
>  4 files changed, 50 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
> index 91b15842c555..18f3e3a9823f 100644
> --- a/drivers/media/platform/qcom/venus/core.c
> +++ b/drivers/media/platform/qcom/venus/core.c
> @@ -317,6 +317,7 @@ static int venus_probe(struct platform_device *pdev)
>  
>  	INIT_LIST_HEAD(&core->instances);
>  	mutex_init(&core->lock);
> +	mutex_init(&core->sync_lock);
>  	INIT_DELAYED_WORK(&core->work, venus_sys_error_handler);
>  
>  	ret = devm_request_threaded_irq(dev, core->irq, hfi_isr, hfi_isr_thread,
> @@ -331,6 +332,8 @@ static int venus_probe(struct platform_device *pdev)
>  
>  	venus_assign_register_offsets(core);
>  
> +	mutex_lock(&core->sync_lock);
> +
>  	ret = v4l2_device_register(dev, &core->v4l2_dev);
>  	if (ret)
>  		goto err_core_deinit;
> @@ -377,6 +380,7 @@ static int venus_probe(struct platform_device *pdev)
>  		goto err_dev_unregister;
>  	}
>  
> +	mutex_unlock(&core->sync_lock);
>  	venus_dbgfs_init(core);
>  
>  	return 0;
> @@ -392,6 +396,7 @@ static int venus_probe(struct platform_device *pdev)
>  	hfi_destroy(core);
>  err_core_deinit:
>  	hfi_core_deinit(core, false);
> +	mutex_unlock(&core->sync_lock);
>  err_core_put:
>  	if (core->pm_ops->core_put)
>  		core->pm_ops->core_put(core);
> @@ -428,6 +433,7 @@ static int venus_remove(struct platform_device *pdev)
>  
>  	mutex_destroy(&core->pm_lock);
>  	mutex_destroy(&core->lock);
> +	mutex_destroy(&core->sync_lock);
>  	venus_dbgfs_deinit(core);
>  
>  	return ret;
> diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
> index 5ec851115eca..3f80dc26febb 100644
> --- a/drivers/media/platform/qcom/venus/core.h
> +++ b/drivers/media/platform/qcom/venus/core.h
> @@ -119,6 +119,7 @@ struct venus_format {
>   * @use_tz:	a flag that suggests presence of trustzone
>   * @fw:		structure of firmware parameters
>   * @lock:	a lock for this strucure
> + * @sync_lock	a lock for probe sync between venus_core and venus_enc/dec
>   * @instances:	a list_head of all instances
>   * @insts_count:	num of instances
>   * @state:	the state of the venus core
> @@ -176,6 +177,7 @@ struct venus_core {
>  		size_t mem_size;
>  	} fw;
>  	struct mutex lock;
> +	struct mutex sync_lock;
>  	struct list_head instances;
>  	atomic_t insts_count;
>  	unsigned int state;
> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
> index 198e47eb63f4..959e43bb6c00 100644
> --- a/drivers/media/platform/qcom/venus/vdec.c
> +++ b/drivers/media/platform/qcom/venus/vdec.c
> @@ -1659,17 +1659,32 @@ static int vdec_probe(struct platform_device *pdev)
>  	if (!core)
>  		return -EPROBE_DEFER;
>  
> +	/* Sync and wait on the venus core to initialize first.
> +	 * If we manage to acquire the sync_lock here it means
> +	 * that the venus_probe() finished running */
> +	ret = mutex_trylock(&core->sync_lock);
> +	if (!ret) {
> +		return -EPROBE_DEFER;
> +	} else {
> +		if (core->state != CORE_INIT) {
> +			ret = -ENODEV;
> +			goto err_core_unlock;
> +		}
> +	}
> +
>  	platform_set_drvdata(pdev, core);
>  
>  	if (core->pm_ops->vdec_get) {
>  		ret = core->pm_ops->vdec_get(dev);
>  		if (ret)
> -			return ret;
> +			goto err_core_unlock;
>  	}
>  
>  	vdev = video_device_alloc();
> -	if (!vdev)
> -		return -ENOMEM;
> +	if (!vdev) {
> +		ret = -ENOMEM;
> +		goto err_core_unlock;
> +	}
>  
>  	strscpy(vdev->name, "qcom-venus-decoder", sizeof(vdev->name));
>  	vdev->release = video_device_release;
> @@ -1690,11 +1705,14 @@ static int vdec_probe(struct platform_device *pdev)
>  	pm_runtime_set_autosuspend_delay(dev, 2000);
>  	pm_runtime_use_autosuspend(dev);
>  	pm_runtime_enable(dev);
> +	mutex_unlock(&core->sync_lock);
>  
>  	return 0;
>  
>  err_vdev_release:
>  	video_device_release(vdev);
> +err_core_unlock:
> +	mutex_unlock(&core->sync_lock);
>  	return ret;
>  }
>  
> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
> index bc1c42dd53c0..11ec7bff5e3f 100644
> --- a/drivers/media/platform/qcom/venus/venc.c
> +++ b/drivers/media/platform/qcom/venus/venc.c
> @@ -1338,17 +1338,32 @@ static int venc_probe(struct platform_device *pdev)
>  	if (!core)
>  		return -EPROBE_DEFER;
>  
> +	/* Sync and wait on the venus core to initialize first.
> +	 * If we manage to acquire the sync_lock here it means
> +	 * that the venus_probe() finished running */
> +	ret = mutex_trylock(&core->sync_lock);
> +	if (!ret) {
> +		return -EPROBE_DEFER;
> +	} else {
> +		if (core->state != CORE_INIT) {
> +			ret = -ENODEV;
> +			goto err_core_unlock;
> +		}
> +	}
> +
>  	platform_set_drvdata(pdev, core);
>  
>  	if (core->pm_ops->venc_get) {
>  		ret = core->pm_ops->venc_get(dev);
>  		if (ret)
> -			return ret;
> +			goto err_core_unlock;
>  	}
>  
>  	vdev = video_device_alloc();
> -	if (!vdev)
> -		return -ENOMEM;
> +	if (!vdev) {
> +		ret = -ENOMEM;
> +		goto err_core_unlock;
> +	}
>  
>  	strscpy(vdev->name, "qcom-venus-encoder", sizeof(vdev->name));
>  	vdev->release = video_device_release;
> @@ -1367,11 +1382,14 @@ static int venc_probe(struct platform_device *pdev)
>  
>  	video_set_drvdata(vdev, core);
>  	pm_runtime_enable(dev);
> +	mutex_unlock(&core->sync_lock);
>  
>  	return 0;
>  
>  err_vdev_release:
>  	video_device_release(vdev);
> +err_core_unlock:
> +	mutex_unlock(&core->sync_lock);
>  	return ret;
>  }
>  
> -- 
> 2.31.1
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
  2021-10-29 21:48 [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec Tadeusz Struk
                   ` (2 preceding siblings ...)
  2021-11-23 22:57 ` Peter Collingbourne
@ 2021-11-24  3:31 ` Bjorn Andersson
  2021-12-01  4:49   ` John Stultz
  3 siblings, 1 reply; 9+ messages in thread
From: Bjorn Andersson @ 2021-11-24  3:31 UTC (permalink / raw)
  To: Tadeusz Struk
  Cc: Stanimir Varbanov, Andy Gross, Mauro Carvalho Chehab, Lee Jones,
	Amit Pundir, John Stultz, linux-media, linux-arm-msm,
	linux-kernel

On Fri 29 Oct 14:48 PDT 2021, Tadeusz Struk wrote:

> Venus video encode/decode hardware driver consists of three modules.
> The parent module venus-core, and two sub modules venus-enc and venus-dec.
> The venus-core module allocates a common structure that is used by the
> enc/dec modules, loads the firmware, and performs some common hardware
> initialization. Since the three modules are loaded one after the other,
> and their probe functions can run in parallel it is possible that
> the venc_probe and vdec_probe functions can finish before the core
> venus_probe function, which then can fail when, for example it
> fails to load the firmware. In this case the subsequent call to venc_open
> causes an Oops as it tries to dereference already uninitialized structures
> through dev->parent and the system crashes in __pm_runtime_resume() as in
> the trace below:
> 
> [   26.064835][  T485] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> [   26.270914][  T485] Hardware name: Thundercomm Dragonboard 845c (DT)
> [   26.285019][  T485] pc : __pm_runtime_resume+0x34/0x178
> [   26.286374][  T213] lt9611 10-003b: hdmi cable connected
> [   26.290285][  T485] lr : venc_open+0xc0/0x278 [venus_enc]
> [   26.290326][  T485] Call trace:
> [   26.290328][  T485]  __pm_runtime_resume+0x34/0x178
> [   26.290330][  T485]  venc_open+0xc0/0x278 [venus_enc]
> [   26.290335][  T485]  v4l2_open+0x184/0x294
> [   26.290340][  T485]  chrdev_open+0x468/0x5c8
> [   26.290344][  T485]  do_dentry_open+0x260/0x54c
> [   26.290349][  T485]  path_openat+0xbe8/0xd5c
> [   26.290352][  T485]  do_filp_open+0xb8/0x168
> [   26.290354][  T485]  do_sys_openat2+0xa4/0x1e8
> [   26.290357][  T485]  __arm64_compat_sys_openat+0x70/0x9c
> [   26.290359][  T485]  invoke_syscall+0x60/0x170
> [   26.290363][  T485]  el0_svc_common+0xb8/0xf8
> [   26.290365][  T485]  do_el0_svc_compat+0x20/0x30
> [   26.290367][  T485]  el0_svc_compat+0x24/0x84
> [   26.290372][  T485]  el0t_32_sync_handler+0x7c/0xbc
> [   26.290374][  T485]  el0t_32_sync+0x1b8/0x1bc
> [   26.290381][  T485] ---[ end trace 04ca7c088b4c1a9c ]---
> [   26.290383][  T485] Kernel panic - not syncing: Oops: Fatal exception
> 
> This can be fixed by synchronizing the three probe functions and
> only allowing the venc_probe() and vdec_probe() to pass when venus_probe()
> returns success.
> 
> Changes in v2:
> - Change locking from mutex_lock to mutex_trylock
>   in venc_probe and vdec_probe to avoid potential deadlock.
> 

Rather than trying to synchronize away the side effects of
of_platform_populate() I think we should stop using it.

I had the very same problem in the qcom_wcnss remoteproc driver and
in below change I got rid of that by manually initializing a struct
device for the child node. In the event that the child probe defer I
would just probe defer the parent as well.

1fcef985c8bd ("remoteproc: qcom: wcnss: Fix race with iris probe")

The change might look a little bit messy, but the end result it much
cleaner than relying on various locks etc.


But in the qcom_wcnss case I have a child _device_ because I need
something to do e.g. regulator_get() on. I fail to see why venc and vdec
are devices in the first place.

Regards,
Bjorn

> Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
> ---
>  drivers/media/platform/qcom/venus/core.c |  6 ++++++
>  drivers/media/platform/qcom/venus/core.h |  2 ++
>  drivers/media/platform/qcom/venus/vdec.c | 24 +++++++++++++++++++++---
>  drivers/media/platform/qcom/venus/venc.c | 24 +++++++++++++++++++++---
>  4 files changed, 50 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
> index 91b15842c555..18f3e3a9823f 100644
> --- a/drivers/media/platform/qcom/venus/core.c
> +++ b/drivers/media/platform/qcom/venus/core.c
> @@ -317,6 +317,7 @@ static int venus_probe(struct platform_device *pdev)
>  
>  	INIT_LIST_HEAD(&core->instances);
>  	mutex_init(&core->lock);
> +	mutex_init(&core->sync_lock);
>  	INIT_DELAYED_WORK(&core->work, venus_sys_error_handler);
>  
>  	ret = devm_request_threaded_irq(dev, core->irq, hfi_isr, hfi_isr_thread,
> @@ -331,6 +332,8 @@ static int venus_probe(struct platform_device *pdev)
>  
>  	venus_assign_register_offsets(core);
>  
> +	mutex_lock(&core->sync_lock);
> +
>  	ret = v4l2_device_register(dev, &core->v4l2_dev);
>  	if (ret)
>  		goto err_core_deinit;
> @@ -377,6 +380,7 @@ static int venus_probe(struct platform_device *pdev)
>  		goto err_dev_unregister;
>  	}
>  
> +	mutex_unlock(&core->sync_lock);
>  	venus_dbgfs_init(core);
>  
>  	return 0;
> @@ -392,6 +396,7 @@ static int venus_probe(struct platform_device *pdev)
>  	hfi_destroy(core);
>  err_core_deinit:
>  	hfi_core_deinit(core, false);
> +	mutex_unlock(&core->sync_lock);
>  err_core_put:
>  	if (core->pm_ops->core_put)
>  		core->pm_ops->core_put(core);
> @@ -428,6 +433,7 @@ static int venus_remove(struct platform_device *pdev)
>  
>  	mutex_destroy(&core->pm_lock);
>  	mutex_destroy(&core->lock);
> +	mutex_destroy(&core->sync_lock);
>  	venus_dbgfs_deinit(core);
>  
>  	return ret;
> diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
> index 5ec851115eca..3f80dc26febb 100644
> --- a/drivers/media/platform/qcom/venus/core.h
> +++ b/drivers/media/platform/qcom/venus/core.h
> @@ -119,6 +119,7 @@ struct venus_format {
>   * @use_tz:	a flag that suggests presence of trustzone
>   * @fw:		structure of firmware parameters
>   * @lock:	a lock for this strucure
> + * @sync_lock	a lock for probe sync between venus_core and venus_enc/dec
>   * @instances:	a list_head of all instances
>   * @insts_count:	num of instances
>   * @state:	the state of the venus core
> @@ -176,6 +177,7 @@ struct venus_core {
>  		size_t mem_size;
>  	} fw;
>  	struct mutex lock;
> +	struct mutex sync_lock;
>  	struct list_head instances;
>  	atomic_t insts_count;
>  	unsigned int state;
> diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
> index 198e47eb63f4..959e43bb6c00 100644
> --- a/drivers/media/platform/qcom/venus/vdec.c
> +++ b/drivers/media/platform/qcom/venus/vdec.c
> @@ -1659,17 +1659,32 @@ static int vdec_probe(struct platform_device *pdev)
>  	if (!core)
>  		return -EPROBE_DEFER;
>  
> +	/* Sync and wait on the venus core to initialize first.
> +	 * If we manage to acquire the sync_lock here it means
> +	 * that the venus_probe() finished running */
> +	ret = mutex_trylock(&core->sync_lock);
> +	if (!ret) {
> +		return -EPROBE_DEFER;
> +	} else {
> +		if (core->state != CORE_INIT) {
> +			ret = -ENODEV;
> +			goto err_core_unlock;
> +		}
> +	}
> +
>  	platform_set_drvdata(pdev, core);
>  
>  	if (core->pm_ops->vdec_get) {
>  		ret = core->pm_ops->vdec_get(dev);
>  		if (ret)
> -			return ret;
> +			goto err_core_unlock;
>  	}
>  
>  	vdev = video_device_alloc();
> -	if (!vdev)
> -		return -ENOMEM;
> +	if (!vdev) {
> +		ret = -ENOMEM;
> +		goto err_core_unlock;
> +	}
>  
>  	strscpy(vdev->name, "qcom-venus-decoder", sizeof(vdev->name));
>  	vdev->release = video_device_release;
> @@ -1690,11 +1705,14 @@ static int vdec_probe(struct platform_device *pdev)
>  	pm_runtime_set_autosuspend_delay(dev, 2000);
>  	pm_runtime_use_autosuspend(dev);
>  	pm_runtime_enable(dev);
> +	mutex_unlock(&core->sync_lock);
>  
>  	return 0;
>  
>  err_vdev_release:
>  	video_device_release(vdev);
> +err_core_unlock:
> +	mutex_unlock(&core->sync_lock);
>  	return ret;
>  }
>  
> diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
> index bc1c42dd53c0..11ec7bff5e3f 100644
> --- a/drivers/media/platform/qcom/venus/venc.c
> +++ b/drivers/media/platform/qcom/venus/venc.c
> @@ -1338,17 +1338,32 @@ static int venc_probe(struct platform_device *pdev)
>  	if (!core)
>  		return -EPROBE_DEFER;
>  
> +	/* Sync and wait on the venus core to initialize first.
> +	 * If we manage to acquire the sync_lock here it means
> +	 * that the venus_probe() finished running */
> +	ret = mutex_trylock(&core->sync_lock);
> +	if (!ret) {
> +		return -EPROBE_DEFER;
> +	} else {
> +		if (core->state != CORE_INIT) {
> +			ret = -ENODEV;
> +			goto err_core_unlock;
> +		}
> +	}
> +
>  	platform_set_drvdata(pdev, core);
>  
>  	if (core->pm_ops->venc_get) {
>  		ret = core->pm_ops->venc_get(dev);
>  		if (ret)
> -			return ret;
> +			goto err_core_unlock;
>  	}
>  
>  	vdev = video_device_alloc();
> -	if (!vdev)
> -		return -ENOMEM;
> +	if (!vdev) {
> +		ret = -ENOMEM;
> +		goto err_core_unlock;
> +	}
>  
>  	strscpy(vdev->name, "qcom-venus-encoder", sizeof(vdev->name));
>  	vdev->release = video_device_release;
> @@ -1367,11 +1382,14 @@ static int venc_probe(struct platform_device *pdev)
>  
>  	video_set_drvdata(vdev, core);
>  	pm_runtime_enable(dev);
> +	mutex_unlock(&core->sync_lock);
>  
>  	return 0;
>  
>  err_vdev_release:
>  	video_device_release(vdev);
> +err_core_unlock:
> +	mutex_unlock(&core->sync_lock);
>  	return ret;
>  }
>  
> -- 
> 2.31.1
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
  2021-11-24  3:31 ` Bjorn Andersson
@ 2021-12-01  4:49   ` John Stultz
  2021-12-09  3:11     ` John Stultz
  0 siblings, 1 reply; 9+ messages in thread
From: John Stultz @ 2021-12-01  4:49 UTC (permalink / raw)
  To: Bjorn Andersson
  Cc: Tadeusz Struk, Stanimir Varbanov, Andy Gross,
	Mauro Carvalho Chehab, Lee Jones, Amit Pundir, linux-media,
	linux-arm-msm, linux-kernel

On Tue, Nov 23, 2021 at 7:29 PM Bjorn Andersson
<bjorn.andersson@linaro.org> wrote:
>
> On Fri 29 Oct 14:48 PDT 2021, Tadeusz Struk wrote:
>
> > Venus video encode/decode hardware driver consists of three modules.
> > The parent module venus-core, and two sub modules venus-enc and venus-dec.
> > The venus-core module allocates a common structure that is used by the
> > enc/dec modules, loads the firmware, and performs some common hardware
> > initialization. Since the three modules are loaded one after the other,
> > and their probe functions can run in parallel it is possible that
> > the venc_probe and vdec_probe functions can finish before the core
> > venus_probe function, which then can fail when, for example it
> > fails to load the firmware. In this case the subsequent call to venc_open
> > causes an Oops as it tries to dereference already uninitialized structures
> > through dev->parent and the system crashes in __pm_runtime_resume() as in
> > the trace below:
> >
> > [   26.064835][  T485] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> > [   26.270914][  T485] Hardware name: Thundercomm Dragonboard 845c (DT)
> > [   26.285019][  T485] pc : __pm_runtime_resume+0x34/0x178
> > [   26.286374][  T213] lt9611 10-003b: hdmi cable connected
> > [   26.290285][  T485] lr : venc_open+0xc0/0x278 [venus_enc]
> > [   26.290326][  T485] Call trace:
> > [   26.290328][  T485]  __pm_runtime_resume+0x34/0x178
> > [   26.290330][  T485]  venc_open+0xc0/0x278 [venus_enc]
> > [   26.290335][  T485]  v4l2_open+0x184/0x294
> > [   26.290340][  T485]  chrdev_open+0x468/0x5c8
> > [   26.290344][  T485]  do_dentry_open+0x260/0x54c
> > [   26.290349][  T485]  path_openat+0xbe8/0xd5c
> > [   26.290352][  T485]  do_filp_open+0xb8/0x168
> > [   26.290354][  T485]  do_sys_openat2+0xa4/0x1e8
> > [   26.290357][  T485]  __arm64_compat_sys_openat+0x70/0x9c
> > [   26.290359][  T485]  invoke_syscall+0x60/0x170
> > [   26.290363][  T485]  el0_svc_common+0xb8/0xf8
> > [   26.290365][  T485]  do_el0_svc_compat+0x20/0x30
> > [   26.290367][  T485]  el0_svc_compat+0x24/0x84
> > [   26.290372][  T485]  el0t_32_sync_handler+0x7c/0xbc
> > [   26.290374][  T485]  el0t_32_sync+0x1b8/0x1bc
> > [   26.290381][  T485] ---[ end trace 04ca7c088b4c1a9c ]---
> > [   26.290383][  T485] Kernel panic - not syncing: Oops: Fatal exception
> >
> > This can be fixed by synchronizing the three probe functions and
> > only allowing the venc_probe() and vdec_probe() to pass when venus_probe()
> > returns success.
> >
> > Changes in v2:
> > - Change locking from mutex_lock to mutex_trylock
> >   in venc_probe and vdec_probe to avoid potential deadlock.
> >
>
> Rather than trying to synchronize away the side effects of
> of_platform_populate() I think we should stop using it.
>
> I had the very same problem in the qcom_wcnss remoteproc driver and
> in below change I got rid of that by manually initializing a struct
> device for the child node. In the event that the child probe defer I
> would just probe defer the parent as well.
>
> 1fcef985c8bd ("remoteproc: qcom: wcnss: Fix race with iris probe")
>
> The change might look a little bit messy, but the end result it much
> cleaner than relying on various locks etc.
>
>
> But in the qcom_wcnss case I have a child _device_ because I need
> something to do e.g. regulator_get() on. I fail to see why venc and vdec
> are devices in the first place.

I definitely agree with Bjorn that all this asynchronous component
probing feels overly complicated, and a rework is probably the better
solution.

Though my only question is:  is someone planning to do this rework?

In the meantime, Tadeusz' patch does resolve a *very* frequent boot
crash seen when the venus driver is enabled.
So Stanimir, should we consider merging this as a stop gap until the
larger probe rework is done?

thanks
-john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
  2021-12-01  4:49   ` John Stultz
@ 2021-12-09  3:11     ` John Stultz
  2021-12-13 22:50       ` Stanimir Varbanov
  0 siblings, 1 reply; 9+ messages in thread
From: John Stultz @ 2021-12-09  3:11 UTC (permalink / raw)
  To: Bjorn Andersson, Stanimir Varbanov
  Cc: Tadeusz Struk, Andy Gross, Mauro Carvalho Chehab, Lee Jones,
	Amit Pundir, linux-media, linux-arm-msm, linux-kernel

On Tue, Nov 30, 2021 at 8:49 PM John Stultz <john.stultz@linaro.org> wrote:
> On Tue, Nov 23, 2021 at 7:29 PM Bjorn Andersson
> <bjorn.andersson@linaro.org> wrote:
> > Rather than trying to synchronize away the side effects of
> > of_platform_populate() I think we should stop using it.
> >
> > I had the very same problem in the qcom_wcnss remoteproc driver and
> > in below change I got rid of that by manually initializing a struct
> > device for the child node. In the event that the child probe defer I
> > would just probe defer the parent as well.
> >
> > 1fcef985c8bd ("remoteproc: qcom: wcnss: Fix race with iris probe")
> >
> > The change might look a little bit messy, but the end result it much
> > cleaner than relying on various locks etc.
> >
> >
> > But in the qcom_wcnss case I have a child _device_ because I need
> > something to do e.g. regulator_get() on. I fail to see why venc and vdec
> > are devices in the first place.
>
> I definitely agree with Bjorn that all this asynchronous component
> probing feels overly complicated, and a rework is probably the better
> solution.
>
> Though my only question is:  is someone planning to do this rework?
>
> In the meantime, Tadeusz' patch does resolve a *very* frequent boot
> crash seen when the venus driver is enabled.
> So Stanimir, should we consider merging this as a stop gap until the
> larger probe rework is done?

Stanimir? Does the above sound reasonable?

thanks
-john

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
  2021-12-09  3:11     ` John Stultz
@ 2021-12-13 22:50       ` Stanimir Varbanov
  2021-12-13 22:55         ` Stanimir Varbanov
  0 siblings, 1 reply; 9+ messages in thread
From: Stanimir Varbanov @ 2021-12-13 22:50 UTC (permalink / raw)
  To: John Stultz, Bjorn Andersson
  Cc: Tadeusz Struk, Andy Gross, Mauro Carvalho Chehab, Lee Jones,
	Amit Pundir, linux-media, linux-arm-msm, linux-kernel

Hi John,

On 12/9/21 5:11 AM, John Stultz wrote:
> On Tue, Nov 30, 2021 at 8:49 PM John Stultz <john.stultz@linaro.org> wrote:
>> On Tue, Nov 23, 2021 at 7:29 PM Bjorn Andersson
>> <bjorn.andersson@linaro.org> wrote:
>>> Rather than trying to synchronize away the side effects of
>>> of_platform_populate() I think we should stop using it.
>>>
>>> I had the very same problem in the qcom_wcnss remoteproc driver and
>>> in below change I got rid of that by manually initializing a struct
>>> device for the child node. In the event that the child probe defer I
>>> would just probe defer the parent as well.
>>>
>>> 1fcef985c8bd ("remoteproc: qcom: wcnss: Fix race with iris probe")
>>>
>>> The change might look a little bit messy, but the end result it much
>>> cleaner than relying on various locks etc.
>>>
>>>
>>> But in the qcom_wcnss case I have a child _device_ because I need
>>> something to do e.g. regulator_get() on. I fail to see why venc and vdec
>>> are devices in the first place.
>>
>> I definitely agree with Bjorn that all this asynchronous component
>> probing feels overly complicated, and a rework is probably the better
>> solution.
>>
>> Though my only question is:  is someone planning to do this rework?
>>
>> In the meantime, Tadeusz' patch does resolve a *very* frequent boot
>> crash seen when the venus driver is enabled.
>> So Stanimir, should we consider merging this as a stop gap until the
>> larger probe rework is done?
> 
> Stanimir? Does the above sound reasonable?

Apologize for the delay.

I'd like to avoid one more mutex in the driver,  I think some reordering
in the .probe and changing the firmware_request API could help. I'll
spend some time to dig more deeply into the problem.

See untested patch below (I have to simulate firmware load from ufs
partition on Debian).

> 
> thanks
> -john
> 

-- 
regards,
Stan

From 9bfb69026374fa010d36680554e2634d5d435681 Mon Sep 17 00:00:00 2001
From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Date: Tue, 14 Dec 2021 00:45:18 +0200
Subject: [PATCH] venus: WIP: Rework and reorder firmware load

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
---
 drivers/media/platform/qcom/venus/core.c     |  8 +++----
 drivers/media/platform/qcom/venus/core.h     |  2 ++
 drivers/media/platform/qcom/venus/firmware.c | 22 +++++++++++++++++++-
 drivers/media/platform/qcom/venus/vdec.c     |  3 ++-
 drivers/media/platform/qcom/venus/venc.c     |  3 ++-
 5 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.c
b/drivers/media/platform/qcom/venus/core.c
index 877eca125803..7f65b08b2bac 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -344,10 +344,6 @@ static int venus_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err_runtime_disable;

-	ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
-	if (ret)
-		goto err_runtime_disable;
-
 	ret = venus_firmware_init(core);
 	if (ret)
 		goto err_of_depopulate;
@@ -372,6 +368,10 @@ static int venus_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_venus_shutdown;

+	ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
+	if (ret)
+		goto err_venus_shutdown;
+
 	ret = pm_runtime_put_sync(dev);
 	if (ret) {
 		pm_runtime_get_noresume(dev);
diff --git a/drivers/media/platform/qcom/venus/core.h
b/drivers/media/platform/qcom/venus/core.h
index 7c3bac01cd49..6455efb35168 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -182,6 +182,8 @@ struct venus_core {
 	atomic_t insts_count;
 	unsigned int state;
 	struct completion done;
+	struct completion fwload_done;
+	bool fwload_success;
 	unsigned int error;
 	unsigned long sys_error;
 	wait_queue_head_t sys_err_done;
diff --git a/drivers/media/platform/qcom/venus/firmware.c
b/drivers/media/platform/qcom/venus/firmware.c
index 14b6f1d05991..d523fbeb9d56 100644
--- a/drivers/media/platform/qcom/venus/firmware.c
+++ b/drivers/media/platform/qcom/venus/firmware.c
@@ -76,6 +76,14 @@ int venus_set_hw_state(struct venus_core *core, bool
resume)
 	return 0;
 }

+static void firmware_async_load(const struct firmware *fw, void *context)
+{
+	struct venus_core *core = context;
+
+	core->fwload_success = true;
+	complete(&core->fwload_done);
+}
+
 static int venus_load_fw(struct venus_core *core, const char *fwname,
 			 phys_addr_t *mem_phys, size_t *mem_size)
 {
@@ -101,10 +109,22 @@ static int venus_load_fw(struct venus_core *core,
const char *fwname,
 	if (ret)
 		goto err_put_node;

-	ret = request_firmware(&mdt, fwname, dev);
+	init_completion(&core->fwload_done);
+	core->fwload_success = false;
+
+	ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOUEVENT, fwname,
+				      dev, GFP_KERNEL, core,
+				      firmware_async_load);
 	if (ret < 0)
 		goto err_put_node;

+	wait_for_completion(&core->fwload_done);
+
+	if (!core->fwload_success) {
+		ret = -ENOENT;
+		goto err_put_node;
+	}
+
 	fw_size = qcom_mdt_get_size(mdt);
 	if (fw_size < 0) {
 		ret = fw_size;
diff --git a/drivers/media/platform/qcom/venus/vdec.c
b/drivers/media/platform/qcom/venus/vdec.c
index 91da3f509724..0e718d24a3b3 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -1718,6 +1718,8 @@ static int vdec_probe(struct platform_device *pdev)
 	if (!vdev)
 		return -ENOMEM;

+	core->dev_dec = dev;
+
 	strscpy(vdev->name, "qcom-venus-decoder", sizeof(vdev->name));
 	vdev->release = video_device_release;
 	vdev->fops = &vdec_fops;
@@ -1731,7 +1733,6 @@ static int vdec_probe(struct platform_device *pdev)
 		goto err_vdev_release;

 	core->vdev_dec = vdev;
-	core->dev_dec = dev;

 	video_set_drvdata(vdev, core);
 	pm_runtime_set_autosuspend_delay(dev, 2000);
diff --git a/drivers/media/platform/qcom/venus/venc.c
b/drivers/media/platform/qcom/venus/venc.c
index 84bafc3118cc..1b3fb927eb16 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -1448,6 +1448,8 @@ static int venc_probe(struct platform_device *pdev)
 	if (!vdev)
 		return -ENOMEM;

+	core->dev_enc = dev;
+
 	strscpy(vdev->name, "qcom-venus-encoder", sizeof(vdev->name));
 	vdev->release = video_device_release;
 	vdev->fops = &venc_fops;
@@ -1461,7 +1463,6 @@ static int venc_probe(struct platform_device *pdev)
 		goto err_vdev_release;

 	core->vdev_enc = vdev;
-	core->dev_enc = dev;

 	video_set_drvdata(vdev, core);
 	pm_runtime_set_autosuspend_delay(dev, 2000);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec
  2021-12-13 22:50       ` Stanimir Varbanov
@ 2021-12-13 22:55         ` Stanimir Varbanov
  0 siblings, 0 replies; 9+ messages in thread
From: Stanimir Varbanov @ 2021-12-13 22:55 UTC (permalink / raw)
  To: John Stultz, Bjorn Andersson
  Cc: Tadeusz Struk, Andy Gross, Mauro Carvalho Chehab, Lee Jones,
	Amit Pundir, linux-media, linux-arm-msm, linux-kernel



On 12/14/21 12:50 AM, Stanimir Varbanov wrote:
> From 9bfb69026374fa010d36680554e2634d5d435681 Mon Sep 17 00:00:00 2001
> From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
> Date: Tue, 14 Dec 2021 00:45:18 +0200
> Subject: [PATCH] venus: WIP: Rework and reorder firmware load
> 
> Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
> ---
>  drivers/media/platform/qcom/venus/core.c     |  8 +++----
>  drivers/media/platform/qcom/venus/core.h     |  2 ++
>  drivers/media/platform/qcom/venus/firmware.c | 22 +++++++++++++++++++-
>  drivers/media/platform/qcom/venus/vdec.c     |  3 ++-
>  drivers/media/platform/qcom/venus/venc.c     |  3 ++-
>  5 files changed, 31 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/media/platform/qcom/venus/core.c
> b/drivers/media/platform/qcom/venus/core.c
> index 877eca125803..7f65b08b2bac 100644
> --- a/drivers/media/platform/qcom/venus/core.c
> +++ b/drivers/media/platform/qcom/venus/core.c
> @@ -344,10 +344,6 @@ static int venus_probe(struct platform_device *pdev)
>  	if (ret < 0)
>  		goto err_runtime_disable;
> 
> -	ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
> -	if (ret)
> -		goto err_runtime_disable;
> -
>  	ret = venus_firmware_init(core);
>  	if (ret)
>  		goto err_of_depopulate;
> @@ -372,6 +368,10 @@ static int venus_probe(struct platform_device *pdev)
>  	if (ret)
>  		goto err_venus_shutdown;
> 
> +	ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
> +	if (ret)
> +		goto err_venus_shutdown;
> +
>  	ret = pm_runtime_put_sync(dev);
>  	if (ret) {
>  		pm_runtime_get_noresume(dev);
> diff --git a/drivers/media/platform/qcom/venus/core.h
> b/drivers/media/platform/qcom/venus/core.h
> index 7c3bac01cd49..6455efb35168 100644
> --- a/drivers/media/platform/qcom/venus/core.h
> +++ b/drivers/media/platform/qcom/venus/core.h
> @@ -182,6 +182,8 @@ struct venus_core {
>  	atomic_t insts_count;
>  	unsigned int state;
>  	struct completion done;
> +	struct completion fwload_done;
> +	bool fwload_success;
>  	unsigned int error;
>  	unsigned long sys_error;
>  	wait_queue_head_t sys_err_done;
> diff --git a/drivers/media/platform/qcom/venus/firmware.c
> b/drivers/media/platform/qcom/venus/firmware.c
> index 14b6f1d05991..d523fbeb9d56 100644
> --- a/drivers/media/platform/qcom/venus/firmware.c
> +++ b/drivers/media/platform/qcom/venus/firmware.c
> @@ -76,6 +76,14 @@ int venus_set_hw_state(struct venus_core *core, bool
> resume)
>  	return 0;
>  }
> 
> +static void firmware_async_load(const struct firmware *fw, void *context)
> +{
> +	struct venus_core *core = context;
> +
> +	core->fwload_success = true;

this should be

	if (fw)
		core->fwload_success = true;

> +	complete(&core->fwload_done);
> +}
> +
>  static int venus_load_fw(struct venus_core *core, const char *fwname,
>  			 phys_addr_t *mem_phys, size_t *mem_size)
>  {
> @@ -101,10 +109,22 @@ static int venus_load_fw(struct venus_core *core,
> const char *fwname,
>  	if (ret)
>  		goto err_put_node;
> 
> -	ret = request_firmware(&mdt, fwname, dev);
> +	init_completion(&core->fwload_done);
> +	core->fwload_success = false;
> +
> +	ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOUEVENT, fwname,
> +				      dev, GFP_KERNEL, core,
> +				      firmware_async_load);
>  	if (ret < 0)
>  		goto err_put_node;
> 
> +	wait_for_completion(&core->fwload_done);
> +
> +	if (!core->fwload_success) {
> +		ret = -ENOENT;
> +		goto err_put_node;
> +	}
> +
>  	fw_size = qcom_mdt_get_size(mdt);
>  	if (fw_size < 0) {
>  		ret = fw_size;
> diff --git a/drivers/media/platform/qcom/venus/vdec.c
> b/drivers/media/platform/qcom/venus/vdec.c
> index 91da3f509724..0e718d24a3b3 100644
> --- a/drivers/media/platform/qcom/venus/vdec.c
> +++ b/drivers/media/platform/qcom/venus/vdec.c
> @@ -1718,6 +1718,8 @@ static int vdec_probe(struct platform_device *pdev)
>  	if (!vdev)
>  		return -ENOMEM;
> 
> +	core->dev_dec = dev;
> +
>  	strscpy(vdev->name, "qcom-venus-decoder", sizeof(vdev->name));
>  	vdev->release = video_device_release;
>  	vdev->fops = &vdec_fops;
> @@ -1731,7 +1733,6 @@ static int vdec_probe(struct platform_device *pdev)
>  		goto err_vdev_release;
> 
>  	core->vdev_dec = vdev;
> -	core->dev_dec = dev;
> 
>  	video_set_drvdata(vdev, core);
>  	pm_runtime_set_autosuspend_delay(dev, 2000);
> diff --git a/drivers/media/platform/qcom/venus/venc.c
> b/drivers/media/platform/qcom/venus/venc.c
> index 84bafc3118cc..1b3fb927eb16 100644
> --- a/drivers/media/platform/qcom/venus/venc.c
> +++ b/drivers/media/platform/qcom/venus/venc.c
> @@ -1448,6 +1448,8 @@ static int venc_probe(struct platform_device *pdev)
>  	if (!vdev)
>  		return -ENOMEM;
> 
> +	core->dev_enc = dev;
> +
>  	strscpy(vdev->name, "qcom-venus-encoder", sizeof(vdev->name));
>  	vdev->release = video_device_release;
>  	vdev->fops = &venc_fops;
> @@ -1461,7 +1463,6 @@ static int venc_probe(struct platform_device *pdev)
>  		goto err_vdev_release;
> 
>  	core->vdev_enc = vdev;
> -	core->dev_enc = dev;
> 
>  	video_set_drvdata(vdev, core);
>  	pm_runtime_set_autosuspend_delay(dev, 2000);
> -- 2.25.1

-- 
regards,
Stan

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-12-13 22:55 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-29 21:48 [PATCH v2] media: venus: Synchronize probe() between venus_core and enc/dec Tadeusz Struk
2021-10-29 23:59 ` John Stultz
2021-11-11  0:28 ` John Stultz
2021-11-23 22:57 ` Peter Collingbourne
2021-11-24  3:31 ` Bjorn Andersson
2021-12-01  4:49   ` John Stultz
2021-12-09  3:11     ` John Stultz
2021-12-13 22:50       ` Stanimir Varbanov
2021-12-13 22:55         ` Stanimir Varbanov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).