All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V2 1/2] remoteproc: introduce rproc features
@ 2022-03-08  6:48 Peng Fan (OSS)
  2022-03-08  6:48 ` [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash Peng Fan (OSS)
  2022-03-11 18:26 ` [PATCH V2 1/2] remoteproc: introduce rproc features Bjorn Andersson
  0 siblings, 2 replies; 8+ messages in thread
From: Peng Fan (OSS) @ 2022-03-08  6:48 UTC (permalink / raw)
  To: bjorn.andersson, mathieu.poirier, arnaud.pouliquen
  Cc: linux-remoteproc, linux-kernel, peng.fan

From: Peng Fan <peng.fan@nxp.com>

remote processor may support:
 - firmware recovery with help from main processor
 - self recovery without help from main processor
 - iommu
 - etc

Introduce rproc features could simplify code to avoid adding more bool
flags and let us optimize current code.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
---

V2:
 New

 include/linux/remoteproc.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 93a1d0050fbc..51edaf80692c 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -417,6 +417,7 @@ struct rproc_ops {
  *			has attached to it
  * @RPROC_DETACHED:	device has been booted by another entity and waiting
  *			for the core to attach to it
+ * @RPROC_CRASHED_ATTACH_RECOVERY: device has crashed and self recovery
  * @RPROC_LAST:		just keep this one at the end
  *
  * Please note that the values of these states are used as indices
@@ -489,6 +490,11 @@ struct rproc_dump_segment {
 	loff_t offset;
 };
 
+enum rproc_features {
+	RPROC_FEAT_ATTACH_RECOVERY = 0,
+	RPROC_MAX_FEATURES = 32,
+};
+
 /**
  * struct rproc - represents a physical remote processor device
  * @node: list node of this rproc object
@@ -530,6 +536,7 @@ struct rproc_dump_segment {
  * @elf_machine: firmware ELF machine
  * @cdev: character device of the rproc
  * @cdev_put_on_release: flag to indicate if remoteproc should be shutdown on @char_dev release
+ * @features: indicate remoteproc features
  */
 struct rproc {
 	struct list_head node;
@@ -570,8 +577,19 @@ struct rproc {
 	u16 elf_machine;
 	struct cdev cdev;
 	bool cdev_put_on_release;
+	DECLARE_BITMAP(features, RPROC_MAX_FEATURES);
 };
 
+static inline bool rproc_has_feature(struct rproc *rproc, unsigned int feature)
+{
+	return test_bit(feature, rproc->features);
+}
+
+static inline void rproc_set_feature(struct rproc *rproc, unsigned int feature)
+{
+	set_bit(feature, rproc->features);
+}
+
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
-- 
2.30.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash
  2022-03-08  6:48 [PATCH V2 1/2] remoteproc: introduce rproc features Peng Fan (OSS)
@ 2022-03-08  6:48 ` Peng Fan (OSS)
  2022-03-09 18:39   ` Mathieu Poirier
  2022-03-11 18:47   ` Bjorn Andersson
  2022-03-11 18:26 ` [PATCH V2 1/2] remoteproc: introduce rproc features Bjorn Andersson
  1 sibling, 2 replies; 8+ messages in thread
From: Peng Fan (OSS) @ 2022-03-08  6:48 UTC (permalink / raw)
  To: bjorn.andersson, mathieu.poirier, arnaud.pouliquen
  Cc: linux-remoteproc, linux-kernel, peng.fan

From: Peng Fan <peng.fan@nxp.com>

Current logic only support main processor to stop/start the remote
processor after rproc crash. However to SoC, such as i.MX8QM/QXP, the
remote processor could do attach recovery after crash and trigger watchdog
reboot. It does not need main processor to load image, or stop/start M4
core.

Introduce two functions: rproc_attach_recovery, rproc_firmware_recovery
for the two cases. Firmware recovery is as before, let main processor to
help recovery, while attach recovery is recover itself withou help.
To attach recovery, we only do detach and attach.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
---

V2:
 use rproc_has_feature in patch 1/2

 drivers/remoteproc/remoteproc_core.c | 67 ++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 69f51acf235e..366fad475898 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1887,6 +1887,50 @@ static int __rproc_detach(struct rproc *rproc)
 	return 0;
 }
 
+static int rproc_attach_recovery(struct rproc *rproc)
+{
+	int ret;
+
+	mutex_unlock(&rproc->lock);
+	ret = rproc_detach(rproc);
+	mutex_lock(&rproc->lock);
+	if (ret)
+		return ret;
+
+	if (atomic_inc_return(&rproc->power) > 1)
+		return 0;
+
+	return rproc_attach(rproc);
+}
+
+static int rproc_firmware_recovery(struct rproc *rproc)
+{
+	const struct firmware *firmware_p;
+	struct device *dev = &rproc->dev;
+	int ret;
+
+	ret = rproc_stop(rproc, true);
+	if (ret)
+		return ret;
+
+	/* generate coredump */
+	rproc->ops->coredump(rproc);
+
+	/* load firmware */
+	ret = request_firmware(&firmware_p, rproc->firmware, dev);
+	if (ret < 0) {
+		dev_err(dev, "request_firmware failed: %d\n", ret);
+		return ret;
+	}
+
+	/* boot the remote processor up again */
+	ret = rproc_start(rproc, firmware_p);
+
+	release_firmware(firmware_p);
+
+	return ret;
+}
+
 /**
  * rproc_trigger_recovery() - recover a remoteproc
  * @rproc: the remote processor
@@ -1901,7 +1945,6 @@ static int __rproc_detach(struct rproc *rproc)
  */
 int rproc_trigger_recovery(struct rproc *rproc)
 {
-	const struct firmware *firmware_p;
 	struct device *dev = &rproc->dev;
 	int ret;
 
@@ -1915,24 +1958,10 @@ int rproc_trigger_recovery(struct rproc *rproc)
 
 	dev_err(dev, "recovering %s\n", rproc->name);
 
-	ret = rproc_stop(rproc, true);
-	if (ret)
-		goto unlock_mutex;
-
-	/* generate coredump */
-	rproc->ops->coredump(rproc);
-
-	/* load firmware */
-	ret = request_firmware(&firmware_p, rproc->firmware, dev);
-	if (ret < 0) {
-		dev_err(dev, "request_firmware failed: %d\n", ret);
-		goto unlock_mutex;
-	}
-
-	/* boot the remote processor up again */
-	ret = rproc_start(rproc, firmware_p);
-
-	release_firmware(firmware_p);
+	if (rproc_has_feature(rproc, RPROC_FEAT_ATTACH_RECOVERY))
+		ret = rproc_attach_recovery(rproc);
+	else
+		ret = rproc_firmware_recovery(rproc);
 
 unlock_mutex:
 	mutex_unlock(&rproc->lock);
-- 
2.30.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash
  2022-03-08  6:48 ` [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash Peng Fan (OSS)
@ 2022-03-09 18:39   ` Mathieu Poirier
  2022-03-09 22:05     ` Peng Fan
  2022-03-11 18:47   ` Bjorn Andersson
  1 sibling, 1 reply; 8+ messages in thread
From: Mathieu Poirier @ 2022-03-09 18:39 UTC (permalink / raw)
  To: Peng Fan (OSS)
  Cc: bjorn.andersson, arnaud.pouliquen, linux-remoteproc,
	linux-kernel, peng.fan

On Mon, 7 Mar 2022 at 23:08, Peng Fan (OSS) <peng.fan@oss.nxp.com> wrote:
>
> From: Peng Fan <peng.fan@nxp.com>
>
> Current logic only support main processor to stop/start the remote
> processor after rproc crash. However to SoC, such as i.MX8QM/QXP, the
> remote processor could do attach recovery after crash and trigger watchdog
> reboot. It does not need main processor to load image, or stop/start M4
> core.
>
> Introduce two functions: rproc_attach_recovery, rproc_firmware_recovery
> for the two cases. Firmware recovery is as before, let main processor to
> help recovery, while attach recovery is recover itself withou help.
> To attach recovery, we only do detach and attach.
>
> Signed-off-by: Peng Fan <peng.fan@nxp.com>
> ---
>
> V2:
>  use rproc_has_feature in patch 1/2
>
>  drivers/remoteproc/remoteproc_core.c | 67 ++++++++++++++++++++--------
>  1 file changed, 48 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 69f51acf235e..366fad475898 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1887,6 +1887,50 @@ static int __rproc_detach(struct rproc *rproc)
>         return 0;
>  }
>
> +static int rproc_attach_recovery(struct rproc *rproc)
> +{
> +       int ret;
> +
> +       mutex_unlock(&rproc->lock);
> +       ret = rproc_detach(rproc);
> +       mutex_lock(&rproc->lock);
> +       if (ret)
> +               return ret;
> +
> +       if (atomic_inc_return(&rproc->power) > 1)
> +               return 0;
> +
> +       return rproc_attach(rproc);
> +}
> +
> +static int rproc_firmware_recovery(struct rproc *rproc)
> +{
> +       const struct firmware *firmware_p;
> +       struct device *dev = &rproc->dev;
> +       int ret;
> +
> +       ret = rproc_stop(rproc, true);
> +       if (ret)
> +               return ret;
> +
> +       /* generate coredump */
> +       rproc->ops->coredump(rproc);
> +
> +       /* load firmware */
> +       ret = request_firmware(&firmware_p, rproc->firmware, dev);
> +       if (ret < 0) {
> +               dev_err(dev, "request_firmware failed: %d\n", ret);
> +               return ret;
> +       }
> +
> +       /* boot the remote processor up again */
> +       ret = rproc_start(rproc, firmware_p);
> +
> +       release_firmware(firmware_p);
> +
> +       return ret;
> +}
> +
>  /**
>   * rproc_trigger_recovery() - recover a remoteproc
>   * @rproc: the remote processor
> @@ -1901,7 +1945,6 @@ static int __rproc_detach(struct rproc *rproc)
>   */
>  int rproc_trigger_recovery(struct rproc *rproc)
>  {
> -       const struct firmware *firmware_p;
>         struct device *dev = &rproc->dev;
>         int ret;
>
> @@ -1915,24 +1958,10 @@ int rproc_trigger_recovery(struct rproc *rproc)
>
>         dev_err(dev, "recovering %s\n", rproc->name);
>
> -       ret = rproc_stop(rproc, true);
> -       if (ret)
> -               goto unlock_mutex;
> -
> -       /* generate coredump */
> -       rproc->ops->coredump(rproc);
> -
> -       /* load firmware */
> -       ret = request_firmware(&firmware_p, rproc->firmware, dev);
> -       if (ret < 0) {
> -               dev_err(dev, "request_firmware failed: %d\n", ret);
> -               goto unlock_mutex;
> -       }
> -
> -       /* boot the remote processor up again */
> -       ret = rproc_start(rproc, firmware_p);
> -
> -       release_firmware(firmware_p);
> +       if (rproc_has_feature(rproc, RPROC_FEAT_ATTACH_RECOVERY))
> +               ret = rproc_attach_recovery(rproc);
> +       else
> +               ret = rproc_firmware_recovery(rproc);

Should I assume this set, which is labeled V2, replaces this other
patch [1] that is also labeled V2, sent out on January 26th?  If so,
why are they both labeled with the same tag and why isn't there a
cover letter to clearly state your intent?  More importantly, why am I
having this conversation with an experienced kernel developer that
should know better?

Any reason I should not move this work to the very bottom of my patch
queue or better yet, simply drop it?

[1]. https://lore.kernel.org/lkml/20220207173456.GA3355405@p14s/t/

>
>  unlock_mutex:
>         mutex_unlock(&rproc->lock);
> --
> 2.30.0
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash
  2022-03-09 18:39   ` Mathieu Poirier
@ 2022-03-09 22:05     ` Peng Fan
  0 siblings, 0 replies; 8+ messages in thread
From: Peng Fan @ 2022-03-09 22:05 UTC (permalink / raw)
  To: Mathieu Poirier, Peng Fan (OSS)
  Cc: bjorn.andersson, arnaud.pouliquen, linux-remoteproc, linux-kernel

> Subject: Re: [PATCH V2 2/2] remoteproc: support attach recovery after rproc
> crash
> 
> On Mon, 7 Mar 2022 at 23:08, Peng Fan (OSS) <peng.fan@oss.nxp.com>
> wrote:
> >
> > From: Peng Fan <peng.fan@nxp.com>
> >
> > Current logic only support main processor to stop/start the remote
> > processor after rproc crash. However to SoC, such as i.MX8QM/QXP, the
> > remote processor could do attach recovery after crash and trigger
> > watchdog reboot. It does not need main processor to load image, or
> > stop/start M4 core.
> >
> > Introduce two functions: rproc_attach_recovery,
> > rproc_firmware_recovery for the two cases. Firmware recovery is as
> > before, let main processor to help recovery, while attach recovery is recover
> itself withou help.
> > To attach recovery, we only do detach and attach.
> >
> > Signed-off-by: Peng Fan <peng.fan@nxp.com>
> > ---
> >
> > V2:
> >  use rproc_has_feature in patch 1/2
> >
> >  drivers/remoteproc/remoteproc_core.c | 67
> > ++++++++++++++++++++--------
> >  1 file changed, 48 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/remoteproc/remoteproc_core.c
> > b/drivers/remoteproc/remoteproc_core.c
> > index 69f51acf235e..366fad475898 100644
> > --- a/drivers/remoteproc/remoteproc_core.c
> > +++ b/drivers/remoteproc/remoteproc_core.c
> > @@ -1887,6 +1887,50 @@ static int __rproc_detach(struct rproc *rproc)
> >         return 0;
> >  }
> >
> > +static int rproc_attach_recovery(struct rproc *rproc) {
> > +       int ret;
> > +
> > +       mutex_unlock(&rproc->lock);
> > +       ret = rproc_detach(rproc);
> > +       mutex_lock(&rproc->lock);
> > +       if (ret)
> > +               return ret;
> > +
> > +       if (atomic_inc_return(&rproc->power) > 1)
> > +               return 0;
> > +
> > +       return rproc_attach(rproc);
> > +}
> > +
> > +static int rproc_firmware_recovery(struct rproc *rproc) {
> > +       const struct firmware *firmware_p;
> > +       struct device *dev = &rproc->dev;
> > +       int ret;
> > +
> > +       ret = rproc_stop(rproc, true);
> > +       if (ret)
> > +               return ret;
> > +
> > +       /* generate coredump */
> > +       rproc->ops->coredump(rproc);
> > +
> > +       /* load firmware */
> > +       ret = request_firmware(&firmware_p, rproc->firmware, dev);
> > +       if (ret < 0) {
> > +               dev_err(dev, "request_firmware failed: %d\n", ret);
> > +               return ret;
> > +       }
> > +
> > +       /* boot the remote processor up again */
> > +       ret = rproc_start(rproc, firmware_p);
> > +
> > +       release_firmware(firmware_p);
> > +
> > +       return ret;
> > +}
> > +
> >  /**
> >   * rproc_trigger_recovery() - recover a remoteproc
> >   * @rproc: the remote processor
> > @@ -1901,7 +1945,6 @@ static int __rproc_detach(struct rproc *rproc)
> >   */
> >  int rproc_trigger_recovery(struct rproc *rproc)  {
> > -       const struct firmware *firmware_p;
> >         struct device *dev = &rproc->dev;
> >         int ret;
> >
> > @@ -1915,24 +1958,10 @@ int rproc_trigger_recovery(struct rproc
> > *rproc)
> >
> >         dev_err(dev, "recovering %s\n", rproc->name);
> >
> > -       ret = rproc_stop(rproc, true);
> > -       if (ret)
> > -               goto unlock_mutex;
> > -
> > -       /* generate coredump */
> > -       rproc->ops->coredump(rproc);
> > -
> > -       /* load firmware */
> > -       ret = request_firmware(&firmware_p, rproc->firmware, dev);
> > -       if (ret < 0) {
> > -               dev_err(dev, "request_firmware failed: %d\n", ret);
> > -               goto unlock_mutex;
> > -       }
> > -
> > -       /* boot the remote processor up again */
> > -       ret = rproc_start(rproc, firmware_p);
> > -
> > -       release_firmware(firmware_p);
> > +       if (rproc_has_feature(rproc, RPROC_FEAT_ATTACH_RECOVERY))
> > +               ret = rproc_attach_recovery(rproc);
> > +       else
> > +               ret = rproc_firmware_recovery(rproc);
> 
> Should I assume this set, which is labeled V2, replaces this other patch [1]
> that is also labeled V2, sent out on January 26th?  If so, why are they both
> labeled with the same tag and why isn't there a cover letter to clearly state
> your intent?  More importantly, why am I having this conversation with an
> experienced kernel developer that should know better?
> 
> Any reason I should not move this work to the very bottom of my patch queue
> or better yet, simply drop it?

My bad. This patchset should labeled V3. I'll resend the patchset with a cover-letter
and label V3. Thanks for your patience.

Thanks,
Peng.

> 
> [1].
> https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.ke
> rnel.org%2Flkml%2F20220207173456.GA3355405%40p14s%2Ft%2F&amp;da
> ta=04%7C01%7Cpeng.fan%40nxp.com%7Ccb80ecb9fb3348d1222a08da01fc2
> 6cf%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637824479736
> 071420%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2
> luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=FIdhBvAUP
> NZDPxzEW6wW%2B0GPzoQ7MUm8IbXc7yq%2BP6w%3D&amp;reserved=0
> 
> >
> >  unlock_mutex:
> >         mutex_unlock(&rproc->lock);
> > --
> > 2.30.0
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH V2 1/2] remoteproc: introduce rproc features
  2022-03-08  6:48 [PATCH V2 1/2] remoteproc: introduce rproc features Peng Fan (OSS)
  2022-03-08  6:48 ` [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash Peng Fan (OSS)
@ 2022-03-11 18:26 ` Bjorn Andersson
  2022-03-14  7:23   ` Peng Fan
  1 sibling, 1 reply; 8+ messages in thread
From: Bjorn Andersson @ 2022-03-11 18:26 UTC (permalink / raw)
  To: Peng Fan (OSS)
  Cc: mathieu.poirier, arnaud.pouliquen, linux-remoteproc,
	linux-kernel, peng.fan

On Tue 08 Mar 00:48 CST 2022, Peng Fan (OSS) wrote:

> From: Peng Fan <peng.fan@nxp.com>
> 
> remote processor may support:
>  - firmware recovery with help from main processor
>  - self recovery without help from main processor
>  - iommu
>  - etc
> 
> Introduce rproc features could simplify code to avoid adding more bool
> flags and let us optimize current code.
> 
> Signed-off-by: Peng Fan <peng.fan@nxp.com>
> ---
> 
> V2:
>  New
> 
>  include/linux/remoteproc.h | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h

This is the API that other parts of the kernel use to interact with a
struct rproc, what feature flags do you have a need for other parts of
the kernel to be able to query?

> index 93a1d0050fbc..51edaf80692c 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -417,6 +417,7 @@ struct rproc_ops {
>   *			has attached to it
>   * @RPROC_DETACHED:	device has been booted by another entity and waiting
>   *			for the core to attach to it
> + * @RPROC_CRASHED_ATTACH_RECOVERY: device has crashed and self recovery

This seems to belong in the other patch...

>   * @RPROC_LAST:		just keep this one at the end
>   *
>   * Please note that the values of these states are used as indices
> @@ -489,6 +490,11 @@ struct rproc_dump_segment {
>  	loff_t offset;
>  };
>  
> +enum rproc_features {
> +	RPROC_FEAT_ATTACH_RECOVERY = 0,

No need to specify that this is bit 0, and the enum will do that for you.

> +	RPROC_MAX_FEATURES = 32,

You're using DECLARE_BITMAP() so why 32?

Regards,
Bjorn

> +};
> +
>  /**
>   * struct rproc - represents a physical remote processor device
>   * @node: list node of this rproc object
> @@ -530,6 +536,7 @@ struct rproc_dump_segment {
>   * @elf_machine: firmware ELF machine
>   * @cdev: character device of the rproc
>   * @cdev_put_on_release: flag to indicate if remoteproc should be shutdown on @char_dev release
> + * @features: indicate remoteproc features
>   */
>  struct rproc {
>  	struct list_head node;
> @@ -570,8 +577,19 @@ struct rproc {
>  	u16 elf_machine;
>  	struct cdev cdev;
>  	bool cdev_put_on_release;
> +	DECLARE_BITMAP(features, RPROC_MAX_FEATURES);
>  };
>  
> +static inline bool rproc_has_feature(struct rproc *rproc, unsigned int feature)
> +{
> +	return test_bit(feature, rproc->features);
> +}
> +
> +static inline void rproc_set_feature(struct rproc *rproc, unsigned int feature)
> +{
> +	set_bit(feature, rproc->features);
> +}
> +
>  /**
>   * struct rproc_subdev - subdevice tied to a remoteproc
>   * @node: list node related to the rproc subdevs list
> -- 
> 2.30.0
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash
  2022-03-08  6:48 ` [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash Peng Fan (OSS)
  2022-03-09 18:39   ` Mathieu Poirier
@ 2022-03-11 18:47   ` Bjorn Andersson
  2022-03-14  7:31     ` Peng Fan
  1 sibling, 1 reply; 8+ messages in thread
From: Bjorn Andersson @ 2022-03-11 18:47 UTC (permalink / raw)
  To: Peng Fan (OSS)
  Cc: mathieu.poirier, arnaud.pouliquen, linux-remoteproc,
	linux-kernel, peng.fan

On Tue 08 Mar 00:48 CST 2022, Peng Fan (OSS) wrote:

> From: Peng Fan <peng.fan@nxp.com>
> 
> Current logic only support main processor to stop/start the remote
> processor after rproc crash. However to SoC, such as i.MX8QM/QXP, the
> remote processor could do attach recovery after crash and trigger watchdog

Does it really do something called "attach recovery and trigger watchdog
reboot"? Doesn't it just reboot itself and Linux needs to detach and
reattach to get something (what?) reset?

> reboot. It does not need main processor to load image, or stop/start M4
> core.
> 
> Introduce two functions: rproc_attach_recovery, rproc_firmware_recovery
> for the two cases. Firmware recovery is as before, let main processor to
> help recovery, while attach recovery is recover itself withou help.
> To attach recovery, we only do detach and attach.
> 
> Signed-off-by: Peng Fan <peng.fan@nxp.com>
> ---
> 
> V2:
>  use rproc_has_feature in patch 1/2
> 
>  drivers/remoteproc/remoteproc_core.c | 67 ++++++++++++++++++++--------
>  1 file changed, 48 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 69f51acf235e..366fad475898 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -1887,6 +1887,50 @@ static int __rproc_detach(struct rproc *rproc)
>  	return 0;
>  }
>  
> +static int rproc_attach_recovery(struct rproc *rproc)
> +{
> +	int ret;
> +
> +	mutex_unlock(&rproc->lock);
> +	ret = rproc_detach(rproc);
> +	mutex_lock(&rproc->lock);
> +	if (ret)
> +		return ret;
> +
> +	if (atomic_inc_return(&rproc->power) > 1)

In the stop/coredump/start path the code _will_ attempt to recover the
remote processor. With rproc_detach() and rproc_attach() fiddling with
the rproc->power refcount this might do something, or it might not do
something. And with the mutex_unlock() it's likely that you're opening
of up for various race conditions inbetween.


PS. Does anyone actually use this refcount, or are we just all holding
our breath for it never going beyond 1?

Regards,
Bjorn

> +		return 0;
> +
> +	return rproc_attach(rproc);
> +}
> +
> +static int rproc_firmware_recovery(struct rproc *rproc)
> +{
> +	const struct firmware *firmware_p;
> +	struct device *dev = &rproc->dev;
> +	int ret;
> +
> +	ret = rproc_stop(rproc, true);
> +	if (ret)
> +		return ret;
> +
> +	/* generate coredump */
> +	rproc->ops->coredump(rproc);
> +
> +	/* load firmware */
> +	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> +	if (ret < 0) {
> +		dev_err(dev, "request_firmware failed: %d\n", ret);
> +		return ret;
> +	}
> +
> +	/* boot the remote processor up again */
> +	ret = rproc_start(rproc, firmware_p);
> +
> +	release_firmware(firmware_p);
> +
> +	return ret;
> +}
> +
>  /**
>   * rproc_trigger_recovery() - recover a remoteproc
>   * @rproc: the remote processor
> @@ -1901,7 +1945,6 @@ static int __rproc_detach(struct rproc *rproc)
>   */
>  int rproc_trigger_recovery(struct rproc *rproc)
>  {
> -	const struct firmware *firmware_p;
>  	struct device *dev = &rproc->dev;
>  	int ret;
>  
> @@ -1915,24 +1958,10 @@ int rproc_trigger_recovery(struct rproc *rproc)
>  
>  	dev_err(dev, "recovering %s\n", rproc->name);
>  
> -	ret = rproc_stop(rproc, true);
> -	if (ret)
> -		goto unlock_mutex;
> -
> -	/* generate coredump */
> -	rproc->ops->coredump(rproc);
> -
> -	/* load firmware */
> -	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> -	if (ret < 0) {
> -		dev_err(dev, "request_firmware failed: %d\n", ret);
> -		goto unlock_mutex;
> -	}
> -
> -	/* boot the remote processor up again */
> -	ret = rproc_start(rproc, firmware_p);
> -
> -	release_firmware(firmware_p);
> +	if (rproc_has_feature(rproc, RPROC_FEAT_ATTACH_RECOVERY))
> +		ret = rproc_attach_recovery(rproc);
> +	else
> +		ret = rproc_firmware_recovery(rproc);
>  
>  unlock_mutex:
>  	mutex_unlock(&rproc->lock);
> -- 
> 2.30.0
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH V2 1/2] remoteproc: introduce rproc features
  2022-03-11 18:26 ` [PATCH V2 1/2] remoteproc: introduce rproc features Bjorn Andersson
@ 2022-03-14  7:23   ` Peng Fan
  0 siblings, 0 replies; 8+ messages in thread
From: Peng Fan @ 2022-03-14  7:23 UTC (permalink / raw)
  To: Bjorn Andersson, Peng Fan (OSS)
  Cc: mathieu.poirier, arnaud.pouliquen, linux-remoteproc, linux-kernel

> Subject: Re: [PATCH V2 1/2] remoteproc: introduce rproc features
> 
> On Tue 08 Mar 00:48 CST 2022, Peng Fan (OSS) wrote:
> 
> > From: Peng Fan <peng.fan@nxp.com>
> >
> > remote processor may support:
> >  - firmware recovery with help from main processor
> >  - self recovery without help from main processor
> >  - iommu
> >  - etc
> >
> > Introduce rproc features could simplify code to avoid adding more bool
> > flags and let us optimize current code.
> >
> > Signed-off-by: Peng Fan <peng.fan@nxp.com>
> > ---
> >
> > V2:
> >  New
> >
> >  include/linux/remoteproc.h | 18 ++++++++++++++++++
> >  1 file changed, 18 insertions(+)
> >
> > diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> 
> This is the API that other parts of the kernel use to interact with a struct rproc,
> what feature flags do you have a need for other parts of the kernel to be able
> to query?

I no need other parts of kernel to query rproc features.
In my V1 patch to support i.MX8QM/QXP self recovery, I introduced a new bool
in "struct rproc", but it is not preferred. So I think to use a enum flag, after
looking into struct rproc, I see the possibility to use one API rproc_has_feature
to simplify code, such as move has_iommu into 
rproc_has_feature(rproc, RPROC_FEAT_IOMMU), and in future, when need to
add new flag/capability of rproc, we could reuse the rproc_has_feature API.

> 
> > index 93a1d0050fbc..51edaf80692c 100644
> > --- a/include/linux/remoteproc.h
> > +++ b/include/linux/remoteproc.h
> > @@ -417,6 +417,7 @@ struct rproc_ops {
> >   *			has attached to it
> >   * @RPROC_DETACHED:	device has been booted by another entity and
> waiting
> >   *			for the core to attach to it
> > + * @RPROC_CRASHED_ATTACH_RECOVERY: device has crashed and self
> > + recovery
> 
> This seems to belong in the other patch...

I'll move this to patch 2/2

> 
> >   * @RPROC_LAST:		just keep this one at the end
> >   *
> >   * Please note that the values of these states are used as indices @@
> > -489,6 +490,11 @@ struct rproc_dump_segment {
> >  	loff_t offset;
> >  };
> >
> > +enum rproc_features {
> > +	RPROC_FEAT_ATTACH_RECOVERY = 0,
> 
> No need to specify that this is bit 0, and the enum will do that for you.

sure

> 
> > +	RPROC_MAX_FEATURES = 32,
> 
> You're using DECLARE_BITMAP() so why 32?

Just wanna to limit max features to one int, I may mis use this,
will fix in new version.

Thanks,
Peng.

> 
> Regards,
> Bjorn
> 
> > +};
> > +
> >  /**
> >   * struct rproc - represents a physical remote processor device
> >   * @node: list node of this rproc object @@ -530,6 +536,7 @@ struct
> > rproc_dump_segment {
> >   * @elf_machine: firmware ELF machine
> >   * @cdev: character device of the rproc
> >   * @cdev_put_on_release: flag to indicate if remoteproc should be
> > shutdown on @char_dev release
> > + * @features: indicate remoteproc features
> >   */
> >  struct rproc {
> >  	struct list_head node;
> > @@ -570,8 +577,19 @@ struct rproc {
> >  	u16 elf_machine;
> >  	struct cdev cdev;
> >  	bool cdev_put_on_release;
> > +	DECLARE_BITMAP(features, RPROC_MAX_FEATURES);
> >  };
> >
> > +static inline bool rproc_has_feature(struct rproc *rproc, unsigned
> > +int feature) {
> > +	return test_bit(feature, rproc->features); }
> > +
> > +static inline void rproc_set_feature(struct rproc *rproc, unsigned
> > +int feature) {
> > +	set_bit(feature, rproc->features);
> > +}
> > +
> >  /**
> >   * struct rproc_subdev - subdevice tied to a remoteproc
> >   * @node: list node related to the rproc subdevs list
> > --
> > 2.30.0
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash
  2022-03-11 18:47   ` Bjorn Andersson
@ 2022-03-14  7:31     ` Peng Fan
  0 siblings, 0 replies; 8+ messages in thread
From: Peng Fan @ 2022-03-14  7:31 UTC (permalink / raw)
  To: Bjorn Andersson, Peng Fan (OSS)
  Cc: mathieu.poirier, arnaud.pouliquen, linux-remoteproc, linux-kernel

> Subject: Re: [PATCH V2 2/2] remoteproc: support attach recovery after rproc
> crash
> 
> On Tue 08 Mar 00:48 CST 2022, Peng Fan (OSS) wrote:
> 
> > From: Peng Fan <peng.fan@nxp.com>
> >
> > Current logic only support main processor to stop/start the remote
> > processor after rproc crash. However to SoC, such as i.MX8QM/QXP, the
> > remote processor could do attach recovery after crash and trigger
> > watchdog
> 
> Does it really do something called "attach recovery and trigger watchdog
> reboot"? Doesn't it just reboot itself and Linux needs to detach and reattach
> to get something (what?) reset?

I mean the remote processor could re-run without linux to load firmware/stop/
start. Linux side needs to detach/attach to communicate with remote processor.

> 
> > reboot. It does not need main processor to load image, or stop/start
> > M4 core.
> >
> > Introduce two functions: rproc_attach_recovery,
> > rproc_firmware_recovery for the two cases. Firmware recovery is as
> > before, let main processor to help recovery, while attach recovery is recover
> itself withou help.
> > To attach recovery, we only do detach and attach.
> >
> > Signed-off-by: Peng Fan <peng.fan@nxp.com>
> > ---
> >
> > V2:
> >  use rproc_has_feature in patch 1/2
> >
> >  drivers/remoteproc/remoteproc_core.c | 67
> > ++++++++++++++++++++--------
> >  1 file changed, 48 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/remoteproc/remoteproc_core.c
> > b/drivers/remoteproc/remoteproc_core.c
> > index 69f51acf235e..366fad475898 100644
> > --- a/drivers/remoteproc/remoteproc_core.c
> > +++ b/drivers/remoteproc/remoteproc_core.c
> > @@ -1887,6 +1887,50 @@ static int __rproc_detach(struct rproc *rproc)
> >  	return 0;
> >  }
> >
> > +static int rproc_attach_recovery(struct rproc *rproc) {
> > +	int ret;
> > +
> > +	mutex_unlock(&rproc->lock);
> > +	ret = rproc_detach(rproc);
> > +	mutex_lock(&rproc->lock);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (atomic_inc_return(&rproc->power) > 1)
> 
> In the stop/coredump/start path the code _will_ attempt to recover the
> remote processor. With rproc_detach() and rproc_attach() fiddling with the
> rproc->power refcount this might do something, or it might not do something.
> And with the mutex_unlock() it's likely that you're opening of up for various
> race conditions inbetween.

Rproc_boot will inc rproc->power.
Rproc_detach will decrease rproc->power
Rproc_attach not touch rproc->power.

When do attach recovery, the logic is detach->attach. So I add one
inc rproc->power check to avoid count mis-usage.

> 
> 
> PS. Does anyone actually use this refcount, or are we just all holding our
> breath for it never going beyond 1?

I think latter usage.

Thanks,
Peng.

> 
> Regards,
> Bjorn
> 
> > +		return 0;
> > +
> > +	return rproc_attach(rproc);
> > +}
> > +
> > +static int rproc_firmware_recovery(struct rproc *rproc) {
> > +	const struct firmware *firmware_p;
> > +	struct device *dev = &rproc->dev;
> > +	int ret;
> > +
> > +	ret = rproc_stop(rproc, true);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* generate coredump */
> > +	rproc->ops->coredump(rproc);
> > +
> > +	/* load firmware */
> > +	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> > +	if (ret < 0) {
> > +		dev_err(dev, "request_firmware failed: %d\n", ret);
> > +		return ret;
> > +	}
> > +
> > +	/* boot the remote processor up again */
> > +	ret = rproc_start(rproc, firmware_p);
> > +
> > +	release_firmware(firmware_p);
> > +
> > +	return ret;
> > +}
> > +
> >  /**
> >   * rproc_trigger_recovery() - recover a remoteproc
> >   * @rproc: the remote processor
> > @@ -1901,7 +1945,6 @@ static int __rproc_detach(struct rproc *rproc)
> >   */
> >  int rproc_trigger_recovery(struct rproc *rproc)  {
> > -	const struct firmware *firmware_p;
> >  	struct device *dev = &rproc->dev;
> >  	int ret;
> >
> > @@ -1915,24 +1958,10 @@ int rproc_trigger_recovery(struct rproc
> > *rproc)
> >
> >  	dev_err(dev, "recovering %s\n", rproc->name);
> >
> > -	ret = rproc_stop(rproc, true);
> > -	if (ret)
> > -		goto unlock_mutex;
> > -
> > -	/* generate coredump */
> > -	rproc->ops->coredump(rproc);
> > -
> > -	/* load firmware */
> > -	ret = request_firmware(&firmware_p, rproc->firmware, dev);
> > -	if (ret < 0) {
> > -		dev_err(dev, "request_firmware failed: %d\n", ret);
> > -		goto unlock_mutex;
> > -	}
> > -
> > -	/* boot the remote processor up again */
> > -	ret = rproc_start(rproc, firmware_p);
> > -
> > -	release_firmware(firmware_p);
> > +	if (rproc_has_feature(rproc, RPROC_FEAT_ATTACH_RECOVERY))
> > +		ret = rproc_attach_recovery(rproc);
> > +	else
> > +		ret = rproc_firmware_recovery(rproc);
> >
> >  unlock_mutex:
> >  	mutex_unlock(&rproc->lock);
> > --
> > 2.30.0
> >

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-03-14  7:32 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-08  6:48 [PATCH V2 1/2] remoteproc: introduce rproc features Peng Fan (OSS)
2022-03-08  6:48 ` [PATCH V2 2/2] remoteproc: support attach recovery after rproc crash Peng Fan (OSS)
2022-03-09 18:39   ` Mathieu Poirier
2022-03-09 22:05     ` Peng Fan
2022-03-11 18:47   ` Bjorn Andersson
2022-03-14  7:31     ` Peng Fan
2022-03-11 18:26 ` [PATCH V2 1/2] remoteproc: introduce rproc features Bjorn Andersson
2022-03-14  7:23   ` Peng Fan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.