All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drivers/cros_ec: Handle CrOS EC panics
@ 2022-12-21 18:55 Rob Barnes
  2022-12-21 19:23 ` Prashant Malani
  2022-12-21 20:07 ` Guenter Roeck
  0 siblings, 2 replies; 15+ messages in thread
From: Rob Barnes @ 2022-12-21 18:55 UTC (permalink / raw)
  To: Guenter Roeck, chrome-platform, linux-kernel
  Cc: dtor, Benson Leung, Rob Barnes

Add handler for CrOS EC panic events. When a panic is reported,
poll EC log then force an orderly shutdown.

This will preserve the EC log leading up to the crash.

Signed-off-by: Rob Barnes <robbarnes@google.com>
---
 drivers/platform/chrome/cros_ec_debugfs.c   | 24 +++++++++++++++++++++
 drivers/platform/chrome/cros_ec_lpc.c       | 10 +++++++++
 include/linux/platform_data/cros_ec_proto.h |  9 ++++++++
 3 files changed, 43 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 21d973fc6be2..31637a4e4cf9 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -49,6 +49,7 @@ struct cros_ec_debugfs {
 	struct delayed_work log_poll_work;
 	/* EC panicinfo */
 	struct debugfs_blob_wrapper panicinfo_blob;
+	struct notifier_block notifier_panic;
 };
 
 /*
@@ -437,6 +438,23 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
 	return ret;
 }
 
+static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
+				       unsigned long queued_during_suspend,
+				       void *_notify)
+{
+	struct cros_ec_debugfs *debug_info =
+		container_of(nb, struct cros_ec_debugfs, notifier_panic);
+
+	if (debug_info->log_buffer.buf) {
+		/* Force log poll work to run immediately */
+		mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
+		/* Block until log poll work finishes */
+		flush_delayed_work(&debug_info->log_poll_work);
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int cros_ec_debugfs_probe(struct platform_device *pd)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
@@ -473,6 +491,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
 	debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
 			   &ec->ec_dev->suspend_timeout_ms);
 
+	debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
+	ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
+					       &debug_info->notifier_panic);
+	if (ret)
+		goto remove_debugfs;
+
 	ec->debug_info = debug_info;
 
 	dev_set_drvdata(&pd->dev, ec);
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 7fc8f82280ac..21958c3b0c28 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -21,6 +21,7 @@
 #include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
+#include <linux/reboot.h>
 #include <linux/suspend.h>
 
 #include "cros_ec.h"
@@ -332,6 +333,15 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
 
 	if (value == ACPI_NOTIFY_DEVICE_WAKE)
 		pm_system_wakeup();
+
+	if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
+		dev_err(ec_dev->dev,
+			"CrOS EC Panic Reported. Shutdown is imminent!");
+		blocking_notifier_call_chain(&ec_dev->panic_notifier, 0,
+					     ec_dev);
+		/* Begin orderly shutdown. Force shutdown after 1 second. */
+		hw_protection_shutdown("CrOS EC Panic", 1000);
+	}
 }
 
 static int cros_ec_lpc_probe(struct platform_device *pdev)
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index e43107e0bee1..1c4487271836 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -41,6 +41,13 @@
 #define EC_MAX_REQUEST_OVERHEAD		1
 #define EC_MAX_RESPONSE_OVERHEAD	32
 
+/*
+ * EC panic is not covered by the standard (0-F) ACPI notify values.
+ * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
+ * device specific ACPI notify range.
+ */
+#define ACPI_NOTIFY_CROS_EC_PANIC	0xB0
+
 /*
  * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
  */
@@ -176,6 +183,8 @@ struct cros_ec_device {
 	/* The platform devices used by the mfd driver */
 	struct platform_device *ec;
 	struct platform_device *pd;
+
+	struct blocking_notifier_head panic_notifier;
 };
 
 /**
-- 
2.39.0.314.g84b9a713c41-goog


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2022-12-21 18:55 [PATCH] drivers/cros_ec: Handle CrOS EC panics Rob Barnes
@ 2022-12-21 19:23 ` Prashant Malani
  2022-12-21 23:55   ` Rob Barnes
  2022-12-21 20:07 ` Guenter Roeck
  1 sibling, 1 reply; 15+ messages in thread
From: Prashant Malani @ 2022-12-21 19:23 UTC (permalink / raw)
  To: Rob Barnes
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

Hi Rob,

I'd suggest using the commit title tag "platform/chrome: cros_ec: ..."
for commits which are "ChromeOS EC" wide. That's in line with
other recent commits in this directory.

On Wed, Dec 21, 2022 at 10:56 AM Rob Barnes <robbarnes@google.com> wrote:
>
> Add handler for CrOS EC panic events. When a panic is reported,
> poll EC log then force an orderly shutdown.
>
> This will preserve the EC log leading up to the crash.
>
> Signed-off-by: Rob Barnes <robbarnes@google.com>
> ---
>  drivers/platform/chrome/cros_ec_debugfs.c   | 24 +++++++++++++++++++++
>  drivers/platform/chrome/cros_ec_lpc.c       | 10 +++++++++
>  include/linux/platform_data/cros_ec_proto.h |  9 ++++++++
>  3 files changed, 43 insertions(+)
>
> diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
> index 21d973fc6be2..31637a4e4cf9 100644
> --- a/drivers/platform/chrome/cros_ec_debugfs.c
> +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> @@ -49,6 +49,7 @@ struct cros_ec_debugfs {
>         struct delayed_work log_poll_work;
>         /* EC panicinfo */
>         struct debugfs_blob_wrapper panicinfo_blob;
> +       struct notifier_block notifier_panic;
>  };
>
>  /*
> @@ -437,6 +438,23 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
>         return ret;
>  }
>
> +static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
> +                                      unsigned long queued_during_suspend,
> +                                      void *_notify)
> +{
> +       struct cros_ec_debugfs *debug_info =
> +               container_of(nb, struct cros_ec_debugfs, notifier_panic);
> +
> +       if (debug_info->log_buffer.buf) {
> +               /* Force log poll work to run immediately */
> +               mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
> +               /* Block until log poll work finishes */
> +               flush_delayed_work(&debug_info->log_poll_work);
> +       }
> +
> +       return NOTIFY_DONE;
> +}
> +
>  static int cros_ec_debugfs_probe(struct platform_device *pd)
>  {
>         struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
> @@ -473,6 +491,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
>         debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
>                            &ec->ec_dev->suspend_timeout_ms);
>
> +       debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
> +       ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
> +                                              &debug_info->notifier_panic);
> +       if (ret)
> +               goto remove_debugfs;
> +
>         ec->debug_info = debug_info;
>
>         dev_set_drvdata(&pd->dev, ec);
> diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
> index 7fc8f82280ac..21958c3b0c28 100644
> --- a/drivers/platform/chrome/cros_ec_lpc.c
> +++ b/drivers/platform/chrome/cros_ec_lpc.c
> @@ -21,6 +21,7 @@
>  #include <linux/platform_data/cros_ec_proto.h>
>  #include <linux/platform_device.h>
>  #include <linux/printk.h>
> +#include <linux/reboot.h>
>  #include <linux/suspend.h>
>
>  #include "cros_ec.h"
> @@ -332,6 +333,15 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
>
>         if (value == ACPI_NOTIFY_DEVICE_WAKE)
>                 pm_system_wakeup();
> +
> +       if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
> +               dev_err(ec_dev->dev,
> +                       "CrOS EC Panic Reported. Shutdown is imminent!");
> +               blocking_notifier_call_chain(&ec_dev->panic_notifier, 0,
> +                                            ec_dev);
> +               /* Begin orderly shutdown. Force shutdown after 1 second. */
> +               hw_protection_shutdown("CrOS EC Panic", 1000);

I feel like this patch is doing 2 things: pulling the logs, and then
starting a shutdown.
This should be split into 2 patches.

> +       }
>  }

Line limits are now 100 chars[1], so most of these lines can fit on 1 line.

>
>  static int cros_ec_lpc_probe(struct platform_device *pdev)
> diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
> index e43107e0bee1..1c4487271836 100644
> --- a/include/linux/platform_data/cros_ec_proto.h
> +++ b/include/linux/platform_data/cros_ec_proto.h
> @@ -41,6 +41,13 @@
>  #define EC_MAX_REQUEST_OVERHEAD                1
>  #define EC_MAX_RESPONSE_OVERHEAD       32
>
> +/*
> + * EC panic is not covered by the standard (0-F) ACPI notify values.
> + * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
> + * device specific ACPI notify range.
> + */
> +#define ACPI_NOTIFY_CROS_EC_PANIC      0xB0

Can you provide a link (either in the commit, or here in the comment)
to the coreboot/BIOS code which uses this value? I feel this should
be documented in some form that correlates the caller and the callee.

> +
>  /*
>   * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
>   */
> @@ -176,6 +183,8 @@ struct cros_ec_device {
>         /* The platform devices used by the mfd driver */
>         struct platform_device *ec;
>         struct platform_device *pd;
> +
> +       struct blocking_notifier_head panic_notifier;

Any reason we cannot use the existing event_notifier (with value argument)?
It's a system panic, so I doubt that computational overhead for other
notifier block
listeners is a concern.

BR,

-Prashant

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bdc48fa11e46f867ea4d75fa59ee87a7f48be144

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2022-12-21 18:55 [PATCH] drivers/cros_ec: Handle CrOS EC panics Rob Barnes
  2022-12-21 19:23 ` Prashant Malani
@ 2022-12-21 20:07 ` Guenter Roeck
  2022-12-21 23:55   ` Rob Barnes
  1 sibling, 1 reply; 15+ messages in thread
From: Guenter Roeck @ 2022-12-21 20:07 UTC (permalink / raw)
  To: Rob Barnes
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

On Wed, Dec 21, 2022 at 10:56 AM Rob Barnes <robbarnes@google.com> wrote:
>
> Add handler for CrOS EC panic events. When a panic is reported,
> poll EC log then force an orderly shutdown.
>
> This will preserve the EC log leading up to the crash.
>
> Signed-off-by: Rob Barnes <robbarnes@google.com>
> ---
>  drivers/platform/chrome/cros_ec_debugfs.c   | 24 +++++++++++++++++++++
>  drivers/platform/chrome/cros_ec_lpc.c       | 10 +++++++++
>  include/linux/platform_data/cros_ec_proto.h |  9 ++++++++
>  3 files changed, 43 insertions(+)
>
> diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
> index 21d973fc6be2..31637a4e4cf9 100644
> --- a/drivers/platform/chrome/cros_ec_debugfs.c
> +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> @@ -49,6 +49,7 @@ struct cros_ec_debugfs {
>         struct delayed_work log_poll_work;
>         /* EC panicinfo */
>         struct debugfs_blob_wrapper panicinfo_blob;
> +       struct notifier_block notifier_panic;
>  };
>
>  /*
> @@ -437,6 +438,23 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
>         return ret;
>  }
>
> +static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
> +                                      unsigned long queued_during_suspend,
> +                                      void *_notify)
> +{
> +       struct cros_ec_debugfs *debug_info =
> +               container_of(nb, struct cros_ec_debugfs, notifier_panic);
> +
> +       if (debug_info->log_buffer.buf) {
> +               /* Force log poll work to run immediately */
> +               mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
> +               /* Block until log poll work finishes */
> +               flush_delayed_work(&debug_info->log_poll_work);
> +       }
> +
> +       return NOTIFY_DONE;
> +}
> +
>  static int cros_ec_debugfs_probe(struct platform_device *pd)
>  {
>         struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
> @@ -473,6 +491,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
>         debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
>                            &ec->ec_dev->suspend_timeout_ms);
>
> +       debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
> +       ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
> +                                              &debug_info->notifier_panic);
> +       if (ret)
> +               goto remove_debugfs;
> +
>         ec->debug_info = debug_info;
>
>         dev_set_drvdata(&pd->dev, ec);
> diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
> index 7fc8f82280ac..21958c3b0c28 100644
> --- a/drivers/platform/chrome/cros_ec_lpc.c
> +++ b/drivers/platform/chrome/cros_ec_lpc.c
> @@ -21,6 +21,7 @@
>  #include <linux/platform_data/cros_ec_proto.h>
>  #include <linux/platform_device.h>
>  #include <linux/printk.h>
> +#include <linux/reboot.h>
>  #include <linux/suspend.h>
>
>  #include "cros_ec.h"
> @@ -332,6 +333,15 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
>
>         if (value == ACPI_NOTIFY_DEVICE_WAKE)
>                 pm_system_wakeup();
> +
> +       if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
> +               dev_err(ec_dev->dev,
> +                       "CrOS EC Panic Reported. Shutdown is imminent!");

dev_emerg() would seem to be more appropriate, given that we'll be
crashing a second later.

> +               blocking_notifier_call_chain(&ec_dev->panic_notifier, 0,
> +                                            ec_dev);
> +               /* Begin orderly shutdown. Force shutdown after 1 second. */
> +               hw_protection_shutdown("CrOS EC Panic", 1000);

That seems to be the wrong API. From its description:

 * Initiate an emergency system shutdown in order to protect hardware from
 * further damage. Usage examples include a thermal protection or a voltage or
 * current regulator failures.

This is an EC crash that apparently can not be handled gracefully.
That has nothing to do with hardware protection. Why not just call
panic() ?

> +       }

It troubles me that this comes last. That means the entire
mkbp_event_supported handling executes first. What troubles me even
more is that 'value' is, with the exception of
ACPI_NOTIFY_DEVICE_WAKE, not checked at all. Presumably the EC sends
some well defined value if it has an event to report. I don't think it
is a good idea to depend on cros_ec_get_next_event() returning
something useful after the EC crashed.

How is this handled today ? Does the EC just reset the AP ?

>  }
>
>  static int cros_ec_lpc_probe(struct platform_device *pdev)
> diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
> index e43107e0bee1..1c4487271836 100644
> --- a/include/linux/platform_data/cros_ec_proto.h
> +++ b/include/linux/platform_data/cros_ec_proto.h
> @@ -41,6 +41,13 @@
>  #define EC_MAX_REQUEST_OVERHEAD                1
>  #define EC_MAX_RESPONSE_OVERHEAD       32
>
> +/*
> + * EC panic is not covered by the standard (0-F) ACPI notify values.
> + * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
> + * device specific ACPI notify range.
> + */
> +#define ACPI_NOTIFY_CROS_EC_PANIC      0xB0
> +
>  /*
>   * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
>   */
> @@ -176,6 +183,8 @@ struct cros_ec_device {
>         /* The platform devices used by the mfd driver */
>         struct platform_device *ec;
>         struct platform_device *pd;
> +
> +       struct blocking_notifier_head panic_notifier;
>  };
>
>  /**
> --
> 2.39.0.314.g84b9a713c41-goog
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2022-12-21 19:23 ` Prashant Malani
@ 2022-12-21 23:55   ` Rob Barnes
  2022-12-22  0:38     ` Prashant Malani
  0 siblings, 1 reply; 15+ messages in thread
From: Rob Barnes @ 2022-12-21 23:55 UTC (permalink / raw)
  To: Prashant Malani
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

On Wed, Dec 21, 2022 at 12:23 PM Prashant Malani <pmalani@chromium.org> wrote:
>
> Hi Rob,
>
> I'd suggest using the commit title tag "platform/chrome: cros_ec: ..."
> for commits which are "ChromeOS EC" wide. That's in line with
> other recent commits in this directory.

Ack. Will do.

>
> On Wed, Dec 21, 2022 at 10:56 AM Rob Barnes <robbarnes@google.com> wrote:
> >
> > Add handler for CrOS EC panic events. When a panic is reported,
> > poll EC log then force an orderly shutdown.
> >
> > This will preserve the EC log leading up to the crash.
> >
> > Signed-off-by: Rob Barnes <robbarnes@google.com>
> > ---
> >  drivers/platform/chrome/cros_ec_debugfs.c   | 24 +++++++++++++++++++++
> >  drivers/platform/chrome/cros_ec_lpc.c       | 10 +++++++++
> >  include/linux/platform_data/cros_ec_proto.h |  9 ++++++++
> >  3 files changed, 43 insertions(+)
> >
> > diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
> > index 21d973fc6be2..31637a4e4cf9 100644
> > --- a/drivers/platform/chrome/cros_ec_debugfs.c
> > +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> > @@ -49,6 +49,7 @@ struct cros_ec_debugfs {
> >         struct delayed_work log_poll_work;
> >         /* EC panicinfo */
> >         struct debugfs_blob_wrapper panicinfo_blob;
> > +       struct notifier_block notifier_panic;
> >  };
> >
> >  /*
> > @@ -437,6 +438,23 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
> >         return ret;
> >  }
> >
> > +static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
> > +                                      unsigned long queued_during_suspend,
> > +                                      void *_notify)
> > +{
> > +       struct cros_ec_debugfs *debug_info =
> > +               container_of(nb, struct cros_ec_debugfs, notifier_panic);
> > +
> > +       if (debug_info->log_buffer.buf) {
> > +               /* Force log poll work to run immediately */
> > +               mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
> > +               /* Block until log poll work finishes */
> > +               flush_delayed_work(&debug_info->log_poll_work);
> > +       }
> > +
> > +       return NOTIFY_DONE;
> > +}
> > +
> >  static int cros_ec_debugfs_probe(struct platform_device *pd)
> >  {
> >         struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
> > @@ -473,6 +491,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
> >         debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
> >                            &ec->ec_dev->suspend_timeout_ms);
> >
> > +       debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
> > +       ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
> > +                                              &debug_info->notifier_panic);
> > +       if (ret)
> > +               goto remove_debugfs;
> > +
> >         ec->debug_info = debug_info;
> >
> >         dev_set_drvdata(&pd->dev, ec);
> > diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
> > index 7fc8f82280ac..21958c3b0c28 100644
> > --- a/drivers/platform/chrome/cros_ec_lpc.c
> > +++ b/drivers/platform/chrome/cros_ec_lpc.c
> > @@ -21,6 +21,7 @@
> >  #include <linux/platform_data/cros_ec_proto.h>
> >  #include <linux/platform_device.h>
> >  #include <linux/printk.h>
> > +#include <linux/reboot.h>
> >  #include <linux/suspend.h>
> >
> >  #include "cros_ec.h"
> > @@ -332,6 +333,15 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
> >
> >         if (value == ACPI_NOTIFY_DEVICE_WAKE)
> >                 pm_system_wakeup();
> > +
> > +       if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
> > +               dev_err(ec_dev->dev,
> > +                       "CrOS EC Panic Reported. Shutdown is imminent!");
> > +               blocking_notifier_call_chain(&ec_dev->panic_notifier, 0,
> > +                                            ec_dev);
> > +               /* Begin orderly shutdown. Force shutdown after 1 second. */
> > +               hw_protection_shutdown("CrOS EC Panic", 1000);
>
> I feel like this patch is doing 2 things: pulling the logs, and then
> starting a shutdown.
> This should be split into 2 patches.

Ack. Will do.

>
> > +       }
> >  }
>
> Line limits are now 100 chars[1], so most of these lines can fit on 1 line.
>
> >
> >  static int cros_ec_lpc_probe(struct platform_device *pdev)
> > diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
> > index e43107e0bee1..1c4487271836 100644
> > --- a/include/linux/platform_data/cros_ec_proto.h
> > +++ b/include/linux/platform_data/cros_ec_proto.h
> > @@ -41,6 +41,13 @@
> >  #define EC_MAX_REQUEST_OVERHEAD                1
> >  #define EC_MAX_RESPONSE_OVERHEAD       32
> >
> > +/*
> > + * EC panic is not covered by the standard (0-F) ACPI notify values.
> > + * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
> > + * device specific ACPI notify range.
> > + */
> > +#define ACPI_NOTIFY_CROS_EC_PANIC      0xB0
>
> Can you provide a link (either in the commit, or here in the comment)
> to the coreboot/BIOS code which uses this value? I feel this should
> be documented in some form that correlates the caller and the callee.

Link: https://chromium-review.googlesource.com/c/chromiumos/third_party/coreboot/+/4023535

>
> > +
> >  /*
> >   * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
> >   */
> > @@ -176,6 +183,8 @@ struct cros_ec_device {
> >         /* The platform devices used by the mfd driver */
> >         struct platform_device *ec;
> >         struct platform_device *pd;
> > +
> > +       struct blocking_notifier_head panic_notifier;
>
> Any reason we cannot use the existing event_notifier (with value argument)?
> It's a system panic, so I doubt that computational overhead for other
> notifier block
> listeners is a concern.

The value field is already being used for "queued_during_suspend" in
event_notifier.
>
> BR,
>
> -Prashant
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bdc48fa11e46f867ea4d75fa59ee87a7f48be144

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2022-12-21 20:07 ` Guenter Roeck
@ 2022-12-21 23:55   ` Rob Barnes
  2022-12-22  0:08     ` Guenter Roeck
  0 siblings, 1 reply; 15+ messages in thread
From: Rob Barnes @ 2022-12-21 23:55 UTC (permalink / raw)
  To: Guenter Roeck
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

On Wed, Dec 21, 2022 at 1:07 PM Guenter Roeck <groeck@google.com> wrote:
>
> On Wed, Dec 21, 2022 at 10:56 AM Rob Barnes <robbarnes@google.com> wrote:
> >
> > Add handler for CrOS EC panic events. When a panic is reported,
> > poll EC log then force an orderly shutdown.
> >
> > This will preserve the EC log leading up to the crash.
> >
> > Signed-off-by: Rob Barnes <robbarnes@google.com>
> > ---
> >  drivers/platform/chrome/cros_ec_debugfs.c   | 24 +++++++++++++++++++++
> >  drivers/platform/chrome/cros_ec_lpc.c       | 10 +++++++++
> >  include/linux/platform_data/cros_ec_proto.h |  9 ++++++++
> >  3 files changed, 43 insertions(+)
> >
> > diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
> > index 21d973fc6be2..31637a4e4cf9 100644
> > --- a/drivers/platform/chrome/cros_ec_debugfs.c
> > +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> > @@ -49,6 +49,7 @@ struct cros_ec_debugfs {
> >         struct delayed_work log_poll_work;
> >         /* EC panicinfo */
> >         struct debugfs_blob_wrapper panicinfo_blob;
> > +       struct notifier_block notifier_panic;
> >  };
> >
> >  /*
> > @@ -437,6 +438,23 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
> >         return ret;
> >  }
> >
> > +static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
> > +                                      unsigned long queued_during_suspend,
> > +                                      void *_notify)
> > +{
> > +       struct cros_ec_debugfs *debug_info =
> > +               container_of(nb, struct cros_ec_debugfs, notifier_panic);
> > +
> > +       if (debug_info->log_buffer.buf) {
> > +               /* Force log poll work to run immediately */
> > +               mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
> > +               /* Block until log poll work finishes */
> > +               flush_delayed_work(&debug_info->log_poll_work);
> > +       }
> > +
> > +       return NOTIFY_DONE;
> > +}
> > +
> >  static int cros_ec_debugfs_probe(struct platform_device *pd)
> >  {
> >         struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
> > @@ -473,6 +491,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
> >         debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
> >                            &ec->ec_dev->suspend_timeout_ms);
> >
> > +       debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
> > +       ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
> > +                                              &debug_info->notifier_panic);
> > +       if (ret)
> > +               goto remove_debugfs;
> > +
> >         ec->debug_info = debug_info;
> >
> >         dev_set_drvdata(&pd->dev, ec);
> > diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
> > index 7fc8f82280ac..21958c3b0c28 100644
> > --- a/drivers/platform/chrome/cros_ec_lpc.c
> > +++ b/drivers/platform/chrome/cros_ec_lpc.c
> > @@ -21,6 +21,7 @@
> >  #include <linux/platform_data/cros_ec_proto.h>
> >  #include <linux/platform_device.h>
> >  #include <linux/printk.h>
> > +#include <linux/reboot.h>
> >  #include <linux/suspend.h>
> >
> >  #include "cros_ec.h"
> > @@ -332,6 +333,15 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
> >
> >         if (value == ACPI_NOTIFY_DEVICE_WAKE)
> >                 pm_system_wakeup();
> > +
> > +       if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
> > +               dev_err(ec_dev->dev,
> > +                       "CrOS EC Panic Reported. Shutdown is imminent!");
>
> dev_emerg() would seem to be more appropriate, given that we'll be
> crashing a second later.

Makes sense. Will do.

>
> > +               blocking_notifier_call_chain(&ec_dev->panic_notifier, 0,
> > +                                            ec_dev);
> > +               /* Begin orderly shutdown. Force shutdown after 1 second. */
> > +               hw_protection_shutdown("CrOS EC Panic", 1000);
>
> That seems to be the wrong API. From its description:
>
>  * Initiate an emergency system shutdown in order to protect hardware from
>  * further damage. Usage examples include a thermal protection or a voltage or
>  * current regulator failures.
>
> This is an EC crash that apparently can not be handled gracefully.
> That has nothing to do with hardware protection. Why not just call
> panic() ?

I'm adding the ability for the EC to somewhat gracefully handle a
class of panics. The EC will enter "system safe mode" aka limp mode.
All tasks are disabled except for a few critical ones. The purpose of
this mode is to give the OS a couple of seconds to extract info about
the EC panic and shutdown as gracefully as possible. EC will still
force a reset after 2 seconds.

Since the EC is in a precarious, non deterministic state and the EC is
at least partially responsible for thermal and power regulation, it is
arguably a hardware protection event.

The primary motivation for allowing the OS to attempt an orderly
shutdown is to sync user data and EC panic data to storage. Will panic
sync storage? Or will that need to be a seperate call?

>
> > +       }
>
> It troubles me that this comes last. That means the entire
> mkbp_event_supported handling executes first. What troubles me even
> more is that 'value' is, with the exception of
> ACPI_NOTIFY_DEVICE_WAKE, not checked at all. Presumably the EC sends
> some well defined value if it has an event to report. I don't think it
> is a good idea to depend on cros_ec_get_next_event() returning
> something useful after the EC crashed.

Could certainly move the ACPI_NOTIFY_CROS_EC_PANIC to the top. I think
'value' will be 0x80 when there's MKBP events. I will validate this
assumption.

> How is this handled today ? Does the EC just reset the AP ?

EC just immediately resets the AP and itself. System safe mode (see
go/ec-system-safe-mode) will allow the EC to run a few seconds after
some types of crashes.



>
> >  }
> >
> >  static int cros_ec_lpc_probe(struct platform_device *pdev)
> > diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
> > index e43107e0bee1..1c4487271836 100644
> > --- a/include/linux/platform_data/cros_ec_proto.h
> > +++ b/include/linux/platform_data/cros_ec_proto.h
> > @@ -41,6 +41,13 @@
> >  #define EC_MAX_REQUEST_OVERHEAD                1
> >  #define EC_MAX_RESPONSE_OVERHEAD       32
> >
> > +/*
> > + * EC panic is not covered by the standard (0-F) ACPI notify values.
> > + * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
> > + * device specific ACPI notify range.
> > + */
> > +#define ACPI_NOTIFY_CROS_EC_PANIC      0xB0
> > +
> >  /*
> >   * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
> >   */
> > @@ -176,6 +183,8 @@ struct cros_ec_device {
> >         /* The platform devices used by the mfd driver */
> >         struct platform_device *ec;
> >         struct platform_device *pd;
> > +
> > +       struct blocking_notifier_head panic_notifier;
> >  };
> >
> >  /**
> > --
> > 2.39.0.314.g84b9a713c41-goog
> >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2022-12-21 23:55   ` Rob Barnes
@ 2022-12-22  0:08     ` Guenter Roeck
  0 siblings, 0 replies; 15+ messages in thread
From: Guenter Roeck @ 2022-12-22  0:08 UTC (permalink / raw)
  To: Rob Barnes
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

On Wed, Dec 21, 2022 at 3:56 PM Rob Barnes <robbarnes@google.com> wrote:
>
> On Wed, Dec 21, 2022 at 1:07 PM Guenter Roeck <groeck@google.com> wrote:
> >
> > On Wed, Dec 21, 2022 at 10:56 AM Rob Barnes <robbarnes@google.com> wrote:
> > >
> > > Add handler for CrOS EC panic events. When a panic is reported,
> > > poll EC log then force an orderly shutdown.
> > >
> > > This will preserve the EC log leading up to the crash.
> > >
> > > Signed-off-by: Rob Barnes <robbarnes@google.com>
> > > ---
> > >  drivers/platform/chrome/cros_ec_debugfs.c   | 24 +++++++++++++++++++++
> > >  drivers/platform/chrome/cros_ec_lpc.c       | 10 +++++++++
> > >  include/linux/platform_data/cros_ec_proto.h |  9 ++++++++
> > >  3 files changed, 43 insertions(+)
> > >
> > > diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
> > > index 21d973fc6be2..31637a4e4cf9 100644
> > > --- a/drivers/platform/chrome/cros_ec_debugfs.c
> > > +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> > > @@ -49,6 +49,7 @@ struct cros_ec_debugfs {
> > >         struct delayed_work log_poll_work;
> > >         /* EC panicinfo */
> > >         struct debugfs_blob_wrapper panicinfo_blob;
> > > +       struct notifier_block notifier_panic;
> > >  };
> > >
> > >  /*
> > > @@ -437,6 +438,23 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
> > >         return ret;
> > >  }
> > >
> > > +static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
> > > +                                      unsigned long queued_during_suspend,
> > > +                                      void *_notify)
> > > +{
> > > +       struct cros_ec_debugfs *debug_info =
> > > +               container_of(nb, struct cros_ec_debugfs, notifier_panic);
> > > +
> > > +       if (debug_info->log_buffer.buf) {
> > > +               /* Force log poll work to run immediately */
> > > +               mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
> > > +               /* Block until log poll work finishes */
> > > +               flush_delayed_work(&debug_info->log_poll_work);
> > > +       }
> > > +
> > > +       return NOTIFY_DONE;
> > > +}
> > > +
> > >  static int cros_ec_debugfs_probe(struct platform_device *pd)
> > >  {
> > >         struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
> > > @@ -473,6 +491,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
> > >         debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
> > >                            &ec->ec_dev->suspend_timeout_ms);
> > >
> > > +       debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
> > > +       ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
> > > +                                              &debug_info->notifier_panic);
> > > +       if (ret)
> > > +               goto remove_debugfs;
> > > +
> > >         ec->debug_info = debug_info;
> > >
> > >         dev_set_drvdata(&pd->dev, ec);
> > > diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
> > > index 7fc8f82280ac..21958c3b0c28 100644
> > > --- a/drivers/platform/chrome/cros_ec_lpc.c
> > > +++ b/drivers/platform/chrome/cros_ec_lpc.c
> > > @@ -21,6 +21,7 @@
> > >  #include <linux/platform_data/cros_ec_proto.h>
> > >  #include <linux/platform_device.h>
> > >  #include <linux/printk.h>
> > > +#include <linux/reboot.h>
> > >  #include <linux/suspend.h>
> > >
> > >  #include "cros_ec.h"
> > > @@ -332,6 +333,15 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
> > >
> > >         if (value == ACPI_NOTIFY_DEVICE_WAKE)
> > >                 pm_system_wakeup();
> > > +
> > > +       if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
> > > +               dev_err(ec_dev->dev,
> > > +                       "CrOS EC Panic Reported. Shutdown is imminent!");
> >
> > dev_emerg() would seem to be more appropriate, given that we'll be
> > crashing a second later.
>
> Makes sense. Will do.
>
> >
> > > +               blocking_notifier_call_chain(&ec_dev->panic_notifier, 0,
> > > +                                            ec_dev);
> > > +               /* Begin orderly shutdown. Force shutdown after 1 second. */
> > > +               hw_protection_shutdown("CrOS EC Panic", 1000);
> >
> > That seems to be the wrong API. From its description:
> >
> >  * Initiate an emergency system shutdown in order to protect hardware from
> >  * further damage. Usage examples include a thermal protection or a voltage or
> >  * current regulator failures.
> >
> > This is an EC crash that apparently can not be handled gracefully.
> > That has nothing to do with hardware protection. Why not just call
> > panic() ?
>
> I'm adding the ability for the EC to somewhat gracefully handle a
> class of panics. The EC will enter "system safe mode" aka limp mode.
> All tasks are disabled except for a few critical ones. The purpose of
> this mode is to give the OS a couple of seconds to extract info about
> the EC panic and shutdown as gracefully as possible. EC will still
> force a reset after 2 seconds.
>
> Since the EC is in a precarious, non deterministic state and the EC is
> at least partially responsible for thermal and power regulation, it is
> arguably a hardware protection event.
>

In my opinion it is an abuse of the API. I guess we have to agree to disagree.

Guenter

> The primary motivation for allowing the OS to attempt an orderly
> shutdown is to sync user data and EC panic data to storage. Will panic
> sync storage? Or will that need to be a seperate call?
>
> >
> > > +       }
> >
> > It troubles me that this comes last. That means the entire
> > mkbp_event_supported handling executes first. What troubles me even
> > more is that 'value' is, with the exception of
> > ACPI_NOTIFY_DEVICE_WAKE, not checked at all. Presumably the EC sends
> > some well defined value if it has an event to report. I don't think it
> > is a good idea to depend on cros_ec_get_next_event() returning
> > something useful after the EC crashed.
>
> Could certainly move the ACPI_NOTIFY_CROS_EC_PANIC to the top. I think
> 'value' will be 0x80 when there's MKBP events. I will validate this
> assumption.
>
> > How is this handled today ? Does the EC just reset the AP ?
>
> EC just immediately resets the AP and itself. System safe mode (see
> go/ec-system-safe-mode) will allow the EC to run a few seconds after
> some types of crashes.
>
>
>
> >
> > >  }
> > >
> > >  static int cros_ec_lpc_probe(struct platform_device *pdev)
> > > diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
> > > index e43107e0bee1..1c4487271836 100644
> > > --- a/include/linux/platform_data/cros_ec_proto.h
> > > +++ b/include/linux/platform_data/cros_ec_proto.h
> > > @@ -41,6 +41,13 @@
> > >  #define EC_MAX_REQUEST_OVERHEAD                1
> > >  #define EC_MAX_RESPONSE_OVERHEAD       32
> > >
> > > +/*
> > > + * EC panic is not covered by the standard (0-F) ACPI notify values.
> > > + * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
> > > + * device specific ACPI notify range.
> > > + */
> > > +#define ACPI_NOTIFY_CROS_EC_PANIC      0xB0
> > > +
> > >  /*
> > >   * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
> > >   */
> > > @@ -176,6 +183,8 @@ struct cros_ec_device {
> > >         /* The platform devices used by the mfd driver */
> > >         struct platform_device *ec;
> > >         struct platform_device *pd;
> > > +
> > > +       struct blocking_notifier_head panic_notifier;
> > >  };
> > >
> > >  /**
> > > --
> > > 2.39.0.314.g84b9a713c41-goog
> > >

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2022-12-21 23:55   ` Rob Barnes
@ 2022-12-22  0:38     ` Prashant Malani
  2023-01-03 22:52       ` [PATCH v2 0/2] Handle CrOS EC Panics Rob Barnes
  2023-01-03 23:14       ` [PATCH] drivers/cros_ec: Handle CrOS EC panics Rob Barnes
  0 siblings, 2 replies; 15+ messages in thread
From: Prashant Malani @ 2022-12-22  0:38 UTC (permalink / raw)
  To: Rob Barnes
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

On Wed, Dec 21, 2022 at 3:55 PM Rob Barnes <robbarnes@google.com> wrote:
>
> On Wed, Dec 21, 2022 at 12:23 PM Prashant Malani <pmalani@chromium.org> wrote:
> >
> >
> > Can you provide a link (either in the commit, or here in the comment)
> > to the coreboot/BIOS code which uses this value? I feel this should
> > be documented in some form that correlates the caller and the callee.
>
> Link: https://chromium-review.googlesource.com/c/chromiumos/third_party/coreboot/+/4023535

Thanks. Please add a code link (for example, I could find [2], but you
could use another code mirror
if there is a canonical one for coreboot) to the commit description,
or in the comment when you send
out v2.

> > Any reason we cannot use the existing event_notifier (with value argument)?
> > It's a system panic, so I doubt that computational overhead for other
> > notifier block
> > listeners is a concern.
>
> The value field is already being used for "queued_during_suspend" in
> event_notifier.

OK, I suppose you could use the data pointer...

It's just I find having a notifier for a single use case overkill(even
2 would be fine); one could get away with exposing a method
in cros_typec_debugfs via a local .h file (it can compile to a stub if
cros_typec_debugfs is not compiled to the kernel);
the LPC code can then just call that method instead of invoking a notifier.

Best regards,

-Prashant

> > [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bdc48fa11e46f867ea4d75fa59ee87a7f48be144
[2] https://github.com/coreboot/coreboot/blob/ff6b3af113f84a1957dcdf8a8179a751ce08becf/src/ec/google/chromeec/acpi/ec.asl#L15

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 0/2] Handle CrOS EC Panics
  2022-12-22  0:38     ` Prashant Malani
@ 2023-01-03 22:52       ` Rob Barnes
  2023-01-03 22:52         ` [PATCH v2 1/2] platform/chrome: cros_ec: Poll EC log on EC panic Rob Barnes
  2023-01-03 22:52         ` [PATCH v2 2/2] platform/chrome: cros_ec: Shutdown on EC Panic Rob Barnes
  2023-01-03 23:14       ` [PATCH] drivers/cros_ec: Handle CrOS EC panics Rob Barnes
  1 sibling, 2 replies; 15+ messages in thread
From: Rob Barnes @ 2023-01-03 22:52 UTC (permalink / raw)
  To: groeck, pmalani; +Cc: chrome-platform, dtor, Rob Barnes

Currently the OS ignores EC panics when they occur.
After reporting a panic, the EC will force a hard reset,
possibly after a short delay. This may cause loss of data.

These patches add a handler for CrOS EC panics. When
a panic is detected the OS will attempt to flush critical
data for debugging purposes and attempt an orderly shutdown.

Changelog since v1:
- Updated commit messages
- Split into two patches
- Moved panic handle before mkbp loop
- Switched to dev_emerg

Rob Barnes (2):
  platform/chrome: cros_ec: Poll EC log on EC panic
  platform/chrome: cros_ec: Shutdown on EC Panic

 drivers/platform/chrome/cros_ec_debugfs.c   | 23 +++++++++++++++++++++
 drivers/platform/chrome/cros_ec_lpc.c       | 10 +++++++++
 include/linux/platform_data/cros_ec_proto.h |  9 ++++++++
 3 files changed, 42 insertions(+)

-- 
2.39.0.314.g84b9a713c41-goog


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 1/2] platform/chrome: cros_ec: Poll EC log on EC panic
  2023-01-03 22:52       ` [PATCH v2 0/2] Handle CrOS EC Panics Rob Barnes
@ 2023-01-03 22:52         ` Rob Barnes
  2023-01-03 23:14           ` Brian Norris
  2023-01-03 22:52         ` [PATCH v2 2/2] platform/chrome: cros_ec: Shutdown on EC Panic Rob Barnes
  1 sibling, 1 reply; 15+ messages in thread
From: Rob Barnes @ 2023-01-03 22:52 UTC (permalink / raw)
  To: groeck, pmalani; +Cc: chrome-platform, dtor, Rob Barnes

Add handler for CrOS EC panic events. When a panic is reported,
immediately poll for EC log.

This should result in the log leading to the EC panic being
preserved.

ACPI_NOTIFY_CROS_EC_PANIC is defined in coreboot at
https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/heads/master/src/ec/google/chromeec/acpi/ec.asl

Change-Id: I622a5e99b8be922f5c1a8004a863cf94a63ac813
Signed-off-by: Rob Barnes <robbarnes@google.com>
---
 drivers/platform/chrome/cros_ec_debugfs.c   | 23 +++++++++++++++++++++
 drivers/platform/chrome/cros_ec_lpc.c       |  7 +++++++
 include/linux/platform_data/cros_ec_proto.h |  9 ++++++++
 3 files changed, 39 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 21d973fc6be2..34f7b46f8761 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -49,6 +49,7 @@ struct cros_ec_debugfs {
 	struct delayed_work log_poll_work;
 	/* EC panicinfo */
 	struct debugfs_blob_wrapper panicinfo_blob;
+	struct notifier_block notifier_panic;
 };
 
 /*
@@ -437,6 +438,22 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
 	return ret;
 }
 
+static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
+				       unsigned long queued_during_suspend, void *_notify)
+{
+	struct cros_ec_debugfs *debug_info =
+		container_of(nb, struct cros_ec_debugfs, notifier_panic);
+
+	if (debug_info->log_buffer.buf) {
+		/* Force log poll work to run immediately */
+		mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
+		/* Block until log poll work finishes */
+		flush_delayed_work(&debug_info->log_poll_work);
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int cros_ec_debugfs_probe(struct platform_device *pd)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
@@ -473,6 +490,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
 	debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
 			   &ec->ec_dev->suspend_timeout_ms);
 
+	debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
+	ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
+					       &debug_info->notifier_panic);
+	if (ret)
+		goto remove_debugfs;
+
 	ec->debug_info = debug_info;
 
 	dev_set_drvdata(&pd->dev, ec);
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 7fc8f82280ac..5738f1d25091 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -320,6 +320,13 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
 
 	ec_dev->last_event_time = cros_ec_get_time_ns();
 
+	if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
+		dev_emerg(ec_dev->dev, "CrOS EC Panic Reported. Shutdown is imminent!");
+		blocking_notifier_call_chain(&ec_dev->panic_notifier, 0, ec_dev);
+		/* Do not query for other events after a panic is reported */
+		return;
+	}
+
 	if (ec_dev->mkbp_event_supported)
 		do {
 			ret = cros_ec_get_next_event(ec_dev, NULL,
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index e43107e0bee1..7fb2196f99b0 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -41,6 +41,13 @@
 #define EC_MAX_REQUEST_OVERHEAD		1
 #define EC_MAX_RESPONSE_OVERHEAD	32
 
+/*
+ * EC panic is not covered by the standard (0-F) ACPI notify values.
+ * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
+ * device specific ACPI notify range.
+ */
+#define ACPI_NOTIFY_CROS_EC_PANIC 0xB0
+
 /*
  * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
  */
@@ -176,6 +183,8 @@ struct cros_ec_device {
 	/* The platform devices used by the mfd driver */
 	struct platform_device *ec;
 	struct platform_device *pd;
+
+	struct blocking_notifier_head panic_notifier;
 };
 
 /**
-- 
2.39.0.314.g84b9a713c41-goog


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 2/2] platform/chrome: cros_ec: Shutdown on EC Panic
  2023-01-03 22:52       ` [PATCH v2 0/2] Handle CrOS EC Panics Rob Barnes
  2023-01-03 22:52         ` [PATCH v2 1/2] platform/chrome: cros_ec: Poll EC log on EC panic Rob Barnes
@ 2023-01-03 22:52         ` Rob Barnes
  1 sibling, 0 replies; 15+ messages in thread
From: Rob Barnes @ 2023-01-03 22:52 UTC (permalink / raw)
  To: groeck, pmalani; +Cc: chrome-platform, dtor, Rob Barnes

When an EC panic is reported, attempt an orderly shutdown.
Force a shutdown after a brief timeout if the orderly shutdown
fails for any reason.

Using the common hw_protection_shutdown utility function since
an EC panic has the potential to cause hw damage.

This is all best effort. EC should also force a hard reset after a
short timeout.

Change-Id: Ie87e7921431b85e3aba5c0e6c9f0a34e4daa181d
Signed-off-by: Rob Barnes <robbarnes@google.com>
---
 drivers/platform/chrome/cros_ec_lpc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 5738f1d25091..3708fa75feb1 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -21,6 +21,7 @@
 #include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
+#include <linux/reboot.h>
 #include <linux/suspend.h>
 
 #include "cros_ec.h"
@@ -323,6 +324,8 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
 	if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
 		dev_emerg(ec_dev->dev, "CrOS EC Panic Reported. Shutdown is imminent!");
 		blocking_notifier_call_chain(&ec_dev->panic_notifier, 0, ec_dev);
+		/* Begin orderly shutdown. Force shutdown after 1 second. */
+		hw_protection_shutdown("CrOS EC Panic", 1000);
 		/* Do not query for other events after a panic is reported */
 		return;
 	}
-- 
2.39.0.314.g84b9a713c41-goog


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] platform/chrome: cros_ec: Poll EC log on EC panic
  2023-01-03 22:52         ` [PATCH v2 1/2] platform/chrome: cros_ec: Poll EC log on EC panic Rob Barnes
@ 2023-01-03 23:14           ` Brian Norris
  2023-01-03 23:17             ` Rob Barnes
  0 siblings, 1 reply; 15+ messages in thread
From: Brian Norris @ 2023-01-03 23:14 UTC (permalink / raw)
  To: Rob Barnes; +Cc: groeck, pmalani, chrome-platform, dtor

On Tue, Jan 3, 2023 at 2:52 PM Rob Barnes <robbarnes@google.com> wrote:
> Change-Id: I622a5e99b8be922f5c1a8004a863cf94a63ac813

Gerrit Change-Ids don't belong in upstream commit messages. You could
either use `patman` (from the U-Boot project) to send your patches (in
which case, the Change-Id can be helpful to patman in constructing
useful Message-Ids, and patman will strip it in the final mail), or
else run checkpatch properly, which would complain (unless you're
using something like `--ignore=GERRIT_CHANGE_ID` on the CLI or in
~/.checkpatch.conf).

Presumably the apply-er can fix this, or else if you have a v3+, might
as well fix it yourself.

Brian

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2022-12-22  0:38     ` Prashant Malani
  2023-01-03 22:52       ` [PATCH v2 0/2] Handle CrOS EC Panics Rob Barnes
@ 2023-01-03 23:14       ` Rob Barnes
  2023-01-03 23:27         ` Prashant Malani
  1 sibling, 1 reply; 15+ messages in thread
From: Rob Barnes @ 2023-01-03 23:14 UTC (permalink / raw)
  To: Prashant Malani
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

On Wed, Dec 21, 2022 at 5:38 PM Prashant Malani <pmalani@chromium.org> wrote:
>
> On Wed, Dec 21, 2022 at 3:55 PM Rob Barnes <robbarnes@google.com> wrote:
> >
> > On Wed, Dec 21, 2022 at 12:23 PM Prashant Malani <pmalani@chromium.org> wrote:
> > >
> > >
> > > Can you provide a link (either in the commit, or here in the comment)
> > > to the coreboot/BIOS code which uses this value? I feel this should
> > > be documented in some form that correlates the caller and the callee.
> >
> > Link: https://chromium-review.googlesource.com/c/chromiumos/third_party/coreboot/+/4023535
>
> Thanks. Please add a code link (for example, I could find [2], but you
> could use another code mirror
> if there is a canonical one for coreboot) to the commit description,
> or in the comment when you send
> out v2.
>
> > > Any reason we cannot use the existing event_notifier (with value argument)?
> > > It's a system panic, so I doubt that computational overhead for other
> > > notifier block
> > > listeners is a concern.
> >
> > The value field is already being used for "queued_during_suspend" in
> > event_notifier.
>
> OK, I suppose you could use the data pointer...

The data pointer contains ec_dev, so that's not really an option either.

>
> It's just I find having a notifier for a single use case overkill(even
> 2 would be fine); one could get away with exposing a method
> in cros_typec_debugfs via a local .h file (it can compile to a stub if
> cros_typec_debugfs is not compiled to the kernel);
> the LPC code can then just call that method instead of invoking a notifier.

My first implementation did make a direct call to cros_ec_debugfs.c,
but an internal reviewer recommended using an event notifier instead.
So I'm histent to go back to a direct call.

There may be other sub drivers that want to handle EC panics. So I
think keeping this as a separate notifier makes sense given the
constraints.

>
> Best regards,
>
> -Prashant
>
> > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bdc48fa11e46f867ea4d75fa59ee87a7f48be144
> [2] https://github.com/coreboot/coreboot/blob/ff6b3af113f84a1957dcdf8a8179a751ce08becf/src/ec/google/chromeec/acpi/ec.asl#L15

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] platform/chrome: cros_ec: Poll EC log on EC panic
  2023-01-03 23:14           ` Brian Norris
@ 2023-01-03 23:17             ` Rob Barnes
  0 siblings, 0 replies; 15+ messages in thread
From: Rob Barnes @ 2023-01-03 23:17 UTC (permalink / raw)
  To: Brian Norris; +Cc: groeck, pmalani, chrome-platform, dtor

Yep, looks like I made a couple mistakes here. I also sent it as a
reply instead of a new thread. Will send a v3. Thank you.

On Tue, Jan 3, 2023 at 4:15 PM Brian Norris <briannorris@chromium.org> wrote:
>
> On Tue, Jan 3, 2023 at 2:52 PM Rob Barnes <robbarnes@google.com> wrote:
> > Change-Id: I622a5e99b8be922f5c1a8004a863cf94a63ac813
>
> Gerrit Change-Ids don't belong in upstream commit messages. You could
> either use `patman` (from the U-Boot project) to send your patches (in
> which case, the Change-Id can be helpful to patman in constructing
> useful Message-Ids, and patman will strip it in the final mail), or
> else run checkpatch properly, which would complain (unless you're
> using something like `--ignore=GERRIT_CHANGE_ID` on the CLI or in
> ~/.checkpatch.conf).
>
> Presumably the apply-er can fix this, or else if you have a v3+, might
> as well fix it yourself.
>
> Brian

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2023-01-03 23:14       ` [PATCH] drivers/cros_ec: Handle CrOS EC panics Rob Barnes
@ 2023-01-03 23:27         ` Prashant Malani
  2023-01-04  1:19           ` Rob Barnes
  0 siblings, 1 reply; 15+ messages in thread
From: Prashant Malani @ 2023-01-03 23:27 UTC (permalink / raw)
  To: Rob Barnes
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

On Tue, Jan 3, 2023 at 3:15 PM Rob Barnes <robbarnes@google.com> wrote:
>
> On Wed, Dec 21, 2022 at 5:38 PM Prashant Malani <pmalani@chromium.org> wrote:
> >
> > On Wed, Dec 21, 2022 at 3:55 PM Rob Barnes <robbarnes@google.com> wrote:
> > >
> > > On Wed, Dec 21, 2022 at 12:23 PM Prashant Malani <pmalani@chromium.org> wrote:
> > > >
>
> >
> > It's just I find having a notifier for a single use case overkill(even
> > 2 would be fine); one could get away with exposing a method
> > in cros_typec_debugfs via a local .h file (it can compile to a stub if
> > cros_typec_debugfs is not compiled to the kernel);
> > the LPC code can then just call that method instead of invoking a notifier.
>
> My first implementation did make a direct call to cros_ec_debugfs.c,
> but an internal reviewer recommended using an event notifier instead.
> So I'm histent to go back to a direct call.
>
> There may be other sub drivers that want to handle EC panics. So I
> think keeping this as a separate notifier makes sense given the
> constraints.

The issue with that reasoning vis-à-vis your implementation is that
the panic notifier is tied to cros_ec_debugfs. What if another
(sub)-driver wants to use the
panic notifier to do something, but that system doesn't have CONFIG_DEBUGFS
enabled?

Having a direct/explicit dependency avoids that issue; LPC depends on
debugfs being compiled
to have the log printed out (a stub is used when debugfs is not
enabled), but nothing else
relies on debugfs for a panic notifier.

BR,

-Prashant

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] drivers/cros_ec: Handle CrOS EC panics
  2023-01-03 23:27         ` Prashant Malani
@ 2023-01-04  1:19           ` Rob Barnes
  0 siblings, 0 replies; 15+ messages in thread
From: Rob Barnes @ 2023-01-04  1:19 UTC (permalink / raw)
  To: Prashant Malani
  Cc: Guenter Roeck, chrome-platform, linux-kernel, dtor, Benson Leung

On Tue, Jan 3, 2023 at 4:28 PM Prashant Malani <pmalani@chromium.org> wrote:
>
> On Tue, Jan 3, 2023 at 3:15 PM Rob Barnes <robbarnes@google.com> wrote:
> >
> > On Wed, Dec 21, 2022 at 5:38 PM Prashant Malani <pmalani@chromium.org> wrote:
> > >
> > > On Wed, Dec 21, 2022 at 3:55 PM Rob Barnes <robbarnes@google.com> wrote:
> > > >
> > > > On Wed, Dec 21, 2022 at 12:23 PM Prashant Malani <pmalani@chromium.org> wrote:
> > > > >
> >
> > >
> > > It's just I find having a notifier for a single use case overkill(even
> > > 2 would be fine); one could get away with exposing a method
> > > in cros_typec_debugfs via a local .h file (it can compile to a stub if
> > > cros_typec_debugfs is not compiled to the kernel);
> > > the LPC code can then just call that method instead of invoking a notifier.
> >
> > My first implementation did make a direct call to cros_ec_debugfs.c,
> > but an internal reviewer recommended using an event notifier instead.
> > So I'm histent to go back to a direct call.
> >
> > There may be other sub drivers that want to handle EC panics. So I
> > think keeping this as a separate notifier makes sense given the
> > constraints.
>
> The issue with that reasoning vis-à-vis your implementation is that
> the panic notifier is tied to cros_ec_debugfs. What if another
> (sub)-driver wants to use the
> panic notifier to do something, but that system doesn't have CONFIG_DEBUGFS
> enabled?

There isn't a dependency between cros_ec_debugfs.c and
`panic_notifier` inside `cros_ec_proto.h`. So (sub)-drivers can handle
EC panics when CONFIG_DEBUGFS is not enabled.

>
> Having a direct/explicit dependency avoids that issue; LPC depends on
> debugfs being compiled
> to have the log printed out (a stub is used when debugfs is not
> enabled), but nothing else
> relies on debugfs for a panic notifier.
>
> BR,
>
> -Prashant

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-01-04  1:20 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-21 18:55 [PATCH] drivers/cros_ec: Handle CrOS EC panics Rob Barnes
2022-12-21 19:23 ` Prashant Malani
2022-12-21 23:55   ` Rob Barnes
2022-12-22  0:38     ` Prashant Malani
2023-01-03 22:52       ` [PATCH v2 0/2] Handle CrOS EC Panics Rob Barnes
2023-01-03 22:52         ` [PATCH v2 1/2] platform/chrome: cros_ec: Poll EC log on EC panic Rob Barnes
2023-01-03 23:14           ` Brian Norris
2023-01-03 23:17             ` Rob Barnes
2023-01-03 22:52         ` [PATCH v2 2/2] platform/chrome: cros_ec: Shutdown on EC Panic Rob Barnes
2023-01-03 23:14       ` [PATCH] drivers/cros_ec: Handle CrOS EC panics Rob Barnes
2023-01-03 23:27         ` Prashant Malani
2023-01-04  1:19           ` Rob Barnes
2022-12-21 20:07 ` Guenter Roeck
2022-12-21 23:55   ` Rob Barnes
2022-12-22  0:08     ` Guenter Roeck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.