linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] powerpc/powernv: report error messages from opal
       [not found] <1482298544-8418-1-git-send-email-oohall@gmail.com>
@ 2016-12-21  5:52 ` Oliver O'Halloran
  2017-07-05  7:07   ` Stewart Smith
  0 siblings, 1 reply; 6+ messages in thread
From: Oliver O'Halloran @ 2016-12-21  5:52 UTC (permalink / raw)
  To: skiboot; +Cc: linuxppc-dev, Oliver O'Halloran

Recent versions of skiboot will raise an OPAL event (read: interrupt)
when firmware writes an error message to its internal console. In
conjunction they provide an OPAL call that the kernel can use to extract
these messages from the OPAL log to allow them to be written into the
kernel's log buffer where someone will (hopefully) look at them.

For the companion skiboot patches see:

	https://lists.ozlabs.org/pipermail/skiboot/2016-December/005861.html

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
---
 arch/powerpc/include/asm/opal-api.h            |  5 +++-
 arch/powerpc/include/asm/opal.h                |  1 +
 arch/powerpc/platforms/powernv/opal-msglog.c   | 41 ++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal-wrappers.S |  1 +
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 0e2e57bcab50..cb9c0e6afb33 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -167,7 +167,8 @@
 #define OPAL_INT_EOI				124
 #define OPAL_INT_SET_MFRR			125
 #define OPAL_PCI_TCE_KILL			126
-#define OPAL_LAST				126
+#define OPAL_SCRAPE_LOG				128
+#define OPAL_LAST				128
 
 /* Device tree flags */
 
@@ -288,6 +289,7 @@ enum OpalPendingState {
 	OPAL_EVENT_PCI_ERROR	   = 0x200,
 	OPAL_EVENT_DUMP_AVAIL	   = 0x400,
 	OPAL_EVENT_MSG_PENDING	   = 0x800,
+	OPAL_EVENT_LOG_PENDING	   = 0x1000,
 };
 
 enum OpalThreadStatus {
@@ -406,6 +408,7 @@ enum opal_msg_type {
 	OPAL_MSG_DPO		= 5,
 	OPAL_MSG_PRD		= 6,
 	OPAL_MSG_OCC		= 7,
+	OPAL_MSG_LOG		= 8,
 	OPAL_MSG_TYPE_MAX,
 };
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5c7db0f1a708..2b3bd3219fb4 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -232,6 +232,7 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
 int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
 			     uint32_t pe_num, uint32_t tce_size,
 			     uint64_t dma_addr, uint32_t npages);
+int64_t opal_scrape_log(int64_t *offset, char *buf, int64_t len, int64_t *lvl);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
index 39d6ff9e5630..78168f66fb24 100644
--- a/arch/powerpc/platforms/powernv/opal-msglog.c
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -15,6 +15,7 @@
 #include <linux/of.h>
 #include <linux/types.h>
 #include <asm/barrier.h>
+#include <linux/interrupt.h>
 
 /* OPAL in-memory console. Defined in OPAL source at core/console.c */
 struct memcons {
@@ -102,8 +103,36 @@ static struct bin_attribute opal_msglog_attr = {
 	.read = opal_msglog_read
 };
 
+static char *log_levels[] = { "Emergency", "Alert", "Critical", "Error", "Warning" };
+static int64_t offset = -1;
+
+static irqreturn_t opal_print_log(int irq, void *data)
+{
+	int64_t rc, log_lvl;
+	char buffer[320];
+
+	/*
+	 * only print one message per invokation of the IRQ handler
+	 */
+
+	rc = opal_scrape_log(&offset, buffer, sizeof(buffer), &log_lvl);
+
+	if (rc == OPAL_SUCCESS || rc == OPAL_PARTIAL) {
+		log_lvl = be64_to_cpu(log_lvl);
+		if (log_lvl > 4)
+			log_lvl = 4;
+
+		printk_emit(0, log_lvl, NULL, 0, "OPAL %s: %s%s\r\n",
+			log_levels[log_lvl], buffer,
+			rc == OPAL_PARTIAL ? "<truncated>" : "");
+	}
+
+	return IRQ_HANDLED;
+}
+
 void __init opal_msglog_init(void)
 {
+	int virq, rc = -1;
 	u64 mcaddr;
 	struct memcons *mc;
 
@@ -123,6 +152,18 @@ void __init opal_msglog_init(void)
 		return;
 	}
 
+	virq = opal_event_request(ilog2(OPAL_EVENT_LOG_PENDING));
+	if (virq) {
+		rc = request_irq(virq, opal_print_log,
+			IRQF_TRIGGER_HIGH, "opal memcons", NULL);
+
+		if (rc)
+			irq_dispose_mapping(virq);
+	}
+
+	if (!virq || rc)
+		pr_warn("Unable to register OPAL log event handler\n");
+
 	opal_memcons = mc;
 }
 
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3aa40f1b20f5..c59d7da3fd1a 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -312,3 +312,4 @@ OPAL_CALL(opal_int_set_mfrr,			OPAL_INT_SET_MFRR);
 OPAL_CALL_REAL(opal_rm_int_set_mfrr,		OPAL_INT_SET_MFRR);
 OPAL_CALL(opal_pci_tce_kill,			OPAL_PCI_TCE_KILL);
 OPAL_CALL_REAL(opal_rm_pci_tce_kill,		OPAL_PCI_TCE_KILL);
+OPAL_CALL(opal_scrape_log,			OPAL_SCRAPE_LOG);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] powerpc/powernv: report error messages from opal
  2016-12-21  5:52 ` [RFC PATCH] powerpc/powernv: report error messages from opal Oliver O'Halloran
@ 2017-07-05  7:07   ` Stewart Smith
  2017-07-06 10:20     ` [Skiboot] " Michael Ellerman
  0 siblings, 1 reply; 6+ messages in thread
From: Stewart Smith @ 2017-07-05  7:07 UTC (permalink / raw)
  To: Oliver O'Halloran, skiboot; +Cc: Oliver O'Halloran, linuxppc-dev

Oliver O'Halloran <oohall@gmail.com> writes:
> Recent versions of skiboot will raise an OPAL event (read: interrupt)
> when firmware writes an error message to its internal console. In
> conjunction they provide an OPAL call that the kernel can use to extract
> these messages from the OPAL log to allow them to be written into the
> kernel's log buffer where someone will (hopefully) look at them.
>
> For the companion skiboot patches see:
>
> 	https://lists.ozlabs.org/pipermail/skiboot/2016-December/005861.html
>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/include/asm/opal-api.h            |  5 +++-
>  arch/powerpc/include/asm/opal.h                |  1 +
>  arch/powerpc/platforms/powernv/opal-msglog.c   | 41 ++++++++++++++++++++++++++
>  arch/powerpc/platforms/powernv/opal-wrappers.S |  1 +
>  4 files changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
> index 0e2e57bcab50..cb9c0e6afb33 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -167,7 +167,8 @@
>  #define OPAL_INT_EOI				124
>  #define OPAL_INT_SET_MFRR			125
>  #define OPAL_PCI_TCE_KILL			126
> -#define OPAL_LAST				126
> +#define OPAL_SCRAPE_LOG				128

(another thought, along with the skiboot thoughts), I don't like the
SCRAPE_LOG name so much, as it's more of a "hey linux, here's some log
messages from firmware, possibly before you were
involved"... OPAL_FETCH_LOG ?

-- 
Stewart Smith
OPAL Architect, IBM.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Skiboot] [RFC PATCH] powerpc/powernv: report error messages from opal
  2017-07-05  7:07   ` Stewart Smith
@ 2017-07-06 10:20     ` Michael Ellerman
  2017-07-07  0:28       ` Stewart Smith
  2017-07-07  0:45       ` Oliver
  0 siblings, 2 replies; 6+ messages in thread
From: Michael Ellerman @ 2017-07-06 10:20 UTC (permalink / raw)
  To: Stewart Smith, Oliver O'Halloran, skiboot
  Cc: linuxppc-dev, Oliver O'Halloran

Stewart Smith <stewart@linux.vnet.ibm.com> writes:
> Oliver O'Halloran <oohall@gmail.com> writes:
>> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
>> index 0e2e57bcab50..cb9c0e6afb33 100644
>> --- a/arch/powerpc/include/asm/opal-api.h
>> +++ b/arch/powerpc/include/asm/opal-api.h
>> @@ -167,7 +167,8 @@
>>  #define OPAL_INT_EOI				124
>>  #define OPAL_INT_SET_MFRR			125
>>  #define OPAL_PCI_TCE_KILL			126
>> -#define OPAL_LAST				126
>> +#define OPAL_SCRAPE_LOG				128
>
> (another thought, along with the skiboot thoughts), I don't like the
> SCRAPE_LOG name so much, as it's more of a "hey linux, here's some log
> messages from firmware, possibly before you were
> involved"... OPAL_FETCH_LOG ?

I'm not a huge fan of an interrupt followed by an opal call just to
fetch a single line of log.

Can't we do something more like the existing msglog code, where we have
a ring buffer and then the interrupt just becomes "hey Linux you should
look at your ring buffer".

cheers

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Skiboot] [RFC PATCH] powerpc/powernv: report error messages from opal
  2017-07-06 10:20     ` [Skiboot] " Michael Ellerman
@ 2017-07-07  0:28       ` Stewart Smith
  2017-07-07  0:54         ` Oliver
  2017-07-07  0:45       ` Oliver
  1 sibling, 1 reply; 6+ messages in thread
From: Stewart Smith @ 2017-07-07  0:28 UTC (permalink / raw)
  To: Michael Ellerman, Oliver O'Halloran, skiboot
  Cc: linuxppc-dev, Oliver O'Halloran

Michael Ellerman <mpe@ellerman.id.au> writes:
> Stewart Smith <stewart@linux.vnet.ibm.com> writes:
>> Oliver O'Halloran <oohall@gmail.com> writes:
>>> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
>>> index 0e2e57bcab50..cb9c0e6afb33 100644
>>> --- a/arch/powerpc/include/asm/opal-api.h
>>> +++ b/arch/powerpc/include/asm/opal-api.h
>>> @@ -167,7 +167,8 @@
>>>  #define OPAL_INT_EOI				124
>>>  #define OPAL_INT_SET_MFRR			125
>>>  #define OPAL_PCI_TCE_KILL			126
>>> -#define OPAL_LAST				126
>>> +#define OPAL_SCRAPE_LOG				128
>>
>> (another thought, along with the skiboot thoughts), I don't like the
>> SCRAPE_LOG name so much, as it's more of a "hey linux, here's some log
>> messages from firmware, possibly before you were
>> involved"... OPAL_FETCH_LOG ?
>
> I'm not a huge fan of an interrupt followed by an opal call just to
> fetch a single line of log.
>
> Can't we do something more like the existing msglog code, where we have
> a ring buffer and then the interrupt just becomes "hey Linux you should
> look at your ring buffer".

Yeah... that would probably be a bit better...

Although that would mean we can only ever tell the running kernel what's
in the ring buffer. We couldn't work out a way to (after kexec) resend
important bits of info such as "we garded things out, your OCC didn't
start and we trained everything at really slow speeds"


-- 
Stewart Smith
OPAL Architect, IBM.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Skiboot] [RFC PATCH] powerpc/powernv: report error messages from opal
  2017-07-06 10:20     ` [Skiboot] " Michael Ellerman
  2017-07-07  0:28       ` Stewart Smith
@ 2017-07-07  0:45       ` Oliver
  1 sibling, 0 replies; 6+ messages in thread
From: Oliver @ 2017-07-07  0:45 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Stewart Smith, skiboot list, linuxppc-dev

On Thu, Jul 6, 2017 at 8:20 PM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> Stewart Smith <stewart@linux.vnet.ibm.com> writes:
>> Oliver O'Halloran <oohall@gmail.com> writes:
>>> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
>>> index 0e2e57bcab50..cb9c0e6afb33 100644
>>> --- a/arch/powerpc/include/asm/opal-api.h
>>> +++ b/arch/powerpc/include/asm/opal-api.h
>>> @@ -167,7 +167,8 @@
>>>  #define OPAL_INT_EOI                                124
>>>  #define OPAL_INT_SET_MFRR                   125
>>>  #define OPAL_PCI_TCE_KILL                   126
>>> -#define OPAL_LAST                           126
>>> +#define OPAL_SCRAPE_LOG                             128
>>
>> (another thought, along with the skiboot thoughts), I don't like the
>> SCRAPE_LOG name so much, as it's more of a "hey linux, here's some log
>> messages from firmware, possibly before you were
>> involved"... OPAL_FETCH_LOG ?
>
> I'm not a huge fan of an interrupt followed by an opal call just to
> fetch a single line of log.
>
> Can't we do something more like the existing msglog code, where we have
> a ring buffer and then the interrupt just becomes "hey Linux you should
> look at your ring buffer".

eh... personally I don't see the overhead of the OPAL call as being
important. OPAL errors are infrequent and there's no need to make
fetching OPAL errors fast.

I think we have a bit of a recurring problem where we make random
Skiboot implementation details into part of the OPAL ABI. There is
*no* reason the kernel should care about the skiboot log format so I'd
rather we just kept the details of parsing it inside of Skiboot. I'm
open to suggestions of how to improve the interface, but I think we
should keep it.

oliver

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Skiboot] [RFC PATCH] powerpc/powernv: report error messages from opal
  2017-07-07  0:28       ` Stewart Smith
@ 2017-07-07  0:54         ` Oliver
  0 siblings, 0 replies; 6+ messages in thread
From: Oliver @ 2017-07-07  0:54 UTC (permalink / raw)
  To: Stewart Smith; +Cc: Michael Ellerman, skiboot list, linuxppc-dev

On Fri, Jul 7, 2017 at 10:28 AM, Stewart Smith
<stewart@linux.vnet.ibm.com> wrote:
> Michael Ellerman <mpe@ellerman.id.au> writes:
>> Stewart Smith <stewart@linux.vnet.ibm.com> writes:
>>> Oliver O'Halloran <oohall@gmail.com> writes:
>>>> diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
>>>> index 0e2e57bcab50..cb9c0e6afb33 100644
>>>> --- a/arch/powerpc/include/asm/opal-api.h
>>>> +++ b/arch/powerpc/include/asm/opal-api.h
>>>> @@ -167,7 +167,8 @@
>>>>  #define OPAL_INT_EOI                               124
>>>>  #define OPAL_INT_SET_MFRR                  125
>>>>  #define OPAL_PCI_TCE_KILL                  126
>>>> -#define OPAL_LAST                          126
>>>> +#define OPAL_SCRAPE_LOG                            128
>>>
>>> (another thought, along with the skiboot thoughts), I don't like the
>>> SCRAPE_LOG name so much, as it's more of a "hey linux, here's some log
>>> messages from firmware, possibly before you were
>>> involved"... OPAL_FETCH_LOG ?
>>
>> I'm not a huge fan of an interrupt followed by an opal call just to
>> fetch a single line of log.
>>
>> Can't we do something more like the existing msglog code, where we have
>> a ring buffer and then the interrupt just becomes "hey Linux you should
>> look at your ring buffer".
>
> Yeah... that would probably be a bit better...
>
> Although that would mean we can only ever tell the running kernel what's
> in the ring buffer. We couldn't work out a way to (after kexec) resend
> important bits of info such as "we garded things out, your OCC didn't
> start and we trained everything at really slow speeds"

Isn't this is the use case for BMC based error logs? A ring buffer is
fundamentally a structure for transient data. There's no real way to
hack in permanence other than making sure that making sure it never
fills.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-07-07  0:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1482298544-8418-1-git-send-email-oohall@gmail.com>
2016-12-21  5:52 ` [RFC PATCH] powerpc/powernv: report error messages from opal Oliver O'Halloran
2017-07-05  7:07   ` Stewart Smith
2017-07-06 10:20     ` [Skiboot] " Michael Ellerman
2017-07-07  0:28       ` Stewart Smith
2017-07-07  0:54         ` Oliver
2017-07-07  0:45       ` Oliver

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).