linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Anton Blanchard <anton@samba.org>
To: benh@kernel.crashing.org, paulus@samba.org
Cc: linuxppc-dev@lists.ozlabs.org
Subject: [PATCH 2/6] powerpc/pseries: Parse and handle EPOW interrupts
Date: Thu, 22 Mar 2012 12:49:59 +1100	[thread overview]
Message-ID: <20120322124959.32d84480@kryten> (raw)
In-Reply-To: <20120322124707.3cd3037f@kryten>


We have code to take environmental and power warning (EPOW)
interrupts but it simply prints a terse error message:

EPOW <0x6240040000000b8 0x0 0x0>

which tells us nothing about what happened. Even worse, if we
don't correctly respond to the interrupt we may get terminated
by firmware.

Add code to printk some useful information when we get EPOW events.
We want to make it clear that we have an error, that it was
reported by firmware and that the RTAS error log will have more
detailed information. eg:

	Ambient temperature too high reported by firmware.
	Check RTAS error log for details

Depending on the error encountered, we now issue an immediate or
an orderly power down.

Move initialization of the EPOW interrupt earlier in boot since we
want to respond to them as early as possible.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux-build/arch/powerpc/platforms/pseries/ras.c
===================================================================
--- linux-build.orig/arch/powerpc/platforms/pseries/ras.c	2012-03-22 12:24:24.980892033 +1100
+++ linux-build/arch/powerpc/platforms/pseries/ras.c	2012-03-22 12:25:06.857659239 +1100
@@ -35,6 +35,8 @@
 #include <linux/random.h>
 #include <linux/sysrq.h>
 #include <linux/bitops.h>
+#include <linux/fs.h>
+#include <linux/reboot.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -95,26 +97,127 @@ static int __init init_ras_IRQ(void)
 
 	return 0;
 }
-__initcall(init_ras_IRQ);
+subsys_initcall(init_ras_IRQ);
 
-/*
- * Handle power subsystem events (EPOW).
- *
- * Presently we just log the event has occurred.  This should be fixed
- * to examine the type of power failure and take appropriate action where
- * the time horizon permits something useful to be done.
- */
+#define EPOW_SHUTDOWN_NORMAL				1
+#define EPOW_SHUTDOWN_ON_UPS				2
+#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS	3
+#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH	4
+
+static void handle_system_shutdown(char event_modifier)
+{
+	switch (event_modifier) {
+	case EPOW_SHUTDOWN_NORMAL:
+		pr_emerg("Firmware initiated power off");
+		orderly_poweroff(1);
+		break;
+
+	case EPOW_SHUTDOWN_ON_UPS:
+		pr_emerg("Loss of power reported by firmware, system is "
+			"running on UPS/battery");
+		break;
+
+	case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
+		pr_emerg("Loss of system critical functions reported by "
+			"firmware");
+		pr_emerg("Check RTAS error log for details");
+		orderly_poweroff(1);
+		break;
+
+	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
+		pr_emerg("Ambient temperature too high reported by firmware");
+		pr_emerg("Check RTAS error log for details");
+		orderly_poweroff(1);
+		break;
+
+	default:
+		pr_err("Unknown power/cooling shutdown event (modifier %d)",
+			event_modifier);
+	}
+}
+
+struct epow_errorlog {
+	unsigned char sensor_value;
+	unsigned char event_modifier;
+	unsigned char extended_modifier;
+	unsigned char reserved;
+	unsigned char platform_reason;
+};
+
+#define EPOW_RESET			0
+#define EPOW_WARN_COOLING		1
+#define EPOW_WARN_POWER			2
+#define EPOW_SYSTEM_SHUTDOWN		3
+#define EPOW_SYSTEM_HALT		4
+#define EPOW_MAIN_ENCLOSURE		5
+#define EPOW_POWER_OFF			7
+
+void rtas_parse_epow_errlog(struct rtas_error_log *log)
+{
+	struct pseries_errorlog *pseries_log;
+	struct epow_errorlog *epow_log;
+	char action_code;
+	char modifier;
+
+	pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
+	if (pseries_log == NULL)
+		return;
+
+	epow_log = (struct epow_errorlog *)pseries_log->data;
+	action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */
+	modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */
+
+	switch (action_code) {
+	case EPOW_RESET:
+		pr_err("Non critical power or cooling issue cleared");
+		break;
+
+	case EPOW_WARN_COOLING:
+		pr_err("Non critical cooling issue reported by firmware");
+		pr_err("Check RTAS error log for details");
+		break;
+
+	case EPOW_WARN_POWER:
+		pr_err("Non critical power issue reported by firmware");
+		pr_err("Check RTAS error log for details");
+		break;
+
+	case EPOW_SYSTEM_SHUTDOWN:
+		handle_system_shutdown(epow_log->event_modifier);
+		break;
+
+	case EPOW_SYSTEM_HALT:
+		pr_emerg("Firmware initiated power off");
+		orderly_poweroff(1);
+		break;
+
+	case EPOW_MAIN_ENCLOSURE:
+	case EPOW_POWER_OFF:
+		pr_emerg("Critical power/cooling issue reported by firmware");
+		pr_emerg("Check RTAS error log for details");
+		pr_emerg("Immediate power off");
+		emergency_sync();
+		kernel_power_off();
+		break;
+
+	default:
+		pr_err("Unknown power/cooling event (action code %d)",
+			action_code);
+	}
+}
+
+/* Handle environmental and power warning (EPOW) interrupts. */
 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
 {
-	int status = 0xdeadbeef;
-	int state = 0;
+	int status;
+	int state;
 	int critical;
 
 	status = rtas_call(ras_get_sensor_state_token, 2, 2, &state,
 			   EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
 
 	if (state > 3)
-		critical = 1;  /* Time Critical */
+		critical = 1;		/* Time Critical */
 	else
 		critical = 0;
 
@@ -127,14 +230,10 @@ static irqreturn_t ras_epow_interrupt(in
 			   critical, __pa(&ras_log_buf),
 				rtas_get_error_log_max());
 
-	udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
-		    *((unsigned long *)&ras_log_buf), status, state);
-	printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
-	       *((unsigned long *)&ras_log_buf), status, state);
-
-	/* format and print the extended information */
 	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
 
+	rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
+
 	spin_unlock(&ras_log_buf_lock);
 	return IRQ_HANDLED;
 }

  reply	other threads:[~2012-03-22  1:49 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-22  1:47 [PATCH 1/6] powerpc: Make function that parses RTAS error logs global Anton Blanchard
2012-03-22  1:49 ` Anton Blanchard [this message]
2012-03-22  1:53 ` [PATCH 3/6] powerpc/pseries: Use rtas_get_sensor in RAS code Anton Blanchard
2012-03-22  1:56   ` [PATCH 4/6] powerpc/pseries: Remove RTAS_POWERMGM_EVENTS Anton Blanchard
2012-03-22  1:58     ` [PATCH 5/6] powerpc/pseries: Clean up ras_error_interrupt code Anton Blanchard
2012-03-22  1:59       ` [PATCH 6/6] powerpc/pseries: Cut down on enthusiastic use of defines in RAS code Anton Blanchard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120322124959.32d84480@kryten \
    --to=anton@samba.org \
    --cc=benh@kernel.crashing.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).