All of lore.kernel.org
 help / color / mirror / Atom feed
From: Don Brace <don.brace@microsemi.com>
To: joseph.szczypek@hpe.com, gerry.morong@microsemi.com,
	john.hall@microsemi.com, jejb@linux.vnet.ibm.com,
	Kevin.Barnett@microsemi.com, Mahesh.Rajashekhara@microsemi.com,
	bader.alisaleh@microsemi.com, hch@infradead.org,
	scott.teel@microsemi.com, Viswas.G@microsemi.com,
	Justin.Lindley@microsemi.com, scott.benesh@microsemi.com,
	POSWALD@suse.com
Cc: linux-scsi@vger.kernel.org
Subject: [PATCH 24/37] smartpqi: add lockup action
Date: Tue, 25 Apr 2017 14:48:18 -0500	[thread overview]
Message-ID: <149314969887.13903.2698872466777093049.stgit@brunhilda> (raw)
In-Reply-To: <149314950730.13903.644081079070695025.stgit@brunhilda>

From: Kevin Barnett <kevin.barnett@hpe.com>

add support for actions to take when controller goes offline.

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c |  121 +++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 2b667b5..132a419 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -24,6 +24,7 @@
 #include <linux/sched.h>
 #include <linux/rtc.h>
 #include <linux/bcd.h>
+#include <linux/reboot.h>
 #include <linux/cciss_ioctl.h>
 #include <linux/blk-mq-pci.h>
 #include <scsi/scsi_host.h>
@@ -60,6 +61,7 @@ MODULE_LICENSE("GPL");
 static char *hpe_branded_controller = "HPE Smart Array Controller";
 static char *microsemi_branded_controller = "Microsemi Smart Family Controller";
 
+static void pqi_perform_lockup_action(void);
 static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info);
 static int pqi_scan_scsi_devices(struct pqi_ctrl_info *ctrl_info);
 static void pqi_scan_start(struct Scsi_Host *shost);
@@ -81,6 +83,32 @@ static struct scsi_transport_template *pqi_sas_transport_template;
 
 static atomic_t pqi_controller_count = ATOMIC_INIT(0);
 
+enum pqi_lockup_action {
+	NONE,
+	REBOOT,
+	PANIC
+};
+
+static enum pqi_lockup_action pqi_lockup_action = NONE;
+
+static struct {
+	enum pqi_lockup_action	action;
+	char			*name;
+} pqi_lockup_actions[] = {
+	{
+		.action = NONE,
+		.name = "none",
+	},
+	{
+		.action = REBOOT,
+		.name = "reboot",
+	},
+	{
+		.action = PANIC,
+		.name = "panic",
+	},
+};
+
 static unsigned int pqi_supported_event_types[] = {
 	PQI_EVENT_TYPE_HOTPLUG,
 	PQI_EVENT_TYPE_HARDWARE,
@@ -96,6 +124,13 @@ module_param_named(disable_device_id_wildcards,
 MODULE_PARM_DESC(disable_device_id_wildcards,
 	"Disable device ID wildcards.");
 
+static char *pqi_lockup_action_param;
+module_param_named(lockup_action,
+	pqi_lockup_action_param, charp, 0644);
+MODULE_PARM_DESC(lockup_action, "Action to take when controller locked up.\n"
+	"\t\tSupported: none, reboot, panic\n"
+	"\t\tDefault: none");
+
 static char *raid_levels[] = {
 	"RAID-0",
 	"RAID-4",
@@ -2735,6 +2770,8 @@ static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
 	ctrl_info->controller_online = false;
 	dev_err(&ctrl_info->pci_dev->dev, "controller offline\n");
 	sis_shutdown_ctrl(ctrl_info);
+	pci_disable_device(ctrl_info->pci_dev);
+	pqi_perform_lockup_action();
 
 	for (i = 0; i < ctrl_info->num_queue_groups; i++) {
 		queue_group = &ctrl_info->queue_groups[i];
@@ -5388,12 +5425,55 @@ static ssize_t pqi_host_rescan_store(struct device *dev,
 	return count;
 }
 
+static ssize_t pqi_lockup_action_show(struct device *dev,
+	struct device_attribute *attr, char *buffer)
+{
+	int count = 0;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(pqi_lockup_actions); i++) {
+		if (pqi_lockup_actions[i].action == pqi_lockup_action)
+			count += snprintf(buffer + count, PAGE_SIZE - count,
+				"[%s] ", pqi_lockup_actions[i].name);
+		else
+			count += snprintf(buffer + count, PAGE_SIZE - count,
+				"%s ", pqi_lockup_actions[i].name);
+	}
+
+	count += snprintf(buffer + count, PAGE_SIZE - count, "\n");
+
+	return count;
+}
+
+static ssize_t pqi_lockup_action_store(struct device *dev,
+	struct device_attribute *attr, const char *buffer, size_t count)
+{
+	unsigned int i;
+	char *action_name;
+	char action_name_buffer[32];
+
+	strlcpy(action_name_buffer, buffer, sizeof(action_name_buffer));
+	action_name = strstrip(action_name_buffer);
+
+	for (i = 0; i < ARRAY_SIZE(pqi_lockup_actions); i++) {
+		if (strcmp(action_name, pqi_lockup_actions[i].name) == 0) {
+			pqi_lockup_action = pqi_lockup_actions[i].action;
+			return count;
+		}
+	}
+
+	return -EINVAL;
+}
+
 static DEVICE_ATTR(version, 0444, pqi_version_show, NULL);
 static DEVICE_ATTR(rescan, 0200, NULL, pqi_host_rescan_store);
+static DEVICE_ATTR(lockup_action, 0644,
+	pqi_lockup_action_show, pqi_lockup_action_store);
 
 static struct device_attribute *pqi_shost_attrs[] = {
 	&dev_attr_version,
 	&dev_attr_rescan,
+	&dev_attr_lockup_action,
 	NULL
 };
 
@@ -6140,6 +6220,21 @@ static void pqi_remove_ctrl(struct pqi_ctrl_info *ctrl_info)
 	pqi_free_ctrl_resources(ctrl_info);
 }
 
+static void pqi_perform_lockup_action(void)
+{
+	switch (pqi_lockup_action) {
+	case PANIC:
+		panic("FATAL: Smart Family Controller lockup detected");
+		break;
+	case REBOOT:
+		emergency_restart();
+		break;
+	case NONE:
+	default:
+		break;
+	}
+}
+
 static void pqi_print_ctrl_info(struct pci_dev *pci_dev,
 	const struct pci_device_id *id)
 {
@@ -6245,6 +6340,30 @@ static void pqi_shutdown(struct pci_dev *pci_dev)
 		"unable to flush controller cache\n");
 }
 
+static void pqi_process_lockup_action_param(void)
+{
+	unsigned int i;
+
+	if (!pqi_lockup_action_param)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(pqi_lockup_actions); i++) {
+		if (strcmp(pqi_lockup_action_param,
+			pqi_lockup_actions[i].name) == 0) {
+			pqi_lockup_action = pqi_lockup_actions[i].action;
+			return;
+		}
+	}
+
+	pr_warn("%s: invalid lockup action setting \"%s\" - supported settings: none, reboot, panic\n",
+		DRIVER_NAME_SHORT, pqi_lockup_action_param);
+}
+
+static void pqi_process_module_params(void)
+{
+	pqi_process_lockup_action_param();
+}
+
 #if defined(CONFIG_PM)
 
 static int pqi_suspend(struct pci_dev *pci_dev, pm_message_t state)
@@ -6552,6 +6671,8 @@ static int __init pqi_init(void)
 	if (!pqi_sas_transport_template)
 		return -ENODEV;
 
+	pqi_process_module_params();
+
 	rc = pci_register_driver(&pqi_pci_driver);
 	if (rc)
 		sas_release_transport(pqi_sas_transport_template);

  parent reply	other threads:[~2017-04-25 19:48 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-25 19:45 [PATCH 00/37] smartpqi updates Don Brace
2017-04-25 19:45 ` [PATCH 01/37] smartpqi: correct remove scsi devices Don Brace
2017-04-25 19:46 ` [PATCH 02/37] smartpqi: cleanup interrupt management Don Brace
2017-04-25 19:54   ` Bart Van Assche
2017-05-03 15:23     ` Don Brace
2017-04-25 19:46 ` [PATCH 03/37] smartpqi: set pci completion timeout Don Brace
2017-04-25 19:46 ` [PATCH 04/37] smartpqi: add in controller checkpoint for controller lockups Don Brace
2017-04-25 19:46 ` [PATCH 05/37] smartpqi: ensure controller is in SIS mode at init Don Brace
2017-04-25 19:57   ` Bart Van Assche
2017-05-03 15:46     ` Don Brace
2017-04-25 19:46 ` [PATCH 06/37] smartpqi: add supporting events Don Brace
2017-04-25 19:46 ` [PATCH 07/37] smartpqi: enhance resets Don Brace
2017-04-25 20:01   ` Bart Van Assche
2017-05-03 19:19     ` Don Brace
2017-04-25 19:46 ` [PATCH 08/37] smartpqi: add suspend and resume support Don Brace
2017-04-25 20:05   ` Bart Van Assche
2017-05-03 19:21     ` Don Brace
2017-04-25 19:46 ` [PATCH 09/37] smartpqi: add heartbeat check Don Brace
2017-04-25 19:46 ` [PATCH 10/37] smartpqi: correct bdma hw bug Don Brace
2017-04-25 19:46 ` [PATCH 11/37] smartpqi: add pqi_wait_for_completion_io Don Brace
2017-04-25 19:47 ` [PATCH 12/37] smartpqi: change functions to inline Don Brace
2017-04-25 20:07   ` Bart Van Assche
2017-05-03 19:32     ` Don Brace
2017-04-25 19:47 ` [PATCH 13/37] smartpqi: make pdev pointer names consistent Don Brace
2017-04-25 19:47 ` [PATCH 14/37] smartpqi: eliminate redundant error messages Don Brace
2017-04-25 19:47 ` [PATCH 15/37] smartpqi: correct BMIC identify physical drive Don Brace
2017-04-25 19:47 ` [PATCH 16/37] smartpqi: minor driver cleanup Don Brace
2017-04-25 20:09   ` Bart Van Assche
2017-05-03 19:43     ` Don Brace
2017-04-25 19:47 ` [PATCH 17/37] smartpqi: add new PCI device IDs Don Brace
2017-04-25 19:47 ` [PATCH 18/37] smartpqi: cleanup messages Don Brace
2017-04-25 20:11   ` Bart Van Assche
2017-05-03 19:47     ` Don Brace
2017-04-25 19:47 ` [PATCH 19/37] smartpqi: update copyright Don Brace
2017-04-25 19:47 ` [PATCH 20/37] smartpqi: add ptraid support Don Brace
2017-04-25 20:13   ` Bart Van Assche
2017-05-03 20:06     ` Don Brace
2017-04-25 19:48 ` [PATCH 21/37] smartpqi: change return value for LUN reset operations Don Brace
2017-04-25 19:48 ` [PATCH 22/37] smartpqi: enhance kdump Don Brace
2017-04-25 19:48 ` [PATCH 23/37] smartpqi: remove qdepth calculations for logical volumes Don Brace
2017-04-25 19:48 ` Don Brace [this message]
2017-04-25 19:48 ` [PATCH 25/37] smartpqi: correct aio error path Don Brace
2017-04-25 19:48 ` [PATCH 26/37] smartpqi: update device offline Don Brace
2017-04-25 19:48 ` [PATCH 27/37] smartpqi: controller offline improvements Don Brace
2017-04-25 19:48 ` [PATCH 28/37] smartpqi: cleanup controller branding Don Brace
2017-04-25 19:48 ` [PATCH 29/37] smartpqi: map more raid errors to SCSI errors Don Brace
2017-04-25 19:48 ` [PATCH 30/37] smartpqi: update timeout on admin commands Don Brace
2017-04-25 19:49 ` [PATCH 31/37] smartpqi: enhance device add and remove messages Don Brace
2017-04-25 19:49 ` [PATCH 32/37] smartpqi: make raid bypass references consistent Don Brace
2017-04-25 19:49 ` [PATCH 33/37] smartpqi: add raid level show Don Brace
2017-04-25 19:49 ` [PATCH 34/37] smartpqi: cleanup list initialization Don Brace
2017-04-25 19:49 ` [PATCH 35/37] smartpqi: add module parameters Don Brace
2017-04-25 19:49 ` [PATCH 36/37] smartpqi: remove writeq/readq function definitions Don Brace
2017-04-26  9:35   ` kbuild test robot
2017-04-25 19:49 ` [PATCH 37/37] smartpqi: bump driver version Don Brace

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=149314969887.13903.2698872466777093049.stgit@brunhilda \
    --to=don.brace@microsemi.com \
    --cc=Justin.Lindley@microsemi.com \
    --cc=Kevin.Barnett@microsemi.com \
    --cc=Mahesh.Rajashekhara@microsemi.com \
    --cc=POSWALD@suse.com \
    --cc=Viswas.G@microsemi.com \
    --cc=bader.alisaleh@microsemi.com \
    --cc=gerry.morong@microsemi.com \
    --cc=hch@infradead.org \
    --cc=jejb@linux.vnet.ibm.com \
    --cc=john.hall@microsemi.com \
    --cc=joseph.szczypek@hpe.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=scott.benesh@microsemi.com \
    --cc=scott.teel@microsemi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.