linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
@ 2010-09-23  2:36 bo yang
  2010-10-08 15:51 ` Tomas Henzl
  0 siblings, 1 reply; 11+ messages in thread
From: bo yang @ 2010-09-23  2:36 UTC (permalink / raw)
  To: linux-scsi, akpm, linux-kernel, James.Bottomley, bo.yang

[-- Attachment #1: Type: text/plain, Size: 1194 bytes --]

This patch is too big.  I am using attachment to submit.  Please
use attached file to apply.  Also let me know if it can't be accepted.

To add the Online controller reset support, driver need to do:
a). reset the controller chips -- Xscale and Gen2 which will change
the function calls and add the reset function related to this two
chips.
b). during the reset, driver will store the pending cmds which not
returned by FW to driver's pending queue.  Driver will re-issue those
pending cmds again to FW after the OCR finished.
c). In driver's timeout routine, driver will report to OS as reset.
Also driver's queue routine will block the cmds until the OCR
finished.
d). in Driver's ISR routine, if driver get the FW state as state
change, FW in Failure status and FW support online controller
reset (OCR), driver will start to do the controller reset.
e). In driver's IOCTL routine, the application cmds will wait for the
OCR to finish, then issue the cmds to FW.

Signed-off-by Bo Yang<bo.yang@lsi.com>

---
 drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
 drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
 2 files changed, 787 insertions(+), 57 deletions(-)

[-- Attachment #2: megasas-ocr.patch --]
[-- Type: application/octet-stream, Size: 39717 bytes --]

diff -rupN old/drivers/scsi/megaraid/megaraid_sas.c new/drivers/scsi/megaraid/megaraid_sas.c
--- old/drivers/scsi/megaraid/megaraid_sas.c	2010-09-17 02:19:46.000000000 -0400
+++ new/drivers/scsi/megaraid/megaraid_sas.c	2010-09-17 02:19:46.000000000 -0400
@@ -62,6 +62,11 @@ MODULE_VERSION(MEGASAS_VERSION);
 MODULE_AUTHOR("megaraidlinux@lsi.com");
 MODULE_DESCRIPTION("LSI MegaRAID SAS Driver");
 
+static int megasas_transition_to_ready(struct megasas_instance *instance);
+static int megasas_get_pd_list(struct megasas_instance *instance);
+static int megasas_issue_init_mfi(struct megasas_instance *instance);
+static int megasas_register_aen(struct megasas_instance *instance,
+				u32 seq_num, u32 class_locale_word);
 /*
  * PCI ID table for all supported controllers
  */
@@ -164,7 +169,7 @@ megasas_return_cmd(struct megasas_instan
 static inline void
 megasas_enable_intr_xscale(struct megasas_register_set __iomem * regs)
 {
-	writel(1, &(regs)->outbound_intr_mask);
+	writel(0, &(regs)->outbound_intr_mask);
 
 	/* Dummy readl to force pci flush */
 	readl(&regs->outbound_intr_mask);
@@ -200,24 +205,27 @@ static int 
 megasas_clear_intr_xscale(struct megasas_register_set __iomem * regs)
 {
 	u32 status;
+	u32 mfiStatus = 0;
 	/*
 	 * Check if it is our interrupt
 	 */
 	status = readl(&regs->outbound_intr_status);
 
-	if (!(status & MFI_OB_INTR_STATUS_MASK)) {
-		return 1;
-	}
+	if (status & MFI_OB_INTR_STATUS_MASK)
+		mfiStatus = MFI_INTR_FLAG_REPLY_MESSAGE;
+	if (status & MFI_XSCALE_OMR0_CHANGE_INTERRUPT)
+		mfiStatus |= MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE;
 
 	/*
 	 * Clear the interrupt by writing back the same value
 	 */
-	writel(status, &regs->outbound_intr_status);
+	if (mfiStatus)
+		writel(status, &regs->outbound_intr_status);
 
 	/* Dummy readl to force pci flush */
 	readl(&regs->outbound_intr_status);
 
-	return 0;
+	return mfiStatus;
 }
 
 /**
@@ -232,8 +240,69 @@ megasas_fire_cmd_xscale(struct megasas_i
 		u32 frame_count,
 		struct megasas_register_set __iomem *regs)
 {
+	unsigned long flags;
+	spin_lock_irqsave(&instance->hba_lock, flags);
 	writel((frame_phys_addr >> 3)|(frame_count),
 	       &(regs)->inbound_queue_port);
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+}
+
+/**
+ * megasas_adp_reset_xscale -  For controller reset
+ * @regs:                              MFI register set
+ */
+static int
+megasas_adp_reset_xscale(struct megasas_instance *instance,
+	struct megasas_register_set __iomem *regs)
+{
+	u32 i;
+	u32 pcidata;
+	writel(MFI_ADP_RESET, &regs->inbound_doorbell);
+
+	for (i = 0; i < 3; i++)
+		msleep(1000); /* sleep for 3 secs */
+	pcidata  = 0;
+	pci_read_config_dword(instance->pdev, MFI_1068_PCSR_OFFSET, &pcidata);
+	printk(KERN_NOTICE "pcidata = %x\n", pcidata);
+	if (pcidata & 0x2) {
+		printk(KERN_NOTICE "mfi 1068 offset read=%x\n", pcidata);
+		pcidata &= ~0x2;
+		pci_write_config_dword(instance->pdev,
+				MFI_1068_PCSR_OFFSET, pcidata);
+
+		for (i = 0; i < 2; i++)
+			msleep(1000); /* need to wait 2 secs again */
+
+		pcidata  = 0;
+		pci_read_config_dword(instance->pdev,
+				MFI_1068_FW_HANDSHAKE_OFFSET, &pcidata);
+		printk(KERN_NOTICE "1068 offset handshake read=%x\n", pcidata);
+		if ((pcidata & 0xffff0000) == MFI_1068_FW_READY) {
+			printk(KERN_NOTICE "1068 offset pcidt=%x\n", pcidata);
+			pcidata = 0;
+			pci_write_config_dword(instance->pdev,
+				MFI_1068_FW_HANDSHAKE_OFFSET, pcidata);
+		}
+	}
+	return 0;
+}
+
+/**
+ * megasas_check_reset_xscale -	For controller reset check
+ * @regs:				MFI register set
+ */
+static int
+megasas_check_reset_xscale(struct megasas_instance *instance,
+		struct megasas_register_set __iomem *regs)
+{
+	u32 consumer;
+	consumer = *instance->consumer;
+
+	if ((instance->adprecovery != MEGASAS_HBA_OPERATIONAL) &&
+		(*instance->consumer == MEGASAS_ADPRESET_INPROG_SIGN)) {
+		return 1;
+	}
+	return 0;
 }
 
 static struct megasas_instance_template megasas_instance_template_xscale = {
@@ -243,6 +312,8 @@ static struct megasas_instance_template 
 	.disable_intr = megasas_disable_intr_xscale,
 	.clear_intr = megasas_clear_intr_xscale,
 	.read_fw_status_reg = megasas_read_fw_status_reg_xscale,
+	.adp_reset = megasas_adp_reset_xscale,
+	.check_reset = megasas_check_reset_xscale,
 };
 
 /**
@@ -264,7 +335,7 @@ megasas_enable_intr_ppc(struct megasas_r
 {
 	writel(0xFFFFFFFF, &(regs)->outbound_doorbell_clear);
     
-	writel(~0x80000004, &(regs)->outbound_intr_mask);
+	writel(~0x80000000, &(regs)->outbound_intr_mask);
 
 	/* Dummy readl to force pci flush */
 	readl(&regs->outbound_intr_mask);
@@ -307,7 +378,7 @@ megasas_clear_intr_ppc(struct megasas_re
 	status = readl(&regs->outbound_intr_status);
 
 	if (!(status & MFI_REPLY_1078_MESSAGE_INTERRUPT)) {
-		return 1;
+		return 0;
 	}
 
 	/*
@@ -318,7 +389,7 @@ megasas_clear_intr_ppc(struct megasas_re
 	/* Dummy readl to force pci flush */
 	readl(&regs->outbound_doorbell_clear);
 
-	return 0;
+	return 1;
 }
 /**
  * megasas_fire_cmd_ppc -	Sends command to the FW
@@ -332,10 +403,34 @@ megasas_fire_cmd_ppc(struct megasas_inst
 		u32 frame_count,
 		struct megasas_register_set __iomem *regs)
 {
+	unsigned long flags;
+	spin_lock_irqsave(&instance->hba_lock, flags);
 	writel((frame_phys_addr | (frame_count<<1))|1, 
 			&(regs)->inbound_queue_port);
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+}
+
+/**
+ * megasas_adp_reset_ppc -	For controller reset
+ * @regs:				MFI register set
+ */
+static int
+megasas_adp_reset_ppc(struct megasas_instance *instance,
+			struct megasas_register_set __iomem *regs)
+{
+	return 0;
 }
 
+/**
+ * megasas_check_reset_ppc -	For controller reset check
+ * @regs:				MFI register set
+ */
+static int
+megasas_check_reset_ppc(struct megasas_instance *instance,
+			struct megasas_register_set __iomem *regs)
+{
+	return 0;
+}
 static struct megasas_instance_template megasas_instance_template_ppc = {
 	
 	.fire_cmd = megasas_fire_cmd_ppc,
@@ -343,6 +438,8 @@ static struct megasas_instance_template 
 	.disable_intr = megasas_disable_intr_ppc,
 	.clear_intr = megasas_clear_intr_ppc,
 	.read_fw_status_reg = megasas_read_fw_status_reg_ppc,
+	.adp_reset = megasas_adp_reset_ppc,
+	.check_reset = megasas_check_reset_ppc,
 };
 
 /**
@@ -397,7 +494,7 @@ megasas_clear_intr_skinny(struct megasas
 	status = readl(&regs->outbound_intr_status);
 
 	if (!(status & MFI_SKINNY_ENABLE_INTERRUPT_MASK)) {
-		return 1;
+		return 0;
 	}
 
 	/*
@@ -410,7 +507,7 @@ megasas_clear_intr_skinny(struct megasas
 	*/
 	readl(&regs->outbound_intr_status);
 
-	return 0;
+	return 1;
 }
 
 /**
@@ -426,11 +523,33 @@ megasas_fire_cmd_skinny(struct megasas_i
 			struct megasas_register_set __iomem *regs)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&instance->fire_lock, flags);
+	spin_lock_irqsave(&instance->hba_lock, flags);
 	writel(0, &(regs)->inbound_high_queue_port);
 	writel((frame_phys_addr | (frame_count<<1))|1,
 		&(regs)->inbound_low_queue_port);
-	spin_unlock_irqrestore(&instance->fire_lock, flags);
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+}
+
+/**
+ * megasas_adp_reset_skinny -	For controller reset
+ * @regs:				MFI register set
+ */
+static int
+megasas_adp_reset_skinny(struct megasas_instance *instance,
+			struct megasas_register_set __iomem *regs)
+{
+	return 0;
+}
+
+/**
+ * megasas_check_reset_skinny -	For controller reset check
+ * @regs:				MFI register set
+ */
+static int
+megasas_check_reset_skinny(struct megasas_instance *instance,
+				struct megasas_register_set __iomem *regs)
+{
+	return 0;
 }
 
 static struct megasas_instance_template megasas_instance_template_skinny = {
@@ -440,6 +559,8 @@ static struct megasas_instance_template 
 	.disable_intr = megasas_disable_intr_skinny,
 	.clear_intr = megasas_clear_intr_skinny,
 	.read_fw_status_reg = megasas_read_fw_status_reg_skinny,
+	.adp_reset = megasas_adp_reset_skinny,
+	.check_reset = megasas_check_reset_skinny,
 };
 
 
@@ -495,23 +616,29 @@ static int
 megasas_clear_intr_gen2(struct megasas_register_set __iomem *regs)
 {
 	u32 status;
+	u32 mfiStatus = 0;
 	/*
 	 * Check if it is our interrupt
 	 */
 	status = readl(&regs->outbound_intr_status);
 
-	if (!(status & MFI_GEN2_ENABLE_INTERRUPT_MASK))
-		return 1;
+	if (status & MFI_GEN2_ENABLE_INTERRUPT_MASK) {
+		mfiStatus = MFI_INTR_FLAG_REPLY_MESSAGE;
+	}
+	if (status & MFI_G2_OUTBOUND_DOORBELL_CHANGE_INTERRUPT) {
+		mfiStatus |= MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE;
+	}
 
 	/*
 	 * Clear the interrupt by writing back the same value
 	 */
-	writel(status, &regs->outbound_doorbell_clear);
+	if (mfiStatus)
+		writel(status, &regs->outbound_doorbell_clear);
 
 	/* Dummy readl to force pci flush */
 	readl(&regs->outbound_intr_status);
 
-	return 0;
+	return mfiStatus;
 }
 /**
  * megasas_fire_cmd_gen2 -     Sends command to the FW
@@ -525,8 +652,74 @@ megasas_fire_cmd_gen2(struct megasas_ins
 			u32 frame_count,
 			struct megasas_register_set __iomem *regs)
 {
+	unsigned long flags;
+	spin_lock_irqsave(&instance->hba_lock, flags);
 	writel((frame_phys_addr | (frame_count<<1))|1,
 			&(regs)->inbound_queue_port);
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+}
+
+/**
+ * megasas_adp_reset_gen2 -	For controller reset
+ * @regs:				MFI register set
+ */
+static int
+megasas_adp_reset_gen2(struct megasas_instance *instance,
+			struct megasas_register_set __iomem *reg_set)
+{
+	u32			retry = 0 ;
+	u32			HostDiag;
+
+	writel(0, &reg_set->seq_offset);
+	writel(4, &reg_set->seq_offset);
+	writel(0xb, &reg_set->seq_offset);
+	writel(2, &reg_set->seq_offset);
+	writel(7, &reg_set->seq_offset);
+	writel(0xd, &reg_set->seq_offset);
+	msleep(1000);
+
+	HostDiag = (u32)readl(&reg_set->host_diag);
+
+	while ( !( HostDiag & DIAG_WRITE_ENABLE) ) {
+		msleep(100);
+		HostDiag = (u32)readl(&reg_set->host_diag);
+		printk(KERN_NOTICE "RESETGEN2: retry=%x, hostdiag=%x\n",
+					retry, HostDiag);
+
+		if (retry++ >= 100)
+			return 1;
+
+	}
+
+	printk(KERN_NOTICE "ADP_RESET_GEN2: HostDiag=%x\n", HostDiag);
+
+	writel((HostDiag | DIAG_RESET_ADAPTER), &reg_set->host_diag);
+
+	ssleep(10);
+
+	HostDiag = (u32)readl(&reg_set->host_diag);
+	while ( ( HostDiag & DIAG_RESET_ADAPTER) ) {
+		msleep(100);
+		HostDiag = (u32)readl(&reg_set->host_diag);
+		printk(KERN_NOTICE "RESET_GEN2: retry=%x, hostdiag=%x\n",
+				retry, HostDiag);
+
+		if (retry++ >= 1000)
+			return 1;
+
+	}
+	return 0;
+}
+
+/**
+ * megasas_check_reset_gen2 -	For controller reset check
+ * @regs:				MFI register set
+ */
+static int
+megasas_check_reset_gen2(struct megasas_instance *instance,
+		struct megasas_register_set __iomem *regs)
+{
+	return 0;
 }
 
 static struct megasas_instance_template megasas_instance_template_gen2 = {
@@ -536,11 +729,13 @@ static struct megasas_instance_template 
 	.disable_intr = megasas_disable_intr_gen2,
 	.clear_intr = megasas_clear_intr_gen2,
 	.read_fw_status_reg = megasas_read_fw_status_reg_gen2,
+	.adp_reset = megasas_adp_reset_gen2,
+	.check_reset = megasas_check_reset_gen2,
 };
 
 /**
 *	This is the end of set of functions & definitions
-* 	specific to ppc (deviceid : 0x60) controllers
+*       specific to gen2 (deviceid : 0x78, 0x79) controllers
 */
 
 /**
@@ -599,8 +794,7 @@ megasas_issue_blocked_cmd(struct megasas
 	instance->instancet->fire_cmd(instance,
 			cmd->frame_phys_addr, 0, instance->reg_set);
 
-	wait_event_timeout(instance->int_cmd_wait_q, (cmd->cmd_status != ENODATA),
-		MEGASAS_INTERNAL_CMD_WAIT_TIME*HZ);
+	wait_event(instance->int_cmd_wait_q, cmd->cmd_status != ENODATA);
 
 	return 0;
 }
@@ -648,8 +842,8 @@ megasas_issue_blocked_abort_cmd(struct m
 	/*
 	 * Wait for this cmd to complete
 	 */
-	wait_event_timeout(instance->abort_cmd_wait_q, (cmd->cmd_status != 0xFF),
-		MEGASAS_INTERNAL_CMD_WAIT_TIME*HZ);
+	wait_event(instance->abort_cmd_wait_q, cmd->cmd_status != 0xFF);
+	cmd->sync_cmd = 0;
 
 	megasas_return_cmd(instance, cmd);
 	return 0;
@@ -1131,14 +1325,22 @@ megasas_queue_command(struct scsi_cmnd *
 	u32 frame_count;
 	struct megasas_cmd *cmd;
 	struct megasas_instance *instance;
+	unsigned long flags;
 
 	instance = (struct megasas_instance *)
 	    scmd->device->host->hostdata;
 
-	/* Don't process if we have already declared adapter dead */
-	if (instance->hw_crit_error)
+	if (instance->issuepend_done == 0)
 		return SCSI_MLQUEUE_HOST_BUSY;
 
+	spin_lock_irqsave(&instance->hba_lock, flags);
+	if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) {
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
+
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+
 	scmd->scsi_done = done;
 	scmd->result = 0;
 
@@ -1274,6 +1476,18 @@ static int megasas_slave_alloc(struct sc
 	return 0;
 }
 
+static void megaraid_sas_kill_hba(struct megasas_instance *instance)
+{
+	if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0073SKINNY) ||
+		(instance->pdev->device == PCI_DEVICE_ID_LSI_SAS0071SKINNY)) {
+		writel(MFI_STOP_ADP,
+			&instance->reg_set->reserved_0[0]);
+	} else {
+		writel(MFI_STOP_ADP,
+			&instance->reg_set->inbound_doorbell);
+	}
+}
+
 /**
  * megasas_complete_cmd_dpc	 -	Returns FW's controller structure
  * @instance_addr:			Address of adapter soft state
@@ -1291,7 +1505,7 @@ static void megasas_complete_cmd_dpc(uns
 	unsigned long flags;
 
 	/* If we have already declared adapter dead, donot complete cmds */
-	if (instance->hw_crit_error)
+	if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR )
 		return;
 
 	spin_lock_irqsave(&instance->completion_lock, flags);
@@ -1301,6 +1515,11 @@ static void megasas_complete_cmd_dpc(uns
 
 	while (consumer != producer) {
 		context = instance->reply_queue[consumer];
+		if (context >= instance->max_fw_cmds) {
+			printk(KERN_ERR "Unexpected context value %x\n",
+				context);
+			BUG();
+		}
 
 		cmd = instance->cmd_list[context];
 
@@ -1350,7 +1569,76 @@ static void megasas_complete_cmd_dpc(uns
 static int megasas_wait_for_outstanding(struct megasas_instance *instance)
 {
 	int i;
+	u32 reset_index;
 	u32 wait_time = MEGASAS_RESET_WAIT_TIME;
+	u8 adprecovery;
+	unsigned long flags;
+	struct list_head clist_local;
+	struct megasas_cmd *reset_cmd;
+
+	spin_lock_irqsave(&instance->hba_lock, flags);
+	adprecovery = instance->adprecovery;
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+	if (adprecovery != MEGASAS_HBA_OPERATIONAL) {
+
+		INIT_LIST_HEAD(&clist_local);
+		spin_lock_irqsave(&instance->hba_lock, flags);
+		list_splice_init(&instance->internal_reset_pending_q,
+				&clist_local);
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+		printk(KERN_NOTICE "megasas: HBA reset wait ...\n");
+		for (i = 0; i < wait_time; i++) {
+			msleep(1000);
+			spin_lock_irqsave(&instance->hba_lock, flags);
+			adprecovery = instance->adprecovery;
+			spin_unlock_irqrestore(&instance->hba_lock, flags);
+			if (adprecovery == MEGASAS_HBA_OPERATIONAL)
+				break;
+		}
+
+		if (adprecovery != MEGASAS_HBA_OPERATIONAL) {
+			printk(KERN_NOTICE "megasas: reset: Stopping HBA.\n");
+			spin_lock_irqsave(&instance->hba_lock, flags);
+			instance->adprecovery	= MEGASAS_HW_CRITICAL_ERROR;
+			spin_unlock_irqrestore(&instance->hba_lock, flags);
+			return FAILED;
+		}
+
+		reset_index	= 0;
+		while (!list_empty(&clist_local)) {
+			reset_cmd	= list_entry((&clist_local)->next,
+						struct megasas_cmd, list);
+			list_del_init(&reset_cmd->list);
+			if (reset_cmd->scmd) {
+				reset_cmd->scmd->result = DID_RESET << 16;
+				printk(KERN_NOTICE "%d:%p reset [%02x], %#lx\n",
+					reset_index, reset_cmd,
+					reset_cmd->scmd->cmnd[0],
+					reset_cmd->scmd->serial_number);
+
+				reset_cmd->scmd->scsi_done(reset_cmd->scmd);
+				megasas_return_cmd(instance, reset_cmd);
+			} else if (reset_cmd->sync_cmd) {
+				printk(KERN_NOTICE "megasas:%p synch cmds"
+						"reset queue\n",
+						reset_cmd);
+
+				reset_cmd->cmd_status = ENODATA;
+				instance->instancet->fire_cmd(instance,
+						reset_cmd->frame_phys_addr,
+						0, instance->reg_set);
+			} else {
+				printk(KERN_NOTICE "megasas: %p unexpected"
+					"cmds lst\n",
+					reset_cmd);
+			}
+			reset_index++;
+		}
+
+		return SUCCESS;
+	}
 
 	for (i = 0; i < wait_time; i++) {
 
@@ -1373,6 +1661,7 @@ static int megasas_wait_for_outstanding(
 	}
 
 	if (atomic_read(&instance->fw_outstanding)) {
+		printk(KERN_NOTICE "megaraid_sas: pending cmds after reset\n");
 		/*
 		* Send signal to FW to stop processing any pending cmds.
 		* The controller will be taken offline by the OS now.
@@ -1388,10 +1677,14 @@ static int megasas_wait_for_outstanding(
 				&instance->reg_set->inbound_doorbell);
 		}
 		megasas_dump_pending_frames(instance);
-		instance->hw_crit_error = 1;
+		spin_lock_irqsave(&instance->hba_lock, flags);
+		instance->adprecovery	= MEGASAS_HW_CRITICAL_ERROR;
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
 		return FAILED;
 	}
 
+	printk(KERN_NOTICE "megaraid_sas: no pending cmds after reset\n");
+
 	return SUCCESS;
 }
 
@@ -1413,7 +1706,7 @@ static int megasas_generic_reset(struct 
 	scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x retries=%x\n",
 		 scmd->serial_number, scmd->cmnd[0], scmd->retries);
 
-	if (instance->hw_crit_error) {
+	if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) {
 		printk(KERN_ERR "megasas: cannot recover from previous reset "
 		       "failures\n");
 		return FAILED;
@@ -1568,7 +1861,8 @@ megasas_service_aen(struct megasas_insta
 	instance->aen_cmd = NULL;
 	megasas_return_cmd(instance, cmd);
 
-	if (instance->unload == 0) {
+	if ((instance->unload == 0) &&
+		((instance->issuepend_done == 1))) {
 		struct megasas_aen_event *ev;
 		ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
 		if (!ev) {
@@ -1663,6 +1957,9 @@ megasas_complete_cmd(struct megasas_inst
 	struct megasas_header *hdr = &cmd->frame->hdr;
 	unsigned long flags;
 
+	/* flag for the retry reset */
+	cmd->retry_for_fw_reset = 0;
+
 	if (cmd->scmd)
 		cmd->scmd->SCp.ptr = NULL;
 
@@ -1783,39 +2080,301 @@ megasas_complete_cmd(struct megasas_inst
 }
 
 /**
+ * megasas_issue_pending_cmds_again -	issue all pending cmds
+ *                              	in FW again because of the fw reset
+ * @instance:				Adapter soft state
+ */
+static inline void
+megasas_issue_pending_cmds_again(struct megasas_instance *instance)
+{
+	struct megasas_cmd *cmd;
+	struct list_head clist_local;
+	union megasas_evt_class_locale class_locale;
+	unsigned long flags;
+	u32 seq_num;
+
+	INIT_LIST_HEAD(&clist_local);
+	spin_lock_irqsave(&instance->hba_lock, flags);
+	list_splice_init(&instance->internal_reset_pending_q, &clist_local);
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+	while (!list_empty(&clist_local)) {
+		cmd	= list_entry((&clist_local)->next,
+					struct megasas_cmd, list);
+		list_del_init(&cmd->list);
+
+		if (cmd->sync_cmd || cmd->scmd) {
+			printk(KERN_NOTICE "megaraid_sas: command %p, %p:%d"
+				"detected to be pending while HBA reset.\n",
+					cmd, cmd->scmd, cmd->sync_cmd);
+
+			cmd->retry_for_fw_reset++;
+
+			if (cmd->retry_for_fw_reset == 3) {
+				printk(KERN_NOTICE "megaraid_sas: cmd %p, %p:%d"
+					"was tried multiple times during reset."
+					"Shutting down the HBA\n",
+					cmd, cmd->scmd, cmd->sync_cmd);
+				megaraid_sas_kill_hba(instance);
+
+				instance->adprecovery =
+						MEGASAS_HW_CRITICAL_ERROR;
+				return;
+			}
+		}
+
+		if (cmd->sync_cmd == 1) {
+			if (cmd->scmd) {
+				printk(KERN_NOTICE "megaraid_sas: unexpected"
+					"cmd attached to internal command!\n");
+			}
+			printk(KERN_NOTICE "megasas: %p synchronous cmd"
+						"on the internal reset queue,"
+						"issue it again.\n", cmd);
+			cmd->cmd_status = ENODATA;
+			instance->instancet->fire_cmd(instance,
+							cmd->frame_phys_addr ,
+							0, instance->reg_set);
+		} else if (cmd->scmd) {
+			printk(KERN_NOTICE "megasas: %p scsi cmd [%02x],%#lx"
+			"detected on the internal queue, issue again.\n",
+			cmd, cmd->scmd->cmnd[0], cmd->scmd->serial_number);
+
+			atomic_inc(&instance->fw_outstanding);
+			instance->instancet->fire_cmd(instance,
+					cmd->frame_phys_addr,
+					cmd->frame_count-1, instance->reg_set);
+		} else {
+			printk(KERN_NOTICE "megasas: %p unexpected cmd on the"
+				"internal reset defer list while re-issue!!\n",
+				cmd);
+		}
+	}
+
+	if (instance->aen_cmd) {
+		printk(KERN_NOTICE "megaraid_sas: aen_cmd in def process\n");
+		megasas_return_cmd(instance, instance->aen_cmd);
+
+		instance->aen_cmd	= NULL;
+	}
+
+	/*
+	* Initiate AEN (Asynchronous Event Notification)
+	*/
+	seq_num = instance->last_seq_num;
+	class_locale.members.reserved = 0;
+	class_locale.members.locale = MR_EVT_LOCALE_ALL;
+	class_locale.members.class = MR_EVT_CLASS_DEBUG;
+
+	megasas_register_aen(instance, seq_num, class_locale.word);
+}
+
+/**
+ * Move the internal reset pending commands to a deferred queue.
+ *
+ * We move the commands pending at internal reset time to a
+ * pending queue. This queue would be flushed after successful
+ * completion of the internal reset sequence. if the internal reset
+ * did not complete in time, the kernel reset handler would flush
+ * these commands.
+ **/
+static void
+megasas_internal_reset_defer_cmds(struct megasas_instance *instance)
+{
+	struct megasas_cmd *cmd;
+	int i;
+	u32 max_cmd = instance->max_fw_cmds;
+	u32 defer_index;
+	unsigned long flags;
+
+	defer_index     = 0;
+	spin_lock_irqsave(&instance->cmd_pool_lock, flags);
+	for (i = 0; i < max_cmd; i++) {
+		cmd = instance->cmd_list[i];
+		if (cmd->sync_cmd == 1 || cmd->scmd) {
+			printk(KERN_NOTICE "megasas: moving cmd[%d]:%p:%d:%p"
+					"on the defer queue as internal\n",
+				defer_index, cmd, cmd->sync_cmd, cmd->scmd);
+
+			if (!list_empty(&cmd->list)) {
+				printk(KERN_NOTICE "megaraid_sas: ERROR while"
+					" moving this cmd:%p, %d %p, it was"
+					"discovered on some list?\n",
+					cmd, cmd->sync_cmd, cmd->scmd);
+
+				list_del_init(&cmd->list);
+			}
+			defer_index++;
+			list_add_tail(&cmd->list,
+				&instance->internal_reset_pending_q);
+		}
+	}
+	spin_unlock_irqrestore(&instance->cmd_pool_lock, flags);
+}
+
+
+static void
+process_fw_state_change_wq(struct work_struct *work)
+{
+	struct megasas_instance *instance =
+		container_of(work, struct megasas_instance, work_init);
+	u32 wait;
+	unsigned long flags;
+
+	if (instance->adprecovery != MEGASAS_ADPRESET_SM_INFAULT) {
+		printk(KERN_NOTICE "megaraid_sas: error, recovery st %x \n",
+				instance->adprecovery);
+		return ;
+	}
+
+	if (instance->adprecovery == MEGASAS_ADPRESET_SM_INFAULT) {
+		printk(KERN_NOTICE "megaraid_sas: FW detected to be in fault"
+					"state, restarting it...\n");
+
+		instance->instancet->disable_intr(instance->reg_set);
+		atomic_set(&instance->fw_outstanding, 0);
+
+		atomic_set(&instance->fw_reset_no_pci_access, 1);
+		instance->instancet->adp_reset(instance, instance->reg_set);
+		atomic_set(&instance->fw_reset_no_pci_access, 0 );
+
+		printk(KERN_NOTICE "megaraid_sas: FW restarted successfully,"
+					"initiating next stage...\n");
+
+		printk(KERN_NOTICE "megaraid_sas: HBA recovery state machine,"
+					"state 2 starting...\n");
+
+		/*waitting for about 20 second before start the second init*/
+		for (wait = 0; wait < 30; wait++) {
+			msleep(1000);
+		}
+
+		if (megasas_transition_to_ready(instance)) {
+			printk(KERN_NOTICE "megaraid_sas:adapter not ready\n");
+
+			megaraid_sas_kill_hba(instance);
+			instance->adprecovery	= MEGASAS_HW_CRITICAL_ERROR;
+			return ;
+		}
+
+		if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) ||
+			(instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) ||
+			(instance->pdev->device == PCI_DEVICE_ID_LSI_VERDE_ZCR)
+			) {
+			*instance->consumer = *instance->producer;
+		} else {
+			*instance->consumer = 0;
+			*instance->producer = 0;
+		}
+
+		megasas_issue_init_mfi(instance);
+
+		spin_lock_irqsave(&instance->hba_lock, flags);
+		instance->adprecovery	= MEGASAS_HBA_OPERATIONAL;
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
+		instance->instancet->enable_intr(instance->reg_set);
+
+		megasas_issue_pending_cmds_again(instance);
+		instance->issuepend_done = 1;
+	}
+	return ;
+}
+
+/**
  * megasas_deplete_reply_queue -	Processes all completed commands
  * @instance:				Adapter soft state
  * @alt_status:				Alternate status to be returned to
  * 					SCSI mid-layer instead of the status
  * 					returned by the FW
+ * Note: this must be called with hba lock held
  */
 static int
-megasas_deplete_reply_queue(struct megasas_instance *instance, u8 alt_status)
+megasas_deplete_reply_queue(struct megasas_instance *instance,
+					u8 alt_status)
 {
-	/*
-	 * Check if it is our interrupt
-	 * Clear the interrupt 
-	 */
-	if(instance->instancet->clear_intr(instance->reg_set))
+	u32 mfiStatus;
+	u32 fw_state;
+
+	if ((mfiStatus = instance->instancet->check_reset(instance,
+					instance->reg_set)) == 1) {
+		return IRQ_HANDLED;
+	}
+
+	if ((mfiStatus = instance->instancet->clear_intr(
+						instance->reg_set)
+						) == 0) {
 		return IRQ_NONE;
+	}
+
+	instance->mfiStatus = mfiStatus;
+
+	if ((mfiStatus & MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE)) {
+		fw_state = instance->instancet->read_fw_status_reg(
+				instance->reg_set) & MFI_STATE_MASK;
+
+		if (fw_state != MFI_STATE_FAULT) {
+			printk(KERN_NOTICE "megaraid_sas: fw state:%x\n",
+						fw_state);
+		}
+
+		if ((fw_state == MFI_STATE_FAULT) &&
+				(instance->disableOnlineCtrlReset == 0)) {
+			printk(KERN_NOTICE "megaraid_sas: wait adp restart\n");
+
+			if ((instance->pdev->device ==
+					PCI_DEVICE_ID_LSI_SAS1064R) ||
+				(instance->pdev->device ==
+					PCI_DEVICE_ID_DELL_PERC5) ||
+				(instance->pdev->device ==
+					PCI_DEVICE_ID_LSI_VERDE_ZCR)) {
+
+				*instance->consumer =
+					MEGASAS_ADPRESET_INPROG_SIGN;
+			}
+
+
+			instance->instancet->disable_intr(instance->reg_set);
+			instance->adprecovery	= MEGASAS_ADPRESET_SM_INFAULT;
+			instance->issuepend_done = 0;
+
+			atomic_set(&instance->fw_outstanding, 0);
+			megasas_internal_reset_defer_cmds(instance);
+
+			printk(KERN_NOTICE "megasas: fwState=%x, stage:%d\n",
+					fw_state, instance->adprecovery);
+
+			schedule_work(&instance->work_init);
+			return IRQ_HANDLED;
+
+		} else {
+			printk(KERN_NOTICE "megasas: fwstate:%x, dis_OCR=%x\n",
+				fw_state, instance->disableOnlineCtrlReset);
+		}
+	}
 
-	if (instance->hw_crit_error)
-		goto out_done;
-        /*
-	 * Schedule the tasklet for cmd completion
-	 */
 	tasklet_schedule(&instance->isr_tasklet);
-out_done:
 	return IRQ_HANDLED;
 }
-
 /**
  * megasas_isr - isr entry point
  */
 static irqreturn_t megasas_isr(int irq, void *devp)
 {
-	return megasas_deplete_reply_queue((struct megasas_instance *)devp,
-					   DID_OK);
+	struct megasas_instance *instance;
+	unsigned long flags;
+	irqreturn_t	rc;
+
+	if (atomic_read(
+		&(((struct megasas_instance *)devp)->fw_reset_no_pci_access)))
+		return IRQ_HANDLED;
+
+	instance = (struct megasas_instance *)devp;
+
+	spin_lock_irqsave(&instance->hba_lock, flags);
+	rc =  megasas_deplete_reply_queue(instance, DID_OK);
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+	return rc;
 }
 
 /**
@@ -1972,7 +2531,7 @@ megasas_transition_to_ready(struct megas
 			       "in %d secs\n", fw_state, max_wait);
 			return -ENODEV;
 		}
-	};
+	}
  	printk(KERN_INFO "megasas: FW now in Ready state\n");
 
 	return 0;
@@ -2054,6 +2613,7 @@ static int megasas_create_frame_pool(str
 	 */
 	sgl_sz = sge_sz * instance->max_num_sge;
 	frame_count = (sgl_sz + MEGAMFI_FRAME_SIZE - 1) / MEGAMFI_FRAME_SIZE;
+	frame_count = 15;
 
 	/*
 	 * We need one extra frame for the MFI command
@@ -2201,6 +2761,7 @@ static int megasas_alloc_cmds(struct meg
 		cmd = instance->cmd_list[i];
 		memset(cmd, 0, sizeof(struct megasas_cmd));
 		cmd->index = i;
+		cmd->scmd = NULL;
 		cmd->instance = instance;
 
 		list_add_tail(&cmd->list, &instance->cmd_pool);
@@ -2368,7 +2929,7 @@ megasas_get_ld_list(struct megasas_insta
 
 	/* the following function will get the instance PD LIST */
 
-	if ((ret == 0) && (ci->ldCount < MAX_LOGICAL_DRIVES)) {
+	if ((ret == 0) && (ci->ldCount <= MAX_LOGICAL_DRIVES)) {
 		memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
 
 		for (ld_index = 0; ld_index < ci->ldCount; ld_index++) {
@@ -2682,6 +3243,21 @@ static int megasas_init_mfi(struct megas
 	if (megasas_issue_init_mfi(instance))
 		goto fail_fw_init;
 
+	instance->fw_support_ieee = 0;
+	instance->fw_support_ieee =
+		(instance->instancet->read_fw_status_reg(reg_set) &
+		0x04000000);
+
+	printk(KERN_NOTICE "megasas_init_mfi: fw_support_ieee=%d",
+			instance->fw_support_ieee);
+
+	if (instance->fw_support_ieee)
+		instance->flag_ieee = 1;
+
+	/** for passthrough
+	* the following function will get the PD LIST.
+	*/
+
 	memset(instance->pd_list, 0 ,
 		(MEGASAS_MAX_PD * sizeof(struct megasas_pd_list)));
 	megasas_get_pd_list(instance);
@@ -2708,6 +3284,8 @@ static int megasas_init_mfi(struct megas
 		max_sectors_2 = ctrl_info->max_request_size;
 
 		tmp_sectors = min_t(u32, max_sectors_1 , max_sectors_2);
+		instance->disableOnlineCtrlReset =
+		ctrl_info->properties.OnOffProperties.disableOnlineCtrlReset;
 	}
 
 	instance->max_sectors_per_req = instance->max_num_sge *
@@ -2929,6 +3507,7 @@ megasas_register_aen(struct megasas_inst
 	dcmd->flags = MFI_FRAME_DIR_READ;
 	dcmd->timeout = 0;
 	dcmd->pad_0 = 0;
+	instance->last_seq_num = seq_num;
 	dcmd->data_xfer_len = sizeof(struct megasas_evt_detail);
 	dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT;
 	dcmd->mbox.w[0] = seq_num;
@@ -3097,6 +3676,7 @@ megasas_probe_one(struct pci_dev *pdev, 
 
 	instance = (struct megasas_instance *)host->hostdata;
 	memset(instance, 0, sizeof(*instance));
+	atomic_set( &instance->fw_reset_no_pci_access, 0 );
 
 	instance->producer = pci_alloc_consistent(pdev, sizeof(u32),
 						  &instance->producer_h);
@@ -3114,6 +3694,9 @@ megasas_probe_one(struct pci_dev *pdev, 
 	megasas_poll_wait_aen = 0;
 	instance->flag_ieee = 0;
 	instance->ev = NULL;
+	instance->issuepend_done = 1;
+	instance->adprecovery = MEGASAS_HBA_OPERATIONAL;
+	megasas_poll_wait_aen = 0;
 
 	instance->evt_detail = pci_alloc_consistent(pdev,
 						    sizeof(struct
@@ -3130,6 +3713,7 @@ megasas_probe_one(struct pci_dev *pdev, 
 	 * Initialize locks and queues
 	 */
 	INIT_LIST_HEAD(&instance->cmd_pool);
+	INIT_LIST_HEAD(&instance->internal_reset_pending_q);
 
 	atomic_set(&instance->fw_outstanding,0);
 
@@ -3137,7 +3721,7 @@ megasas_probe_one(struct pci_dev *pdev, 
 	init_waitqueue_head(&instance->abort_cmd_wait_q);
 
 	spin_lock_init(&instance->cmd_pool_lock);
-	spin_lock_init(&instance->fire_lock);
+	spin_lock_init(&instance->hba_lock);
 	spin_lock_init(&instance->completion_lock);
 	spin_lock_init(&poll_aen_lock);
 
@@ -3162,6 +3746,9 @@ megasas_probe_one(struct pci_dev *pdev, 
 	instance->flag = 0;
 	instance->unload = 1;
 	instance->last_time = 0;
+	instance->disableOnlineCtrlReset = 1;
+
+	INIT_WORK(&instance->work_init, process_fw_state_change_wq);
 
 	/*
 	 * Initialize MFI Firmware
@@ -3253,6 +3840,9 @@ static void megasas_flush_cache(struct m
 	struct megasas_cmd *cmd;
 	struct megasas_dcmd_frame *dcmd;
 
+	if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR)
+		return;
+
 	cmd = megasas_get_cmd(instance);
 
 	if (!cmd)
@@ -3290,6 +3880,9 @@ static void megasas_shutdown_controller(
 	struct megasas_cmd *cmd;
 	struct megasas_dcmd_frame *dcmd;
 
+	if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR)
+		return;
+
 	cmd = megasas_get_cmd(instance);
 
 	if (!cmd)
@@ -3781,6 +4374,9 @@ static int megasas_mgmt_ioctl_fw(struct 
 	struct megasas_iocpacket *ioc;
 	struct megasas_instance *instance;
 	int error;
+	int i;
+	unsigned long flags;
+	u32 wait_time = MEGASAS_RESET_WAIT_TIME;
 
 	ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
 	if (!ioc)
@@ -3797,8 +4393,8 @@ static int megasas_mgmt_ioctl_fw(struct 
 		goto out_kfree_ioc;
 	}
 
-	if (instance->hw_crit_error == 1) {
-		printk(KERN_DEBUG "Controller in Crit ERROR\n");
+	if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) {
+		printk(KERN_ERR "Controller in crit error\n");
 		error = -ENODEV;
 		goto out_kfree_ioc;
 	}
@@ -3815,6 +4411,35 @@ static int megasas_mgmt_ioctl_fw(struct 
 		error = -ERESTARTSYS;
 		goto out_kfree_ioc;
 	}
+
+	for (i = 0; i < wait_time; i++) {
+
+		spin_lock_irqsave(&instance->hba_lock, flags);
+		if (instance->adprecovery == MEGASAS_HBA_OPERATIONAL) {
+			spin_unlock_irqrestore(&instance->hba_lock, flags);
+			break;
+		}
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+		if (!(i % MEGASAS_RESET_NOTICE_INTERVAL)) {
+			printk(KERN_NOTICE "megasas: waiting"
+				"for controller reset to finish\n");
+		}
+
+		msleep(1000);
+	}
+
+	spin_lock_irqsave(&instance->hba_lock, flags);
+	if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) {
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+		printk(KERN_ERR "megaraid_sas: timed out while"
+			"waiting for HBA to recover\n");
+		error = -ENODEV;
+		goto out_kfree_ioc;
+	}
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+
 	error = megasas_mgmt_fw_ioctl(instance, user_ioc, ioc);
 	up(&instance->ioctl_sem);
 
@@ -3828,6 +4453,9 @@ static int megasas_mgmt_ioctl_aen(struct
 	struct megasas_instance *instance;
 	struct megasas_aen aen;
 	int error;
+	int i;
+	unsigned long flags;
+	u32 wait_time = MEGASAS_RESET_WAIT_TIME;
 
 	if (file->private_data != file) {
 		printk(KERN_DEBUG "megasas: fasync_helper was not "
@@ -3843,14 +4471,42 @@ static int megasas_mgmt_ioctl_aen(struct
 	if (!instance)
 		return -ENODEV;
 
-	if (instance->hw_crit_error == 1) {
-		error = -ENODEV;
+	if (instance->adprecovery == MEGASAS_HW_CRITICAL_ERROR) {
+		return -ENODEV;
 	}
 
 	if (instance->unload == 1) {
 		return -ENODEV;
 	}
 
+	for (i = 0; i < wait_time; i++) {
+
+		spin_lock_irqsave(&instance->hba_lock, flags);
+		if (instance->adprecovery == MEGASAS_HBA_OPERATIONAL) {
+			spin_unlock_irqrestore(&instance->hba_lock,
+						flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
+
+		if (!(i % MEGASAS_RESET_NOTICE_INTERVAL)) {
+			printk(KERN_NOTICE "megasas: waiting for"
+				"controller reset to finish\n");
+		}
+
+		msleep(1000);
+	}
+
+	spin_lock_irqsave(&instance->hba_lock, flags);
+	if (instance->adprecovery != MEGASAS_HBA_OPERATIONAL) {
+		spin_unlock_irqrestore(&instance->hba_lock, flags);
+		printk(KERN_ERR "megaraid_sas: timed out while waiting"
+				"for HBA to recover.\n");
+		return -ENODEV;
+	}
+	spin_unlock_irqrestore(&instance->hba_lock, flags);
+
 	mutex_lock(&instance->aen_mutex);
 	error = megasas_register_aen(instance, aen.seq_num,
 				     aen.class_locale_word);
diff -rupN old/drivers/scsi/megaraid/megaraid_sas.h new/drivers/scsi/megaraid/megaraid_sas.h
--- old/drivers/scsi/megaraid/megaraid_sas.h	2010-09-17 02:19:46.000000000 -0400
+++ new/drivers/scsi/megaraid/megaraid_sas.h	2010-09-17 02:19:46.000000000 -0400
@@ -60,6 +60,7 @@
 #define MFI_STATE_READY				0xB0000000
 #define MFI_STATE_OPERATIONAL			0xC0000000
 #define MFI_STATE_FAULT				0xF0000000
+#define  MFI_RESET_REQUIRED			0x00000001
 
 #define MEGAMFI_FRAME_SIZE			64
 
@@ -73,6 +74,12 @@
  * HOTPLUG	: Resume from Hotplug
  * MFI_STOP_ADP	: Send signal to FW to stop processing
  */
+#define WRITE_SEQUENCE_OFFSET		(0x0000000FC) /* I20 */
+#define HOST_DIAGNOSTIC_OFFSET		(0x000000F8)  /* I20 */
+#define DIAG_WRITE_ENABLE			(0x00000080)
+#define DIAG_RESET_ADAPTER			(0x00000004)
+
+#define MFI_ADP_RESET				0x00000040
 #define MFI_INIT_ABORT				0x00000001
 #define MFI_INIT_READY				0x00000002
 #define MFI_INIT_MFIMODE			0x00000004
@@ -402,8 +409,40 @@ struct megasas_ctrl_prop {
 	u16 ecc_bucket_leak_rate;
 	u8 restore_hotspare_on_insertion;
 	u8 expose_encl_devices;
-	u8 reserved[38];
+	u8 maintainPdFailHistory;
+	u8 disallowHostRequestReordering;
+	u8 abortCCOnError;
+	u8 loadBalanceMode;
+	u8 disableAutoDetectBackplane;
+
+	u8 snapVDSpace;
+
+	/*
+	* Add properties that can be controlled by
+	* a bit in the following structure.
+	*/
 
+	struct {
+		u32     copyBackDisabled            : 1;
+		u32     SMARTerEnabled              : 1;
+		u32     prCorrectUnconfiguredAreas  : 1;
+		u32     useFdeOnly                  : 1;
+		u32     disableNCQ                  : 1;
+		u32     SSDSMARTerEnabled           : 1;
+		u32     SSDPatrolReadEnabled        : 1;
+		u32     enableSpinDownUnconfigured  : 1;
+		u32     autoEnhancedImport          : 1;
+		u32     enableSecretKeyControl      : 1;
+		u32     disableOnlineCtrlReset      : 1;
+		u32     allowBootWithPinnedCache    : 1;
+		u32     disableSpinDownHS           : 1;
+		u32     enableJBOD                  : 1;
+		u32     reserved                    :18;
+	} OnOffProperties;
+	u8 autoSnapVDSpace;
+	u8 viewSpace;
+	u16 spinDownTime;
+	u8  reserved[24];
 } __packed;
 
 /*
@@ -704,6 +743,12 @@ struct megasas_ctrl_info {
  */
 #define IS_DMA64				(sizeof(dma_addr_t) == 8)
 
+#define MFI_XSCALE_OMR0_CHANGE_INTERRUPT		0x00000001
+
+#define MFI_INTR_FLAG_REPLY_MESSAGE			0x00000001
+#define MFI_INTR_FLAG_FIRMWARE_STATE_CHANGE		0x00000002
+#define MFI_G2_OUTBOUND_DOORBELL_CHANGE_INTERRUPT	0x00000004
+
 #define MFI_OB_INTR_STATUS_MASK			0x00000002
 #define MFI_POLL_TIMEOUT_SECS			60
 #define MEGASAS_COMPLETION_TIMER_INTERVAL      (HZ/10)
@@ -714,6 +759,9 @@ struct megasas_ctrl_info {
 #define MFI_REPLY_SKINNY_MESSAGE_INTERRUPT	0x40000000
 #define MFI_SKINNY_ENABLE_INTERRUPT_MASK	(0x00000001)
 
+#define MFI_1068_PCSR_OFFSET			0x84
+#define MFI_1068_FW_HANDSHAKE_OFFSET		0x64
+#define MFI_1068_FW_READY			0xDDDD0000
 /*
 * register set for both 1068 and 1078 controllers
 * structure extended for 1078 registers
@@ -755,8 +803,10 @@ struct megasas_register_set {
 	u32 	inbound_high_queue_port ;	/*00C4h*/
 
 	u32 	reserved_5;			/*00C8h*/
-	u32 	index_registers[820];		/*00CCh*/
-
+	u32	res_6[11];			/*CCh*/
+	u32	host_diag;
+	u32	seq_offset;
+	u32 	index_registers[807];		/*00CCh*/
 } __attribute__ ((packed));
 
 struct megasas_sge32 {
@@ -1226,11 +1276,12 @@ struct megasas_instance {
 
 	struct megasas_cmd **cmd_list;
 	struct list_head cmd_pool;
+	/* used to sync fire the cmd to fw */
 	spinlock_t cmd_pool_lock;
+	/* used to sync fire the cmd to fw */
+	spinlock_t hba_lock;
 	/* used to synch producer, consumer ptrs in dpc */
 	spinlock_t completion_lock;
-	/* used to sync fire the cmd to fw */
-	spinlock_t fire_lock;
 	struct dma_pool *frame_dma_pool;
 	struct dma_pool *sense_dma_pool;
 
@@ -1247,19 +1298,36 @@ struct megasas_instance {
 
 	struct pci_dev *pdev;
 	u32 unique_id;
+	u32 fw_support_ieee;
 
 	atomic_t fw_outstanding;
-	u32 hw_crit_error;
+	atomic_t fw_reset_no_pci_access;
 
 	struct megasas_instance_template *instancet;
 	struct tasklet_struct isr_tasklet;
+	struct work_struct work_init;
 
 	u8 flag;
 	u8 unload;
 	u8 flag_ieee;
+	u8 issuepend_done;
+	u8 disableOnlineCtrlReset;
+	u8 adprecovery;
 	unsigned long last_time;
+	u32 mfiStatus;
+	u32 last_seq_num;
 
 	struct timer_list io_completion_timer;
+	struct list_head internal_reset_pending_q;
+};
+
+enum {
+	MEGASAS_HBA_OPERATIONAL			= 0,
+	MEGASAS_ADPRESET_SM_INFAULT		= 1,
+	MEGASAS_ADPRESET_SM_FW_RESET_SUCCESS	= 2,
+	MEGASAS_ADPRESET_SM_OPERATIONAL		= 3,
+	MEGASAS_HW_CRITICAL_ERROR		= 4,
+	MEGASAS_ADPRESET_INPROG_SIGN		= 0xDEADDEAD,
 };
 
 struct megasas_instance_template {
@@ -1272,6 +1340,10 @@ struct megasas_instance_template {
 	int (*clear_intr)(struct megasas_register_set __iomem *);
 
 	u32 (*read_fw_status_reg)(struct megasas_register_set __iomem *);
+	int (*adp_reset)(struct megasas_instance *, \
+		struct megasas_register_set __iomem *);
+	int (*check_reset)(struct megasas_instance *, \
+		struct megasas_register_set __iomem *);
 };
 
 #define MEGASAS_IS_LOGICAL(scp)						\
@@ -1291,7 +1363,9 @@ struct megasas_cmd {
 	u32 index;
 	u8 sync_cmd;
 	u8 cmd_status;
-	u16 abort_aen;
+	u8 abort_aen;
+	u8 retry_for_fw_reset;
+
 
 	struct list_head list;
 	struct scsi_cmnd *scmd;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-09-23  2:36 PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive bo yang
@ 2010-10-08 15:51 ` Tomas Henzl
  2010-10-08 16:39   ` James Bottomley
  0 siblings, 1 reply; 11+ messages in thread
From: Tomas Henzl @ 2010-10-08 15:51 UTC (permalink / raw)
  To: bo yang; +Cc: linux-scsi, akpm, linux-kernel, James.Bottomley, bo.yang

On 09/23/2010 04:36 AM, bo yang wrote:
> This patch is too big.  I am using attachment to submit.  Please
> use attached file to apply.  Also let me know if it can't be accepted.
>
> To add the Online controller reset support, driver need to do:
> a). reset the controller chips -- Xscale and Gen2 which will change
> the function calls and add the reset function related to this two
> chips.
> b). during the reset, driver will store the pending cmds which not
> returned by FW to driver's pending queue.  Driver will re-issue those
> pending cmds again to FW after the OCR finished.
> c). In driver's timeout routine, driver will report to OS as reset.
> Also driver's queue routine will block the cmds until the OCR
> finished.
> d). in Driver's ISR routine, if driver get the FW state as state
> change, FW in Failure status and FW support online controller
> reset (OCR), driver will start to do the controller reset.
> e). In driver's IOCTL routine, the application cmds will wait for the
> OCR to finish, then issue the cmds to FW.
>
> Signed-off-by Bo Yang<bo.yang@lsi.com>
>
> ---
>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>  2 files changed, 787 insertions(+), 57 deletions(-)
Hi Bo,
in the workqueue function you sleep for 30s,
it's scheduled here - schedule_work(&instance->work_init);

+process_fw_state_change_wq(struct work_struct *work)
+{
...
+		/*waitting for about 20 second before start the second init*/
+		for (wait = 0; wait < 30; wait++) {
+			msleep(1000);
+		}

- this is not a good practice to sleep for a so long time I think
- you should use in your exit function some synchronization 
  for example 'cancel_work_sync', without that if someone rmmods your 
  module, it could then lead to a memory corruption

Tomas




^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-08 15:51 ` Tomas Henzl
@ 2010-10-08 16:39   ` James Bottomley
  2010-10-08 19:28     ` Tomas Henzl
  0 siblings, 1 reply; 11+ messages in thread
From: James Bottomley @ 2010-10-08 16:39 UTC (permalink / raw)
  To: Tomas Henzl; +Cc: bo yang, linux-scsi, akpm, linux-kernel, bo.yang

On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
> On 09/23/2010 04:36 AM, bo yang wrote:
> > This patch is too big.  I am using attachment to submit.  Please
> > use attached file to apply.  Also let me know if it can't be accepted.
> >
> > To add the Online controller reset support, driver need to do:
> > a). reset the controller chips -- Xscale and Gen2 which will change
> > the function calls and add the reset function related to this two
> > chips.
> > b). during the reset, driver will store the pending cmds which not
> > returned by FW to driver's pending queue.  Driver will re-issue those
> > pending cmds again to FW after the OCR finished.
> > c). In driver's timeout routine, driver will report to OS as reset.
> > Also driver's queue routine will block the cmds until the OCR
> > finished.
> > d). in Driver's ISR routine, if driver get the FW state as state
> > change, FW in Failure status and FW support online controller
> > reset (OCR), driver will start to do the controller reset.
> > e). In driver's IOCTL routine, the application cmds will wait for the
> > OCR to finish, then issue the cmds to FW.
> >
> > Signed-off-by Bo Yang<bo.yang@lsi.com>
> >
> > ---
> >  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
> >  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
> >  2 files changed, 787 insertions(+), 57 deletions(-)
> Hi Bo,
> in the workqueue function you sleep for 30s,
> it's scheduled here - schedule_work(&instance->work_init);
> 
> +process_fw_state_change_wq(struct work_struct *work)
> +{
> ...
> +		/*waitting for about 20 second before start the second init*/
> +		for (wait = 0; wait < 30; wait++) {
> +			msleep(1000);
> +		}

this lot should be ssleep(20) if you want a 20 sec sleep.

> 
> - this is not a good practice to sleep for a so long time I think
> - you should use in your exit function some synchronization 
>   for example 'cancel_work_sync', without that if someone rmmods your 
>   module, it could then lead to a memory corruption

Actually flush_scheduled_work() should be fine ... it will force the
module removal to wait for completion ... cancellation can be error
prone, so just forcing the wait sounds easier.

James



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-08 16:39   ` James Bottomley
@ 2010-10-08 19:28     ` Tomas Henzl
  2010-10-09 20:38       ` Tomas Henzl
  0 siblings, 1 reply; 11+ messages in thread
From: Tomas Henzl @ 2010-10-08 19:28 UTC (permalink / raw)
  To: James Bottomley; +Cc: bo yang, linux-scsi, akpm, linux-kernel, bo.yang

On 10/08/2010 06:39 PM, James Bottomley wrote:
> On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
>   
>> On 09/23/2010 04:36 AM, bo yang wrote:
>>     
>>> This patch is too big.  I am using attachment to submit.  Please
>>> use attached file to apply.  Also let me know if it can't be accepted.
>>>
>>> To add the Online controller reset support, driver need to do:
>>> a). reset the controller chips -- Xscale and Gen2 which will change
>>> the function calls and add the reset function related to this two
>>> chips.
>>> b). during the reset, driver will store the pending cmds which not
>>> returned by FW to driver's pending queue.  Driver will re-issue those
>>> pending cmds again to FW after the OCR finished.
>>> c). In driver's timeout routine, driver will report to OS as reset.
>>> Also driver's queue routine will block the cmds until the OCR
>>> finished.
>>> d). in Driver's ISR routine, if driver get the FW state as state
>>> change, FW in Failure status and FW support online controller
>>> reset (OCR), driver will start to do the controller reset.
>>> e). In driver's IOCTL routine, the application cmds will wait for the
>>> OCR to finish, then issue the cmds to FW.
>>>
>>> Signed-off-by Bo Yang<bo.yang@lsi.com>
>>>
>>> ---
>>>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>>>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>>>  2 files changed, 787 insertions(+), 57 deletions(-)
>>>       
>> Hi Bo,
>> in the workqueue function you sleep for 30s,
>> it's scheduled here - schedule_work(&instance->work_init);
>>
>> +process_fw_state_change_wq(struct work_struct *work)
>> +{
>> ...
>> +		/*waitting for about 20 second before start the second init*/
>> +		for (wait = 0; wait < 30; wait++) {
>> +			msleep(1000);
>> +		}
>>     
> this lot should be ssleep(20) if you want a 20 sec sleep.
>   
please do that on every place where you use the 
"for (wait = 0; wait < n; wait++) msleep(1000);" construction

>> - this is not a good practice to sleep for a so long time I think
>>     
this long sleep might might be ok, if the workqueue is used only rarely
is it so?

>> - you should use in your exit function some synchronization 
>>   for example 'cancel_work_sync', without that if someone rmmods your 
>>   module, it could then lead to a memory corruption
>>     
> Actually flush_scheduled_work() should be fine ... it will force the
> module removal to wait for completion ... cancellation can be error
> prone, so just forcing the wait sounds easier.
>   
someone told that cancel_work_sync is safer then flush_scheduled_work
but I'm not an expert, so ok 

Tomas

> James
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>   


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-08 19:28     ` Tomas Henzl
@ 2010-10-09 20:38       ` Tomas Henzl
  2010-10-11 12:55         ` Yang, Bo
  0 siblings, 1 reply; 11+ messages in thread
From: Tomas Henzl @ 2010-10-09 20:38 UTC (permalink / raw)
  To: James Bottomley; +Cc: bo yang, linux-scsi, akpm, linux-kernel, bo.yang

On 10/08/2010 09:28 PM, Tomas Henzl wrote:
> On 10/08/2010 06:39 PM, James Bottomley wrote:
>   
>> On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
>>   
>>     
>>> On 09/23/2010 04:36 AM, bo yang wrote:
>>>     
>>>       
>>>> This patch is too big.  I am using attachment to submit.  Please
>>>> use attached file to apply.  Also let me know if it can't be accepted.
>>>>
>>>> To add the Online controller reset support, driver need to do:
>>>> a). reset the controller chips -- Xscale and Gen2 which will change
>>>> the function calls and add the reset function related to this two
>>>> chips.
>>>> b). during the reset, driver will store the pending cmds which not
>>>> returned by FW to driver's pending queue.  Driver will re-issue those
>>>> pending cmds again to FW after the OCR finished.
>>>> c). In driver's timeout routine, driver will report to OS as reset.
>>>> Also driver's queue routine will block the cmds until the OCR
>>>> finished.
>>>> d). in Driver's ISR routine, if driver get the FW state as state
>>>> change, FW in Failure status and FW support online controller
>>>> reset (OCR), driver will start to do the controller reset.
>>>> e). In driver's IOCTL routine, the application cmds will wait for the
>>>> OCR to finish, then issue the cmds to FW.
>>>>
>>>> Signed-off-by Bo Yang<bo.yang@lsi.com>
>>>>
>>>> ---
>>>>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>>>>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>>>>  2 files changed, 787 insertions(+), 57 deletions(-)
>>>>       
>>>>         
>>> Hi Bo,
>>> in the workqueue function you sleep for 30s,
>>> it's scheduled here - schedule_work(&instance->work_init);
>>>
>>> +process_fw_state_change_wq(struct work_struct *work)
>>> +{
>>> ...
>>> +		/*waitting for about 20 second before start the second init*/
>>> +		for (wait = 0; wait < 30; wait++) {
>>> +			msleep(1000);
>>> +		}
>>>     
>>>       
>> this lot should be ssleep(20) if you want a 20 sec sleep.
>>   
>>     
> please do that on every place where you use the 
> "for (wait = 0; wait < n; wait++) msleep(1000);" construction
>
>   
>>> - this is not a good practice to sleep for a so long time I think
>>>     
>>>       
> this long sleep might might be ok, if the workqueue is used only rarely
> is it so?
>
>   
>>> - you should use in your exit function some synchronization 
>>>   for example 'cancel_work_sync', without that if someone rmmods your 
>>>   module, it could then lead to a memory corruption
>>>     
>>>       
>> Actually flush_scheduled_work() should be fine ... it will force the
>> module removal to wait for completion ... cancellation can be error
>> prone, so just forcing the wait sounds easier.
>>   
>>     
Another correction - flush_scheduled_work is already present in megass_detach_one
it only should be moved away from the if statement.


> someone told that cancel_work_sync is safer then flush_scheduled_work
> but I'm not an expert, so ok 
>
> Tomas
>
>   
>> James
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>   
>>     
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>   


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-09 20:38       ` Tomas Henzl
@ 2010-10-11 12:55         ` Yang, Bo
  2010-10-11 13:20           ` Tomas Henzl
  0 siblings, 1 reply; 11+ messages in thread
From: Yang, Bo @ 2010-10-11 12:55 UTC (permalink / raw)
  To: Tomas Henzl, James Bottomley; +Cc: bo yang, linux-scsi, akpm, linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 4160 bytes --]

Tomas,

>Another correction - flush_scheduled_work is already present in megass_detach_one
>it only should be moved away from the if statement.

The flush_scheduled_work is for schedule_delayed_work().  We need to flush it and remove.

-----Original Message-----
From: Tomas Henzl [mailto:thenzl@redhat.com] 
Sent: Saturday, October 09, 2010 4:38 PM
To: James Bottomley
Cc: bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org; Yang, Bo
Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive

On 10/08/2010 09:28 PM, Tomas Henzl wrote:
> On 10/08/2010 06:39 PM, James Bottomley wrote:
>   
>> On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
>>   
>>     
>>> On 09/23/2010 04:36 AM, bo yang wrote:
>>>     
>>>       
>>>> This patch is too big.  I am using attachment to submit.  Please
>>>> use attached file to apply.  Also let me know if it can't be accepted.
>>>>
>>>> To add the Online controller reset support, driver need to do:
>>>> a). reset the controller chips -- Xscale and Gen2 which will change
>>>> the function calls and add the reset function related to this two
>>>> chips.
>>>> b). during the reset, driver will store the pending cmds which not
>>>> returned by FW to driver's pending queue.  Driver will re-issue those
>>>> pending cmds again to FW after the OCR finished.
>>>> c). In driver's timeout routine, driver will report to OS as reset.
>>>> Also driver's queue routine will block the cmds until the OCR
>>>> finished.
>>>> d). in Driver's ISR routine, if driver get the FW state as state
>>>> change, FW in Failure status and FW support online controller
>>>> reset (OCR), driver will start to do the controller reset.
>>>> e). In driver's IOCTL routine, the application cmds will wait for the
>>>> OCR to finish, then issue the cmds to FW.
>>>>
>>>> Signed-off-by Bo Yang<bo.yang@lsi.com>
>>>>
>>>> ---
>>>>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>>>>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>>>>  2 files changed, 787 insertions(+), 57 deletions(-)
>>>>       
>>>>         
>>> Hi Bo,
>>> in the workqueue function you sleep for 30s,
>>> it's scheduled here - schedule_work(&instance->work_init);
>>>
>>> +process_fw_state_change_wq(struct work_struct *work)
>>> +{
>>> ...
>>> +		/*waitting for about 20 second before start the second init*/
>>> +		for (wait = 0; wait < 30; wait++) {
>>> +			msleep(1000);
>>> +		}
>>>     
>>>       
>> this lot should be ssleep(20) if you want a 20 sec sleep.
>>   
>>     
> please do that on every place where you use the 
> "for (wait = 0; wait < n; wait++) msleep(1000);" construction
>
>   
>>> - this is not a good practice to sleep for a so long time I think
>>>     
>>>       
> this long sleep might might be ok, if the workqueue is used only rarely
> is it so?
>
>   
>>> - you should use in your exit function some synchronization 
>>>   for example 'cancel_work_sync', without that if someone rmmods your 
>>>   module, it could then lead to a memory corruption
>>>     
>>>       
>> Actually flush_scheduled_work() should be fine ... it will force the
>> module removal to wait for completion ... cancellation can be error
>> prone, so just forcing the wait sounds easier.
>>   
>>     
Another correction - flush_scheduled_work is already present in megass_detach_one
it only should be moved away from the if statement.


> someone told that cancel_work_sync is safer then flush_scheduled_work
> but I'm not an expert, so ok 
>
> Tomas
>
>   
>> James
>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>   
>>     
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>   

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-11 12:55         ` Yang, Bo
@ 2010-10-11 13:20           ` Tomas Henzl
  2010-10-11 15:37             ` Yang, Bo
  0 siblings, 1 reply; 11+ messages in thread
From: Tomas Henzl @ 2010-10-11 13:20 UTC (permalink / raw)
  To: Yang, Bo; +Cc: James Bottomley, bo yang, linux-scsi, akpm, linux-kernel

On 10/11/2010 02:55 PM, Yang, Bo wrote:
> Tomas,
>
>   
>> Another correction - flush_scheduled_work is already present in megass_detach_one
>> it only should be moved away from the if statement.
>>     
> The flush_scheduled_work is for schedule_delayed_work().  We need to flush it and remove.
>   
I'm not saying it should be removed, I think a move outside from the 'if' block
makes it work for ev->hotplug_work and for the newly added instance->work_init


diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 55951f4..7773707 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -4088,9 +4088,9 @@ static void __devexit megasas_detach_one(struct pci_dev *pdev)
 		struct megasas_aen_event *ev = instance->ev;
 		cancel_delayed_work(
 			(struct delayed_work *)&ev->hotplug_work);
-		flush_scheduled_work();
-		instance->ev = NULL;
 	}
+	flush_scheduled_work();
+	instance->ev = NULL;
 
 	tasklet_kill(&instance->isr_tasklet);
 
--

> -----Original Message-----
> From: Tomas Henzl [mailto:thenzl@redhat.com] 
> Sent: Saturday, October 09, 2010 4:38 PM
> To: James Bottomley
> Cc: bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org; Yang, Bo
> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>
> On 10/08/2010 09:28 PM, Tomas Henzl wrote:
>   
>> On 10/08/2010 06:39 PM, James Bottomley wrote:
>>   
>>     
>>> On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
>>>   
>>>     
>>>       
>>>> On 09/23/2010 04:36 AM, bo yang wrote:
>>>>     
>>>>       
>>>>         
>>>>> This patch is too big.  I am using attachment to submit.  Please
>>>>> use attached file to apply.  Also let me know if it can't be accepted.
>>>>>
>>>>> To add the Online controller reset support, driver need to do:
>>>>> a). reset the controller chips -- Xscale and Gen2 which will change
>>>>> the function calls and add the reset function related to this two
>>>>> chips.
>>>>> b). during the reset, driver will store the pending cmds which not
>>>>> returned by FW to driver's pending queue.  Driver will re-issue those
>>>>> pending cmds again to FW after the OCR finished.
>>>>> c). In driver's timeout routine, driver will report to OS as reset.
>>>>> Also driver's queue routine will block the cmds until the OCR
>>>>> finished.
>>>>> d). in Driver's ISR routine, if driver get the FW state as state
>>>>> change, FW in Failure status and FW support online controller
>>>>> reset (OCR), driver will start to do the controller reset.
>>>>> e). In driver's IOCTL routine, the application cmds will wait for the
>>>>> OCR to finish, then issue the cmds to FW.
>>>>>
>>>>> Signed-off-by Bo Yang<bo.yang@lsi.com>
>>>>>
>>>>> ---
>>>>>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>>>>>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>>>>>  2 files changed, 787 insertions(+), 57 deletions(-)
>>>>>       
>>>>>         
>>>>>           
>>>> Hi Bo,
>>>> in the workqueue function you sleep for 30s,
>>>> it's scheduled here - schedule_work(&instance->work_init);
>>>>
>>>> +process_fw_state_change_wq(struct work_struct *work)
>>>> +{
>>>> ...
>>>> +		/*waitting for about 20 second before start the second init*/
>>>> +		for (wait = 0; wait < 30; wait++) {
>>>> +			msleep(1000);
>>>> +		}
>>>>     
>>>>       
>>>>         
>>> this lot should be ssleep(20) if you want a 20 sec sleep.
>>>   
>>>     
>>>       
>> please do that on every place where you use the 
>> "for (wait = 0; wait < n; wait++) msleep(1000);" construction
>>
>>   
>>     
>>>> - this is not a good practice to sleep for a so long time I think
>>>>     
>>>>       
>>>>         
>> this long sleep might might be ok, if the workqueue is used only rarely
>> is it so?
>>
>>   
>>     
>>>> - you should use in your exit function some synchronization 
>>>>   for example 'cancel_work_sync', without that if someone rmmods your 
>>>>   module, it could then lead to a memory corruption
>>>>     
>>>>       
>>>>         
>>> Actually flush_scheduled_work() should be fine ... it will force the
>>> module removal to wait for completion ... cancellation can be error
>>> prone, so just forcing the wait sounds easier.
>>>   
>>>     
>>>       
> Another correction - flush_scheduled_work is already present in megass_detach_one
> it only should be moved away from the if statement.
>
>
>   
>> someone told that cancel_work_sync is safer then flush_scheduled_work
>> but I'm not an expert, so ok 
>>
>> Tomas
>>
>>   
>>     
>>> James
>>>
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>   
>>>     
>>>       
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>   
>>     
> N�����r��y���b�X��ǧv�^�)޺{.n�+����{���"�{ay�\x1dʇڙ�,j\a��f���h���z�\x1e�w���\f���j:+v���w�j�m����\a����zZ+��ݢj"��!tml=


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* RE: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-11 13:20           ` Tomas Henzl
@ 2010-10-11 15:37             ` Yang, Bo
  2010-10-12 14:28               ` Tomas Henzl
  0 siblings, 1 reply; 11+ messages in thread
From: Yang, Bo @ 2010-10-11 15:37 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: James Bottomley, bo yang, linux-scsi, akpm, linux-kernel,
	Daftardar, Jayant

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 6191 bytes --]

Tomas,

The reason the driver does flush_scheduled_work() is driver did cancel_delayed_work().  I am not sure driver need to call flush_scheduled_work() if the scheduled work already done, but I will test this changes and submit another patch as soon as it works fine.  

This change should not be the part of this patch, we would have the new patch submit after the verification. 

Regards,

Bo Yang   
 
-----Original Message-----
From: Tomas Henzl [mailto:thenzl@redhat.com] 
Sent: Monday, October 11, 2010 9:21 AM
To: Yang, Bo
Cc: James Bottomley; bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org
Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive

On 10/11/2010 02:55 PM, Yang, Bo wrote:
> Tomas,
>
>   
>> Another correction - flush_scheduled_work is already present in megass_detach_one
>> it only should be moved away from the if statement.
>>     
> The flush_scheduled_work is for schedule_delayed_work().  We need to flush it and remove.
>   
I'm not saying it should be removed, I think a move outside from the 'if' block
makes it work for ev->hotplug_work and for the newly added instance->work_init


diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 55951f4..7773707 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -4088,9 +4088,9 @@ static void __devexit megasas_detach_one(struct pci_dev *pdev)
 		struct megasas_aen_event *ev = instance->ev;
 		cancel_delayed_work(
 			(struct delayed_work *)&ev->hotplug_work);
-		flush_scheduled_work();
-		instance->ev = NULL;
 	}
+	flush_scheduled_work();
+	instance->ev = NULL;
 
 	tasklet_kill(&instance->isr_tasklet);
 
--

> -----Original Message-----
> From: Tomas Henzl [mailto:thenzl@redhat.com] 
> Sent: Saturday, October 09, 2010 4:38 PM
> To: James Bottomley
> Cc: bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org; Yang, Bo
> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>
> On 10/08/2010 09:28 PM, Tomas Henzl wrote:
>   
>> On 10/08/2010 06:39 PM, James Bottomley wrote:
>>   
>>     
>>> On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
>>>   
>>>     
>>>       
>>>> On 09/23/2010 04:36 AM, bo yang wrote:
>>>>     
>>>>       
>>>>         
>>>>> This patch is too big.  I am using attachment to submit.  Please
>>>>> use attached file to apply.  Also let me know if it can't be accepted.
>>>>>
>>>>> To add the Online controller reset support, driver need to do:
>>>>> a). reset the controller chips -- Xscale and Gen2 which will change
>>>>> the function calls and add the reset function related to this two
>>>>> chips.
>>>>> b). during the reset, driver will store the pending cmds which not
>>>>> returned by FW to driver's pending queue.  Driver will re-issue those
>>>>> pending cmds again to FW after the OCR finished.
>>>>> c). In driver's timeout routine, driver will report to OS as reset.
>>>>> Also driver's queue routine will block the cmds until the OCR
>>>>> finished.
>>>>> d). in Driver's ISR routine, if driver get the FW state as state
>>>>> change, FW in Failure status and FW support online controller
>>>>> reset (OCR), driver will start to do the controller reset.
>>>>> e). In driver's IOCTL routine, the application cmds will wait for the
>>>>> OCR to finish, then issue the cmds to FW.
>>>>>
>>>>> Signed-off-by Bo Yang<bo.yang@lsi.com>
>>>>>
>>>>> ---
>>>>>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>>>>>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>>>>>  2 files changed, 787 insertions(+), 57 deletions(-)
>>>>>       
>>>>>         
>>>>>           
>>>> Hi Bo,
>>>> in the workqueue function you sleep for 30s,
>>>> it's scheduled here - schedule_work(&instance->work_init);
>>>>
>>>> +process_fw_state_change_wq(struct work_struct *work)
>>>> +{
>>>> ...
>>>> +		/*waitting for about 20 second before start the second init*/
>>>> +		for (wait = 0; wait < 30; wait++) {
>>>> +			msleep(1000);
>>>> +		}
>>>>     
>>>>       
>>>>         
>>> this lot should be ssleep(20) if you want a 20 sec sleep.
>>>   
>>>     
>>>       
>> please do that on every place where you use the 
>> "for (wait = 0; wait < n; wait++) msleep(1000);" construction
>>
>>   
>>     
>>>> - this is not a good practice to sleep for a so long time I think
>>>>     
>>>>       
>>>>         
>> this long sleep might might be ok, if the workqueue is used only rarely
>> is it so?
>>
>>   
>>     
>>>> - you should use in your exit function some synchronization 
>>>>   for example 'cancel_work_sync', without that if someone rmmods your 
>>>>   module, it could then lead to a memory corruption
>>>>     
>>>>       
>>>>         
>>> Actually flush_scheduled_work() should be fine ... it will force the
>>> module removal to wait for completion ... cancellation can be error
>>> prone, so just forcing the wait sounds easier.
>>>   
>>>     
>>>       
> Another correction - flush_scheduled_work is already present in megass_detach_one
> it only should be moved away from the if statement.
>
>
>   
>> someone told that cancel_work_sync is safer then flush_scheduled_work
>> but I'm not an expert, so ok 
>>
>> Tomas
>>
>>   
>>     
>>> James
>>>
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>   
>>>     
>>>       
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>   
>>     
> N�����r��y���b�X��ǧv�^�)޺{.n�+����{���"�{ay�\x1dʇڙ�,j\r��f���h���z�\x1e�w���
���j:+v���w�j�m����\r����zZ+��ݢj"��!tml=

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-11 15:37             ` Yang, Bo
@ 2010-10-12 14:28               ` Tomas Henzl
  2010-10-12 14:57                 ` Yang, Bo
  0 siblings, 1 reply; 11+ messages in thread
From: Tomas Henzl @ 2010-10-12 14:28 UTC (permalink / raw)
  To: Yang, Bo
  Cc: James Bottomley, bo yang, linux-scsi, akpm, linux-kernel,
	Daftardar, Jayant

On 10/11/2010 05:37 PM, Yang, Bo wrote:
> Tomas,
>
> The reason the driver does flush_scheduled_work() is driver did cancel_delayed_work().  I am not sure driver need to call flush_scheduled_work() if the scheduled work already done, but I will test this changes and submit another patch as soon as it works fine.  
>   
Driver needs to call flush_scheduled_work() because you are adding in this patch a new workqueue:
INIT_WORK(&instance->work_init, process_fw_state_change_wq); so calling the flush_scheduled_work()
only if a certain condition is met is the problem. If the condition comes true for both workqueues
then it would be fine, but I think that it isn't so.
With this patch we could have a started workqueue (with a long 30 sec sleep) and when the module
is removed then probably a memory corruption etc. 


> This change should not be the part of this patch, we would have the new patch submit after the verification. 
>   
I would prefer to have it in one patch, but if it is for you easier, then do it as you wish.

Tomas


> Regards,
>
> Bo Yang   
>  
> -----Original Message-----
> From: Tomas Henzl [mailto:thenzl@redhat.com] 
> Sent: Monday, October 11, 2010 9:21 AM
> To: Yang, Bo
> Cc: James Bottomley; bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org
> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>
> On 10/11/2010 02:55 PM, Yang, Bo wrote:
>   
>> Tomas,
>>
>>   
>>     
>>> Another correction - flush_scheduled_work is already present in megass_detach_one
>>> it only should be moved away from the if statement.
>>>     
>>>       
>> The flush_scheduled_work is for schedule_delayed_work().  We need to flush it and remove.
>>   
>>     
> I'm not saying it should be removed, I think a move outside from the 'if' block
> makes it work for ev->hotplug_work and for the newly added instance->work_init
>
>
> diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
> index 55951f4..7773707 100644
> --- a/drivers/scsi/megaraid/megaraid_sas.c
> +++ b/drivers/scsi/megaraid/megaraid_sas.c
> @@ -4088,9 +4088,9 @@ static void __devexit megasas_detach_one(struct pci_dev *pdev)
>  		struct megasas_aen_event *ev = instance->ev;
>  		cancel_delayed_work(
>  			(struct delayed_work *)&ev->hotplug_work);
> -		flush_scheduled_work();
> -		instance->ev = NULL;
>  	}
> +	flush_scheduled_work();
> +	instance->ev = NULL;
>  
>  	tasklet_kill(&instance->isr_tasklet);
>  
> --
>
>   
>> -----Original Message-----
>> From: Tomas Henzl [mailto:thenzl@redhat.com] 
>> Sent: Saturday, October 09, 2010 4:38 PM
>> To: James Bottomley
>> Cc: bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org; Yang, Bo
>> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>>
>> On 10/08/2010 09:28 PM, Tomas Henzl wrote:
>>   
>>     
>>> On 10/08/2010 06:39 PM, James Bottomley wrote:
>>>   
>>>     
>>>       
>>>> On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
>>>>   
>>>>     
>>>>       
>>>>         
>>>>> On 09/23/2010 04:36 AM, bo yang wrote:
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>>>>> This patch is too big.  I am using attachment to submit.  Please
>>>>>> use attached file to apply.  Also let me know if it can't be accepted.
>>>>>>
>>>>>> To add the Online controller reset support, driver need to do:
>>>>>> a). reset the controller chips -- Xscale and Gen2 which will change
>>>>>> the function calls and add the reset function related to this two
>>>>>> chips.
>>>>>> b). during the reset, driver will store the pending cmds which not
>>>>>> returned by FW to driver's pending queue.  Driver will re-issue those
>>>>>> pending cmds again to FW after the OCR finished.
>>>>>> c). In driver's timeout routine, driver will report to OS as reset.
>>>>>> Also driver's queue routine will block the cmds until the OCR
>>>>>> finished.
>>>>>> d). in Driver's ISR routine, if driver get the FW state as state
>>>>>> change, FW in Failure status and FW support online controller
>>>>>> reset (OCR), driver will start to do the controller reset.
>>>>>> e). In driver's IOCTL routine, the application cmds will wait for the
>>>>>> OCR to finish, then issue the cmds to FW.
>>>>>>
>>>>>> Signed-off-by Bo Yang<bo.yang@lsi.com>
>>>>>>
>>>>>> ---
>>>>>>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>>>>>>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>>>>>>  2 files changed, 787 insertions(+), 57 deletions(-)
>>>>>>       
>>>>>>         
>>>>>>           
>>>>>>             
>>>>> Hi Bo,
>>>>> in the workqueue function you sleep for 30s,
>>>>> it's scheduled here - schedule_work(&instance->work_init);
>>>>>
>>>>> +process_fw_state_change_wq(struct work_struct *work)
>>>>> +{
>>>>> ...
>>>>> +		/*waitting for about 20 second before start the second init*/
>>>>> +		for (wait = 0; wait < 30; wait++) {
>>>>> +			msleep(1000);
>>>>> +		}
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>>> this lot should be ssleep(20) if you want a 20 sec sleep.
>>>>   
>>>>     
>>>>       
>>>>         
>>> please do that on every place where you use the 
>>> "for (wait = 0; wait < n; wait++) msleep(1000);" construction
>>>
>>>   
>>>     
>>>       
>>>>> - this is not a good practice to sleep for a so long time I think
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>> this long sleep might might be ok, if the workqueue is used only rarely
>>> is it so?
>>>
>>>   
>>>     
>>>       
>>>>> - you should use in your exit function some synchronization 
>>>>>   for example 'cancel_work_sync', without that if someone rmmods your 
>>>>>   module, it could then lead to a memory corruption
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>>> Actually flush_scheduled_work() should be fine ... it will force the
>>>> module removal to wait for completion ... cancellation can be error
>>>> prone, so just forcing the wait sounds easier.
>>>>   
>>>>     
>>>>       
>>>>         
>> Another correction - flush_scheduled_work is already present in megass_detach_one
>> it only should be moved away from the if statement.
>>
>>
>>   
>>     
>>> someone told that cancel_work_sync is safer then flush_scheduled_work
>>> but I'm not an expert, so ok 
>>>
>>> Tomas
>>>
>>>   
>>>     
>>>       
>>>> James
>>>>
>>>>
>>>>         


^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-12 14:28               ` Tomas Henzl
@ 2010-10-12 14:57                 ` Yang, Bo
  2010-10-12 15:33                   ` Tomas Henzl
  0 siblings, 1 reply; 11+ messages in thread
From: Yang, Bo @ 2010-10-12 14:57 UTC (permalink / raw)
  To: Tomas Henzl
  Cc: James Bottomley, bo yang, linux-scsi, akpm, linux-kernel,
	Daftardar, Jayant

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 7189 bytes --]

Tomas,

This change will not be in the part of patch 1/5.  But we can submit another new patch for this change after our testing team did the verification.

Also did you get the chance to look at the patches 2/5 to 5/5?  Just give me the feedback.

Thanks,

Bo Yang   
 
-----Original Message-----
From: Tomas Henzl [mailto:thenzl@redhat.com] 
Sent: Tuesday, October 12, 2010 10:29 AM
To: Yang, Bo
Cc: James Bottomley; bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org; Daftardar, Jayant
Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive

On 10/11/2010 05:37 PM, Yang, Bo wrote:
> Tomas,
>
> The reason the driver does flush_scheduled_work() is driver did cancel_delayed_work().  I am not sure driver need to call flush_scheduled_work() if the scheduled work already done, but I will test this changes and submit another patch as soon as it works fine.  
>   
Driver needs to call flush_scheduled_work() because you are adding in this patch a new workqueue:
INIT_WORK(&instance->work_init, process_fw_state_change_wq); so calling the flush_scheduled_work()
only if a certain condition is met is the problem. If the condition comes true for both workqueues
then it would be fine, but I think that it isn't so.
With this patch we could have a started workqueue (with a long 30 sec sleep) and when the module
is removed then probably a memory corruption etc. 


> This change should not be the part of this patch, we would have the new patch submit after the verification. 
>   
I would prefer to have it in one patch, but if it is for you easier, then do it as you wish.

Tomas


> Regards,
>
> Bo Yang   
>  
> -----Original Message-----
> From: Tomas Henzl [mailto:thenzl@redhat.com] 
> Sent: Monday, October 11, 2010 9:21 AM
> To: Yang, Bo
> Cc: James Bottomley; bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org
> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>
> On 10/11/2010 02:55 PM, Yang, Bo wrote:
>   
>> Tomas,
>>
>>   
>>     
>>> Another correction - flush_scheduled_work is already present in megass_detach_one
>>> it only should be moved away from the if statement.
>>>     
>>>       
>> The flush_scheduled_work is for schedule_delayed_work().  We need to flush it and remove.
>>   
>>     
> I'm not saying it should be removed, I think a move outside from the 'if' block
> makes it work for ev->hotplug_work and for the newly added instance->work_init
>
>
> diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
> index 55951f4..7773707 100644
> --- a/drivers/scsi/megaraid/megaraid_sas.c
> +++ b/drivers/scsi/megaraid/megaraid_sas.c
> @@ -4088,9 +4088,9 @@ static void __devexit megasas_detach_one(struct pci_dev *pdev)
>  		struct megasas_aen_event *ev = instance->ev;
>  		cancel_delayed_work(
>  			(struct delayed_work *)&ev->hotplug_work);
> -		flush_scheduled_work();
> -		instance->ev = NULL;
>  	}
> +	flush_scheduled_work();
> +	instance->ev = NULL;
>  
>  	tasklet_kill(&instance->isr_tasklet);
>  
> --
>
>   
>> -----Original Message-----
>> From: Tomas Henzl [mailto:thenzl@redhat.com] 
>> Sent: Saturday, October 09, 2010 4:38 PM
>> To: James Bottomley
>> Cc: bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org; Yang, Bo
>> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>>
>> On 10/08/2010 09:28 PM, Tomas Henzl wrote:
>>   
>>     
>>> On 10/08/2010 06:39 PM, James Bottomley wrote:
>>>   
>>>     
>>>       
>>>> On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
>>>>   
>>>>     
>>>>       
>>>>         
>>>>> On 09/23/2010 04:36 AM, bo yang wrote:
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>>>>> This patch is too big.  I am using attachment to submit.  Please
>>>>>> use attached file to apply.  Also let me know if it can't be accepted.
>>>>>>
>>>>>> To add the Online controller reset support, driver need to do:
>>>>>> a). reset the controller chips -- Xscale and Gen2 which will change
>>>>>> the function calls and add the reset function related to this two
>>>>>> chips.
>>>>>> b). during the reset, driver will store the pending cmds which not
>>>>>> returned by FW to driver's pending queue.  Driver will re-issue those
>>>>>> pending cmds again to FW after the OCR finished.
>>>>>> c). In driver's timeout routine, driver will report to OS as reset.
>>>>>> Also driver's queue routine will block the cmds until the OCR
>>>>>> finished.
>>>>>> d). in Driver's ISR routine, if driver get the FW state as state
>>>>>> change, FW in Failure status and FW support online controller
>>>>>> reset (OCR), driver will start to do the controller reset.
>>>>>> e). In driver's IOCTL routine, the application cmds will wait for the
>>>>>> OCR to finish, then issue the cmds to FW.
>>>>>>
>>>>>> Signed-off-by Bo Yang<bo.yang@lsi.com>
>>>>>>
>>>>>> ---
>>>>>>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>>>>>>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>>>>>>  2 files changed, 787 insertions(+), 57 deletions(-)
>>>>>>       
>>>>>>         
>>>>>>           
>>>>>>             
>>>>> Hi Bo,
>>>>> in the workqueue function you sleep for 30s,
>>>>> it's scheduled here - schedule_work(&instance->work_init);
>>>>>
>>>>> +process_fw_state_change_wq(struct work_struct *work)
>>>>> +{
>>>>> ...
>>>>> +		/*waitting for about 20 second before start the second init*/
>>>>> +		for (wait = 0; wait < 30; wait++) {
>>>>> +			msleep(1000);
>>>>> +		}
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>>> this lot should be ssleep(20) if you want a 20 sec sleep.
>>>>   
>>>>     
>>>>       
>>>>         
>>> please do that on every place where you use the 
>>> "for (wait = 0; wait < n; wait++) msleep(1000);" construction
>>>
>>>   
>>>     
>>>       
>>>>> - this is not a good practice to sleep for a so long time I think
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>> this long sleep might might be ok, if the workqueue is used only rarely
>>> is it so?
>>>
>>>   
>>>     
>>>       
>>>>> - you should use in your exit function some synchronization 
>>>>>   for example 'cancel_work_sync', without that if someone rmmods your 
>>>>>   module, it could then lead to a memory corruption
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>>> Actually flush_scheduled_work() should be fine ... it will force the
>>>> module removal to wait for completion ... cancellation can be error
>>>> prone, so just forcing the wait sounds easier.
>>>>   
>>>>     
>>>>       
>>>>         
>> Another correction - flush_scheduled_work is already present in megass_detach_one
>> it only should be moved away from the if statement.
>>
>>
>>   
>>     
>>> someone told that cancel_work_sync is safer then flush_scheduled_work
>>> but I'm not an expert, so ok 
>>>
>>> Tomas
>>>
>>>   
>>>     
>>>       
>>>> James
>>>>
>>>>
>>>>         

ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
  2010-10-12 14:57                 ` Yang, Bo
@ 2010-10-12 15:33                   ` Tomas Henzl
  0 siblings, 0 replies; 11+ messages in thread
From: Tomas Henzl @ 2010-10-12 15:33 UTC (permalink / raw)
  To: Yang, Bo
  Cc: James Bottomley, bo yang, linux-scsi, akpm, linux-kernel,
	Daftardar, Jayant

On 10/12/2010 04:57 PM, Yang, Bo wrote:
> Tomas,
>
> This change will not be in the part of patch 1/5.  But we can submit another new patch for this change after our testing team did the verification.
>   
I'm fine with every approach.

> Also did you get the chance to look at the patches 2/5 to 5/5?  Just give me the feedback.
>   
I just briefly looked at 2-5/5 and haven't found anything obvious.

> Thanks,
>
> Bo Yang   
>  
> -----Original Message-----
> From: Tomas Henzl [mailto:thenzl@redhat.com] 
> Sent: Tuesday, October 12, 2010 10:29 AM
> To: Yang, Bo
> Cc: James Bottomley; bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org; Daftardar, Jayant
> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>
> On 10/11/2010 05:37 PM, Yang, Bo wrote:
>   
>> Tomas,
>>
>> The reason the driver does flush_scheduled_work() is driver did cancel_delayed_work().  I am not sure driver need to call flush_scheduled_work() if the scheduled work already done, but I will test this changes and submit another patch as soon as it works fine.  
>>   
>>     
> Driver needs to call flush_scheduled_work() because you are adding in this patch a new workqueue:
> INIT_WORK(&instance->work_init, process_fw_state_change_wq); so calling the flush_scheduled_work()
> only if a certain condition is met is the problem. If the condition comes true for both workqueues
> then it would be fine, but I think that it isn't so.
> With this patch we could have a started workqueue (with a long 30 sec sleep) and when the module
> is removed then probably a memory corruption etc. 
>
>
>   
>> This change should not be the part of this patch, we would have the new patch submit after the verification. 
>>   
>>     
> I would prefer to have it in one patch, but if it is for you easier, then do it as you wish.
>
> Tomas
>
>
>   
>> Regards,
>>
>> Bo Yang   
>>  
>> -----Original Message-----
>> From: Tomas Henzl [mailto:thenzl@redhat.com] 
>> Sent: Monday, October 11, 2010 9:21 AM
>> To: Yang, Bo
>> Cc: James Bottomley; bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org
>> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>>
>> On 10/11/2010 02:55 PM, Yang, Bo wrote:
>>   
>>     
>>> Tomas,
>>>
>>>   
>>>     
>>>       
>>>> Another correction - flush_scheduled_work is already present in megass_detach_one
>>>> it only should be moved away from the if statement.
>>>>     
>>>>       
>>>>         
>>> The flush_scheduled_work is for schedule_delayed_work().  We need to flush it and remove.
>>>   
>>>     
>>>       
>> I'm not saying it should be removed, I think a move outside from the 'if' block
>> makes it work for ev->hotplug_work and for the newly added instance->work_init
>>
>>
>> diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
>> index 55951f4..7773707 100644
>> --- a/drivers/scsi/megaraid/megaraid_sas.c
>> +++ b/drivers/scsi/megaraid/megaraid_sas.c
>> @@ -4088,9 +4088,9 @@ static void __devexit megasas_detach_one(struct pci_dev *pdev)
>>  		struct megasas_aen_event *ev = instance->ev;
>>  		cancel_delayed_work(
>>  			(struct delayed_work *)&ev->hotplug_work);
>> -		flush_scheduled_work();
>> -		instance->ev = NULL;
>>  	}
>> +	flush_scheduled_work();
>> +	instance->ev = NULL;
>>  
>>  	tasklet_kill(&instance->isr_tasklet);
>>  
>> --
>>
>>   
>>     
>>> -----Original Message-----
>>> From: Tomas Henzl [mailto:thenzl@redhat.com] 
>>> Sent: Saturday, October 09, 2010 4:38 PM
>>> To: James Bottomley
>>> Cc: bo yang; linux-scsi@vger.kernel.org; akpm@osdl.org; linux-kernel@vger.kernel.org; Yang, Bo
>>> Subject: Re: PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive
>>>
>>> On 10/08/2010 09:28 PM, Tomas Henzl wrote:
>>>   
>>>     
>>>       
>>>> On 10/08/2010 06:39 PM, James Bottomley wrote:
>>>>   
>>>>     
>>>>       
>>>>         
>>>>> On Fri, 2010-10-08 at 17:51 +0200, Tomas Henzl wrote:
>>>>>   
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>>>>> On 09/23/2010 04:36 AM, bo yang wrote:
>>>>>>     
>>>>>>       
>>>>>>         
>>>>>>           
>>>>>>             
>>>>>>> This patch is too big.  I am using attachment to submit.  Please
>>>>>>> use attached file to apply.  Also let me know if it can't be accepted.
>>>>>>>
>>>>>>> To add the Online controller reset support, driver need to do:
>>>>>>> a). reset the controller chips -- Xscale and Gen2 which will change
>>>>>>> the function calls and add the reset function related to this two
>>>>>>> chips.
>>>>>>> b). during the reset, driver will store the pending cmds which not
>>>>>>> returned by FW to driver's pending queue.  Driver will re-issue those
>>>>>>> pending cmds again to FW after the OCR finished.
>>>>>>> c). In driver's timeout routine, driver will report to OS as reset.
>>>>>>> Also driver's queue routine will block the cmds until the OCR
>>>>>>> finished.
>>>>>>> d). in Driver's ISR routine, if driver get the FW state as state
>>>>>>> change, FW in Failure status and FW support online controller
>>>>>>> reset (OCR), driver will start to do the controller reset.
>>>>>>> e). In driver's IOCTL routine, the application cmds will wait for the
>>>>>>> OCR to finish, then issue the cmds to FW.
>>>>>>>
>>>>>>> Signed-off-by Bo Yang<bo.yang@lsi.com>
>>>>>>>
>>>>>>> ---
>>>>>>>  drivers/scsi/megaraid/megaraid_sas.c |  756 ++++++++++++++++++++++++++++++++---
>>>>>>>  drivers/scsi/megaraid/megaraid_sas.h |   88 +++-
>>>>>>>  2 files changed, 787 insertions(+), 57 deletions(-)
>>>>>>>       
>>>>>>>         
>>>>>>>           
>>>>>>>             
>>>>>>>               
>>>>>> Hi Bo,
>>>>>> in the workqueue function you sleep for 30s,
>>>>>> it's scheduled here - schedule_work(&instance->work_init);
>>>>>>
>>>>>> +process_fw_state_change_wq(struct work_struct *work)
>>>>>> +{
>>>>>> ...
>>>>>> +		/*waitting for about 20 second before start the second init*/
>>>>>> +		for (wait = 0; wait < 30; wait++) {
>>>>>> +			msleep(1000);
>>>>>> +		}
>>>>>>     
>>>>>>       
>>>>>>         
>>>>>>           
>>>>>>             
>>>>> this lot should be ssleep(20) if you want a 20 sec sleep.
>>>>>   
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>>> please do that on every place where you use the 
>>>> "for (wait = 0; wait < n; wait++) msleep(1000);" construction
>>>>
>>>>   
>>>>     
>>>>       
>>>>         
>>>>>> - this is not a good practice to sleep for a so long time I think
>>>>>>     
>>>>>>       
>>>>>>         
>>>>>>           
>>>>>>             
>>>> this long sleep might might be ok, if the workqueue is used only rarely
>>>> is it so?
>>>>
>>>>   
>>>>     
>>>>       
>>>>         
>>>>>> - you should use in your exit function some synchronization 
>>>>>>   for example 'cancel_work_sync', without that if someone rmmods your 
>>>>>>   module, it could then lead to a memory corruption
>>>>>>     
>>>>>>       
>>>>>>         
>>>>>>           
>>>>>>             
>>>>> Actually flush_scheduled_work() should be fine ... it will force the
>>>>> module removal to wait for completion ... cancellation can be error
>>>>> prone, so just forcing the wait sounds easier.
>>>>>   
>>>>>     
>>>>>       
>>>>>         
>>>>>           
>>> Another correction - flush_scheduled_work is already present in megass_detach_one
>>> it only should be moved away from the if statement.
>>>
>>>
>>>   
>>>     
>>>       
>>>> someone told that cancel_work_sync is safer then flush_scheduled_work
>>>> but I'm not an expert, so ok 
>>>>
>>>> Tomas
>>>>
>>>>   
>>>>     
>>>>       
>>>>         
>>>>> James
>>>>>
>>>>>
>>>>>         
>>>>>           


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-10-12 15:33 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-23  2:36 PATCH 1/5] scsi: megaraid_sas - Add Online Controller Reset to MegaRAID SAS drive bo yang
2010-10-08 15:51 ` Tomas Henzl
2010-10-08 16:39   ` James Bottomley
2010-10-08 19:28     ` Tomas Henzl
2010-10-09 20:38       ` Tomas Henzl
2010-10-11 12:55         ` Yang, Bo
2010-10-11 13:20           ` Tomas Henzl
2010-10-11 15:37             ` Yang, Bo
2010-10-12 14:28               ` Tomas Henzl
2010-10-12 14:57                 ` Yang, Bo
2010-10-12 15:33                   ` Tomas Henzl

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).