All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
@ 2009-03-10 15:17 Mike Miller (OS Dev)
  2009-03-10 15:46 ` Mike Miller (OS Dev)
  2009-03-10 15:50 ` James Bottomley
  0 siblings, 2 replies; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-03-10 15:17 UTC (permalink / raw)
  To: Andrew Morton, Jens Axboe
  Cc: James Bottomley, LKML-SCSI, coldwell, hare, mikem

Patch 1 of 1

This is yet another go at the patch to detect changes on the MSA2012.
I hope I've addressed all concerns. This patch rearranges some of the code
so we also have coverage in the sg and the ioctl paths as well as the main
data path.

The MSA2012 cannot inform the driver of configuration changes since all
management is out of band. This is a departure from any storage we have
supported in the past. We need some way to detect changes on the topology so
we implement this kernel thread. In some instances there's nothing we can do
from the driver (like LUN failure) so just print out a message. In the case
where logical volumes are added or deleted we call rebuild_lun_table to
refresh the driver's view of the world.

Changelog:
1. Do the completion(hba[i]->rescan_wait) before calling kthread_stop,
this resolves the issue of waiting for the timeout on rmmod
2. Added a new function called check_ioctl_unit_attention to cover UA's
from the ioctl patch
3. I preserved the wait_for_completion_timeout to avoid call traces
caused by /proc/sys/kernel/hung_task_timeout_secs expiring
4. Moved the call to check_for_unit_attention to evaluate_target_status
since it's already called from complete_command
5. Add retry_cmd as an argument to evaluate_target_status
6. Added *rescan_wait to the controller info struct
7. Changed wait_for_completion_timeout to wait_for_completion_interruptible.
This will allow kthread_should_stop to stop the thread immediately with no race.

Please consider this for inclusion.

Signed-off-by: Mike Miller <mike.miller@hp.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d2cb67b..35d7ad7 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -51,6 +51,7 @@
 #include <scsi/scsi_ioctl.h>
 #include <linux/cdrom.h>
 #include <linux/scatterlist.h>
+#include <linux/kthread.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
 #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
@@ -186,6 +187,8 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
 			   __u8 page_code, int cmd_type);
 
 static void fail_all_cmds(unsigned long ctlr);
+static int scan_thread(ctlr_info_t *h);
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
 
 #ifdef CONFIG_PROC_FS
 static void cciss_procinit(int i);
@@ -735,6 +738,12 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
+{
+	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
+		(void)check_for_unit_attention(host, c);
+}
 /*
  * ioctl
  */
@@ -1029,6 +1038,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 					 iocommand.buf_size,
 					 PCI_DMA_BIDIRECTIONAL);
 
+			check_ioctl_unit_attention(host, c);
+
 			/* Copy the error information out */
 			iocommand.error_info = *(c->err_info);
 			if (copy_to_user
@@ -1180,6 +1191,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 					(dma_addr_t) temp64.val, buff_size[i],
 					PCI_DMA_BIDIRECTIONAL);
 			}
+			check_ioctl_unit_attention(host, c);
 			/* Copy the error information out */
 			ioc->error_info = *(c->err_info);
 			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
@@ -2585,12 +2597,14 @@ static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
 		((driver_byte & 0xff) << 24);
 }
 
-static inline int evaluate_target_status(CommandList_struct *cmd)
+static inline int evaluate_target_status(ctlr_info_t *h,
+			CommandList_struct *cmd, int *retry_cmd)
 {
 	unsigned char sense_key;
 	unsigned char status_byte, msg_byte, host_byte, driver_byte;
 	int error_value;
 
+	*retry_cmd = 0;
 	/* If we get in here, it means we got "target status", that is, scsi status */
 	status_byte = cmd->err_info->ScsiStatus;
 	driver_byte = DRIVER_OK;
@@ -2618,6 +2632,11 @@ static inline int evaluate_target_status(CommandList_struct *cmd)
 	if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
 		error_value = 0;
 
+	if (check_for_unit_attention(h, cmd)) {
+		*retry_cmd = !blk_pc_request(cmd->rq);
+		return 0;
+	}
+
 	if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
 		if (error_value != 0)
 			printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
@@ -2657,7 +2676,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 
 	switch (cmd->err_info->CommandStatus) {
 	case CMD_TARGET_STATUS:
-		rq->errors = evaluate_target_status(cmd);
+		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
 		break;
 	case CMD_DATA_UNDERRUN:
 		if (blk_fs_request(cmd->rq)) {
@@ -3008,6 +3027,62 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int scan_thread(ctlr_info_t *h)
+{
+	int rc;
+	DECLARE_COMPLETION_ONSTACK(wait);
+	h->rescan_wait = &wait;
+
+	while (!kthread_should_stop()) {
+		rc = wait_for_completion_interruptible(&wait);
+		if (!rc)
+			continue;
+		else
+			rebuild_lun_table(h, 0);
+	}
+	return 0;
+}
+
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
+{
+	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
+		return 0;
+
+	switch (c->err_info->SenseInfo[12]) {
+	case STATE_CHANGED:
+		printk(KERN_WARNING "cciss%d: a state change "
+			"detected, command retried\n", h->ctlr);
+		return 1;
+	break;
+	case LUN_FAILED:
+		printk(KERN_WARNING "cciss%d: LUN failure "
+			"detected, action required\n", h->ctlr);
+		return 1;
+	break;
+	case REPORT_LUNS_CHANGED:
+		printk(KERN_WARNING "cciss%d: report LUN data "
+			"changed\n", h->ctlr);
+		if (h->rescan_wait)
+			complete(h->rescan_wait);
+		return 1;
+	break;
+	case POWER_OR_RESET:
+		printk(KERN_WARNING "cciss%d: a power on "
+			"or device reset detected\n", h->ctlr);
+		return 1;
+	break;
+	case UNIT_ATTENTION_CLEARED:
+		printk(KERN_WARNING "cciss%d: unit attention "
+		    "cleared by another initiator\n", h->ctlr);
+		return 1;
+	break;
+	default:
+		printk(KERN_WARNING "cciss%d: unknown "
+			"unit attention detected\n", h->ctlr);
+				return 1;
+	}
+}
+
 /*
  *  We cannot read the structure directly, for portability we must use
  *   the io functions.
@@ -3600,6 +3675,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	int rc;
 	int dac, return_code;
 	InquiryData_struct *inq_buff = NULL;
+	char cciss_scan[14];
 
 	if (reset_devices) {
 		/* Reset the controller with a PCI power-cycle */
@@ -3751,6 +3827,12 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	hba[i]->busy_initializing = 0;
 
 	rebuild_lun_table(hba[i], 1);
+	snprintf(cciss_scan, 14, "cciss_scan%02d", i);
+	hba[i]->cciss_scan_thread = kthread_run((void *)scan_thread, hba[i],
+				cciss_scan);
+	if (IS_ERR(hba[i]->cciss_scan_thread))
+		return PTR_ERR(hba[i]->cciss_scan_thread);
+
 	return 1;
 
 clean4:
@@ -3826,6 +3908,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		printk(KERN_ERR "cciss: Unable to remove device \n");
 		return;
 	}
+
 	tmp_ptr = pci_get_drvdata(pdev);
 	i = tmp_ptr->ctlr;
 	if (hba[i] == NULL) {
@@ -3834,6 +3917,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		return;
 	}
 
+	kthread_stop(hba[i]->cciss_scan_thread);
+	complete(hba[i]->rescan_wait);
+
 	remove_proc_entry(hba[i]->devname, proc_cciss);
 	unregister_blkdev(hba[i]->major, hba[i]->devname);
 
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 15e2b84..703e080 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -121,6 +121,8 @@ struct ctlr_info
 	struct sendcmd_reject_list scsi_rejects;
 #endif
 	unsigned char alive;
+	struct completion *rescan_wait;
+	struct task_struct *cciss_scan_thread;
 };
 
 /*  Defining the diffent access_menthods */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 24e22de..40b1b92 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -25,6 +25,29 @@
 #define CMD_TIMEOUT             0x000B
 #define CMD_UNABORTABLE		0x000C
 
+/* Unit Attentions ASC's as defined for the MSA2012sa */
+#define POWER_OR_RESET			0x29
+#define STATE_CHANGED			0x2a
+#define UNIT_ATTENTION_CLEARED		0x2f
+#define LUN_FAILED			0x3e
+#define REPORT_LUNS_CHANGED		0x3f
+
+/* Unit Attentions ASCQ's as defined for the MSA2012sa */
+
+	/* These ASCQ's defined for ASC = POWER_OR_RESET */
+#define POWER_ON_RESET			0x00
+#define POWER_ON_REBOOT			0x01
+#define SCSI_BUS_RESET			0x02
+#define MSA_TARGET_RESET		0x03
+#define CONTROLLER_FAILOVER		0x04
+#define TRANSCEIVER_SE			0x05
+#define TRANSCEIVER_LVD			0x06
+
+	/* These ASCQ's defined for ASC = STATE_CHANGED */
+#define RESERVATION_PREEMPTED		0x03
+#define ASYM_ACCESS_CHANGED		0x06
+#define LUN_CAPACITY_CHANGED		0x09
+
 //transfer direction
 #define XFER_NONE               0x00
 #define XFER_WRITE              0x01

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-10 15:17 [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012 Mike Miller (OS Dev)
@ 2009-03-10 15:46 ` Mike Miller (OS Dev)
  2009-03-10 15:50 ` James Bottomley
  1 sibling, 0 replies; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-03-10 15:46 UTC (permalink / raw)
  To: Andrew Morton, Jens Axboe; +Cc: James Bottomley, LKML-SCSI, coldwell, hare

On Tue, Mar 10, 2009 at 10:17:38AM -0500, Mike Miller (OS Dev) wrote:
> Patch 1 of 1
> 
> This is yet another go at the patch to detect changes on the MSA2012.
> I hope I've addressed all concerns. This patch rearranges some of the code
> so we also have coverage in the sg and the ioctl paths as well as the main
> data path.
> 
> The MSA2012 cannot inform the driver of configuration changes since all
> management is out of band. This is a departure from any storage we have
> supported in the past. We need some way to detect changes on the topology so
> we implement this kernel thread. In some instances there's nothing we can do
> from the driver (like LUN failure) so just print out a message. In the case
> where logical volumes are added or deleted we call rebuild_lun_table to
> refresh the driver's view of the world.
> 
> Changelog:
> 1. Do the completion(hba[i]->rescan_wait) before calling kthread_stop,
> this resolves the issue of waiting for the timeout on rmmod
> 2. Added a new function called check_ioctl_unit_attention to cover UA's
> from the ioctl patch
> 3. I preserved the wait_for_completion_timeout to avoid call traces
> caused by /proc/sys/kernel/hung_task_timeout_secs expiring
> 4. Moved the call to check_for_unit_attention to evaluate_target_status
> since it's already called from complete_command
> 5. Add retry_cmd as an argument to evaluate_target_status
> 6. Added *rescan_wait to the controller info struct
> 7. Changed wait_for_completion_timeout to wait_for_completion_interruptible.
> This will allow kthread_should_stop to stop the thread immediately with no race.
> 

Andrew,
Did you have a comment? I don't see any, unless I've missed them.

-- mikem


> Please consider this for inclusion.
> 
> Signed-off-by: Mike Miller <mike.miller@hp.com>
> 
> diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> index d2cb67b..35d7ad7 100644
> --- a/drivers/block/cciss.c
> +++ b/drivers/block/cciss.c
> @@ -51,6 +51,7 @@
>  #include <scsi/scsi_ioctl.h>
>  #include <linux/cdrom.h>
>  #include <linux/scatterlist.h>
> +#include <linux/kthread.h>
>  
>  #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
>  #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
> @@ -186,6 +187,8 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
>  			   __u8 page_code, int cmd_type);
>  
>  static void fail_all_cmds(unsigned long ctlr);
> +static int scan_thread(ctlr_info_t *h);
> +static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
>  
>  #ifdef CONFIG_PROC_FS
>  static void cciss_procinit(int i);
> @@ -735,6 +738,12 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
>  	return 0;
>  }
>  
> +static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
> +{
> +	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
> +			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
> +		(void)check_for_unit_attention(host, c);
> +}
>  /*
>   * ioctl
>   */
> @@ -1029,6 +1038,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
>  					 iocommand.buf_size,
>  					 PCI_DMA_BIDIRECTIONAL);
>  
> +			check_ioctl_unit_attention(host, c);
> +
>  			/* Copy the error information out */
>  			iocommand.error_info = *(c->err_info);
>  			if (copy_to_user
> @@ -1180,6 +1191,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
>  					(dma_addr_t) temp64.val, buff_size[i],
>  					PCI_DMA_BIDIRECTIONAL);
>  			}
> +			check_ioctl_unit_attention(host, c);
>  			/* Copy the error information out */
>  			ioc->error_info = *(c->err_info);
>  			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
> @@ -2585,12 +2597,14 @@ static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
>  		((driver_byte & 0xff) << 24);
>  }
>  
> -static inline int evaluate_target_status(CommandList_struct *cmd)
> +static inline int evaluate_target_status(ctlr_info_t *h,
> +			CommandList_struct *cmd, int *retry_cmd)
>  {
>  	unsigned char sense_key;
>  	unsigned char status_byte, msg_byte, host_byte, driver_byte;
>  	int error_value;
>  
> +	*retry_cmd = 0;
>  	/* If we get in here, it means we got "target status", that is, scsi status */
>  	status_byte = cmd->err_info->ScsiStatus;
>  	driver_byte = DRIVER_OK;
> @@ -2618,6 +2632,11 @@ static inline int evaluate_target_status(CommandList_struct *cmd)
>  	if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
>  		error_value = 0;
>  
> +	if (check_for_unit_attention(h, cmd)) {
> +		*retry_cmd = !blk_pc_request(cmd->rq);
> +		return 0;
> +	}
> +
>  	if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
>  		if (error_value != 0)
>  			printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
> @@ -2657,7 +2676,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
>  
>  	switch (cmd->err_info->CommandStatus) {
>  	case CMD_TARGET_STATUS:
> -		rq->errors = evaluate_target_status(cmd);
> +		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
>  		break;
>  	case CMD_DATA_UNDERRUN:
>  		if (blk_fs_request(cmd->rq)) {
> @@ -3008,6 +3027,62 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
>  	return IRQ_HANDLED;
>  }
>  
> +static int scan_thread(ctlr_info_t *h)
> +{
> +	int rc;
> +	DECLARE_COMPLETION_ONSTACK(wait);
> +	h->rescan_wait = &wait;
> +
> +	while (!kthread_should_stop()) {
> +		rc = wait_for_completion_interruptible(&wait);
> +		if (!rc)
> +			continue;
> +		else
> +			rebuild_lun_table(h, 0);
> +	}
> +	return 0;
> +}
> +
> +static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
> +{
> +	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
> +		return 0;
> +
> +	switch (c->err_info->SenseInfo[12]) {
> +	case STATE_CHANGED:
> +		printk(KERN_WARNING "cciss%d: a state change "
> +			"detected, command retried\n", h->ctlr);
> +		return 1;
> +	break;
> +	case LUN_FAILED:
> +		printk(KERN_WARNING "cciss%d: LUN failure "
> +			"detected, action required\n", h->ctlr);
> +		return 1;
> +	break;
> +	case REPORT_LUNS_CHANGED:
> +		printk(KERN_WARNING "cciss%d: report LUN data "
> +			"changed\n", h->ctlr);
> +		if (h->rescan_wait)
> +			complete(h->rescan_wait);
> +		return 1;
> +	break;
> +	case POWER_OR_RESET:
> +		printk(KERN_WARNING "cciss%d: a power on "
> +			"or device reset detected\n", h->ctlr);
> +		return 1;
> +	break;
> +	case UNIT_ATTENTION_CLEARED:
> +		printk(KERN_WARNING "cciss%d: unit attention "
> +		    "cleared by another initiator\n", h->ctlr);
> +		return 1;
> +	break;
> +	default:
> +		printk(KERN_WARNING "cciss%d: unknown "
> +			"unit attention detected\n", h->ctlr);
> +				return 1;
> +	}
> +}
> +
>  /*
>   *  We cannot read the structure directly, for portability we must use
>   *   the io functions.
> @@ -3600,6 +3675,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
>  	int rc;
>  	int dac, return_code;
>  	InquiryData_struct *inq_buff = NULL;
> +	char cciss_scan[14];
>  
>  	if (reset_devices) {
>  		/* Reset the controller with a PCI power-cycle */
> @@ -3751,6 +3827,12 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
>  	hba[i]->busy_initializing = 0;
>  
>  	rebuild_lun_table(hba[i], 1);
> +	snprintf(cciss_scan, 14, "cciss_scan%02d", i);
> +	hba[i]->cciss_scan_thread = kthread_run((void *)scan_thread, hba[i],
> +				cciss_scan);
> +	if (IS_ERR(hba[i]->cciss_scan_thread))
> +		return PTR_ERR(hba[i]->cciss_scan_thread);
> +
>  	return 1;
>  
>  clean4:
> @@ -3826,6 +3908,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
>  		printk(KERN_ERR "cciss: Unable to remove device \n");
>  		return;
>  	}
> +
>  	tmp_ptr = pci_get_drvdata(pdev);
>  	i = tmp_ptr->ctlr;
>  	if (hba[i] == NULL) {
> @@ -3834,6 +3917,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
>  		return;
>  	}
>  
> +	kthread_stop(hba[i]->cciss_scan_thread);
> +	complete(hba[i]->rescan_wait);
> +
>  	remove_proc_entry(hba[i]->devname, proc_cciss);
>  	unregister_blkdev(hba[i]->major, hba[i]->devname);
>  
> diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
> index 15e2b84..703e080 100644
> --- a/drivers/block/cciss.h
> +++ b/drivers/block/cciss.h
> @@ -121,6 +121,8 @@ struct ctlr_info
>  	struct sendcmd_reject_list scsi_rejects;
>  #endif
>  	unsigned char alive;
> +	struct completion *rescan_wait;
> +	struct task_struct *cciss_scan_thread;
>  };
>  
>  /*  Defining the diffent access_menthods */
> diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
> index 24e22de..40b1b92 100644
> --- a/drivers/block/cciss_cmd.h
> +++ b/drivers/block/cciss_cmd.h
> @@ -25,6 +25,29 @@
>  #define CMD_TIMEOUT             0x000B
>  #define CMD_UNABORTABLE		0x000C
>  
> +/* Unit Attentions ASC's as defined for the MSA2012sa */
> +#define POWER_OR_RESET			0x29
> +#define STATE_CHANGED			0x2a
> +#define UNIT_ATTENTION_CLEARED		0x2f
> +#define LUN_FAILED			0x3e
> +#define REPORT_LUNS_CHANGED		0x3f
> +
> +/* Unit Attentions ASCQ's as defined for the MSA2012sa */
> +
> +	/* These ASCQ's defined for ASC = POWER_OR_RESET */
> +#define POWER_ON_RESET			0x00
> +#define POWER_ON_REBOOT			0x01
> +#define SCSI_BUS_RESET			0x02
> +#define MSA_TARGET_RESET		0x03
> +#define CONTROLLER_FAILOVER		0x04
> +#define TRANSCEIVER_SE			0x05
> +#define TRANSCEIVER_LVD			0x06
> +
> +	/* These ASCQ's defined for ASC = STATE_CHANGED */
> +#define RESERVATION_PREEMPTED		0x03
> +#define ASYM_ACCESS_CHANGED		0x06
> +#define LUN_CAPACITY_CHANGED		0x09
> +
>  //transfer direction
>  #define XFER_NONE               0x00
>  #define XFER_WRITE              0x01

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-10 15:17 [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012 Mike Miller (OS Dev)
  2009-03-10 15:46 ` Mike Miller (OS Dev)
@ 2009-03-10 15:50 ` James Bottomley
  2009-03-10 16:34   ` Mike Miller (OS Dev)
  1 sibling, 1 reply; 16+ messages in thread
From: James Bottomley @ 2009-03-10 15:50 UTC (permalink / raw)
  To: Mike Miller (OS Dev); +Cc: Andrew Morton, Jens Axboe, LKML-SCSI, coldwell, hare

On Tue, 2009-03-10 at 10:17 -0500, Mike Miller (OS Dev) wrote:
> Patch 1 of 1
> 
> This is yet another go at the patch to detect changes on the MSA2012.
> I hope I've addressed all concerns. This patch rearranges some of the
> code
> so we also have coverage in the sg and the ioctl paths as well as the
> main
> data path.
> 
> The MSA2012 cannot inform the driver of configuration changes since
> all
> management is out of band. This is a departure from any storage we
> have
> supported in the past. We need some way to detect changes on the
> topology so
> we implement this kernel thread. In some instances there's nothing we
> can do
> from the driver (like LUN failure) so just print out a message. In the
> case
> where logical volumes are added or deleted we call rebuild_lun_table
> to
> refresh the driver's view of the world.
> 
> Changelog:
> 1. Do the completion(hba[i]->rescan_wait) before calling kthread_stop,
> this resolves the issue of waiting for the timeout on rmmod
> 2. Added a new function called check_ioctl_unit_attention to cover
> UA's
> from the ioctl patch
> 3. I preserved the wait_for_completion_timeout to avoid call traces
> caused by /proc/sys/kernel/hung_task_timeout_secs expiring
> 4. Moved the call to check_for_unit_attention to
> evaluate_target_status
> since it's already called from complete_command
> 5. Add retry_cmd as an argument to evaluate_target_status
> 6. Added *rescan_wait to the controller info struct
> 7. Changed wait_for_completion_timeout to
> wait_for_completion_interruptible.
> This will allow kthread_should_stop to stop the thread immediately
> with no race.
> 
> Please consider this for inclusion.
> 
> Signed-off-by: Mike Miller <mike.miller@hp.com>
[...]
> @@ -3834,6 +3917,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
>  		return;
>  	}
>  
> +	kthread_stop(hba[i]->cciss_scan_thread);
> +	complete(hba[i]->rescan_wait);

This complete is superfluous (the kthread_stop will wake the
wait_for_completion_interruptible() and if it doesn't, you won't get to
the complete() because kthread_stop() waits for the thread to die).

Otherwise, the rest of this looks fine to me.

James



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-10 15:50 ` James Bottomley
@ 2009-03-10 16:34   ` Mike Miller (OS Dev)
  2009-03-10 22:39     ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-03-10 16:34 UTC (permalink / raw)
  To: James Bottomley; +Cc: Andrew Morton, Jens Axboe, LKML-SCSI, coldwell, hare

On Tue, Mar 10, 2009 at 10:50:41AM -0500, James Bottomley wrote:
> On Tue, 2009-03-10 at 10:17 -0500, Mike Miller (OS Dev) wrote:
> > Patch 1 of 1
> > 
> > This is yet another go at the patch to detect changes on the MSA2012.
> > I hope I've addressed all concerns. This patch rearranges some of the
> > code
> > so we also have coverage in the sg and the ioctl paths as well as the
> > main
> > data path.
> > 
> > The MSA2012 cannot inform the driver of configuration changes since
> > all
> > management is out of band. This is a departure from any storage we
> > have
> > supported in the past. We need some way to detect changes on the
> > topology so
> > we implement this kernel thread. In some instances there's nothing we
> > can do
> > from the driver (like LUN failure) so just print out a message. In the
> > case
> > where logical volumes are added or deleted we call rebuild_lun_table
> > to
> > refresh the driver's view of the world.
> > 
> > Changelog:
> > 1. Do the completion(hba[i]->rescan_wait) before calling kthread_stop,
> > this resolves the issue of waiting for the timeout on rmmod
> > 2. Added a new function called check_ioctl_unit_attention to cover
> > UA's
> > from the ioctl patch
> > 3. I preserved the wait_for_completion_timeout to avoid call traces
> > caused by /proc/sys/kernel/hung_task_timeout_secs expiring
> > 4. Moved the call to check_for_unit_attention to
> > evaluate_target_status
> > since it's already called from complete_command
> > 5. Add retry_cmd as an argument to evaluate_target_status
> > 6. Added *rescan_wait to the controller info struct
> > 7. Changed wait_for_completion_timeout to
> > wait_for_completion_interruptible.
> > This will allow kthread_should_stop to stop the thread immediately
> > with no race.
> > 
> > Please consider this for inclusion.
> > 
> > Signed-off-by: Mike Miller <mike.miller@hp.com>
> [...]
> > @@ -3834,6 +3917,9 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
> >  		return;
> >  	}
> >  
> > +	kthread_stop(hba[i]->cciss_scan_thread);
> > +	complete(hba[i]->rescan_wait);
> 
> This complete is superfluous (the kthread_stop will wake the
> wait_for_completion_interruptible() and if it doesn't, you won't get to
> the complete() because kthread_stop() waits for the thread to die).
> 
> Otherwise, the rest of this looks fine to me.
> 
> James
> 
> 
Changelog:
removed the complete per James' comment

Signed-off-by: Mike Miller <mike.miller@hp.com>
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d2cb67b..8e470e5 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -51,6 +51,7 @@
 #include <scsi/scsi_ioctl.h>
 #include <linux/cdrom.h>
 #include <linux/scatterlist.h>
+#include <linux/kthread.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
 #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
@@ -186,6 +187,8 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
 			   __u8 page_code, int cmd_type);
 
 static void fail_all_cmds(unsigned long ctlr);
+static int scan_thread(ctlr_info_t *h);
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
 
 #ifdef CONFIG_PROC_FS
 static void cciss_procinit(int i);
@@ -735,6 +738,12 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
+{
+	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
+		(void)check_for_unit_attention(host, c);
+}
 /*
  * ioctl
  */
@@ -1029,6 +1038,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 					 iocommand.buf_size,
 					 PCI_DMA_BIDIRECTIONAL);
 
+			check_ioctl_unit_attention(host, c);
+
 			/* Copy the error information out */
 			iocommand.error_info = *(c->err_info);
 			if (copy_to_user
@@ -1180,6 +1191,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 					(dma_addr_t) temp64.val, buff_size[i],
 					PCI_DMA_BIDIRECTIONAL);
 			}
+			check_ioctl_unit_attention(host, c);
 			/* Copy the error information out */
 			ioc->error_info = *(c->err_info);
 			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
@@ -2585,12 +2597,14 @@ static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
 		((driver_byte & 0xff) << 24);
 }
 
-static inline int evaluate_target_status(CommandList_struct *cmd)
+static inline int evaluate_target_status(ctlr_info_t *h,
+			CommandList_struct *cmd, int *retry_cmd)
 {
 	unsigned char sense_key;
 	unsigned char status_byte, msg_byte, host_byte, driver_byte;
 	int error_value;
 
+	*retry_cmd = 0;
 	/* If we get in here, it means we got "target status", that is, scsi status */
 	status_byte = cmd->err_info->ScsiStatus;
 	driver_byte = DRIVER_OK;
@@ -2618,6 +2632,11 @@ static inline int evaluate_target_status(CommandList_struct *cmd)
 	if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
 		error_value = 0;
 
+	if (check_for_unit_attention(h, cmd)) {
+		*retry_cmd = !blk_pc_request(cmd->rq);
+		return 0;
+	}
+
 	if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
 		if (error_value != 0)
 			printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
@@ -2657,7 +2676,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 
 	switch (cmd->err_info->CommandStatus) {
 	case CMD_TARGET_STATUS:
-		rq->errors = evaluate_target_status(cmd);
+		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
 		break;
 	case CMD_DATA_UNDERRUN:
 		if (blk_fs_request(cmd->rq)) {
@@ -3008,6 +3027,62 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int scan_thread(ctlr_info_t *h)
+{
+	int rc;
+	DECLARE_COMPLETION_ONSTACK(wait);
+	h->rescan_wait = &wait;
+
+	while (!kthread_should_stop()) {
+		rc = wait_for_completion_interruptible(&wait);
+		if (!rc)
+			continue;
+		else
+			rebuild_lun_table(h, 0);
+	}
+	return 0;
+}
+
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
+{
+	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
+		return 0;
+
+	switch (c->err_info->SenseInfo[12]) {
+	case STATE_CHANGED:
+		printk(KERN_WARNING "cciss%d: a state change "
+			"detected, command retried\n", h->ctlr);
+		return 1;
+	break;
+	case LUN_FAILED:
+		printk(KERN_WARNING "cciss%d: LUN failure "
+			"detected, action required\n", h->ctlr);
+		return 1;
+	break;
+	case REPORT_LUNS_CHANGED:
+		printk(KERN_WARNING "cciss%d: report LUN data "
+			"changed\n", h->ctlr);
+		if (h->rescan_wait)
+			complete(h->rescan_wait);
+		return 1;
+	break;
+	case POWER_OR_RESET:
+		printk(KERN_WARNING "cciss%d: a power on "
+			"or device reset detected\n", h->ctlr);
+		return 1;
+	break;
+	case UNIT_ATTENTION_CLEARED:
+		printk(KERN_WARNING "cciss%d: unit attention "
+		    "cleared by another initiator\n", h->ctlr);
+		return 1;
+	break;
+	default:
+		printk(KERN_WARNING "cciss%d: unknown "
+			"unit attention detected\n", h->ctlr);
+				return 1;
+	}
+}
+
 /*
  *  We cannot read the structure directly, for portability we must use
  *   the io functions.
@@ -3600,6 +3675,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	int rc;
 	int dac, return_code;
 	InquiryData_struct *inq_buff = NULL;
+	char cciss_scan[14];
 
 	if (reset_devices) {
 		/* Reset the controller with a PCI power-cycle */
@@ -3751,6 +3827,12 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	hba[i]->busy_initializing = 0;
 
 	rebuild_lun_table(hba[i], 1);
+	snprintf(cciss_scan, 14, "cciss_scan%02d", i);
+	hba[i]->cciss_scan_thread = kthread_run((void *)scan_thread, hba[i],
+				cciss_scan);
+	if (IS_ERR(hba[i]->cciss_scan_thread))
+		return PTR_ERR(hba[i]->cciss_scan_thread);
+
 	return 1;
 
 clean4:
@@ -3826,6 +3908,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		printk(KERN_ERR "cciss: Unable to remove device \n");
 		return;
 	}
+
 	tmp_ptr = pci_get_drvdata(pdev);
 	i = tmp_ptr->ctlr;
 	if (hba[i] == NULL) {
@@ -3834,6 +3917,8 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		return;
 	}
 
+	kthread_stop(hba[i]->cciss_scan_thread);
+
 	remove_proc_entry(hba[i]->devname, proc_cciss);
 	unregister_blkdev(hba[i]->major, hba[i]->devname);
 
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 15e2b84..703e080 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -121,6 +121,8 @@ struct ctlr_info
 	struct sendcmd_reject_list scsi_rejects;
 #endif
 	unsigned char alive;
+	struct completion *rescan_wait;
+	struct task_struct *cciss_scan_thread;
 };
 
 /*  Defining the diffent access_menthods */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 24e22de..40b1b92 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -25,6 +25,29 @@
 #define CMD_TIMEOUT             0x000B
 #define CMD_UNABORTABLE		0x000C
 
+/* Unit Attentions ASC's as defined for the MSA2012sa */
+#define POWER_OR_RESET			0x29
+#define STATE_CHANGED			0x2a
+#define UNIT_ATTENTION_CLEARED		0x2f
+#define LUN_FAILED			0x3e
+#define REPORT_LUNS_CHANGED		0x3f
+
+/* Unit Attentions ASCQ's as defined for the MSA2012sa */
+
+	/* These ASCQ's defined for ASC = POWER_OR_RESET */
+#define POWER_ON_RESET			0x00
+#define POWER_ON_REBOOT			0x01
+#define SCSI_BUS_RESET			0x02
+#define MSA_TARGET_RESET		0x03
+#define CONTROLLER_FAILOVER		0x04
+#define TRANSCEIVER_SE			0x05
+#define TRANSCEIVER_LVD			0x06
+
+	/* These ASCQ's defined for ASC = STATE_CHANGED */
+#define RESERVATION_PREEMPTED		0x03
+#define ASYM_ACCESS_CHANGED		0x06
+#define LUN_CAPACITY_CHANGED		0x09
+
 //transfer direction
 #define XFER_NONE               0x00
 #define XFER_WRITE              0x01

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-10 16:34   ` Mike Miller (OS Dev)
@ 2009-03-10 22:39     ` Andrew Morton
  2009-03-11 14:31       ` Mike Miller (OS Dev)
                         ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Andrew Morton @ 2009-03-10 22:39 UTC (permalink / raw)
  To: Mike Miller (OS Dev)
  Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare


Confused.

> +static int scan_thread(ctlr_info_t *h)
> +{
> +	int rc;
> +	DECLARE_COMPLETION_ONSTACK(wait);
> +	h->rescan_wait = &wait;
> +
> +	while (!kthread_should_stop()) {
> +		rc = wait_for_completion_interruptible(&wait);
> +		if (!rc)
> +			continue;
> +		else
> +			rebuild_lun_table(h, 0);
> +	}
> +	return 0;
> +}

afacit this thread won't ever do anything,

> +static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
> +{
> +	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
> +		return 0;
> +
> +	switch (c->err_info->SenseInfo[12]) {
> +	case STATE_CHANGED:
> +		printk(KERN_WARNING "cciss%d: a state change "
> +			"detected, command retried\n", h->ctlr);
> +		return 1;
> +	break;
> +	case LUN_FAILED:
> +		printk(KERN_WARNING "cciss%d: LUN failure "
> +			"detected, action required\n", h->ctlr);
> +		return 1;
> +	break;
> +	case REPORT_LUNS_CHANGED:
> +		printk(KERN_WARNING "cciss%d: report LUN data "
> +			"changed\n", h->ctlr);
> +		if (h->rescan_wait)
> +			complete(h->rescan_wait);

because this will cause the wait_for_completion_interruptible() to return 0.

> +		return 1;
> +	break;
> +	case POWER_OR_RESET:
> +		printk(KERN_WARNING "cciss%d: a power on "
> +			"or device reset detected\n", h->ctlr);
> +		return 1;
> +	break;
> +	case UNIT_ATTENTION_CLEARED:
> +		printk(KERN_WARNING "cciss%d: unit attention "
> +		    "cleared by another initiator\n", h->ctlr);
> +		return 1;
> +	break;
> +	default:
> +		printk(KERN_WARNING "cciss%d: unknown "
> +			"unit attention detected\n", h->ctlr);
> +				return 1;
> +	}
> +}


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-10 22:39     ` Andrew Morton
@ 2009-03-11 14:31       ` Mike Miller (OS Dev)
  2009-03-11 15:21       ` Mike Miller (OS Dev)
  2009-03-11 16:17       ` Mike Miller (OS Dev)
  2 siblings, 0 replies; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-03-11 14:31 UTC (permalink / raw)
  To: Andrew Morton; +Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Tue, Mar 10, 2009 at 03:39:33PM -0700, Andrew Morton wrote:
> 
> Confused.

I was too. I copied myself and that's what I saw. Sorry to bother everyone.

-- mikem

> 
> > +static int scan_thread(ctlr_info_t *h)
> > +{
> > +	int rc;
> > +	DECLARE_COMPLETION_ONSTACK(wait);
> > +	h->rescan_wait = &wait;
> > +
> > +	while (!kthread_should_stop()) {
> > +		rc = wait_for_completion_interruptible(&wait);
> > +		if (!rc)
> > +			continue;
> > +		else
> > +			rebuild_lun_table(h, 0);
> > +	}
> > +	return 0;
> > +}
> 
> afacit this thread won't ever do anything,
> 
> > +static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
> > +{
> > +	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
> > +		return 0;
> > +
> > +	switch (c->err_info->SenseInfo[12]) {
> > +	case STATE_CHANGED:
> > +		printk(KERN_WARNING "cciss%d: a state change "
> > +			"detected, command retried\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case LUN_FAILED:
> > +		printk(KERN_WARNING "cciss%d: LUN failure "
> > +			"detected, action required\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case REPORT_LUNS_CHANGED:
> > +		printk(KERN_WARNING "cciss%d: report LUN data "
> > +			"changed\n", h->ctlr);
> > +		if (h->rescan_wait)
> > +			complete(h->rescan_wait);
> 
> because this will cause the wait_for_completion_interruptible() to return 0.
> 
> > +		return 1;
> > +	break;
> > +	case POWER_OR_RESET:
> > +		printk(KERN_WARNING "cciss%d: a power on "
> > +			"or device reset detected\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case UNIT_ATTENTION_CLEARED:
> > +		printk(KERN_WARNING "cciss%d: unit attention "
> > +		    "cleared by another initiator\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	default:
> > +		printk(KERN_WARNING "cciss%d: unknown "
> > +			"unit attention detected\n", h->ctlr);
> > +				return 1;
> > +	}
> > +}
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-10 22:39     ` Andrew Morton
  2009-03-11 14:31       ` Mike Miller (OS Dev)
@ 2009-03-11 15:21       ` Mike Miller (OS Dev)
  2009-03-11 16:17       ` Mike Miller (OS Dev)
  2 siblings, 0 replies; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-03-11 15:21 UTC (permalink / raw)
  To: Andrew Morton; +Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Tue, Mar 10, 2009 at 03:39:33PM -0700, Andrew Morton wrote:
> 
> Confused.
> 
> > +static int scan_thread(ctlr_info_t *h)
> > +{
> > +	int rc;
> > +	DECLARE_COMPLETION_ONSTACK(wait);
> > +	h->rescan_wait = &wait;
> > +
> > +	while (!kthread_should_stop()) {
> > +		rc = wait_for_completion_interruptible(&wait);
> > +		if (!rc)
> > +			continue;
> > +		else
> > +			rebuild_lun_table(h, 0);
> > +	}
> > +	return 0;
> > +}
> 
> afacit this thread won't ever do anything,
> 

I'm fixing and testing.

> > +static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
> > +{
> > +	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
> > +		return 0;
> > +
> > +	switch (c->err_info->SenseInfo[12]) {
> > +	case STATE_CHANGED:
> > +		printk(KERN_WARNING "cciss%d: a state change "
> > +			"detected, command retried\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case LUN_FAILED:
> > +		printk(KERN_WARNING "cciss%d: LUN failure "
> > +			"detected, action required\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case REPORT_LUNS_CHANGED:
> > +		printk(KERN_WARNING "cciss%d: report LUN data "
> > +			"changed\n", h->ctlr);
> > +		if (h->rescan_wait)
> > +			complete(h->rescan_wait);
> 
> because this will cause the wait_for_completion_interruptible() to return 0.
> 
> > +		return 1;
> > +	break;
> > +	case POWER_OR_RESET:
> > +		printk(KERN_WARNING "cciss%d: a power on "
> > +			"or device reset detected\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case UNIT_ATTENTION_CLEARED:
> > +		printk(KERN_WARNING "cciss%d: unit attention "
> > +		    "cleared by another initiator\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	default:
> > +		printk(KERN_WARNING "cciss%d: unknown "
> > +			"unit attention detected\n", h->ctlr);
> > +				return 1;
> > +	}
> > +}
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-10 22:39     ` Andrew Morton
  2009-03-11 14:31       ` Mike Miller (OS Dev)
  2009-03-11 15:21       ` Mike Miller (OS Dev)
@ 2009-03-11 16:17       ` Mike Miller (OS Dev)
  2009-03-11 22:14         ` Andrew Morton
  2 siblings, 1 reply; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-03-11 16:17 UTC (permalink / raw)
  To: Andrew Morton; +Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Tue, Mar 10, 2009 at 03:39:33PM -0700, Andrew Morton wrote:
> 
> Confused.
> 
> > +static int scan_thread(ctlr_info_t *h)
> > +{
> > +	int rc;
> > +	DECLARE_COMPLETION_ONSTACK(wait);
> > +	h->rescan_wait = &wait;
> > +
> > +	while (!kthread_should_stop()) {
> > +		rc = wait_for_completion_interruptible(&wait);
> > +		if (!rc)
> > +			continue;
> > +		else
> > +			rebuild_lun_table(h, 0);
> > +	}
> > +	return 0;
> > +}
> 
> afacit this thread won't ever do anything,
> 
> > +static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
> > +{
> > +	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
> > +		return 0;
> > +
> > +	switch (c->err_info->SenseInfo[12]) {
> > +	case STATE_CHANGED:
> > +		printk(KERN_WARNING "cciss%d: a state change "
> > +			"detected, command retried\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case LUN_FAILED:
> > +		printk(KERN_WARNING "cciss%d: LUN failure "
> > +			"detected, action required\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case REPORT_LUNS_CHANGED:
> > +		printk(KERN_WARNING "cciss%d: report LUN data "
> > +			"changed\n", h->ctlr);
> > +		if (h->rescan_wait)
> > +			complete(h->rescan_wait);
> 
> because this will cause the wait_for_completion_interruptible() to return 0.
> 
> > +		return 1;
> > +	break;
> > +	case POWER_OR_RESET:
> > +		printk(KERN_WARNING "cciss%d: a power on "
> > +			"or device reset detected\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	case UNIT_ATTENTION_CLEARED:
> > +		printk(KERN_WARNING "cciss%d: unit attention "
> > +		    "cleared by another initiator\n", h->ctlr);
> > +		return 1;
> > +	break;
> > +	default:
> > +		printk(KERN_WARNING "cciss%d: unknown "
> > +			"unit attention detected\n", h->ctlr);
> > +				return 1;
> > +	}
> > +}
> 
Changed the logic in the while loop. The thread works, whenever I change my
config the next IO to the storage returns the unit attention
LUN_DATA_CHANGED. The thread fires off and either adds or removes the
logical volume.

Signed-off-by: Mike Miller <mike.miller@hp.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d2cb67b..819352c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -51,6 +51,7 @@
 #include <scsi/scsi_ioctl.h>
 #include <linux/cdrom.h>
 #include <linux/scatterlist.h>
+#include <linux/kthread.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
 #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
@@ -186,6 +187,8 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
 			   __u8 page_code, int cmd_type);
 
 static void fail_all_cmds(unsigned long ctlr);
+static int scan_thread(ctlr_info_t *h);
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
 
 #ifdef CONFIG_PROC_FS
 static void cciss_procinit(int i);
@@ -735,6 +738,12 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
+{
+	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
+		(void)check_for_unit_attention(host, c);
+}
 /*
  * ioctl
  */
@@ -1029,6 +1038,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 					 iocommand.buf_size,
 					 PCI_DMA_BIDIRECTIONAL);
 
+			check_ioctl_unit_attention(host, c);
+
 			/* Copy the error information out */
 			iocommand.error_info = *(c->err_info);
 			if (copy_to_user
@@ -1180,6 +1191,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 					(dma_addr_t) temp64.val, buff_size[i],
 					PCI_DMA_BIDIRECTIONAL);
 			}
+			check_ioctl_unit_attention(host, c);
 			/* Copy the error information out */
 			ioc->error_info = *(c->err_info);
 			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
@@ -2585,12 +2597,14 @@ static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
 		((driver_byte & 0xff) << 24);
 }
 
-static inline int evaluate_target_status(CommandList_struct *cmd)
+static inline int evaluate_target_status(ctlr_info_t *h,
+			CommandList_struct *cmd, int *retry_cmd)
 {
 	unsigned char sense_key;
 	unsigned char status_byte, msg_byte, host_byte, driver_byte;
 	int error_value;
 
+	*retry_cmd = 0;
 	/* If we get in here, it means we got "target status", that is, scsi status */
 	status_byte = cmd->err_info->ScsiStatus;
 	driver_byte = DRIVER_OK;
@@ -2618,6 +2632,11 @@ static inline int evaluate_target_status(CommandList_struct *cmd)
 	if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
 		error_value = 0;
 
+	if (check_for_unit_attention(h, cmd)) {
+		*retry_cmd = !blk_pc_request(cmd->rq);
+		return 0;
+	}
+
 	if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
 		if (error_value != 0)
 			printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
@@ -2657,7 +2676,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 
 	switch (cmd->err_info->CommandStatus) {
 	case CMD_TARGET_STATUS:
-		rq->errors = evaluate_target_status(cmd);
+		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
 		break;
 	case CMD_DATA_UNDERRUN:
 		if (blk_fs_request(cmd->rq)) {
@@ -3008,6 +3027,60 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int scan_thread(ctlr_info_t *h)
+{
+	int rc;
+	DECLARE_COMPLETION_ONSTACK(wait);
+	h->rescan_wait = &wait;
+
+	while (!kthread_should_stop()) {
+		rc = wait_for_completion_interruptible(&wait);
+		if (!rc)
+			rebuild_lun_table(h, 0);
+	}
+	return 0;
+}
+
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
+{
+	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
+		return 0;
+
+	switch (c->err_info->SenseInfo[12]) {
+	case STATE_CHANGED:
+		printk(KERN_WARNING "cciss%d: a state change "
+			"detected, command retried\n", h->ctlr);
+		return 1;
+	break;
+	case LUN_FAILED:
+		printk(KERN_WARNING "cciss%d: LUN failure "
+			"detected, action required\n", h->ctlr);
+		return 1;
+	break;
+	case REPORT_LUNS_CHANGED:
+		printk(KERN_WARNING "cciss%d: report LUN data "
+			"changed\n", h->ctlr);
+		if (h->rescan_wait)
+			complete(h->rescan_wait);
+		return 1;
+	break;
+	case POWER_OR_RESET:
+		printk(KERN_WARNING "cciss%d: a power on "
+			"or device reset detected\n", h->ctlr);
+		return 1;
+	break;
+	case UNIT_ATTENTION_CLEARED:
+		printk(KERN_WARNING "cciss%d: unit attention "
+		    "cleared by another initiator\n", h->ctlr);
+		return 1;
+	break;
+	default:
+		printk(KERN_WARNING "cciss%d: unknown "
+			"unit attention detected\n", h->ctlr);
+				return 1;
+	}
+}
+
 /*
  *  We cannot read the structure directly, for portability we must use
  *   the io functions.
@@ -3600,6 +3673,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	int rc;
 	int dac, return_code;
 	InquiryData_struct *inq_buff = NULL;
+	char cciss_scan[14];
 
 	if (reset_devices) {
 		/* Reset the controller with a PCI power-cycle */
@@ -3751,6 +3825,12 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	hba[i]->busy_initializing = 0;
 
 	rebuild_lun_table(hba[i], 1);
+	snprintf(cciss_scan, 14, "cciss_scan%02d", i);
+	hba[i]->cciss_scan_thread = kthread_run((void *)scan_thread, hba[i],
+				cciss_scan);
+	if (IS_ERR(hba[i]->cciss_scan_thread))
+		return PTR_ERR(hba[i]->cciss_scan_thread);
+
 	return 1;
 
 clean4:
@@ -3826,6 +3906,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		printk(KERN_ERR "cciss: Unable to remove device \n");
 		return;
 	}
+
 	tmp_ptr = pci_get_drvdata(pdev);
 	i = tmp_ptr->ctlr;
 	if (hba[i] == NULL) {
@@ -3834,6 +3915,8 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		return;
 	}
 
+	kthread_stop(hba[i]->cciss_scan_thread);
+
 	remove_proc_entry(hba[i]->devname, proc_cciss);
 	unregister_blkdev(hba[i]->major, hba[i]->devname);
 
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 15e2b84..703e080 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -121,6 +121,8 @@ struct ctlr_info
 	struct sendcmd_reject_list scsi_rejects;
 #endif
 	unsigned char alive;
+	struct completion *rescan_wait;
+	struct task_struct *cciss_scan_thread;
 };
 
 /*  Defining the diffent access_menthods */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 24e22de..40b1b92 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -25,6 +25,29 @@
 #define CMD_TIMEOUT             0x000B
 #define CMD_UNABORTABLE		0x000C
 
+/* Unit Attentions ASC's as defined for the MSA2012sa */
+#define POWER_OR_RESET			0x29
+#define STATE_CHANGED			0x2a
+#define UNIT_ATTENTION_CLEARED		0x2f
+#define LUN_FAILED			0x3e
+#define REPORT_LUNS_CHANGED		0x3f
+
+/* Unit Attentions ASCQ's as defined for the MSA2012sa */
+
+	/* These ASCQ's defined for ASC = POWER_OR_RESET */
+#define POWER_ON_RESET			0x00
+#define POWER_ON_REBOOT			0x01
+#define SCSI_BUS_RESET			0x02
+#define MSA_TARGET_RESET		0x03
+#define CONTROLLER_FAILOVER		0x04
+#define TRANSCEIVER_SE			0x05
+#define TRANSCEIVER_LVD			0x06
+
+	/* These ASCQ's defined for ASC = STATE_CHANGED */
+#define RESERVATION_PREEMPTED		0x03
+#define ASYM_ACCESS_CHANGED		0x06
+#define LUN_CAPACITY_CHANGED		0x09
+
 //transfer direction
 #define XFER_NONE               0x00
 #define XFER_WRITE              0x01

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-11 16:17       ` Mike Miller (OS Dev)
@ 2009-03-11 22:14         ` Andrew Morton
  2009-03-12 16:26           ` Mike Miller (OS Dev)
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2009-03-11 22:14 UTC (permalink / raw)
  To: Mike Miller (OS Dev)
  Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Wed, 11 Mar 2009 11:17:33 -0500
"Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:
>

We haven't finished with you yet ;)

> Changed the logic in the while loop. The thread works, whenever I change my
> config the next IO to the storage returns the unit attention
> LUN_DATA_CHANGED. The thread fires off and either adds or removes the
> logical volume.
> 
> Signed-off-by: Mike Miller <mike.miller@hp.com>

That's not a suitable changelog for the patch.  Please maintain
changelogs alongside the patch, update them (if needed) with each
iteration and resend the full changelog each time.

> +static int scan_thread(ctlr_info_t *h)
> +{
> +	int rc;
> +	DECLARE_COMPLETION_ONSTACK(wait);
> +	h->rescan_wait = &wait;
> +
> +	while (!kthread_should_stop()) {
> +		rc = wait_for_completion_interruptible(&wait);
> +		if (!rc)
> +			rebuild_lun_table(h, 0);
> +	}
> +	return 0;
> +}

This will run rebuild_lun_table() in the case where the thread is being
asked to terminate.  Seems a bit peculiar, although hopefully harmless.

> +	snprintf(cciss_scan, 14, "cciss_scan%02d", i);
> +	hba[i]->cciss_scan_thread = kthread_run((void *)scan_thread, hba[i],
> +				cciss_scan);

kthread_run() takes printf-style arguments, so this can be

	hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
					 "cciss_scan%02d", i);

and cciss_scan[] is removed.


And that void* cast of scan_thread is a bit grubby.  It would be
better to be more honest to the type system and do

static int scan_thread(void *data)
{
	ctlr_info_t *h = data;
	...
}



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-11 22:14         ` Andrew Morton
@ 2009-03-12 16:26           ` Mike Miller (OS Dev)
  2009-06-15 20:39             ` Mike Miller (OS Dev)
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-03-12 16:26 UTC (permalink / raw)
  To: Andrew Morton
  Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare, mikem

On Wed, Mar 11, 2009 at 03:14:36PM -0700, Andrew Morton wrote:
> On Wed, 11 Mar 2009 11:17:33 -0500
> "Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:
> >
> 
> We haven't finished with you yet ;)
> 
> > Changed the logic in the while loop. The thread works, whenever I change my
> > config the next IO to the storage returns the unit attention
> > LUN_DATA_CHANGED. The thread fires off and either adds or removes the
> > logical volume.
> > 
> > Signed-off-by: Mike Miller <mike.miller@hp.com>
> 
> That's not a suitable changelog for the patch.  Please maintain
> changelogs alongside the patch, update them (if needed) with each
> iteration and resend the full changelog each time.
> 
> > +static int scan_thread(ctlr_info_t *h)
> > +{
> > +	int rc;
> > +	DECLARE_COMPLETION_ONSTACK(wait);
> > +	h->rescan_wait = &wait;
> > +
> > +	while (!kthread_should_stop()) {
> > +		rc = wait_for_completion_interruptible(&wait);
> > +		if (!rc)
> > +			rebuild_lun_table(h, 0);
> > +	}
> > +	return 0;
> > +}
> 
> This will run rebuild_lun_table() in the case where the thread is being
> asked to terminate.  Seems a bit peculiar, although hopefully harmless.
> 
> > +	snprintf(cciss_scan, 14, "cciss_scan%02d", i);
> > +	hba[i]->cciss_scan_thread = kthread_run((void *)scan_thread, hba[i],
> > +				cciss_scan);
> 
> kthread_run() takes printf-style arguments, so this can be
> 
> 	hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
> 					 "cciss_scan%02d", i);
> 
> and cciss_scan[] is removed.
> 
> 
> And that void* cast of scan_thread is a bit grubby.  It would be
> better to be more honest to the type system and do
> 
> static int scan_thread(void *data)
> {
> 	ctlr_info_t *h = data;
> 	...
> }
> 
> 
OK Andrew,
I think I've covered everything. But if I haven't, I'm confident you will
let me know. ;)

This is yet another go at the patch to detect changes on the MSA2012.
I hope I've addressed all concerns. This patch rearranges some of the code
so we also have coverage in the sg and the ioctl paths as well as the main
data path.

The MSA2012 cannot inform the driver of configuration changes since all
management is out of band. This is a departure from any storage we have
supported in the past. We need some way to detect changes on the topology so
we implement this kernel thread. In some instances there's nothing we can do
from the driver (like LUN failure) so just print out a message. In the case
where logical volumes are added or deleted we call rebuild_lun_table to
refresh the driver's view of the world.

Changelog:
1. Do the completion(hba[i]->rescan_wait) before calling kthread_stop,
this resolves the issue of waiting for the timeout on rmmod
2. Added a new function called check_ioctl_unit_attention to cover UA's
from the ioctl patch
3. I preserved the wait_for_completion_timeout to avoid call traces
caused by /proc/sys/kernel/hung_task_timeout_secs expiring
4. Moved the call to check_for_unit_attention to evaluate_target_status
since it's already called from complete_command
5. Add retry_cmd as an argument to evaluate_target_status
6. Added *rescan_wait to the controller info struct
7. Changed logic in while loop in scanthread() to use
wait_for_completion_interruptible
8. Changed the while loop back to an infinite for loop, added a test for
kthread_should_stop to exit
9. Changed argument to scan_thread to void *data
10. Eliminated cciss_scan array by using printk format in call to
kthread_run

Please consider this for inclusion.

Signed-off-by: Mike Miller <mike.miller@hp.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d2cb67b..b2e05f6 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -51,6 +51,7 @@
 #include <scsi/scsi_ioctl.h>
 #include <linux/cdrom.h>
 #include <linux/scatterlist.h>
+#include <linux/kthread.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
 #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
@@ -186,6 +187,8 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
 			   __u8 page_code, int cmd_type);
 
 static void fail_all_cmds(unsigned long ctlr);
+static int scan_thread(void *data);
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
 
 #ifdef CONFIG_PROC_FS
 static void cciss_procinit(int i);
@@ -735,6 +738,12 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
+static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
+{
+	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
+			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
+		(void)check_for_unit_attention(host, c);
+}
 /*
  * ioctl
  */
@@ -1029,6 +1038,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 					 iocommand.buf_size,
 					 PCI_DMA_BIDIRECTIONAL);
 
+			check_ioctl_unit_attention(host, c);
+
 			/* Copy the error information out */
 			iocommand.error_info = *(c->err_info);
 			if (copy_to_user
@@ -1180,6 +1191,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 					(dma_addr_t) temp64.val, buff_size[i],
 					PCI_DMA_BIDIRECTIONAL);
 			}
+			check_ioctl_unit_attention(host, c);
 			/* Copy the error information out */
 			ioc->error_info = *(c->err_info);
 			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
@@ -2585,12 +2597,14 @@ static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
 		((driver_byte & 0xff) << 24);
 }
 
-static inline int evaluate_target_status(CommandList_struct *cmd)
+static inline int evaluate_target_status(ctlr_info_t *h,
+			CommandList_struct *cmd, int *retry_cmd)
 {
 	unsigned char sense_key;
 	unsigned char status_byte, msg_byte, host_byte, driver_byte;
 	int error_value;
 
+	*retry_cmd = 0;
 	/* If we get in here, it means we got "target status", that is, scsi status */
 	status_byte = cmd->err_info->ScsiStatus;
 	driver_byte = DRIVER_OK;
@@ -2618,6 +2632,11 @@ static inline int evaluate_target_status(CommandList_struct *cmd)
 	if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
 		error_value = 0;
 
+	if (check_for_unit_attention(h, cmd)) {
+		*retry_cmd = !blk_pc_request(cmd->rq);
+		return 0;
+	}
+
 	if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
 		if (error_value != 0)
 			printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
@@ -2657,7 +2676,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
 
 	switch (cmd->err_info->CommandStatus) {
 	case CMD_TARGET_STATUS:
-		rq->errors = evaluate_target_status(cmd);
+		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
 		break;
 	case CMD_DATA_UNDERRUN:
 		if (blk_fs_request(cmd->rq)) {
@@ -3008,6 +3027,63 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int scan_thread(void *data)
+{
+	ctlr_info_t *h = data;
+	int rc;
+	DECLARE_COMPLETION_ONSTACK(wait);
+	h->rescan_wait = &wait;
+
+	for (;;) {
+		rc = wait_for_completion_interruptible(&wait);
+		if (kthread_should_stop())
+			break;
+		if (!rc)
+			rebuild_lun_table(h, 0);
+	}
+	return 0;
+}
+
+static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
+{
+	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
+		return 0;
+
+	switch (c->err_info->SenseInfo[12]) {
+	case STATE_CHANGED:
+		printk(KERN_WARNING "cciss%d: a state change "
+			"detected, command retried\n", h->ctlr);
+		return 1;
+	break;
+	case LUN_FAILED:
+		printk(KERN_WARNING "cciss%d: LUN failure "
+			"detected, action required\n", h->ctlr);
+		return 1;
+	break;
+	case REPORT_LUNS_CHANGED:
+		printk(KERN_WARNING "cciss%d: report LUN data "
+			"changed\n", h->ctlr);
+		if (h->rescan_wait)
+			complete(h->rescan_wait);
+		return 1;
+	break;
+	case POWER_OR_RESET:
+		printk(KERN_WARNING "cciss%d: a power on "
+			"or device reset detected\n", h->ctlr);
+		return 1;
+	break;
+	case UNIT_ATTENTION_CLEARED:
+		printk(KERN_WARNING "cciss%d: unit attention "
+		    "cleared by another initiator\n", h->ctlr);
+		return 1;
+	break;
+	default:
+		printk(KERN_WARNING "cciss%d: unknown "
+			"unit attention detected\n", h->ctlr);
+				return 1;
+	}
+}
+
 /*
  *  We cannot read the structure directly, for portability we must use
  *   the io functions.
@@ -3751,6 +3827,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	hba[i]->busy_initializing = 0;
 
 	rebuild_lun_table(hba[i], 1);
+	hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
+				"cciss_scan%02d", i);
+	if (IS_ERR(hba[i]->cciss_scan_thread))
+		return PTR_ERR(hba[i]->cciss_scan_thread);
+
 	return 1;
 
 clean4:
@@ -3826,6 +3907,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		printk(KERN_ERR "cciss: Unable to remove device \n");
 		return;
 	}
+
 	tmp_ptr = pci_get_drvdata(pdev);
 	i = tmp_ptr->ctlr;
 	if (hba[i] == NULL) {
@@ -3834,6 +3916,8 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		return;
 	}
 
+	kthread_stop(hba[i]->cciss_scan_thread);
+
 	remove_proc_entry(hba[i]->devname, proc_cciss);
 	unregister_blkdev(hba[i]->major, hba[i]->devname);
 
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 15e2b84..703e080 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -121,6 +121,8 @@ struct ctlr_info
 	struct sendcmd_reject_list scsi_rejects;
 #endif
 	unsigned char alive;
+	struct completion *rescan_wait;
+	struct task_struct *cciss_scan_thread;
 };
 
 /*  Defining the diffent access_menthods */
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 24e22de..40b1b92 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -25,6 +25,29 @@
 #define CMD_TIMEOUT             0x000B
 #define CMD_UNABORTABLE		0x000C
 
+/* Unit Attentions ASC's as defined for the MSA2012sa */
+#define POWER_OR_RESET			0x29
+#define STATE_CHANGED			0x2a
+#define UNIT_ATTENTION_CLEARED		0x2f
+#define LUN_FAILED			0x3e
+#define REPORT_LUNS_CHANGED		0x3f
+
+/* Unit Attentions ASCQ's as defined for the MSA2012sa */
+
+	/* These ASCQ's defined for ASC = POWER_OR_RESET */
+#define POWER_ON_RESET			0x00
+#define POWER_ON_REBOOT			0x01
+#define SCSI_BUS_RESET			0x02
+#define MSA_TARGET_RESET		0x03
+#define CONTROLLER_FAILOVER		0x04
+#define TRANSCEIVER_SE			0x05
+#define TRANSCEIVER_LVD			0x06
+
+	/* These ASCQ's defined for ASC = STATE_CHANGED */
+#define RESERVATION_PREEMPTED		0x03
+#define ASYM_ACCESS_CHANGED		0x06
+#define LUN_CAPACITY_CHANGED		0x09
+
 //transfer direction
 #define XFER_NONE               0x00
 #define XFER_WRITE              0x01


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-03-12 16:26           ` Mike Miller (OS Dev)
@ 2009-06-15 20:39             ` Mike Miller (OS Dev)
  2009-06-15 21:25               ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-06-15 20:39 UTC (permalink / raw)
  To: Andrew Morton; +Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Thu, Mar 12, 2009 at 11:26:53AM -0500, Mike Miller (OS Dev) wrote:

I'm getting reports that the driver hangs in the scan thread during rmmod. I
call kthread_stop in cciss_remove_one. Do I also neesd to call complete()?
During my testing everything seemed to working OK.

-- mikem

> On Wed, Mar 11, 2009 at 03:14:36PM -0700, Andrew Morton wrote:
> > On Wed, 11 Mar 2009 11:17:33 -0500
> > "Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:
> > >
> > 
> > We haven't finished with you yet ;)
> > 
> > > Changed the logic in the while loop. The thread works, whenever I change my
> > > config the next IO to the storage returns the unit attention
> > > LUN_DATA_CHANGED. The thread fires off and either adds or removes the
> > > logical volume.
> > > 
> > > Signed-off-by: Mike Miller <mike.miller@hp.com>
> > 
> > That's not a suitable changelog for the patch.  Please maintain
> > changelogs alongside the patch, update them (if needed) with each
> > iteration and resend the full changelog each time.
> > 
> > > +static int scan_thread(ctlr_info_t *h)
> > > +{
> > > +	int rc;
> > > +	DECLARE_COMPLETION_ONSTACK(wait);
> > > +	h->rescan_wait = &wait;
> > > +
> > > +	while (!kthread_should_stop()) {
> > > +		rc = wait_for_completion_interruptible(&wait);
> > > +		if (!rc)
> > > +			rebuild_lun_table(h, 0);
> > > +	}
> > > +	return 0;
> > > +}
> > 
> > This will run rebuild_lun_table() in the case where the thread is being
> > asked to terminate.  Seems a bit peculiar, although hopefully harmless.
> > 
> > > +	snprintf(cciss_scan, 14, "cciss_scan%02d", i);
> > > +	hba[i]->cciss_scan_thread = kthread_run((void *)scan_thread, hba[i],
> > > +				cciss_scan);
> > 
> > kthread_run() takes printf-style arguments, so this can be
> > 
> > 	hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
> > 					 "cciss_scan%02d", i);
> > 
> > and cciss_scan[] is removed.
> > 
> > 
> > And that void* cast of scan_thread is a bit grubby.  It would be
> > better to be more honest to the type system and do
> > 
> > static int scan_thread(void *data)
> > {
> > 	ctlr_info_t *h = data;
> > 	...
> > }
> > 
> > 
> OK Andrew,
> I think I've covered everything. But if I haven't, I'm confident you will
> let me know. ;)
> 
> This is yet another go at the patch to detect changes on the MSA2012.
> I hope I've addressed all concerns. This patch rearranges some of the code
> so we also have coverage in the sg and the ioctl paths as well as the main
> data path.
> 
> The MSA2012 cannot inform the driver of configuration changes since all
> management is out of band. This is a departure from any storage we have
> supported in the past. We need some way to detect changes on the topology so
> we implement this kernel thread. In some instances there's nothing we can do
> from the driver (like LUN failure) so just print out a message. In the case
> where logical volumes are added or deleted we call rebuild_lun_table to
> refresh the driver's view of the world.
> 
> Changelog:
> 1. Do the completion(hba[i]->rescan_wait) before calling kthread_stop,
> this resolves the issue of waiting for the timeout on rmmod
> 2. Added a new function called check_ioctl_unit_attention to cover UA's
> from the ioctl patch
> 3. I preserved the wait_for_completion_timeout to avoid call traces
> caused by /proc/sys/kernel/hung_task_timeout_secs expiring
> 4. Moved the call to check_for_unit_attention to evaluate_target_status
> since it's already called from complete_command
> 5. Add retry_cmd as an argument to evaluate_target_status
> 6. Added *rescan_wait to the controller info struct
> 7. Changed logic in while loop in scanthread() to use
> wait_for_completion_interruptible
> 8. Changed the while loop back to an infinite for loop, added a test for
> kthread_should_stop to exit
> 9. Changed argument to scan_thread to void *data
> 10. Eliminated cciss_scan array by using printk format in call to
> kthread_run
> 
> Please consider this for inclusion.
> 
> Signed-off-by: Mike Miller <mike.miller@hp.com>
> 
> diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> index d2cb67b..b2e05f6 100644
> --- a/drivers/block/cciss.c
> +++ b/drivers/block/cciss.c
> @@ -51,6 +51,7 @@
>  #include <scsi/scsi_ioctl.h>
>  #include <linux/cdrom.h>
>  #include <linux/scatterlist.h>
> +#include <linux/kthread.h>
>  
>  #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
>  #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
> @@ -186,6 +187,8 @@ static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
>  			   __u8 page_code, int cmd_type);
>  
>  static void fail_all_cmds(unsigned long ctlr);
> +static int scan_thread(void *data);
> +static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
>  
>  #ifdef CONFIG_PROC_FS
>  static void cciss_procinit(int i);
> @@ -735,6 +738,12 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
>  	return 0;
>  }
>  
> +static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
> +{
> +	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
> +			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
> +		(void)check_for_unit_attention(host, c);
> +}
>  /*
>   * ioctl
>   */
> @@ -1029,6 +1038,8 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
>  					 iocommand.buf_size,
>  					 PCI_DMA_BIDIRECTIONAL);
>  
> +			check_ioctl_unit_attention(host, c);
> +
>  			/* Copy the error information out */
>  			iocommand.error_info = *(c->err_info);
>  			if (copy_to_user
> @@ -1180,6 +1191,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
>  					(dma_addr_t) temp64.val, buff_size[i],
>  					PCI_DMA_BIDIRECTIONAL);
>  			}
> +			check_ioctl_unit_attention(host, c);
>  			/* Copy the error information out */
>  			ioc->error_info = *(c->err_info);
>  			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
> @@ -2585,12 +2597,14 @@ static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
>  		((driver_byte & 0xff) << 24);
>  }
>  
> -static inline int evaluate_target_status(CommandList_struct *cmd)
> +static inline int evaluate_target_status(ctlr_info_t *h,
> +			CommandList_struct *cmd, int *retry_cmd)
>  {
>  	unsigned char sense_key;
>  	unsigned char status_byte, msg_byte, host_byte, driver_byte;
>  	int error_value;
>  
> +	*retry_cmd = 0;
>  	/* If we get in here, it means we got "target status", that is, scsi status */
>  	status_byte = cmd->err_info->ScsiStatus;
>  	driver_byte = DRIVER_OK;
> @@ -2618,6 +2632,11 @@ static inline int evaluate_target_status(CommandList_struct *cmd)
>  	if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
>  		error_value = 0;
>  
> +	if (check_for_unit_attention(h, cmd)) {
> +		*retry_cmd = !blk_pc_request(cmd->rq);
> +		return 0;
> +	}
> +
>  	if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
>  		if (error_value != 0)
>  			printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
> @@ -2657,7 +2676,7 @@ static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
>  
>  	switch (cmd->err_info->CommandStatus) {
>  	case CMD_TARGET_STATUS:
> -		rq->errors = evaluate_target_status(cmd);
> +		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
>  		break;
>  	case CMD_DATA_UNDERRUN:
>  		if (blk_fs_request(cmd->rq)) {
> @@ -3008,6 +3027,63 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
>  	return IRQ_HANDLED;
>  }
>  
> +static int scan_thread(void *data)
> +{
> +	ctlr_info_t *h = data;
> +	int rc;
> +	DECLARE_COMPLETION_ONSTACK(wait);
> +	h->rescan_wait = &wait;
> +
> +	for (;;) {
> +		rc = wait_for_completion_interruptible(&wait);
> +		if (kthread_should_stop())
> +			break;
> +		if (!rc)
> +			rebuild_lun_table(h, 0);
> +	}
> +	return 0;
> +}
> +
> +static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
> +{
> +	if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
> +		return 0;
> +
> +	switch (c->err_info->SenseInfo[12]) {
> +	case STATE_CHANGED:
> +		printk(KERN_WARNING "cciss%d: a state change "
> +			"detected, command retried\n", h->ctlr);
> +		return 1;
> +	break;
> +	case LUN_FAILED:
> +		printk(KERN_WARNING "cciss%d: LUN failure "
> +			"detected, action required\n", h->ctlr);
> +		return 1;
> +	break;
> +	case REPORT_LUNS_CHANGED:
> +		printk(KERN_WARNING "cciss%d: report LUN data "
> +			"changed\n", h->ctlr);
> +		if (h->rescan_wait)
> +			complete(h->rescan_wait);
> +		return 1;
> +	break;
> +	case POWER_OR_RESET:
> +		printk(KERN_WARNING "cciss%d: a power on "
> +			"or device reset detected\n", h->ctlr);
> +		return 1;
> +	break;
> +	case UNIT_ATTENTION_CLEARED:
> +		printk(KERN_WARNING "cciss%d: unit attention "
> +		    "cleared by another initiator\n", h->ctlr);
> +		return 1;
> +	break;
> +	default:
> +		printk(KERN_WARNING "cciss%d: unknown "
> +			"unit attention detected\n", h->ctlr);
> +				return 1;
> +	}
> +}
> +
>  /*
>   *  We cannot read the structure directly, for portability we must use
>   *   the io functions.
> @@ -3751,6 +3827,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
>  	hba[i]->busy_initializing = 0;
>  
>  	rebuild_lun_table(hba[i], 1);
> +	hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
> +				"cciss_scan%02d", i);
> +	if (IS_ERR(hba[i]->cciss_scan_thread))
> +		return PTR_ERR(hba[i]->cciss_scan_thread);
> +
>  	return 1;
>  
>  clean4:
> @@ -3826,6 +3907,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
>  		printk(KERN_ERR "cciss: Unable to remove device \n");
>  		return;
>  	}
> +
>  	tmp_ptr = pci_get_drvdata(pdev);
>  	i = tmp_ptr->ctlr;
>  	if (hba[i] == NULL) {
> @@ -3834,6 +3916,8 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
>  		return;
>  	}
>  
> +	kthread_stop(hba[i]->cciss_scan_thread);
> +
>  	remove_proc_entry(hba[i]->devname, proc_cciss);
>  	unregister_blkdev(hba[i]->major, hba[i]->devname);
>  
> diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
> index 15e2b84..703e080 100644
> --- a/drivers/block/cciss.h
> +++ b/drivers/block/cciss.h
> @@ -121,6 +121,8 @@ struct ctlr_info
>  	struct sendcmd_reject_list scsi_rejects;
>  #endif
>  	unsigned char alive;
> +	struct completion *rescan_wait;
> +	struct task_struct *cciss_scan_thread;
>  };
>  
>  /*  Defining the diffent access_menthods */
> diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
> index 24e22de..40b1b92 100644
> --- a/drivers/block/cciss_cmd.h
> +++ b/drivers/block/cciss_cmd.h
> @@ -25,6 +25,29 @@
>  #define CMD_TIMEOUT             0x000B
>  #define CMD_UNABORTABLE		0x000C
>  
> +/* Unit Attentions ASC's as defined for the MSA2012sa */
> +#define POWER_OR_RESET			0x29
> +#define STATE_CHANGED			0x2a
> +#define UNIT_ATTENTION_CLEARED		0x2f
> +#define LUN_FAILED			0x3e
> +#define REPORT_LUNS_CHANGED		0x3f
> +
> +/* Unit Attentions ASCQ's as defined for the MSA2012sa */
> +
> +	/* These ASCQ's defined for ASC = POWER_OR_RESET */
> +#define POWER_ON_RESET			0x00
> +#define POWER_ON_REBOOT			0x01
> +#define SCSI_BUS_RESET			0x02
> +#define MSA_TARGET_RESET		0x03
> +#define CONTROLLER_FAILOVER		0x04
> +#define TRANSCEIVER_SE			0x05
> +#define TRANSCEIVER_LVD			0x06
> +
> +	/* These ASCQ's defined for ASC = STATE_CHANGED */
> +#define RESERVATION_PREEMPTED		0x03
> +#define ASYM_ACCESS_CHANGED		0x06
> +#define LUN_CAPACITY_CHANGED		0x09
> +
>  //transfer direction
>  #define XFER_NONE               0x00
>  #define XFER_WRITE              0x01
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-06-15 20:39             ` Mike Miller (OS Dev)
@ 2009-06-15 21:25               ` Andrew Morton
  2009-06-15 22:17                 ` Mike Miller (OS Dev)
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2009-06-15 21:25 UTC (permalink / raw)
  To: Mike Miller (OS Dev)
  Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Mon, 15 Jun 2009 15:39:55 -0500
"Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:

> I'm getting reports that the driver hangs in the scan thread during rmmod. I
> call kthread_stop in cciss_remove_one. Do I also neesd to call complete()?

Yep.

static int scan_thread(void *data)
{
	ctlr_info_t *h = data;
	int rc;
	DECLARE_COMPLETION_ONSTACK(wait);
	h->rescan_wait = &wait;

	for (;;) {
		rc = wait_for_completion_interruptible(&wait);
		if (kthread_should_stop())
			break;
		if (!rc)
			rebuild_lun_table(h, 0);
	}
	return 0;
}

Two things will cause that wait_for_completion_interruptible() to
return: a complete() and a signal_pending().  But this is a kernel
thread, and kernel threads start out with all signals blocked.

> During my testing everything seemed to working OK.

That's odd.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-06-15 21:25               ` Andrew Morton
@ 2009-06-15 22:17                 ` Mike Miller (OS Dev)
  2009-06-15 22:41                   ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-06-15 22:17 UTC (permalink / raw)
  To: Andrew Morton; +Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Mon, Jun 15, 2009 at 02:25:50PM -0700, Andrew Morton wrote:
> On Mon, 15 Jun 2009 15:39:55 -0500
> "Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:
> 
> > I'm getting reports that the driver hangs in the scan thread during rmmod. I
> > call kthread_stop in cciss_remove_one. Do I also neesd to call complete()?
> 
> Yep.
> 
> static int scan_thread(void *data)
> {
> 	ctlr_info_t *h = data;
> 	int rc;
> 	DECLARE_COMPLETION_ONSTACK(wait);
> 	h->rescan_wait = &wait;
> 
> 	for (;;) {
> 		rc = wait_for_completion_interruptible(&wait);
> 		if (kthread_should_stop())
> 			break;
> 		if (!rc)
> 			rebuild_lun_table(h, 0);
> 	}
> 	return 0;
> }
> 
> Two things will cause that wait_for_completion_interruptible() to
> return: a complete() and a signal_pending().  But this is a kernel
> thread, and kernel threads start out with all signals blocked.
> 
> > During my testing everything seemed to working OK.
> 
> That's odd.

Yeah, I guess so. Here's a patch to fix that problem. I add a call to
complete to cciss_remove_one to stop the thread for rmmod. Please consider
this for inclusion. Or should I submit separately from this mail?

Signed-off-by: Mike Miller <mike.miller@hp.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4d4d5e0..e51a0b2 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3935,6 +3935,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 		return;
 	}
 
+	complete(hba[i]->rescan_wait);
 	kthread_stop(hba[i]->cciss_scan_thread);
 
 	remove_proc_entry(hba[i]->devname, proc_cciss);

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-06-15 22:17                 ` Mike Miller (OS Dev)
@ 2009-06-15 22:41                   ` Andrew Morton
  2009-06-16 19:07                     ` Mike Miller (OS Dev)
  0 siblings, 1 reply; 16+ messages in thread
From: Andrew Morton @ 2009-06-15 22:41 UTC (permalink / raw)
  To: Mike Miller (OS Dev)
  Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Mon, 15 Jun 2009 17:17:35 -0500
"Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:

> On Mon, Jun 15, 2009 at 02:25:50PM -0700, Andrew Morton wrote:
> > On Mon, 15 Jun 2009 15:39:55 -0500
> > "Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:
> > 
> > > I'm getting reports that the driver hangs in the scan thread during rmmod. I
> > > call kthread_stop in cciss_remove_one. Do I also neesd to call complete()?
> > 
> > Yep.
> > 
> > static int scan_thread(void *data)
> > {
> > 	ctlr_info_t *h = data;
> > 	int rc;
> > 	DECLARE_COMPLETION_ONSTACK(wait);
> > 	h->rescan_wait = &wait;
> > 
> > 	for (;;) {
> > 		rc = wait_for_completion_interruptible(&wait);
> > 		if (kthread_should_stop())
> > 			break;
> > 		if (!rc)
> > 			rebuild_lun_table(h, 0);
> > 	}
> > 	return 0;
> > }
> > 
> > Two things will cause that wait_for_completion_interruptible() to
> > return: a complete() and a signal_pending().  But this is a kernel
> > thread, and kernel threads start out with all signals blocked.
> > 
> > > During my testing everything seemed to working OK.
> > 
> > That's odd.
> 
> Yeah, I guess so. Here's a patch to fix that problem. I add a call to
> complete to cciss_remove_one to stop the thread for rmmod. Please consider
> this for inclusion. Or should I submit separately from this mail?

Well it's a bit crappy - someone need to cook up a suitable title and
write a suitable changelog.  I'd prefer that it be you ;)

<writes a title and changelog.  Sigh>

> Signed-off-by: Mike Miller <mike.miller@hp.com>
> 
> diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> index 4d4d5e0..e51a0b2 100644
> --- a/drivers/block/cciss.c
> +++ b/drivers/block/cciss.c
> @@ -3935,6 +3935,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
>  		return;
>  	}
>  
> +	complete(hba[i]->rescan_wait);
>  	kthread_stop(hba[i]->cciss_scan_thread);
>  
>  	remove_proc_entry(hba[i]->devname, proc_cciss);

Has this been confirmed to fix things?

This is needed in 2.6.30.x, yes?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-06-15 22:41                   ` Andrew Morton
@ 2009-06-16 19:07                     ` Mike Miller (OS Dev)
  2009-06-16 19:17                       ` Andrew Morton
  0 siblings, 1 reply; 16+ messages in thread
From: Mike Miller (OS Dev) @ 2009-06-16 19:07 UTC (permalink / raw)
  To: Andrew Morton; +Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Mon, Jun 15, 2009 at 03:41:50PM -0700, Andrew Morton wrote:
> On Mon, 15 Jun 2009 17:17:35 -0500
> "Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:
> 
> > On Mon, Jun 15, 2009 at 02:25:50PM -0700, Andrew Morton wrote:
> > > On Mon, 15 Jun 2009 15:39:55 -0500
> > > "Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:
> > > 
> > > > I'm getting reports that the driver hangs in the scan thread during rmmod. I
> > > > call kthread_stop in cciss_remove_one. Do I also neesd to call complete()?
> > > 
> > > Yep.
> > > 
> > > static int scan_thread(void *data)
> > > {
> > > 	ctlr_info_t *h = data;
> > > 	int rc;
> > > 	DECLARE_COMPLETION_ONSTACK(wait);
> > > 	h->rescan_wait = &wait;
> > > 
> > > 	for (;;) {
> > > 		rc = wait_for_completion_interruptible(&wait);
> > > 		if (kthread_should_stop())
> > > 			break;
> > > 		if (!rc)
> > > 			rebuild_lun_table(h, 0);
> > > 	}
> > > 	return 0;
> > > }
> > > 
> > > Two things will cause that wait_for_completion_interruptible() to
> > > return: a complete() and a signal_pending().  But this is a kernel
> > > thread, and kernel threads start out with all signals blocked.
> > > 
> > > > During my testing everything seemed to working OK.
> > > 
> > > That's odd.
> > 
> > Yeah, I guess so. Here's a patch to fix that problem. I add a call to
> > complete to cciss_remove_one to stop the thread for rmmod. Please consider
> > this for inclusion. Or should I submit separately from this mail?
> 
> Well it's a bit crappy - someone need to cook up a suitable title and
> write a suitable changelog.  I'd prefer that it be you ;)
> 
> <writes a title and changelog.  Sigh>
> 
> > Signed-off-by: Mike Miller <mike.miller@hp.com>
> > 
> > diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> > index 4d4d5e0..e51a0b2 100644
> > --- a/drivers/block/cciss.c
> > +++ b/drivers/block/cciss.c
> > @@ -3935,6 +3935,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
> >  		return;
> >  	}
> >  
> > +	complete(hba[i]->rescan_wait);
> >  	kthread_stop(hba[i]->cciss_scan_thread);
> >  
> >  	remove_proc_entry(hba[i]->devname, proc_cciss);
> 
> Has this been confirmed to fix things?

Apparently, not. I'm still having issues. I'm sure this worked before my
patch was finally accepted. That's why I made the thread interruptible. Now
I see an article on LWN saying that kthread_stop doesn't use signals. It
says call kthread_should_stop periodically, which I do. How else to stop a
thread???

http://lwn.net/Articles/118935/


> 
> This is needed in 2.6.30.x, yes?

Yes, this is needed in 2.6.30.x. I'm working it. Will submit a proper patch
with changelog when complete.


Sorry for the inconvenience.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012
  2009-06-16 19:07                     ` Mike Miller (OS Dev)
@ 2009-06-16 19:17                       ` Andrew Morton
  0 siblings, 0 replies; 16+ messages in thread
From: Andrew Morton @ 2009-06-16 19:17 UTC (permalink / raw)
  To: Mike Miller (OS Dev)
  Cc: James.Bottomley, jens.axboe, linux-scsi, coldwell, hare

On Tue, 16 Jun 2009 14:07:43 -0500
"Mike Miller (OS Dev)" <mikem@beardog.cca.cpqcorp.net> wrote:

> > > diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> > > index 4d4d5e0..e51a0b2 100644
> > > --- a/drivers/block/cciss.c
> > > +++ b/drivers/block/cciss.c
> > > @@ -3935,6 +3935,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
> > >  		return;
> > >  	}
> > >  
> > > +	complete(hba[i]->rescan_wait);
> > >  	kthread_stop(hba[i]->cciss_scan_thread);
> > >  
> > >  	remove_proc_entry(hba[i]->devname, proc_cciss);
> > 
> > Has this been confirmed to fix things?
> 
> Apparently, not. I'm still having issues. I'm sure this worked before my
> patch was finally accepted. That's why I made the thread interruptible. Now
> I see an article on LWN saying that kthread_stop doesn't use signals. It
> says call kthread_should_stop periodically, which I do. How else to stop a
> thread???

Presumably your thread is blocked elsewhere, so it is failing to poll
kthread_should_stop().

When the thread is stuck, do an `echo t > /proc/sysrq-trigger' and find
the thread's stack trace and work out where it's blocked.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2009-06-16 19:17 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-10 15:17 [PATCH 1/1] cciss: resubmit kernel scan thread for MSA2012 Mike Miller (OS Dev)
2009-03-10 15:46 ` Mike Miller (OS Dev)
2009-03-10 15:50 ` James Bottomley
2009-03-10 16:34   ` Mike Miller (OS Dev)
2009-03-10 22:39     ` Andrew Morton
2009-03-11 14:31       ` Mike Miller (OS Dev)
2009-03-11 15:21       ` Mike Miller (OS Dev)
2009-03-11 16:17       ` Mike Miller (OS Dev)
2009-03-11 22:14         ` Andrew Morton
2009-03-12 16:26           ` Mike Miller (OS Dev)
2009-06-15 20:39             ` Mike Miller (OS Dev)
2009-06-15 21:25               ` Andrew Morton
2009-06-15 22:17                 ` Mike Miller (OS Dev)
2009-06-15 22:41                   ` Andrew Morton
2009-06-16 19:07                     ` Mike Miller (OS Dev)
2009-06-16 19:17                       ` Andrew Morton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.