All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
@ 2005-06-04  1:19 Mike Christie
  2005-06-04 16:07 ` James Bottomley
  2005-06-07 18:07 ` Jens Axboe
  0 siblings, 2 replies; 41+ messages in thread
From: Mike Christie @ 2005-06-04  1:19 UTC (permalink / raw)
  To: device-mapper development, linux-scsi, axboe

The following patches should enable the use of scatter lists for all
REQ_BLOCK_PC requests and cleanup some code duplication or dangerous
memory allocations in the dm-multipath hw handlers and
block/scsi_ioctl.c.

REQ_BLOCK_PC scatter list usage only required converting the old
sg_scsi_ioctl code to do bio backed requests since the current block
layer SG_IO code will always use requests with bios. But while
converting the old ioctl and removing some dangerous (GFP_KERNEL in
failover path) memory allocations from a dm-multipath hw_handler (and
while updating the LSI one) I was able to seperate some common code into
two new functions: blk_rq_map_kern() and bio_map_kern. These functions
are similar to their blk/bio*user cousins where they allocate requests
and bios and setup the data pointers except they work on kernel buffers.

The goal is next convert the scsi 'special' requests to use these
functions, so scsi will be able to use block layer functions for scatter
lists setup for all requests. And then hopefully one day we will not
need those stinking "if (sc->use_sg)" paths all over our scsi drivers.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-04  1:19 [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers Mike Christie
@ 2005-06-04 16:07 ` James Bottomley
  2005-06-05  7:15   ` Mike Christie
  2005-06-07 12:10   ` Christoph Hellwig
  2005-06-07 18:07 ` Jens Axboe
  1 sibling, 2 replies; 41+ messages in thread
From: James Bottomley @ 2005-06-04 16:07 UTC (permalink / raw)
  To: Mike Christie; +Cc: device-mapper development, Jens Axboe, linux-scsi

On Fri, 2005-06-03 at 18:19 -0700, Mike Christie wrote:
> The goal is next convert the scsi 'special' requests to use these
> functions, so scsi will be able to use block layer functions for scatter
> lists setup for all requests. And then hopefully one day we will not
> need those stinking "if (sc->use_sg)" paths all over our scsi drivers.

Here's the proof of concept for this one.  It converts scsi_wait_req to
do correct REQ_BLOCK_PC submission (and works nicely in my setup).

The final goal should be to eliminate struct scsi_request, but that
can't be done until the character submission paths of sg and st are also
modified.

There's some loss of functionality to this: retries are no longer
controllable (except by setting REQ_FASTFAIL) and the wait_req API needs
to be altered, but it looks very nice.

Thanks,

James

--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -238,23 +238,6 @@ void scsi_do_req(struct scsi_request *sr
 }
 EXPORT_SYMBOL(scsi_do_req);
 
-static void scsi_wait_done(struct scsi_cmnd *cmd)
-{
-	struct request *req = cmd->request;
-	struct request_queue *q = cmd->device->request_queue;
-	unsigned long flags;
-
-	req->rq_status = RQ_SCSI_DONE;	/* Busy, but indicate request done */
-
-	spin_lock_irqsave(q->queue_lock, flags);
-	if (blk_rq_tagged(req))
-		blk_queue_end_tag(q, req);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	if (req->waiting)
-		complete(req->waiting);
-}
-
 /* This is the end routine we get to if a command was never attached
  * to the request.  Simply complete the request without changing
  * rq_status; this will cause a DRIVER_ERROR. */
@@ -269,19 +252,36 @@ void scsi_wait_req(struct scsi_request *
 		   unsigned bufflen, int timeout, int retries)
 {
 	DECLARE_COMPLETION(wait);
-	
-	sreq->sr_request->waiting = &wait;
-	sreq->sr_request->rq_status = RQ_SCSI_BUSY;
-	sreq->sr_request->end_io = scsi_wait_req_end_io;
-	scsi_do_req(sreq, cmnd, buffer, bufflen, scsi_wait_done,
-			timeout, retries);
+	struct request *req;
+
+	if (bufflen)
+		req = blk_rq_map_kern(sreq->sr_device->request_queue,
+				      sreq->sr_data_direction == DMA_TO_DEVICE,
+				      buffer, bufflen, __GFP_WAIT);
+	else
+		req = blk_get_request(sreq->sr_device->request_queue, READ,
+				      __GFP_WAIT);
+	req->flags |= REQ_NOMERGE;
+	req->waiting = &wait;
+	req->end_io = scsi_wait_req_end_io;
+	req->cmd_len = COMMAND_SIZE(((u8 *)cmnd)[0]);
+	req->sense = sreq->sr_sense_buffer;
+	req->sense_len = 0;
+	memcpy(req->cmd, cmnd, req->cmd_len);
+	req->timeout = timeout;
+	req->flags |= REQ_BLOCK_PC;
+	req->rq_disk = NULL;
+	blk_insert_request(sreq->sr_device->request_queue, req,
+			   sreq->sr_data_direction == DMA_TO_DEVICE, NULL);
 	wait_for_completion(&wait);
 	sreq->sr_request->waiting = NULL;
-	if (sreq->sr_request->rq_status != RQ_SCSI_DONE)
+	sreq->sr_result = req->errors;
+	if (req->errors)
 		sreq->sr_result |= (DRIVER_ERROR << 24);
 
-	__scsi_release_request(sreq);
+	blk_put_request(req);
 }
+
 EXPORT_SYMBOL(scsi_wait_req);
 
 /*
@@ -889,11 +889,12 @@ void scsi_io_completion(struct scsi_cmnd
 		return;
 	}
 	if (result) {
-		printk(KERN_INFO "SCSI error : <%d %d %d %d> return code "
-		       "= 0x%x\n", cmd->device->host->host_no,
-		       cmd->device->channel,
-		       cmd->device->id,
-		       cmd->device->lun, result);
+		if (!(req->flags & REQ_SPECIAL))
+			printk(KERN_INFO "SCSI error : <%d %d %d %d> return code "
+			       "= 0x%x\n", cmd->device->host->host_no,
+			       cmd->device->channel,
+			       cmd->device->id,
+			       cmd->device->lun, result);
 
 		if (driver_byte(result) & DRIVER_SENSE)
 			scsi_print_sense("", cmd);
@@ -1026,6 +1027,12 @@ static int scsi_issue_flush_fn(request_q
 	return -EOPNOTSUPP;
 }
 
+static void scsi_generic_done(struct scsi_cmnd *cmd)
+{
+	BUG_ON(!blk_pc_request(cmd->request));
+	scsi_io_completion(cmd, cmd->result == 0 ? cmd->bufflen : 0, 0);
+}
+
 static int scsi_prep_fn(struct request_queue *q, struct request *req)
 {
 	struct scsi_device *sdev = q->queuedata;
@@ -1067,7 +1074,7 @@ static int scsi_prep_fn(struct request_q
 	 * these two cases differently.  We differentiate by looking
 	 * at request->cmd, as this tells us the real story.
 	 */
-	if (req->flags & REQ_SPECIAL) {
+	if (req->flags & REQ_SPECIAL && req->special) {
 		struct scsi_request *sreq = req->special;
 
 		if (sreq->sr_magic == SCSI_REQ_MAGIC) {
@@ -1079,7 +1086,7 @@ static int scsi_prep_fn(struct request_q
 			cmd = req->special;
 	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
 
-		if(unlikely(specials_only)) {
+		if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) {
 			if(specials_only == SDEV_QUIESCE ||
 					specials_only == SDEV_BLOCK)
 				return BLKPREP_DEFER;
@@ -1148,11 +1155,26 @@ static int scsi_prep_fn(struct request_q
 		/*
 		 * Initialize the actual SCSI command for this request.
 		 */
-		drv = *(struct scsi_driver **)req->rq_disk->private_data;
-		if (unlikely(!drv->init_command(cmd))) {
-			scsi_release_buffers(cmd);
-			scsi_put_command(cmd);
-			return BLKPREP_KILL;
+		if (req->rq_disk) {
+			drv = *(struct scsi_driver **)req->rq_disk->private_data;
+			if (unlikely(!drv->init_command(cmd))) {
+				scsi_release_buffers(cmd);
+				scsi_put_command(cmd);
+				return BLKPREP_KILL;
+			}
+		} else {
+			memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
+			if (rq_data_dir(req) == WRITE)
+				cmd->sc_data_direction = DMA_TO_DEVICE;
+			else if (req->data_len)
+				cmd->sc_data_direction = DMA_FROM_DEVICE;
+			else
+				cmd->sc_data_direction = DMA_NONE;
+			
+			cmd->transfersize = req->data_len;
+			cmd->allowed = 3;
+			cmd->timeout_per_command = req->timeout;
+			cmd->done = scsi_generic_done;
 		}
 	}
 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-04 16:07 ` James Bottomley
@ 2005-06-05  7:15   ` Mike Christie
  2005-06-05  9:41     ` [dm-devel] " christophe varoqui
                       ` (2 more replies)
  2005-06-07 12:10   ` Christoph Hellwig
  1 sibling, 3 replies; 41+ messages in thread
From: Mike Christie @ 2005-06-05  7:15 UTC (permalink / raw)
  To: James Bottomley; +Cc: device-mapper development, Jens Axboe, linux-scsi

On Sat, 2005-06-04 at 09:07, James Bottomley wrote:
> On Fri, 2005-06-03 at 18:19 -0700, Mike Christie wrote:
> > The goal is next convert the scsi 'special' requests to use these
> > functions, so scsi will be able to use block layer functions for scatter
> > lists setup for all requests. And then hopefully one day we will not
> > need those stinking "if (sc->use_sg)" paths all over our scsi drivers.
> 
> Here's the proof of concept for this one.  It converts scsi_wait_req to
> do correct REQ_BLOCK_PC submission (and works nicely in my setup).
> 
> The final goal should be to eliminate struct scsi_request, but that
> can't be done until the character submission paths of sg and st are also
> modified.

inlined below is a patch built over yours + my patch set (I put all my
patches here http://www.cs.wisc.edu/~michaelc/block/use-sg/v3/) that
converts scsi_scan.c and removes its scsi_request usage. I cheated and
added a new function that is basically your scsi_wait_req but it uses
the block layer's blk_execute_rq function instead of blk_insert_request.
Also to send block pc commands at scan time without the special bit set
I had to modify the scsi_prep_fn. This is just a RFC/test-patch so oh
well. There is the larger problem of handling queue limits to handle.

> 
> There's some loss of functionality to this: retries are no longer
> controllable (except by setting REQ_FASTFAIL) and the wait_req API needs
> to be altered, but it looks very nice.


diff -aurp git/drivers/block/ll_rw_blk.c git.work/drivers/block/ll_rw_blk.c
--- git/drivers/block/ll_rw_blk.c	2005-06-04 20:25:10.000000000 -0700
+++ git.work/drivers/block/ll_rw_blk.c	2005-06-04 23:42:13.000000000 -0700
@@ -2236,14 +2236,16 @@ EXPORT_SYMBOL(blk_rq_map_kern);
  * @q:		queue to insert the request in
  * @bd_disk:	matching gendisk
  * @rq:		request to insert
+ * @at_head:    insert request at head or tail of queue
  *
  * Description:
  *    Insert a fully prepared request at the back of the io scheduler queue
  *    for execution.
  */
 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
-		   struct request *rq)
+		   struct request *rq, int at_head)
 {
+	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 	DECLARE_COMPLETION(wait);
 	char sense[SCSI_SENSE_BUFFERSIZE];
 	int err = 0;
@@ -2265,7 +2267,7 @@ int blk_execute_rq(request_queue_t *q, s
 	rq->flags |= REQ_NOMERGE;
 	rq->waiting = &wait;
 	rq->end_io = blk_end_sync_rq;
-	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
+	elv_add_request(q, rq, where, 1);
 	generic_unplug_device(q);
 	wait_for_completion(&wait);
 	rq->waiting = NULL;
@@ -2333,7 +2335,7 @@ int blkdev_scsi_issue_flush_fn(request_q
 	rq->data_len = 0;
 	rq->timeout = 60 * HZ;
 
-	ret = blk_execute_rq(q, disk, rq);
+	ret = blk_execute_rq(q, disk, rq, 0);
 
 	if (ret && error_sector)
 		*error_sector = rq->sector;
diff -aurp git/drivers/block/scsi_ioctl.c git.work/drivers/block/scsi_ioctl.c
--- git/drivers/block/scsi_ioctl.c	2005-06-04 20:25:10.000000000 -0700
+++ git.work/drivers/block/scsi_ioctl.c	2005-06-04 23:32:24.000000000 -0700
@@ -298,7 +298,7 @@ static int sg_io(struct file *file, requ
 	 * (if he doesn't check that is his problem).
 	 * N.B. a non-zero SCSI status is _not_ necessarily an error.
 	 */
-	blk_execute_rq(q, bd_disk, rq);
+	blk_execute_rq(q, bd_disk, rq, 0);
 
 	/* write to all output members */
 	hdr->status = 0xff & rq->errors;
@@ -415,7 +415,7 @@ static int sg_scsi_ioctl(struct file *fi
 	rq->data_len = bytes;
 	rq->flags |= REQ_BLOCK_PC;
 
-	blk_execute_rq(q, bd_disk, rq);
+	blk_execute_rq(q, bd_disk, rq, 0);
 	err = rq->errors & 0xff;	/* only 8 bit SCSI status */
 	if (err) {
 		if (rq->sense_len && rq->sense) {
@@ -569,7 +569,7 @@ int scsi_cmd_ioctl(struct file *file, st
 			rq->cmd[0] = GPCMD_START_STOP_UNIT;
 			rq->cmd[4] = 0x02 + (close != 0);
 			rq->cmd_len = 6;
-			err = blk_execute_rq(q, bd_disk, rq);
+			err = blk_execute_rq(q, bd_disk, rq, 0);
 			blk_put_request(rq);
 			break;
 		default:
diff -aurp git/drivers/cdrom/cdrom.c git.work/drivers/cdrom/cdrom.c
--- git/drivers/cdrom/cdrom.c	2005-06-04 20:04:45.000000000 -0700
+++ git.work/drivers/cdrom/cdrom.c	2005-06-04 23:32:54.000000000 -0700
@@ -2132,7 +2132,7 @@ static int cdrom_read_cdda_bpc(struct cd
 		if (rq->bio)
 			blk_queue_bounce(q, &rq->bio);
 
-		if (blk_execute_rq(q, cdi->disk, rq)) {
+		if (blk_execute_rq(q, cdi->disk, rq, 0)) {
 			struct request_sense *s = rq->sense;
 			ret = -EIO;
 			cdi->last_sense = s->sense_key;
diff -aurp git/drivers/ide/ide-disk.c git.work/drivers/ide/ide-disk.c
--- git/drivers/ide/ide-disk.c	2005-06-04 20:04:57.000000000 -0700
+++ git.work/drivers/ide/ide-disk.c	2005-06-04 23:32:42.000000000 -0700
@@ -750,7 +750,7 @@ static int idedisk_issue_flush(request_q
 
 	idedisk_prepare_flush(q, rq);
 
-	ret = blk_execute_rq(q, disk, rq);
+	ret = blk_execute_rq(q, disk, rq, 0);
 
 	/*
 	 * if we failed and caller wants error offset, get it
diff -aurp git/drivers/scsi/scsi_lib.c git.work/drivers/scsi/scsi_lib.c
--- git/drivers/scsi/scsi_lib.c	2005-06-04 20:25:14.000000000 -0700
+++ git.work/drivers/scsi/scsi_lib.c	2005-06-04 23:57:59.000000000 -0700
@@ -284,6 +284,59 @@ void scsi_wait_req(struct scsi_request *
 
 EXPORT_SYMBOL(scsi_wait_req);
 
+/**
+ * scsi_execute_req - insert request and wait for the result
+ * @sdev:	scsi device
+ * @cmd:	scsi command
+ * @data_direction: data direction
+ * @buffer:	data buffer
+ * @bufflen:	len of buffer
+ * @sense:	optional sense buffer
+ * @timeout:	request timeout in seconds
+ * @retries:	number of times to retry request
+ *
+ * scsi_execute_req returns the req->errors value which is the
+ * the scsi_cmnd result field.
+ **/
+int scsi_execute_req(struct scsi_device *sdev, unsigned char *cmd,
+		     int data_direction, void *buffer, unsigned bufflen,
+		     unsigned char *sense, int timeout, int retries)
+{
+	struct request *req;
+
+	if (bufflen)
+		req = blk_rq_map_kern(sdev->request_queue,
+				      data_direction == DMA_TO_DEVICE,
+				      buffer, bufflen, __GFP_WAIT);
+	else
+		req = blk_get_request(sdev->request_queue, READ, __GFP_WAIT);
+	if (!req)
+		/*
+		 * if we could not map the buffer within the request queue's
+		 * limits we return DRIVER_ERROR.
+		 *
+		 * TODO: should caller check limits and send multiple
+		 * requests or should we handle it.
+		 */
+		return DRIVER_ERROR << 24;
+
+	req->cmd_len = COMMAND_SIZE(cmd[0]);
+	memcpy(req->cmd, cmd, req->cmd_len);
+	req->sense = sense;
+	req->sense_len = 0;
+	req->timeout = timeout;
+	req->flags |= REQ_BLOCK_PC;
+
+	/*
+	 * head injection *required* here otherwise quiesce won't work
+	 */
+	blk_execute_rq(req->q, NULL, req, 1);
+	blk_put_request(req);
+	return req->errors;
+}
+
+EXPORT_SYMBOL(scsi_execute_req);
+
 /*
  * Function:    scsi_init_cmd_errh()
  *
@@ -1049,7 +1102,8 @@ static int scsi_prep_fn(struct request_q
 		       sdev->host->host_no, sdev->id, sdev->lun);
 		return BLKPREP_KILL;
 	}
-	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
+	if (unlikely(sdev->sdev_state != SDEV_RUNNING) &&
+	    unlikely(sdev->sdev_state != SDEV_CREATED)) {
 		/* OK, we're not in a running state don't prep
 		 * user commands */
 		if (sdev->sdev_state == SDEV_DEL) {
diff -aurp git/drivers/scsi/scsi_scan.c git.work/drivers/scsi/scsi_scan.c
--- git/drivers/scsi/scsi_scan.c	2005-06-04 20:05:53.000000000 -0700
+++ git.work/drivers/scsi/scsi_scan.c	2005-06-04 23:15:59.000000000 -0700
@@ -111,15 +111,14 @@ MODULE_PARM_DESC(inq_timeout, 
 
 /**
  * scsi_unlock_floptical - unlock device via a special MODE SENSE command
- * @sreq:	used to send the command
+ * @sdev:	scsi device to send command to
  * @result:	area to store the result of the MODE SENSE
  *
  * Description:
- *     Send a vendor specific MODE SENSE (not a MODE SELECT) command using
- *     @sreq to unlock a device, storing the (unused) results into result.
+ *     Send a vendor specific MODE SENSE (not a MODE SELECT) command.
  *     Called for BLIST_KEY devices.
  **/
-static void scsi_unlock_floptical(struct scsi_request *sreq,
+static void scsi_unlock_floptical(struct scsi_device *sdev,
 				  unsigned char *result)
 {
 	unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -129,11 +128,10 @@ static void scsi_unlock_floptical(struct
 	scsi_cmd[1] = 0;
 	scsi_cmd[2] = 0x2e;
 	scsi_cmd[3] = 0;
-	scsi_cmd[4] = 0x2a;	/* size */
+	scsi_cmd[4] = 0x2a;     /* size */
 	scsi_cmd[5] = 0;
-	sreq->sr_cmd_len = 0;
-	sreq->sr_data_direction = DMA_FROM_DEVICE;
-	scsi_wait_req(sreq, scsi_cmd, result, 0x2a /* size */, SCSI_TIMEOUT, 3);
+	scsi_execute_req(sdev, scsi_cmd, DMA_FROM_DEVICE, result, 0x2a, NULL,
+			 SCSI_TIMEOUT, 3);
 }
 
 /**
@@ -419,26 +417,26 @@ void scsi_target_reap(struct scsi_target
 
 /**
  * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY
- * @sreq:	used to send the INQUIRY
+ * @sdev:	scsi_device to probe
  * @inq_result:	area to store the INQUIRY result
+ * @result_len: len of inq_result
  * @bflags:	store any bflags found here
  *
  * Description:
- *     Probe the lun associated with @sreq using a standard SCSI INQUIRY;
+ *     Probe the lun associated with @req using a standard SCSI INQUIRY;
  *
- *     If the INQUIRY is successful, sreq->sr_result is zero and: the
+ *     If the INQUIRY is successful, zero is returned and the
  *     INQUIRY data is in @inq_result; the scsi_level and INQUIRY length
- *     are copied to the Scsi_Device at @sreq->sr_device (sdev);
- *     any flags value is stored in *@bflags.
+ *     are copied to the Scsi_Device any flags value is stored in *@bflags.
  **/
-static void scsi_probe_lun(struct scsi_request *sreq, char *inq_result,
-			   int *bflags)
+static int scsi_probe_lun(struct scsi_device *sdev, char *inq_result,
+			  int result_len, int *bflags)
 {
-	struct scsi_device *sdev = sreq->sr_device;	/* a bit ugly */
+	char sense[SCSI_SENSE_BUFFERSIZE];
 	unsigned char scsi_cmd[MAX_COMMAND_SIZE];
 	int first_inquiry_len, try_inquiry_len, next_inquiry_len;
 	int response_len = 0;
-	int pass, count;
+	int pass, count, result;
 	struct scsi_sense_hdr sshdr;
 
 	*bflags = 0;
@@ -461,28 +459,28 @@ static void scsi_probe_lun(struct scsi_r
 		memset(scsi_cmd, 0, 6);
 		scsi_cmd[0] = INQUIRY;
 		scsi_cmd[4] = (unsigned char) try_inquiry_len;
-		sreq->sr_cmd_len = 0;
-		sreq->sr_data_direction = DMA_FROM_DEVICE;
 
+		memset(sense, 0, sizeof(sense));
 		memset(inq_result, 0, try_inquiry_len);
-		scsi_wait_req(sreq, (void *) scsi_cmd, (void *) inq_result,
-				try_inquiry_len,
-				HZ/2 + HZ*scsi_inq_timeout, 3);
+
+		result = scsi_execute_req(sdev,  scsi_cmd, DMA_FROM_DEVICE,
+					  inq_result, try_inquiry_len, sense,
+					  HZ / 2 + HZ * scsi_inq_timeout, 3);
 
 		SCSI_LOG_SCAN_BUS(3, printk(KERN_INFO "scsi scan: INQUIRY %s "
 				"with code 0x%x\n",
-				sreq->sr_result ? "failed" : "successful",
-				sreq->sr_result));
+				result ? "failed" : "successful", result));
 
-		if (sreq->sr_result) {
+		if (result) {
 			/*
 			 * not-ready to ready transition [asc/ascq=0x28/0x0]
 			 * or power-on, reset [asc/ascq=0x29/0x0], continue.
 			 * INQUIRY should not yield UNIT_ATTENTION
 			 * but many buggy devices do so anyway. 
 			 */
-			if ((driver_byte(sreq->sr_result) & DRIVER_SENSE) &&
-			    scsi_request_normalize_sense(sreq, &sshdr)) {
+			if ((driver_byte(result) & DRIVER_SENSE) &&
+			    scsi_normalize_sense(sense, sizeof(sense),
+						 &sshdr)) {
 				if ((sshdr.sense_key == UNIT_ATTENTION) &&
 				    ((sshdr.asc == 0x28) ||
 				     (sshdr.asc == 0x29)) &&
@@ -493,7 +491,7 @@ static void scsi_probe_lun(struct scsi_r
 		break;
 	}
 
-	if (sreq->sr_result == 0) {
+	if (result == 0) {
 		response_len = (unsigned char) inq_result[4] + 5;
 		if (response_len > 255)
 			response_len = first_inquiry_len;	/* sanity */
@@ -542,8 +540,8 @@ static void scsi_probe_lun(struct scsi_r
 
 	/* If the last transfer attempt got an error, assume the
 	 * peripheral doesn't exist or is dead. */
-	if (sreq->sr_result)
-		return;
+	if (result)
+		return -EIO;
 
 	/* Don't report any more data than the device says is valid */
 	sdev->inquiry_len = min(try_inquiry_len, response_len);
@@ -579,7 +577,7 @@ static void scsi_probe_lun(struct scsi_r
 	    (sdev->scsi_level == 1 && (inq_result[3] & 0x0f) == 1))
 		sdev->scsi_level++;
 
-	return;
+	return 0;
 }
 
 /**
@@ -785,9 +783,8 @@ static int scsi_probe_and_add_lun(struct
 				  void *hostdata)
 {
 	struct scsi_device *sdev;
-	struct scsi_request *sreq;
 	unsigned char *result;
-	int bflags, res = SCSI_SCAN_NO_RESPONSE;
+	int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 
 	/*
@@ -816,16 +813,13 @@ static int scsi_probe_and_add_lun(struct
 	sdev = scsi_alloc_sdev(starget, lun, hostdata);
 	if (!sdev)
 		goto out;
-	sreq = scsi_allocate_request(sdev, GFP_ATOMIC);
-	if (!sreq)
-		goto out_free_sdev;
-	result = kmalloc(256, GFP_ATOMIC |
+
+	result = kmalloc(result_len, GFP_ATOMIC |
 			((shost->unchecked_isa_dma) ? __GFP_DMA : 0));
 	if (!result)
-		goto out_free_sreq;
+		goto out_free_sdev;
 
-	scsi_probe_lun(sreq, result, &bflags);
-	if (sreq->sr_result)
+	if (scsi_probe_lun(sdev, result, result_len, &bflags))
 		goto out_free_result;
 
 	/*
@@ -853,7 +847,7 @@ static int scsi_probe_and_add_lun(struct
 	if (res == SCSI_SCAN_LUN_PRESENT) {
 		if (bflags & BLIST_KEY) {
 			sdev->lockable = 0;
-			scsi_unlock_floptical(sreq, result);
+			scsi_unlock_floptical(sdev, result);
 		}
 		if (bflagsp)
 			*bflagsp = bflags;
@@ -861,8 +855,6 @@ static int scsi_probe_and_add_lun(struct
 
  out_free_result:
 	kfree(result);
- out_free_sreq:
-	scsi_release_request(sreq);
  out_free_sdev:
 	if (res == SCSI_SCAN_LUN_PRESENT) {
 		if (sdevp) {
@@ -1018,13 +1010,14 @@ static int scsi_report_lun_scan(struct s
 				int rescan)
 {
 	char devname[64];
+	char sense[SCSI_SENSE_BUFFERSIZE];
 	unsigned char scsi_cmd[MAX_COMMAND_SIZE];
 	unsigned int length;
 	unsigned int lun;
 	unsigned int num_luns;
 	unsigned int retries;
+	int result;
 	struct scsi_lun *lunp, *lun_data;
-	struct scsi_request *sreq;
 	u8 *data;
 	struct scsi_sense_hdr sshdr;
 	struct scsi_target *starget = scsi_target(sdev);
@@ -1042,10 +1035,6 @@ static int scsi_report_lun_scan(struct s
 	if (bflags & BLIST_NOLUN)
 		return 0;
 
-	sreq = scsi_allocate_request(sdev, GFP_ATOMIC);
-	if (!sreq)
-		goto out;
-
 	sprintf(devname, "host %d channel %d id %d",
 		sdev->host->host_no, sdev->channel, sdev->id);
 
@@ -1063,7 +1052,7 @@ static int scsi_report_lun_scan(struct s
 	lun_data = kmalloc(length, GFP_ATOMIC |
 			   (sdev->host->unchecked_isa_dma ? __GFP_DMA : 0));
 	if (!lun_data)
-		goto out_release_request;
+		goto out;
 
 	scsi_cmd[0] = REPORT_LUNS;
 
@@ -1082,8 +1071,6 @@ static int scsi_report_lun_scan(struct s
 
 	scsi_cmd[10] = 0;	/* reserved */
 	scsi_cmd[11] = 0;	/* control */
-	sreq->sr_cmd_len = 0;
-	sreq->sr_data_direction = DMA_FROM_DEVICE;
 
 	/*
 	 * We can get a UNIT ATTENTION, for example a power on/reset, so
@@ -1099,29 +1086,30 @@ static int scsi_report_lun_scan(struct s
 		SCSI_LOG_SCAN_BUS(3, printk (KERN_INFO "scsi scan: Sending"
 				" REPORT LUNS to %s (try %d)\n", devname,
 				retries));
-		scsi_wait_req(sreq, scsi_cmd, lun_data, length,
-				SCSI_TIMEOUT + 4*HZ, 3);
+
+		memset(sense, 0, sizeof(sense));
+		result = scsi_execute_req(sdev, scsi_cmd, DMA_FROM_DEVICE,
+					  lun_data, length, sense,
+					  SCSI_TIMEOUT + 4 * HZ, 3);
+
 		SCSI_LOG_SCAN_BUS(3, printk (KERN_INFO "scsi scan: REPORT LUNS"
-				" %s (try %d) result 0x%x\n", sreq->sr_result
-				?  "failed" : "successful", retries,
-				sreq->sr_result));
-		if (sreq->sr_result == 0)
+				" %s (try %d) result 0x%x\n", result
+				?  "failed" : "successful", retries, result));
+		if (result == 0)
 			break;
-		else if (scsi_request_normalize_sense(sreq, &sshdr)) {
+		else if (scsi_normalize_sense(sense, sizeof(sense), &sshdr)) {
 			if (sshdr.sense_key != UNIT_ATTENTION)
 				break;
 		}
 	}
 
-	if (sreq->sr_result) {
+	if (result) {
 		/*
 		 * The device probably does not support a REPORT LUN command
 		 */
 		kfree(lun_data);
-		scsi_release_request(sreq);
 		return 1;
 	}
-	scsi_release_request(sreq);
 
 	/*
 	 * Get the length from the first four bytes of lun_data.
@@ -1195,8 +1183,6 @@ static int scsi_report_lun_scan(struct s
 	kfree(lun_data);
 	return 0;
 
- out_release_request:
-	scsi_release_request(sreq);
  out:
 	/*
 	 * We are out of memory, don't try scanning any further.
diff -aurp git/include/linux/blkdev.h git.work/include/linux/blkdev.h
--- git/include/linux/blkdev.h	2005-06-04 20:25:10.000000000 -0700
+++ git.work/include/linux/blkdev.h	2005-06-04 23:31:22.000000000 -0700
@@ -562,7 +562,8 @@ extern struct request *blk_rq_map_user(r
 extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int);
 extern struct request *blk_rq_map_kern(request_queue_t *, int, void *,
 					unsigned int, unsigned int);
-extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *);
+extern int blk_execute_rq(request_queue_t *, struct gendisk *,
+			  struct request *, int);
 
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
 {
diff -aurp git/include/scsi/scsi_request.h git.work/include/scsi/scsi_request.h
--- git/include/scsi/scsi_request.h	2005-06-04 20:07:53.000000000 -0700
+++ git.work/include/scsi/scsi_request.h	2005-06-04 23:16:31.000000000 -0700
@@ -54,6 +54,9 @@ extern void scsi_do_req(struct scsi_requ
 			void *buffer, unsigned bufflen,
 			void (*done) (struct scsi_cmnd *),
 			int timeout, int retries);
+extern int scsi_execute_req(struct scsi_device *sdev, unsigned char *cmd,
+			    int data_direction, void *buffer, unsigned bufflen,
+			    unsigned char *sense, int timeout, int retries);
 
 struct scsi_mode_data {
 	__u32	length;

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [dm-devel] Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-05  7:15   ` Mike Christie
@ 2005-06-05  9:41     ` christophe varoqui
  2005-06-06 13:31       ` Lars Marowsky-Bree
  2005-06-05 14:40     ` James Bottomley
  2005-06-06 19:02     ` Patrick Mansfield
  2 siblings, 1 reply; 41+ messages in thread
From: christophe varoqui @ 2005-06-05  9:41 UTC (permalink / raw)
  To: device-mapper development; +Cc: James Bottomley, Jens Axboe, linux-scsi

While the focus is in this area, I'd like to mention multipath tools
userland checkers will need to insert SG request on queue head (Ed
Gaggin will explain this at OLS). I guess the API is lacking there, and
would like to know if this feature is on your TODOs ...

Regards,


> inlined below is a patch built over yours + my patch set (I put all my
> patches here http://www.cs.wisc.edu/~michaelc/block/use-sg/v3/) that
> converts scsi_scan.c and removes its scsi_request usage. I cheated and
> added a new function that is basically your scsi_wait_req but it uses
> the block layer's blk_execute_rq function instead of blk_insert_request.
> Also to send block pc commands at scan time without the special bit set
> I had to modify the scsi_prep_fn. This is just a RFC/test-patch so oh
> well. There is the larger problem of handling queue limits to handle.
> 
> > 
> > There's some loss of functionality to this: retries are no longer
> > controllable (except by setting REQ_FASTFAIL) and the wait_req API needs
> > to be altered, but it looks very nice.
> 

-- 
christophe varoqui <christophe.varoqui@free.fr>



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-05  7:15   ` Mike Christie
  2005-06-05  9:41     ` [dm-devel] " christophe varoqui
@ 2005-06-05 14:40     ` James Bottomley
  2005-06-05 19:11       ` James Bottomley
  2005-06-06 19:02     ` Patrick Mansfield
  2 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-05 14:40 UTC (permalink / raw)
  To: Mike Christie; +Cc: device-mapper development, Jens Axboe, linux-scsi

On Sun, 2005-06-05 at 00:15 -0700, Mike Christie wrote:
> +int scsi_execute_req(struct scsi_device *sdev, unsigned char *cmd,
> +                    int data_direction, void *buffer, unsigned
> bufflen,
> +                    unsigned char *sense, int timeout, int retries)


Actually, not quite; there are two problems with this

1. We may not be able to compute the command length for vendor specific
commands and the group 3 (variable length command 0x7f). 
2. some requests want to set other flags (REQ_FAILFAST mostly).

However, extra arguments should accommodate these.

This:

> +       if (!req)

Should be if (IS_ERR(req))

Also, you can't do this (use after free):

> +       blk_put_request(req);
> +       return req->errors;

Finally, there's coming up with a replacement API for scsi_do_req that
returns via the end_io callback ... since that doesn't do a wait/wake,
perhaps this should be the core API upon which the others are built?

James

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-05 14:40     ` James Bottomley
@ 2005-06-05 19:11       ` James Bottomley
  2005-06-06  5:43         ` Douglas Gilbert
  0 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-05 19:11 UTC (permalink / raw)
  To: Mike Christie; +Cc: device-mapper development, linux-scsi, Jens Axboe

On Sun, 2005-06-05 at 09:40 -0500, James Bottomley wrote:
> Finally, there's coming up with a replacement API for scsi_do_req that
> returns via the end_io callback ... since that doesn't do a wait/wake,
> perhaps this should be the core API upon which the others are built?

OK, well, the API is easy ... it's attached.  Converting sg and st to
use it is quite another matter.  sg in particular is a nasty tangle when
it comes to doing its own sg mapping, which the block layer will now do
for it.

James
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -177,6 +177,23 @@ int scsi_queue_insert(struct scsi_cmnd *
 	return 0;
 }
 
+struct scsi_do_req_cb {
+	void (*done)(void *, int);
+	void *data;
+};
+
+static void scsi_do_req_end_io(struct request *req)
+{
+	int result = req->errors;
+	struct scsi_do_req_cb *cb = req->end_io_data;
+	
+	BUG_ON(cb == NULL);
+	if (result)
+		result |= DRIVER_ERROR << 24;
+	blk_put_request(req);
+	cb->done(cb->data, result);
+	kfree(cb);
+}
 /*
  * Function:    scsi_do_req
  *
@@ -203,38 +220,49 @@ int scsi_queue_insert(struct scsi_cmnd *
  *		now inject requests on the *head* of the device queue
  *		rather than the tail.
  */
-void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
-		 void *buffer, unsigned bufflen,
-		 void (*done)(struct scsi_cmnd *),
-		 int timeout, int retries)
+void scsi_do_command(struct scsi_device *sdev, const void *cmnd, int cmd_len,
+		     void *buffer, unsigned bufflen, void *data,
+		     void (*done)(void *, int),
+		     char *sense_buffer, int timeout, int retries,
+		     enum dma_data_direction dir)
 {
-	/*
-	 * If the upper level driver is reusing these things, then
-	 * we should release the low-level block now.  Another one will
-	 * be allocated later when this request is getting queued.
-	 */
-	__scsi_release_request(sreq);
+	struct scsi_do_req_cb *cb = kmalloc(sizeof(struct scsi_do_req_cb),
+						   GFP_KERNEL);
+	struct request *req;
 
-	/*
-	 * Our own function scsi_done (which marks the host as not busy,
-	 * disables the timeout counter, etc) will be called by us or by the
-	 * scsi_hosts[host].queuecommand() function needs to also call
-	 * the completion function for the high level driver.
-	 */
-	memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));
-	sreq->sr_bufflen = bufflen;
-	sreq->sr_buffer = buffer;
-	sreq->sr_allowed = retries;
-	sreq->sr_done = done;
-	sreq->sr_timeout_per_command = timeout;
+	if (!cb)
+		goto error_out;
+	if (bufflen) {
+		req = blk_rq_map_kern(sdev->request_queue,
+				      dir == DMA_TO_DEVICE,
+				      buffer, bufflen, __GFP_WAIT);
+		if (IS_ERR(req))
+			goto error_out;
+		blk_queue_bounce(sdev->request_queue, &req->bio);
+	} else
+		req = blk_get_request(sdev->request_queue, READ, __GFP_WAIT);
 
-	if (sreq->sr_cmd_len == 0)
-		sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
+	req->waiting = NULL;
+	req->end_io = scsi_do_req_end_io;
+	if (cmd_len == 0)
+		req->cmd_len = COMMAND_SIZE(((u8 *)cmnd)[0]);
+	else
+		req->cmd_len = cmd_len;
+	req->sense = sense_buffer;
+	req->sense_len = 0;
+	memcpy(req->cmd, cmnd, req->cmd_len);
+	req->timeout = timeout;
+	req->flags |= REQ_BLOCK_PC;
+	/* If we have no rq_disk, the ULD won't get to prepare the command */
+	req->rq_disk = NULL;
+	blk_insert_request(sdev->request_queue, req,
+			   1, NULL);
+	return;
 
-	/*
-	 * head injection *required* here otherwise quiesce won't work
-	 */
-	scsi_insert_special_req(sreq, 1);
+ error_out:
+	kfree(cb);
+	done(data, DRIVER_ERROR << 24);
+	return;
 }
 EXPORT_SYMBOL(scsi_do_req);
 



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-05 19:11       ` James Bottomley
@ 2005-06-06  5:43         ` Douglas Gilbert
  2005-06-06 14:19           ` James Bottomley
  0 siblings, 1 reply; 41+ messages in thread
From: Douglas Gilbert @ 2005-06-06  5:43 UTC (permalink / raw)
  To: James Bottomley
  Cc: Mike Christie, device-mapper development, linux-scsi, Jens Axboe

James Bottomley wrote:
> On Sun, 2005-06-05 at 09:40 -0500, James Bottomley wrote:
> 
>>Finally, there's coming up with a replacement API for scsi_do_req that
>>returns via the end_io callback ... since that doesn't do a wait/wake,
>>perhaps this should be the core API upon which the others are built?
> 
> 
> OK, well, the API is easy ... it's attached.  Converting sg and st to
> use it is quite another matter.  sg in particular is a nasty tangle when
> it comes to doing its own sg mapping, which the block layer will now do
> for it.
> 
> James
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -177,6 +177,23 @@ int scsi_queue_insert(struct scsi_cmnd *
>  	return 0;
>  }
>  
> +struct scsi_do_req_cb {
> +	void (*done)(void *, int);
> +	void *data;
> +};
> +
> +static void scsi_do_req_end_io(struct request *req)
> +{
> +	int result = req->errors;
> +	struct scsi_do_req_cb *cb = req->end_io_data;
> +	
> +	BUG_ON(cb == NULL);
> +	if (result)
> +		result |= DRIVER_ERROR << 24;
> +	blk_put_request(req);
> +	cb->done(cb->data, result);
> +	kfree(cb);
> +}
>  /*
>   * Function:    scsi_do_req
>   *
> @@ -203,38 +220,49 @@ int scsi_queue_insert(struct scsi_cmnd *
>   *		now inject requests on the *head* of the device queue
>   *		rather than the tail.
>   */
> -void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
> -		 void *buffer, unsigned bufflen,
> -		 void (*done)(struct scsi_cmnd *),
> -		 int timeout, int retries)
> +void scsi_do_command(struct scsi_device *sdev, const void *cmnd, int cmd_len,
> +		     void *buffer, unsigned bufflen, void *data,
> +		     void (*done)(void *, int),
> +		     char *sense_buffer, int timeout, int retries,
> +		     enum dma_data_direction dir)

James,
There is no max_length on sense_buffer. How does one
know how much, if any, sense data was written (returned).
resid is a very useful return parameter.
Perhaps the *done() callback should have
two more arguments (and scsi_do_command()
one more argument). And 'buffer' and 'bufflen' have
not changed in name but I assume they have in nature.

Additionally perhaps it is time to start thinking about
a clean pass through. Something that can pass a packet
command (like sg can at the moment) but to a scsi_host
(or a transport host). An additional argument would be
needed to pass addressing information to the LLD. I'm
thinking about the SAS Serial Management Protocol (SMP)
that has nothing to do with the block layer or the
SCSI mid level. We want to bind to a SAS host (initiator
port) and supply a SAS address (of the SMP target
which is most likely an expander).

Doug Gilbert

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-05  9:41     ` [dm-devel] " christophe varoqui
@ 2005-06-06 13:31       ` Lars Marowsky-Bree
  2005-06-07  0:04         ` Michael Christie
  0 siblings, 1 reply; 41+ messages in thread
From: Lars Marowsky-Bree @ 2005-06-06 13:31 UTC (permalink / raw)
  To: device-mapper development; +Cc: James Bottomley, Jens Axboe, linux-scsi

On 2005-06-05T11:41:46, christophe varoqui <christophe.varoqui@free.fr> wrote:

> While the focus is in this area, I'd like to mention multipath tools
> userland checkers will need to insert SG request on queue head (Ed
> Gaggin will explain this at OLS). I guess the API is lacking there, and
> would like to know if this feature is on your TODOs ...

It's on the TODO list. It's just not done yet. Essentially we need to be
able to set BW_RW_FAILFAST from user-space for SGIO.

This is Novell bugzilla #81694 and RHAT bugzilla #154436.


Sincerely,
    Lars Marowsky-Brée <lmb@suse.de>

-- 
High Availability & Clustering
SUSE Labs, Research and Development
SUSE LINUX Products GmbH - A Novell Business	 -- Charles Darwin
"Ignorance more frequently begets confidence than does knowledge"

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-06  5:43         ` Douglas Gilbert
@ 2005-06-06 14:19           ` James Bottomley
  2005-06-07 13:08             ` Douglas Gilbert
  0 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-06 14:19 UTC (permalink / raw)
  To: Douglas Gilbert
  Cc: Mike Christie, device-mapper development, linux-scsi, Jens Axboe

Well ... while I've got your attention, the only complexity I see to
converting sg is the hd->iovec_count != 0 case, which is doing vectored
SG from user land.

This can be implemented (basically you do multiple bio attachment to a
request), and probably in the block layer (since block SG_IO would then
pick up the capability).

How important is this functionality?  Are there any applications using
it?

On Mon, 2005-06-06 at 15:43 +1000, Douglas Gilbert wrote:
> There is no max_length on sense_buffer. How does one
> know how much, if any, sense data was written (returned).

Well, given the way most initiators work we can't do variable length
sense buffer sizes; it's limited by SCSI_SENSE_BUFFERSIZE, which seems
to be a good fixed value assumption.  The reason we don't have this is
because the block layer doesn't.  It also assumes SCSI_SENSE_BUFFERSIZE
is the correct maximum.

> resid is a very useful return parameter.

Yes, I spotted that too ... already in my version II ...

> Perhaps the *done() callback should have
> two more arguments (and scsi_do_command()
> one more argument). And 'buffer' and 'bufflen' have
> not changed in name but I assume they have in nature.
> 
> Additionally perhaps it is time to start thinking about
> a clean pass through. Something that can pass a packet
> command (like sg can at the moment) but to a scsi_host
> (or a transport host). An additional argument would be
> needed to pass addressing information to the LLD. I'm
> thinking about the SAS Serial Management Protocol (SMP)
> that has nothing to do with the block layer or the
> SCSI mid level. We want to bind to a SAS host (initiator
> port) and supply a SAS address (of the SMP target
> which is most likely an expander).

Well, pretty much the block layer SG_IO is a clean passthrough.  The
direction I think it makes most sense to move in is towards services
provided by the block layer (even if that means additions to it) rather
than away from them.  Even for arbitrary SAS expanders, we can detect
and bind to them; once that's done we have a connection and a management
application can read and set sysfs parameters for the object.

James

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-05  7:15   ` Mike Christie
  2005-06-05  9:41     ` [dm-devel] " christophe varoqui
  2005-06-05 14:40     ` James Bottomley
@ 2005-06-06 19:02     ` Patrick Mansfield
  2005-06-07 15:26       ` Michael Christie
  2 siblings, 1 reply; 41+ messages in thread
From: Patrick Mansfield @ 2005-06-06 19:02 UTC (permalink / raw)
  To: Mike Christie
  Cc: James Bottomley, device-mapper development, Jens Axboe, linux-scsi

On Sun, Jun 05, 2005 at 12:15:27AM -0700, Mike Christie wrote:
> On Sat, 2005-06-04 at 09:07, James Bottomley wrote:

> > Here's the proof of concept for this one.  It converts scsi_wait_req to
> > do correct REQ_BLOCK_PC submission (and works nicely in my setup).

That is nice ... I didn't realize Mike's change allowed that.

> > There's some loss of functionality to this: retries are no longer
> > controllable (except by setting REQ_FASTFAIL) and the wait_req API needs
> > to be altered, but it looks very nice.

Mike your patch did not replace the retries.

Why can't the retries be passed down via a new blk req->retries?

Or just have scsi_wait_req() do the retries for us (it can sleep). 

It is hard to look at the scsi IO completion paths and see exactly what we
do and don't (well should and should not) retry. That is we call
scsi_retry_command() conditionally based on "retries" but can repeatedly
call scsi_requeue_command() in scsi_io_completion().

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-06 13:31       ` Lars Marowsky-Bree
@ 2005-06-07  0:04         ` Michael Christie
  2005-06-07  7:01           ` [dm-devel] " Lars Marowsky-Bree
  0 siblings, 1 reply; 41+ messages in thread
From: Michael Christie @ 2005-06-07  0:04 UTC (permalink / raw)
  To: device-mapper development, Lars Marowsky-Bree
  Cc: James Bottomley, Jens Axboe, linux-scsi

Quoting Lars Marowsky-Bree <lmb@suse.de>:

> On 2005-06-05T11:41:46, christophe varoqui <christophe.varoqui@free.fr>
> wrote:
> 
> > While the focus is in this area, I'd like to mention multipath tools
> > userland checkers will need to insert SG request on queue head (Ed
> > Gaggin will explain this at OLS). I guess the API is lacking there, and
> > would like to know if this feature is on your TODOs ...
> 
> It's on the TODO list. It's just not done yet. Essentially we need to be
> able to set BW_RW_FAILFAST from user-space for SGIO.
> 

Isn't this a seperate issue? It looks like the RHAT bugzilla asks to be able to
insert directly at the head of the queue so you do you not have to wait for
possibly a hundred IO to complete before the test IO. Setting failfast just
means in case of en error it will be returned faster (no wasting time
retying).


> This is Novell bugzilla #81694 and RHAT bugzilla #154436.
> 

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [dm-devel] Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07  0:04         ` Michael Christie
@ 2005-06-07  7:01           ` Lars Marowsky-Bree
  0 siblings, 0 replies; 41+ messages in thread
From: Lars Marowsky-Bree @ 2005-06-07  7:01 UTC (permalink / raw)
  To: Michael Christie, device-mapper development
  Cc: James Bottomley, Jens Axboe, linux-scsi

On 2005-06-06T19:04:09, Michael Christie <michaelc@cs.wisc.edu> wrote:

> > It's on the TODO list. It's just not done yet. Essentially we need
> > to be able to set BW_RW_FAILFAST from user-space for SGIO.
> > 
> Isn't this a seperate issue? It looks like the RHAT bugzilla asks to
> be able to insert directly at the head of the queue so you do you not
> have to wait for possibly a hundred IO to complete before the test IO.
> Setting failfast just means in case of en error it will be returned
> faster (no wasting time retying).

Uhm, sorry, you're right. We actually want/need to be able to set both,
but it would sort of end up being the same mechanism (additional flags
to SGIO) and so I have it mentally lumped together ;-)


Sincerely,
    Lars Marowsky-Brée <lmb@suse.de>

-- 
High Availability & Clustering
SUSE Labs, Research and Development
SUSE LINUX Products GmbH - A Novell Business	 -- Charles Darwin
"Ignorance more frequently begets confidence than does knowledge"

-
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-04 16:07 ` James Bottomley
  2005-06-05  7:15   ` Mike Christie
@ 2005-06-07 12:10   ` Christoph Hellwig
  2005-06-07 12:20     ` James Bottomley
  1 sibling, 1 reply; 41+ messages in thread
From: Christoph Hellwig @ 2005-06-07 12:10 UTC (permalink / raw)
  To: James Bottomley
  Cc: Mike Christie, device-mapper development, linux-scsi, Jens Axboe

On Sat, Jun 04, 2005 at 11:07:14AM -0500, James Bottomley wrote:
> +	if (bufflen)
> +		req = blk_rq_map_kern(sreq->sr_device->request_queue,
> +				      sreq->sr_data_direction == DMA_TO_DEVICE,
> +				      buffer, bufflen, __GFP_WAIT);
> +	else
> +		req = blk_get_request(sreq->sr_device->request_queue, READ,
> +				      __GFP_WAIT);

shouldn't blk_rq_map_kern handle a 0 buffer and do nothing more than
blk_get_request?  It's not exactly a criticial fastpath and that would make life
easier for the callers.

> +		if (req->rq_disk) {
> +			drv = *(struct scsi_driver **)req->rq_disk->private_data;
> +			if (unlikely(!drv->init_command(cmd))) {
> +				scsi_release_buffers(cmd);
> +				scsi_put_command(cmd);
> +				return BLKPREP_KILL;
> +			}
> +		} else {
> +			memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
> +			if (rq_data_dir(req) == WRITE)
> +				cmd->sc_data_direction = DMA_TO_DEVICE;
> +			else if (req->data_len)
> +				cmd->sc_data_direction = DMA_FROM_DEVICE;
> +			else
> +				cmd->sc_data_direction = DMA_NONE;
> +			
> +			cmd->transfersize = req->data_len;
> +			cmd->allowed = 3;
> +			cmd->timeout_per_command = req->timeout;

most of this could probably be done in the midlayer always instead of the
upper drivers.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 12:10   ` Christoph Hellwig
@ 2005-06-07 12:20     ` James Bottomley
  2005-06-07 15:36       ` Michael Christie
  0 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-07 12:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Mike Christie, device-mapper development, linux-scsi, Jens Axboe

On Tue, 2005-06-07 at 13:10 +0100, Christoph Hellwig wrote:
> shouldn't blk_rq_map_kern handle a 0 buffer and do nothing more than
> blk_get_request?  It's not exactly a criticial fastpath and that would make life
> easier for the callers.

Yes ... and it should probably do bio bouncing as well, since there's
nothing special we have to do on completion to clean it up.

I also think we might need a blk_rq_kern_iovec call that would take a
vector of user I/O's and map it to a multiple bio request.  This would
allow me to strip all of the user mapping out of sg, and also allow the
block layer SG_IO to pick up this API (currently it returns -ENOTSUPP if
you try it).

> > +		if (req->rq_disk) {
> > +			drv = *(struct scsi_driver **)req->rq_disk->private_data;
> > +			if (unlikely(!drv->init_command(cmd))) {
> > +				scsi_release_buffers(cmd);
> > +				scsi_put_command(cmd);
> > +				return BLKPREP_KILL;
> > +			}
> > +		} else {
> > +			memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
> > +			if (rq_data_dir(req) == WRITE)
> > +				cmd->sc_data_direction = DMA_TO_DEVICE;
> > +			else if (req->data_len)
> > +				cmd->sc_data_direction = DMA_FROM_DEVICE;
> > +			else
> > +				cmd->sc_data_direction = DMA_NONE;
> > +			
> > +			cmd->transfersize = req->data_len;
> > +			cmd->allowed = 3;
> > +			cmd->timeout_per_command = req->timeout;
> 
> most of this could probably be done in the midlayer always instead of the
> upper drivers.

I don't think so ... this is all struct scsi_cmnd handling.  SCSI needs
to allocate and process its internal command structures.

However, I think we have a case for the number of retries being carried
by the generic request structure.

James

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-06 14:19           ` James Bottomley
@ 2005-06-07 13:08             ` Douglas Gilbert
  2005-06-07 13:34               ` Tony Battersby
  2005-06-07 15:59               ` [PATCH RFC 0/4] use scatter lists for all block pc " James Bottomley
  0 siblings, 2 replies; 41+ messages in thread
From: Douglas Gilbert @ 2005-06-07 13:08 UTC (permalink / raw)
  To: James Bottomley
  Cc: LIRANS, Mike Christie, device-mapper development, linux-scsi, Jens Axboe

James Bottomley wrote:
> Well ... while I've got your attention, the only complexity I see to
> converting sg is the hd->iovec_count != 0 case, which is doing vectored
> SG from user land.
> 
> This can be implemented (basically you do multiple bio attachment to a
> request), and probably in the block layer (since block SG_IO would then
> pick up the capability).
> 
> How important is this functionality?  Are there any applications using
> it?

I don't think it is important and I'm not aware of any
real world applications that are using it. Naturally,
I will find out if that is actually true after it
gets removed.

> On Mon, 2005-06-06 at 15:43 +1000, Douglas Gilbert wrote:
> 
>>There is no max_length on sense_buffer. How does one
>>know how much, if any, sense data was written (returned).
> 
> 
> Well, given the way most initiators work we can't do variable length
> sense buffer sizes; it's limited by SCSI_SENSE_BUFFERSIZE, which seems
> to be a good fixed value assumption.  The reason we don't have this is
> because the block layer doesn't.  It also assumes SCSI_SENSE_BUFFERSIZE
> is the correct maximum.

I believe that I am guilty for SCSI_SENSE_BUFFERSIZE.
But implementation details shouldn't necessarily limit
new interface functions. Again OSD could be a problem
here as it uses descriptor sense format with larger
descriptors (than SPC-3). Perhaps Liran might comment
on whether the current 96 bytes is sufficient. [According
to SPC-3 there is still a limit of 252 bytes.]

Also scsi_do_command() could drop the direction
argument and have an "in" pair (pointer and length)
and an "out" pair. Then it could cope with OSD and
advanced SBC-2 commands.

>>resid is a very useful return parameter.
> 
> 
> Yes, I spotted that too ... already in my version II ...
> 
> 
>>Perhaps the *done() callback should have
>>two more arguments (and scsi_do_command()
>>one more argument). And 'buffer' and 'bufflen' have
>>not changed in name but I assume they have in nature.
>>
>>Additionally perhaps it is time to start thinking about
>>a clean pass through. Something that can pass a packet
>>command (like sg can at the moment) but to a scsi_host
>>(or a transport host). An additional argument would be
>>needed to pass addressing information to the LLD. I'm
>>thinking about the SAS Serial Management Protocol (SMP)
>>that has nothing to do with the block layer or the
>>SCSI mid level. We want to bind to a SAS host (initiator
>>port) and supply a SAS address (of the SMP target
>>which is most likely an expander).
> 
> 
> Well, pretty much the block layer SG_IO is a clean passthrough.

Depending on the driver which controls the device node
that has been opened, there are various policy decisions
to maneouver (e.g. sr and st require O_NONBLOCK on
open() otherwise they will wait until media is loaded,
making the SCSI Test Unit Ready command pointless). Also
there are state machines in some drivers, that
can either be confused by the pass through or
vice versa (e.g. try using the sg_prevent utility in
sg3_utils via a sr device node, then
try again with a sg device node). Should maximum
transfer size limits that apply to the READ/WRITE
SBC commands also apply to READ/WRITE_BUFFER SPC
commands?

As always "clean" is the debatable word.

   The
> direction I think it makes most sense to move in is towards services
> provided by the block layer (even if that means additions to it) rather
> than away from them.  Even for arbitrary SAS expanders, we can detect
> and bind to them; once that's done we have a connection and a management
> application can read and set sysfs parameters for the object.

The SAS SMP protocol is for discovering what is out
there. In the extreme case no block device has been found.
There is just a SAS HBA on a PCI bus with sysfs information
telling us about some phys and what they are attached
to. If the attached device type is an expander that
implements a SMP target then we need to issue SMP
frames (commands) through a local phy to the SAS address
of the expander. I'm not sure we need scatter-gather,
command merging, tagged queues or pseudo block devices
to accomplish this.

Doug Gilbert

^ permalink raw reply	[flat|nested] 41+ messages in thread

* RE: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 13:08             ` Douglas Gilbert
@ 2005-06-07 13:34               ` Tony Battersby
  2005-06-07 16:34                 ` James Bottomley
  2005-06-07 15:59               ` [PATCH RFC 0/4] use scatter lists for all block pc " James Bottomley
  1 sibling, 1 reply; 41+ messages in thread
From: Tony Battersby @ 2005-06-07 13:34 UTC (permalink / raw)
  To: dougg, 'James Bottomley'
  Cc: 'Mike Christie', 'device-mapper development',
	'linux-scsi', 'Jens Axboe',
	LIRANS

> James Bottomley wrote:
> > Well ... while I've got your attention, the only complexity I see to
> > converting sg is the hd->iovec_count != 0 case, which is doing
vectored
> > SG from user land.
> >
> > This can be implemented (basically you do multiple bio attachment to
a
> > request), and probably in the block layer (since block SG_IO would
then
> > pick up the capability).
> >
> > How important is this functionality?  Are there any applications
using
> > it?
>
> I don't think it is important and I'm not aware of any
> real world applications that are using it. Naturally,
> I will find out if that is actually true after it
> gets removed.

My company (Cybernetics) sells several products that run embedded Linux
which use the userspace scatter-gather capability of the SG driver.
They are still running the 2.4 kernel series, but we will probably
switch to 2.6 at some point.

Anthony J. Battersby
Cybernetics


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-06 19:02     ` Patrick Mansfield
@ 2005-06-07 15:26       ` Michael Christie
  2005-06-07 18:23         ` Patrick Mansfield
  0 siblings, 1 reply; 41+ messages in thread
From: Michael Christie @ 2005-06-07 15:26 UTC (permalink / raw)
  To: Patrick Mansfield
  Cc: James Bottomley, device-mapper development, Jens Axboe, linux-scsi

Quoting Patrick Mansfield <patmans@us.ibm.com>:

> On Sun, Jun 05, 2005 at 12:15:27AM -0700, Mike Christie wrote:
> > On Sat, 2005-06-04 at 09:07, James Bottomley wrote:
> 
> > > Here's the proof of concept for this one.  It converts scsi_wait_req to
> > > do correct REQ_BLOCK_PC submission (and works nicely in my setup).
> 
> That is nice ... I didn't realize Mike's change allowed that.
> 
> > > There's some loss of functionality to this: retries are no longer
> > > controllable (except by setting REQ_FASTFAIL) and the wait_req API needs
> > > to be altered, but it looks very nice.
> 
> Mike your patch did not replace the retries.
> 
> Why can't the retries be passed down via a new blk req->retries?
> 

Is that all that is needed for dm-multipath? I thought this would be a good time
to add some code to allow the finer grained control of retries that could also
be used by dm and md. For example do you only want to retry certain failures X
numbeer of times?

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 12:20     ` James Bottomley
@ 2005-06-07 15:36       ` Michael Christie
  2005-06-07 15:45         ` [dm-devel] " Michael Christie
  2005-06-07 18:09         ` Jens Axboe
  0 siblings, 2 replies; 41+ messages in thread
From: Michael Christie @ 2005-06-07 15:36 UTC (permalink / raw)
  To: James Bottomley
  Cc: Christoph Hellwig, device-mapper development, Jens Axboe, linux-scsi

Quoting James Bottomley <James.Bottomley@SteelEye.com>:

> On Tue, 2005-06-07 at 13:10 +0100, Christoph Hellwig wrote:
> > shouldn't blk_rq_map_kern handle a 0 buffer and do nothing more than
> > blk_get_request?  It's not exactly a criticial fastpath and that would make
> life
> > easier for the callers.
> 
> Yes ... and it should probably do bio bouncing as well, since there's
> nothing special we have to do on completion to clean it up.

ok. I just made it like the existing blk_rq_map_user which made the caller do
those things.

> 
> I also think we might need a blk_rq_kern_iovec call that would take a
> vector of user I/O's and map it to a multiple bio request.  This would

Does it need to be a multiple bio request? A single bio should be able to handle
 a request's segments and sectors limits.
 
Will the user assure that the iovec will fit in a single request to handle a
case where the iovec is greater than the phys or hw segment limits though?

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [dm-devel] Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 15:36       ` Michael Christie
@ 2005-06-07 15:45         ` Michael Christie
  2005-06-07 16:26           ` Kai Makisara
  2005-06-07 18:09         ` Jens Axboe
  1 sibling, 1 reply; 41+ messages in thread
From: Michael Christie @ 2005-06-07 15:45 UTC (permalink / raw)
  To: Michael Christie
  Cc: James Bottomley, Christoph Hellwig, device-mapper development,
	Jens Axboe, linux-scsi

Quoting Michael Christie <michaelc@cs.wisc.edu>:

> Quoting James Bottomley <James.Bottomley@SteelEye.com>:
> 
> > On Tue, 2005-06-07 at 13:10 +0100, Christoph Hellwig wrote:
> > > shouldn't blk_rq_map_kern handle a 0 buffer and do nothing more than
> > > blk_get_request?  It's not exactly a criticial fastpath and that would
> make
> > life
> > > easier for the callers.
> > 
> > Yes ... and it should probably do bio bouncing as well, since there's
> > nothing special we have to do on completion to clean it up.
> 
> ok. I just made it like the existing blk_rq_map_user which made the caller
> do
> those things.
> 
> > 
> > I also think we might need a blk_rq_kern_iovec call that would take a
> > vector of user I/O's and map it to a multiple bio request.  This would
> 
> Does it need to be a multiple bio request? A single bio should be able to
> handle
>  a request's segments and sectors limits.
>  
> Will the user assure that the iovec will fit in a single request to handle a
> case where the iovec is greater than the phys or hw segment limits though?
> 

scsi_do/wait_req could do the checking and submit mutliple requests for sg I
mean.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 13:08             ` Douglas Gilbert
  2005-06-07 13:34               ` Tony Battersby
@ 2005-06-07 15:59               ` James Bottomley
  2005-06-07 18:07                 ` Jens Axboe
  1 sibling, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-07 15:59 UTC (permalink / raw)
  To: Douglas Gilbert
  Cc: Mike Christie, device-mapper development, linux-scsi, Jens Axboe, LIRANS

On Tue, 2005-06-07 at 23:08 +1000, Douglas Gilbert wrote:
> I don't think it is important and I'm not aware of any
> real world applications that are using it. Naturally,
> I will find out if that is actually true after it
> gets removed.

Heh, true.  OK, let's do this: I'll remove it from the sg driver and
make sg use the do_command (or it's successor) interface.  If an actual
user for the iovecs does turn up, I'll add it to the block layer (it's a
fairly well understood multi-bio request setup) and make sg and also the
block layer SG_IO use it.

> I believe that I am guilty for SCSI_SENSE_BUFFERSIZE.
> But implementation details shouldn't necessarily limit
> new interface functions. Again OSD could be a problem
> here as it uses descriptor sense format with larger
> descriptors (than SPC-3). Perhaps Liran might comment
> on whether the current 96 bytes is sufficient. [According
> to SPC-3 there is still a limit of 252 bytes.]
> 
> Also scsi_do_command() could drop the direction
> argument and have an "in" pair (pointer and length)
> and an "out" pair. Then it could cope with OSD and
> advanced SBC-2 commands.

The basic trouble is that to use variable length sense buffer sizes we
need to modify the way block layer requests code sense data.  This
should be doable ... I'll look into it.

> Depending on the driver which controls the device node
> that has been opened, there are various policy decisions
> to maneouver (e.g. sr and st require O_NONBLOCK on
> open() otherwise they will wait until media is loaded,
> making the SCSI Test Unit Ready command pointless). Also
> there are state machines in some drivers, that
> can either be confused by the pass through or
> vice versa (e.g. try using the sg_prevent utility in
> sg3_utils via a sr device node, then
> try again with a sg device node). Should maximum
> transfer size limits that apply to the READ/WRITE
> SBC commands also apply to READ/WRITE_BUFFER SPC
> commands?

Well, but the former are only semantic ... and they can be fixed.  I'm
not aware that we do actually impose limits on READ/WRITE (at least not
when we receive them as packet commands).

> The SAS SMP protocol is for discovering what is out
> there. In the extreme case no block device has been found.
> There is just a SAS HBA on a PCI bus with sysfs information
> telling us about some phys and what they are attached
> to. If the attached device type is an expander that
> implements a SMP target then we need to issue SMP
> frames (commands) through a local phy to the SAS address
> of the expander. I'm not sure we need scatter-gather,
> command merging, tagged queues or pseudo block devices
> to accomplish this.

Yes, but this type of discovery is probably going to be the province of
the SAS class.  It will export an API (similar to the current SCSI sysfs
scanning one) that would allow the user to manipulate discovery.

The question is not whether such a process would make use of all the
features the block layer places at its disposal, but whether it's
capable of being done by the existing block layer infrastructure.  If
the latter is true, there's no need to invent new methods.

James



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [dm-devel] Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 15:45         ` [dm-devel] " Michael Christie
@ 2005-06-07 16:26           ` Kai Makisara
  2005-06-07 19:23             ` James Bottomley
  0 siblings, 1 reply; 41+ messages in thread
From: Kai Makisara @ 2005-06-07 16:26 UTC (permalink / raw)
  To: Michael Christie
  Cc: device-mapper development, James Bottomley, Christoph Hellwig,
	Jens Axboe, linux-scsi

On Tue, 7 Jun 2005, Michael Christie wrote:

> Quoting Michael Christie <michaelc@cs.wisc.edu>:
> 
> > Quoting James Bottomley <James.Bottomley@SteelEye.com>:
> > 
> > > On Tue, 2005-06-07 at 13:10 +0100, Christoph Hellwig wrote:
> > > > shouldn't blk_rq_map_kern handle a 0 buffer and do nothing more than
> > > > blk_get_request?  It's not exactly a criticial fastpath and that would
> > make
> > > life
> > > > easier for the callers.
> > > 
> > > Yes ... and it should probably do bio bouncing as well, since there's
> > > nothing special we have to do on completion to clean it up.
> > 
> > ok. I just made it like the existing blk_rq_map_user which made the caller
> > do
> > those things.
> > 
> > > 
> > > I also think we might need a blk_rq_kern_iovec call that would take a
> > > vector of user I/O's and map it to a multiple bio request.  This would
> > 
> > Does it need to be a multiple bio request? A single bio should be able to
> > handle
> >  a request's segments and sectors limits.
> >  
> > Will the user assure that the iovec will fit in a single request to handle a
> > case where the iovec is greater than the phys or hw segment limits though?
> > 
> 
> scsi_do/wait_req could do the checking and submit mutliple requests for sg I
> mean.

No, it can't do that. If the user submits one SCSI command, it must result 
in one SCSI command to the device. Otherwise the effect is not what the 
user wants. (You can split a multiple block read/write but this does not 
apply to all commands.)

Michael's question is important. The number of sg segments a HBA supports 
determines the maximum SCSI data size. In some cases (e.g., tape 
reads and writes with multimegabyte blocks) using page size (e.g., 4 kB) 
segments does not allow large enough data size. One solution has been to 
have a kernel space buffer that consists of segments spanning several 
pages. As far as I know, the current bio code requires page size segments. 
It is possible to use chained bios with multimegabyte buffers but the 
user should be sure that the split segments will be merged before the 
request reaches the HBA so that the request fits the HBA sg segment limit.

-- 
Kai

^ permalink raw reply	[flat|nested] 41+ messages in thread

* RE: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 13:34               ` Tony Battersby
@ 2005-06-07 16:34                 ` James Bottomley
  2005-06-07 18:38                   ` [PATCH RFC 0/4] use scatter lists for all blockpc " Tony Battersby
  0 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-07 16:34 UTC (permalink / raw)
  To: tonyb
  Cc: Douglas Gilbert, 'Mike Christie',
	'device-mapper development', 'linux-scsi',
	'Jens Axboe',
	LIRANS

On Tue, 2005-06-07 at 09:34 -0400, Tony Battersby wrote:
> My company (Cybernetics) sells several products that run embedded Linux
> which use the userspace scatter-gather capability of the SG driver.
> They are still running the 2.4 kernel series, but we will probably
> switch to 2.6 at some point.

What do they actually use it for?  As in why is the data scattered even
in the virtual address space?

James



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-04  1:19 [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers Mike Christie
  2005-06-04 16:07 ` James Bottomley
@ 2005-06-07 18:07 ` Jens Axboe
  2005-06-07 19:38   ` James Bottomley
  1 sibling, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2005-06-07 18:07 UTC (permalink / raw)
  To: Mike Christie; +Cc: device-mapper development, linux-scsi

On Fri, Jun 03 2005, Mike Christie wrote:
> The following patches should enable the use of scatter lists for all
> REQ_BLOCK_PC requests and cleanup some code duplication or dangerous
> memory allocations in the dm-multipath hw handlers and
> block/scsi_ioctl.c.
> 
> REQ_BLOCK_PC scatter list usage only required converting the old
> sg_scsi_ioctl code to do bio backed requests since the current block
> layer SG_IO code will always use requests with bios. But while
> converting the old ioctl and removing some dangerous (GFP_KERNEL in
> failover path) memory allocations from a dm-multipath hw_handler (and
> while updating the LSI one) I was able to seperate some common code into
> two new functions: blk_rq_map_kern() and bio_map_kern. These functions
> are similar to their blk/bio*user cousins where they allocate requests
> and bios and setup the data pointers except they work on kernel buffers.

Wonderful stuff, much needed for a long time.

> The goal is next convert the scsi 'special' requests to use these
> functions, so scsi will be able to use block layer functions for scatter
> lists setup for all requests. And then hopefully one day we will not
> need those stinking "if (sc->use_sg)" paths all over our scsi drivers.

I've slowly been doing the same thing in other places in the kernel and
this bit has been talked about between James and I for at least a year
or two.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 15:59               ` [PATCH RFC 0/4] use scatter lists for all block pc " James Bottomley
@ 2005-06-07 18:07                 ` Jens Axboe
  2005-06-07 19:26                   ` James Bottomley
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2005-06-07 18:07 UTC (permalink / raw)
  To: James Bottomley
  Cc: LIRANS, Mike Christie, Douglas Gilbert,
	device-mapper development, linux-scsi

On Tue, Jun 07 2005, James Bottomley wrote:
> On Tue, 2005-06-07 at 23:08 +1000, Douglas Gilbert wrote:
> > I don't think it is important and I'm not aware of any
> > real world applications that are using it. Naturally,
> > I will find out if that is actually true after it
> > gets removed.
> 
> Heh, true.  OK, let's do this: I'll remove it from the sg driver and
> make sg use the do_command (or it's successor) interface.  If an actual
> user for the iovecs does turn up, I'll add it to the block layer (it's a
> fairly well understood multi-bio request setup) and make sg and also the
> block layer SG_IO use it.

Why multi-bio? No one should ever have to build a request with multiple
bio's in one go, that's pointless. The only reason multi-bio requests
exist is because of the file systems not submitting big extents in one
submission. The whole io path would be faster and simpler were it not
for multi-bio requests :-)

A single bio can contain just as much data.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 15:36       ` Michael Christie
  2005-06-07 15:45         ` [dm-devel] " Michael Christie
@ 2005-06-07 18:09         ` Jens Axboe
  2005-06-08 12:46           ` Mike Christie
  1 sibling, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2005-06-07 18:09 UTC (permalink / raw)
  To: Michael Christie
  Cc: James Bottomley, Christoph Hellwig, device-mapper development,
	linux-scsi

On Tue, Jun 07 2005, Michael Christie wrote:
> Quoting James Bottomley <James.Bottomley@SteelEye.com>:
> 
> > On Tue, 2005-06-07 at 13:10 +0100, Christoph Hellwig wrote:
> > > shouldn't blk_rq_map_kern handle a 0 buffer and do nothing more than
> > > blk_get_request?  It's not exactly a criticial fastpath and that would make
> > life
> > > easier for the callers.
> > 
> > Yes ... and it should probably do bio bouncing as well, since there's
> > nothing special we have to do on completion to clean it up.
> 
> ok. I just made it like the existing blk_rq_map_user which made the caller do
> those things.

I agree with Christoph, makes it nicer for blk_rq_map_user as well. I'll
change the latter if you fix the former.

> > I also think we might need a blk_rq_kern_iovec call that would take a
> > vector of user I/O's and map it to a multiple bio request.  This would
> 
> Does it need to be a multiple bio request? A single bio should be able
> to handle a request's segments and sectors limits.

Indeed.

> Will the user assure that the iovec will fit in a single request to
> handle a case where the iovec is greater than the phys or hw segment
> limits though?

This is where the block layer SG_IO and the SCSI layer sg differ. The
SCSI layer has historically handled these partial completion requests
fine, where the block layer didn't provide functionality for it and the
ide driver didn't cope with it.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 15:26       ` Michael Christie
@ 2005-06-07 18:23         ` Patrick Mansfield
  2005-06-08 15:41           ` Mike Christie
  0 siblings, 1 reply; 41+ messages in thread
From: Patrick Mansfield @ 2005-06-07 18:23 UTC (permalink / raw)
  To: Michael Christie
  Cc: James Bottomley, device-mapper development, Jens Axboe, linux-scsi

On Tue, Jun 07, 2005 at 10:26:16AM -0500, Michael Christie wrote:
> Quoting Patrick Mansfield <patmans@us.ibm.com>:

> > Why can't the retries be passed down via a new blk req->retries?
> > 
> 
> Is that all that is needed for dm-multipath? I thought this would be a good time
> to add some code to allow the finer grained control of retries that could also
> be used by dm and md. For example do you only want to retry certain failures X
> numbeer of times?

I was looking/thinking about the scsi_scan retries not multipath.

Setting retries might help dm-multipath, but I thought multipath was going
to use fast fail.

The "certain failures" is in question, it is not clear if fastfail (or
setting retries to zero) give the right behaviour for use by dm-multipath,
and it is not easy to analyze.

Examples: 

MEDIUM_ERROR gets NEEDS_RETRY but is also handled in sd.c:sd_rw_intr(), it
looks like we retry (if not fast fail), and then sd_rw_intr will do a
partial retry of the error after that. So with fast fail, we never retry
the entire IO.

DID_BUSY_BUSY: should this not be retried for fast fail? It is HBA
driver/hardware specific.

The unconditional retries in scsi_io_completion() all look OK for multipath.

-- Patrick Mansfield

^ permalink raw reply	[flat|nested] 41+ messages in thread

* RE: [PATCH RFC 0/4] use scatter lists for all blockpc requests and simplify hw handlers
  2005-06-07 16:34                 ` James Bottomley
@ 2005-06-07 18:38                   ` Tony Battersby
  2005-06-07 18:43                     ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: Tony Battersby @ 2005-06-07 18:38 UTC (permalink / raw)
  To: 'James Bottomley'
  Cc: 'Douglas Gilbert', 'Mike Christie',
	'device-mapper development', 'linux-scsi',
	'Jens Axboe',
	LIRANS

> > My company (Cybernetics) sells several products that run embedded
Linux
> > which use the userspace scatter-gather capability of the SG driver.
> > They are still running the 2.4 kernel series, but we will probably
> > switch to 2.6 at some point.
>
> What do they actually use it for?  As in why is the data scattered
even
> in the virtual address space?

One of our products is a RAID-like controller for SCSI tape drives.
Parallel SCSI or iSCSI in via a target-mode SCSI driver and parallel
SCSI, iSCSI, or FC out via the SG driver, with various functions in
between for data buffering, emulation, striping, mirroring, etc.  The
program that controls everything has a complex internal buffering scheme
that uses userspace scatter-gather for the main read/write I/O path to
the tape drives.

Anthony J. Battersby
Cybernetics


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all blockpc requests and simplify hw handlers
  2005-06-07 18:38                   ` [PATCH RFC 0/4] use scatter lists for all blockpc " Tony Battersby
@ 2005-06-07 18:43                     ` Jens Axboe
  0 siblings, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2005-06-07 18:43 UTC (permalink / raw)
  To: Tony Battersby
  Cc: 'James Bottomley', 'Douglas Gilbert',
	'Mike Christie', 'device-mapper development',
	'linux-scsi',
	LIRANS

On Tue, Jun 07 2005, Tony Battersby wrote:
> > > My company (Cybernetics) sells several products that run embedded
> Linux
> > > which use the userspace scatter-gather capability of the SG driver.
> > > They are still running the 2.4 kernel series, but we will probably
> > > switch to 2.6 at some point.
> >
> > What do they actually use it for?  As in why is the data scattered
> even
> > in the virtual address space?
> 
> One of our products is a RAID-like controller for SCSI tape drives.
> Parallel SCSI or iSCSI in via a target-mode SCSI driver and parallel
> SCSI, iSCSI, or FC out via the SG driver, with various functions in
> between for data buffering, emulation, striping, mirroring, etc.  The
> program that controls everything has a complex internal buffering scheme
> that uses userspace scatter-gather for the main read/write I/O path to
> the tape drives.

That sounds like a valid use. Either way, I don't think we should remove
such a feature. It's a sane feature, and people might be using it -
there's no excuse for removing it.
Should we kill readv next? :)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 16:26           ` Kai Makisara
@ 2005-06-07 19:23             ` James Bottomley
  0 siblings, 0 replies; 41+ messages in thread
From: James Bottomley @ 2005-06-07 19:23 UTC (permalink / raw)
  To: Kai Makisara
  Cc: Christoph Hellwig, Michael Christie, device-mapper development,
	linux-scsi, Jens Axboe

On Tue, 2005-06-07 at 19:26 +0300, Kai Makisara wrote:
> No, it can't do that. If the user submits one SCSI command, it must result 
> in one SCSI command to the device. Otherwise the effect is not what the 
> user wants. (You can split a multiple block read/write but this does not 
> apply to all commands.)

Yes, I agree ... we have to have a single request.

> Michael's question is important. The number of sg segments a HBA supports 
> determines the maximum SCSI data size. In some cases (e.g., tape 
> reads and writes with multimegabyte blocks) using page size (e.g., 4 kB) 
> segments does not allow large enough data size. One solution has been to 
> have a kernel space buffer that consists of segments spanning several 
> pages. As far as I know, the current bio code requires page size segments. 
> It is possible to use chained bios with multimegabyte buffers but the 
> user should be sure that the split segments will be merged before the 
> request reaches the HBA so that the request fits the HBA sg segment limit.

Well, this isn't actually necessarily the problem.  *Provided* the
underlying driver enables clustering, the block layer will merge
physically contiguous pages in a single bio (so a large buffer will come
out the other side of blk_rq_map_sg as a single scatter/gather entry)

James

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 18:07                 ` Jens Axboe
@ 2005-06-07 19:26                   ` James Bottomley
  2005-06-08  7:09                     ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-07 19:26 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Douglas Gilbert, Mike Christie, device-mapper development,
	linux-scsi, LIRANS

On Tue, 2005-06-07 at 20:07 +0200, Jens Axboe wrote:
> Why multi-bio? No one should ever have to build a request with multiple
> bio's in one go, that's pointless. The only reason multi-bio requests
> exist is because of the file systems not submitting big extents in one
> submission. The whole io path would be faster and simpler were it not
> for multi-bio requests :-)

OK, OK, sorry thinko ... I meant multi-biovec (which can be a single
bio) ... however, I'm not the only one who keeps being confused by
this ...

Incidentally, do I take it you're happy with all of this and I can take
(at least the block pieces) through the SCSI tree?

Mike, I won't taking the dm-multipath patch unless there's agreement
from Alasdair.

James



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 18:07 ` Jens Axboe
@ 2005-06-07 19:38   ` James Bottomley
  2005-06-08  3:00     ` Douglas Gilbert
  0 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-07 19:38 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Mike Christie, device-mapper development, linux-scsi

On Tue, 2005-06-07 at 20:07 +0200, Jens Axboe wrote:
> I've slowly been doing the same thing in other places in the kernel and
> this bit has been talked about between James and I for at least a year
> or two.

Yes, it's long been a dream of mine to eliminate at least struct
scsi_request and just use block struct request for everything in the
SCSI layer.

The most pleasing aspect of this will be getting rid of the dma mapping
duplications in st and sg.

I'm still not sure we have it entirely correct, though.  Something still
needs to be done about the queue alignment constraints, so bio_copy_...
might need to be part of this API.

Also, Jens, this looks wrong (from ll_rw_blk.c around line 2131):

if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))

The alignment surely wants to be only on the start buffer (otherwise
we'll get spurious copies in variable length commands (like inquiries)?

James



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 19:38   ` James Bottomley
@ 2005-06-08  3:00     ` Douglas Gilbert
  2005-06-08 12:59       ` James Bottomley
  0 siblings, 1 reply; 41+ messages in thread
From: Douglas Gilbert @ 2005-06-08  3:00 UTC (permalink / raw)
  To: James Bottomley; +Cc: Jens Axboe, Mike Christie, linux-scsi, Kai.Makisara

James Bottomley wrote:
> On Tue, 2005-06-07 at 20:07 +0200, Jens Axboe wrote:
> 
>>I've slowly been doing the same thing in other places in the kernel and
>>this bit has been talked about between James and I for at least a year
>>or two.
> 
> 
> Yes, it's long been a dream of mine to eliminate at least struct
> scsi_request and just use block struct request for everything in the
> SCSI layer.
> 
> The most pleasing aspect of this will be getting rid of the dma mapping
> duplications in st and sg.

James,
A "char_uld" library that st, sg, ch and the proposed OSD
ULDs could tap into, would cut the duplication. As you can
see, Kai and I have shared implementations in this area but
had no mechanism for sharing the actual code.

An idea I had was to flag what mechanism inserted a
command onto a request queue and when a blk_pc_request()
follows a blk_pc_request() then use FIFO order for
the second one.

Doug Gilbert

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 19:26                   ` James Bottomley
@ 2005-06-08  7:09                     ` Jens Axboe
  0 siblings, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2005-06-08  7:09 UTC (permalink / raw)
  To: James Bottomley
  Cc: LIRANS, Mike Christie, Douglas Gilbert,
	device-mapper development, linux-scsi

On Tue, Jun 07 2005, James Bottomley wrote:
> On Tue, 2005-06-07 at 20:07 +0200, Jens Axboe wrote:
> > Why multi-bio? No one should ever have to build a request with multiple
> > bio's in one go, that's pointless. The only reason multi-bio requests
> > exist is because of the file systems not submitting big extents in one
> > submission. The whole io path would be faster and simpler were it not
> > for multi-bio requests :-)
> 
> OK, OK, sorry thinko ... I meant multi-biovec (which can be a single
> bio) ... however, I'm not the only one who keeps being confused by
> this ...
> 
> Incidentally, do I take it you're happy with all of this and I can take
> (at least the block pieces) through the SCSI tree?

I was planning on trying a block tree out, let me try and merge it first
and look it over if you don't mind.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 18:09         ` Jens Axboe
@ 2005-06-08 12:46           ` Mike Christie
  0 siblings, 0 replies; 41+ messages in thread
From: Mike Christie @ 2005-06-08 12:46 UTC (permalink / raw)
  To: Jens Axboe
  Cc: James Bottomley, Christoph Hellwig, device-mapper development,
	linux-scsi

Jens Axboe wrote:
> On Tue, Jun 07 2005, Michael Christie wrote:
> 
>>Quoting James Bottomley <James.Bottomley@SteelEye.com>:
>>
>>
>>>On Tue, 2005-06-07 at 13:10 +0100, Christoph Hellwig wrote:
>>>
>>>>shouldn't blk_rq_map_kern handle a 0 buffer and do nothing more than
>>>>blk_get_request?  It's not exactly a criticial fastpath and that would make
>>>
>>>life
>>>
>>>>easier for the callers.
>>>
>>>Yes ... and it should probably do bio bouncing as well, since there's
>>>nothing special we have to do on completion to clean it up.
>>
>>ok. I just made it like the existing blk_rq_map_user which made the caller do
>>those things.
> 
> 
> I agree with Christoph, makes it nicer for blk_rq_map_user as well. I'll
> change the latter if you fix the former.

ok will do.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-08  3:00     ` Douglas Gilbert
@ 2005-06-08 12:59       ` James Bottomley
  2005-06-08 14:50         ` Luben Tuikov
  0 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-08 12:59 UTC (permalink / raw)
  To: Douglas Gilbert; +Cc: Jens Axboe, Mike Christie, linux-scsi, Kai.Makisara

On Wed, 2005-06-08 at 13:00 +1000, Douglas Gilbert wrote:
> A "char_uld" library that st, sg, ch and the proposed OSD
> ULDs could tap into, would cut the duplication. As you can
> see, Kai and I have shared implementations in this area but
> had no mechanism for sharing the actual code.

Well, yes, I've been asking for something like this for ages.  However,
with the new block APIs it seems that the character read and write paths
become very short, so it may no longer be worth it.

Also, I really don't think OSD should be a character device.  It's
definitely a block device, it just happens to have a two dimensional
address space instead of a one dimensional one.

> An idea I had was to flag what mechanism inserted a
> command onto a request queue and when a blk_pc_request()
> follows a blk_pc_request() then use FIFO order for
> the second one.

Really, I think it's cleaner for head or tail insertion to be handled at
the time the request is generated.  However, the block queues of
character taps are special; we certainly don't need all the elevator
merging machinery, so perhaps we should have a way of setting them up as
noop elevator?

James



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-08 12:59       ` James Bottomley
@ 2005-06-08 14:50         ` Luben Tuikov
  0 siblings, 0 replies; 41+ messages in thread
From: Luben Tuikov @ 2005-06-08 14:50 UTC (permalink / raw)
  To: James Bottomley
  Cc: Douglas Gilbert, Jens Axboe, Mike Christie, linux-scsi, Kai.Makisara

On 06/08/05 08:59, James Bottomley wrote:
> Also, I really don't think OSD should be a character device.  It's
> definitely a block device, it just happens to have a two dimensional
> address space instead of a one dimensional one.

Very true.  Although _management_ of OSD contents would not be
"block device", but the next layer up. (hint)

> Really, I think it's cleaner for head or tail insertion to be handled at
> the time the request is generated.  However, the block queues of
> character taps are special; we certainly don't need all the elevator
> merging machinery, so perhaps we should have a way of setting them up as
> noop elevator?

Agreed.  Treating block queues of character devices with no-op elevator
would yield very clean and generalized implementation.

	Luben

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-07 18:23         ` Patrick Mansfield
@ 2005-06-08 15:41           ` Mike Christie
  2005-06-09  0:08             ` Patrick Mansfield
  0 siblings, 1 reply; 41+ messages in thread
From: Mike Christie @ 2005-06-08 15:41 UTC (permalink / raw)
  To: Patrick Mansfield
  Cc: James Bottomley, device-mapper development, linux-scsi, Jens Axboe

Patrick Mansfield wrote:
> On Tue, Jun 07, 2005 at 10:26:16AM -0500, Michael Christie wrote:
> 
>>Quoting Patrick Mansfield <patmans@us.ibm.com>:
> 
> 
>>>Why can't the retries be passed down via a new blk req->retries?
>>>
>>
>>Is that all that is needed for dm-multipath? I thought this would be a good time
>>to add some code to allow the finer grained control of retries that could also
>>be used by dm and md. For example do you only want to retry certain failures X
>>numbeer of times?
> 
> 
> I was looking/thinking about the scsi_scan retries not multipath.

And I was hoping the same code/framework can be used for scsi, DM, MD, 
SG_IO and all other block layer drivers.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-08 15:41           ` Mike Christie
@ 2005-06-09  0:08             ` Patrick Mansfield
  2005-06-09  6:18               ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: Patrick Mansfield @ 2005-06-09  0:08 UTC (permalink / raw)
  To: Mike Christie
  Cc: James Bottomley, device-mapper development, Jens Axboe, linux-scsi

On Wed, Jun 08, 2005 at 10:41:48AM -0500, Mike Christie wrote:
> Patrick Mansfield wrote:
> >On Tue, Jun 07, 2005 at 10:26:16AM -0500, Michael Christie wrote:
> >
> >>Quoting Patrick Mansfield <patmans@us.ibm.com>:
> >
> >
> >>>Why can't the retries be passed down via a new blk req->retries?
> >>>
> >>
> >>Is that all that is needed for dm-multipath? I thought this would be a 
> >>good time
> >>to add some code to allow the finer grained control of retries that could 
> >>also
> >>be used by dm and md. For example do you only want to retry certain 
> >>failures X
> >>numbeer of times?
> >
> >
> >I was looking/thinking about the scsi_scan retries not multipath.
> 
> And I was hoping the same code/framework can be used for scsi, DM, MD, 
> SG_IO and all other block layer drivers.

So use a req->retries instead of the fail fast flag?

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-09  0:08             ` Patrick Mansfield
@ 2005-06-09  6:18               ` Jens Axboe
  2005-06-09 11:51                 ` James Bottomley
  0 siblings, 1 reply; 41+ messages in thread
From: Jens Axboe @ 2005-06-09  6:18 UTC (permalink / raw)
  To: Patrick Mansfield
  Cc: James Bottomley, Mike Christie, device-mapper development, linux-scsi

On Wed, Jun 08 2005, Patrick Mansfield wrote:
> On Wed, Jun 08, 2005 at 10:41:48AM -0500, Mike Christie wrote:
> > Patrick Mansfield wrote:
> > >On Tue, Jun 07, 2005 at 10:26:16AM -0500, Michael Christie wrote:
> > >
> > >>Quoting Patrick Mansfield <patmans@us.ibm.com>:
> > >
> > >
> > >>>Why can't the retries be passed down via a new blk req->retries?
> > >>>
> > >>
> > >>Is that all that is needed for dm-multipath? I thought this would be a 
> > >>good time
> > >>to add some code to allow the finer grained control of retries that could 
> > >>also
> > >>be used by dm and md. For example do you only want to retry certain 
> > >>failures X
> > >>numbeer of times?
> > >
> > >
> > >I was looking/thinking about the scsi_scan retries not multipath.
> > 
> > And I was hoping the same code/framework can be used for scsi, DM, MD, 
> > SG_IO and all other block layer drivers.
> 
> So use a req->retries instead of the fail fast flag?

Would be fine. Some drivers are messing with ->errors for that anyways,
so would be nice to clean that up.

It would need to be done _very_ carefully though!

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-09  6:18               ` Jens Axboe
@ 2005-06-09 11:51                 ` James Bottomley
  2005-06-09 11:54                   ` Jens Axboe
  0 siblings, 1 reply; 41+ messages in thread
From: James Bottomley @ 2005-06-09 11:51 UTC (permalink / raw)
  To: Jens Axboe
  Cc: Mike Christie, device-mapper development, linux-scsi, Patrick Mansfield

On Thu, 2005-06-09 at 08:18 +0200, Jens Axboe wrote:
> > So use a req->retries instead of the fail fast flag?
> 
> Would be fine. Some drivers are messing with ->errors for that anyways,
> so would be nice to clean that up.
> 
> It would need to be done _very_ carefully though!

Actually, I think there's still a need for both.  Fail fast implies more
than simply no retry.  In the SCSI case it should eventually mean fail
the command before we begin transport recovery (at the moment, we simply
use the command in transport recovery, however long it takes, and then
return the failure).

James

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers
  2005-06-09 11:51                 ` James Bottomley
@ 2005-06-09 11:54                   ` Jens Axboe
  0 siblings, 0 replies; 41+ messages in thread
From: Jens Axboe @ 2005-06-09 11:54 UTC (permalink / raw)
  To: James Bottomley
  Cc: Mike Christie, device-mapper development, linux-scsi, Patrick Mansfield

On Thu, Jun 09 2005, James Bottomley wrote:
> On Thu, 2005-06-09 at 08:18 +0200, Jens Axboe wrote:
> > > So use a req->retries instead of the fail fast flag?
> > 
> > Would be fine. Some drivers are messing with ->errors for that anyways,
> > so would be nice to clean that up.
> > 
> > It would need to be done _very_ carefully though!
> 
> Actually, I think there's still a need for both.  Fail fast implies more
> than simply no retry.  In the SCSI case it should eventually mean fail
> the command before we begin transport recovery (at the moment, we simply
> use the command in transport recovery, however long it takes, and then
> return the failure).

I tend to agree, FAILFAST does have a use outside of normal retry logic.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2005-06-09 11:54 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-06-04  1:19 [PATCH RFC 0/4] use scatter lists for all block pc requests and simplify hw handlers Mike Christie
2005-06-04 16:07 ` James Bottomley
2005-06-05  7:15   ` Mike Christie
2005-06-05  9:41     ` [dm-devel] " christophe varoqui
2005-06-06 13:31       ` Lars Marowsky-Bree
2005-06-07  0:04         ` Michael Christie
2005-06-07  7:01           ` [dm-devel] " Lars Marowsky-Bree
2005-06-05 14:40     ` James Bottomley
2005-06-05 19:11       ` James Bottomley
2005-06-06  5:43         ` Douglas Gilbert
2005-06-06 14:19           ` James Bottomley
2005-06-07 13:08             ` Douglas Gilbert
2005-06-07 13:34               ` Tony Battersby
2005-06-07 16:34                 ` James Bottomley
2005-06-07 18:38                   ` [PATCH RFC 0/4] use scatter lists for all blockpc " Tony Battersby
2005-06-07 18:43                     ` Jens Axboe
2005-06-07 15:59               ` [PATCH RFC 0/4] use scatter lists for all block pc " James Bottomley
2005-06-07 18:07                 ` Jens Axboe
2005-06-07 19:26                   ` James Bottomley
2005-06-08  7:09                     ` Jens Axboe
2005-06-06 19:02     ` Patrick Mansfield
2005-06-07 15:26       ` Michael Christie
2005-06-07 18:23         ` Patrick Mansfield
2005-06-08 15:41           ` Mike Christie
2005-06-09  0:08             ` Patrick Mansfield
2005-06-09  6:18               ` Jens Axboe
2005-06-09 11:51                 ` James Bottomley
2005-06-09 11:54                   ` Jens Axboe
2005-06-07 12:10   ` Christoph Hellwig
2005-06-07 12:20     ` James Bottomley
2005-06-07 15:36       ` Michael Christie
2005-06-07 15:45         ` [dm-devel] " Michael Christie
2005-06-07 16:26           ` Kai Makisara
2005-06-07 19:23             ` James Bottomley
2005-06-07 18:09         ` Jens Axboe
2005-06-08 12:46           ` Mike Christie
2005-06-07 18:07 ` Jens Axboe
2005-06-07 19:38   ` James Bottomley
2005-06-08  3:00     ` Douglas Gilbert
2005-06-08 12:59       ` James Bottomley
2005-06-08 14:50         ` Luben Tuikov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.