All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] remove scsi_req_map_sg
@ 2008-12-13 16:23 FUJITA Tomonori
  2008-12-13 16:23 ` [PATCH 1/3] " FUJITA Tomonori
  2008-12-15  7:28 ` [PATCH 0/3] remove scsi_req_map_sg Jens Axboe
  0 siblings, 2 replies; 38+ messages in thread
From: FUJITA Tomonori @ 2008-12-13 16:23 UTC (permalink / raw)
  To: James.Bottomley, jens.axboe
  Cc: linux-scsi, fujita.tomonori, Kai.Makisara, osst

This patchset removes scsi_req_map_sg. IOW, this patchset removes
'SCSI-ml does something wrong' comment in the block layer :)

/*
 * Temporary export, until SCSI gets fixed up.
 */
extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
                            struct bio *bio);

This patchset also removes scsi_execute_async and struct
scsi_io_context() and unexport bio_add_pc_page().


This is against scsi-misc and depends on two patchsets:

'removing scsi_execute_async in st' the second half patchset:

http://marc.info/?l=linux-scsi&m=122900494917385&w=2

'removing scsi_execute_async in osst' the patchset:

http://marc.info/?l=linux-scsi&m=122918478521903&w=2




^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH 1/3] remove scsi_req_map_sg
  2008-12-13 16:23 [PATCH 0/3] remove scsi_req_map_sg FUJITA Tomonori
@ 2008-12-13 16:23 ` FUJITA Tomonori
  2008-12-13 16:23   ` [PATCH 2/3] block: unexport blk_rq_append_bio FUJITA Tomonori
  2008-12-15  7:28 ` [PATCH 0/3] remove scsi_req_map_sg Jens Axboe
  1 sibling, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2008-12-13 16:23 UTC (permalink / raw)
  To: James.Bottomley, jens.axboe; +Cc: linux-scsi, fujita.tomonori

No one uses scsi_execute_async with data transfer now. We can remove
scsi_req_map_sg.

Only scsi_eh_lock_door uses scsi_execute_async. scsi_eh_lock_door
doesn't handle sense and the callback. So we can remove
scsi_io_context too.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 drivers/scsi/scsi_error.c  |   34 +++++--
 drivers/scsi/scsi_lib.c    |  203 +-------------------------------------------
 include/scsi/scsi_device.h |    6 --
 3 files changed, 25 insertions(+), 218 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 381838e..29fb11a 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1453,6 +1453,11 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	}
 }
 
+static void eh_lock_door_done(struct request *req, int uptodate)
+{
+	__blk_put_request(req->q, req);
+}
+
 /**
  * scsi_eh_lock_door - Prevent medium removal for the specified device
  * @sdev:	SCSI device to prevent medium removal
@@ -1475,19 +1480,28 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
  */
 static void scsi_eh_lock_door(struct scsi_device *sdev)
 {
-	unsigned char cmnd[MAX_COMMAND_SIZE];
+	struct request *req;
 
-	cmnd[0] = ALLOW_MEDIUM_REMOVAL;
-	cmnd[1] = 0;
-	cmnd[2] = 0;
-	cmnd[3] = 0;
-	cmnd[4] = SCSI_REMOVAL_PREVENT;
-	cmnd[5] = 0;
+	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
+	if (!req)
+		return;
 
-	scsi_execute_async(sdev, cmnd, 6, DMA_NONE, NULL, 0, 0, 10 * HZ,
-			   5, NULL, NULL, GFP_KERNEL);
-}
+	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;
+	req->cmd[1] = 0;
+	req->cmd[2] = 0;
+	req->cmd[3] = 0;
+	req->cmd[4] = SCSI_REMOVAL_PREVENT;
+	req->cmd[5] = 0;
 
+	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
+
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_QUIET;
+	req->timeout = 10 * HZ;
+	req->retries = 5;
+
+	blk_execute_rq_nowait(req->q, NULL, req, 1, eh_lock_door_done);
+}
 
 /**
  * scsi_restart_operations - restart io operations to the specified host.
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 111f9e9..537e0c0 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -260,196 +260,6 @@ int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 }
 EXPORT_SYMBOL(scsi_execute_req);
 
-struct scsi_io_context {
-	void *data;
-	void (*done)(void *data, char *sense, int result, int resid);
-	char sense[SCSI_SENSE_BUFFERSIZE];
-};
-
-static struct kmem_cache *scsi_io_context_cache;
-
-static void scsi_end_async(struct request *req, int uptodate)
-{
-	struct scsi_io_context *sioc = req->end_io_data;
-
-	if (sioc->done)
-		sioc->done(sioc->data, sioc->sense, req->errors, req->data_len);
-
-	kmem_cache_free(scsi_io_context_cache, sioc);
-	__blk_put_request(req->q, req);
-}
-
-static int scsi_merge_bio(struct request *rq, struct bio *bio)
-{
-	struct request_queue *q = rq->q;
-
-	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
-	if (rq_data_dir(rq) == WRITE)
-		bio->bi_rw |= (1 << BIO_RW);
-	blk_queue_bounce(q, &bio);
-
-	return blk_rq_append_bio(q, rq, bio);
-}
-
-static void scsi_bi_endio(struct bio *bio, int error)
-{
-	bio_put(bio);
-}
-
-/**
- * scsi_req_map_sg - map a scatterlist into a request
- * @rq:		request to fill
- * @sgl:	scatterlist
- * @nsegs:	number of elements
- * @bufflen:	len of buffer
- * @gfp:	memory allocation flags
- *
- * scsi_req_map_sg maps a scatterlist into a request so that the
- * request can be sent to the block layer. We do not trust the scatterlist
- * sent to use, as some ULDs use that struct to only organize the pages.
- */
-static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
-			   int nsegs, unsigned bufflen, gfp_t gfp)
-{
-	struct request_queue *q = rq->q;
-	int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	unsigned int data_len = bufflen, len, bytes, off;
-	struct scatterlist *sg;
-	struct page *page;
-	struct bio *bio = NULL;
-	int i, err, nr_vecs = 0;
-
-	for_each_sg(sgl, sg, nsegs, i) {
-		page = sg_page(sg);
-		off = sg->offset;
-		len = sg->length;
-
-		while (len > 0 && data_len > 0) {
-			/*
-			 * sg sends a scatterlist that is larger than
-			 * the data_len it wants transferred for certain
-			 * IO sizes
-			 */
-			bytes = min_t(unsigned int, len, PAGE_SIZE - off);
-			bytes = min(bytes, data_len);
-
-			if (!bio) {
-				nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
-				nr_pages -= nr_vecs;
-
-				bio = bio_alloc(gfp, nr_vecs);
-				if (!bio) {
-					err = -ENOMEM;
-					goto free_bios;
-				}
-				bio->bi_end_io = scsi_bi_endio;
-			}
-
-			if (bio_add_pc_page(q, bio, page, bytes, off) !=
-			    bytes) {
-				bio_put(bio);
-				err = -EINVAL;
-				goto free_bios;
-			}
-
-			if (bio->bi_vcnt >= nr_vecs) {
-				err = scsi_merge_bio(rq, bio);
-				if (err) {
-					bio_endio(bio, 0);
-					goto free_bios;
-				}
-				bio = NULL;
-			}
-
-			page++;
-			len -= bytes;
-			data_len -=bytes;
-			off = 0;
-		}
-	}
-
-	rq->buffer = rq->data = NULL;
-	rq->data_len = bufflen;
-	return 0;
-
-free_bios:
-	while ((bio = rq->bio) != NULL) {
-		rq->bio = bio->bi_next;
-		/*
-		 * call endio instead of bio_put incase it was bounced
-		 */
-		bio_endio(bio, 0);
-	}
-
-	return err;
-}
-
-/**
- * scsi_execute_async - insert request
- * @sdev:	scsi device
- * @cmd:	scsi command
- * @cmd_len:	length of scsi cdb
- * @data_direction: DMA_TO_DEVICE, DMA_FROM_DEVICE, or DMA_NONE
- * @buffer:	data buffer (this can be a kernel buffer or scatterlist)
- * @bufflen:	len of buffer
- * @use_sg:	if buffer is a scatterlist this is the number of elements
- * @timeout:	request timeout in seconds
- * @retries:	number of times to retry request
- * @privdata:	data passed to done()
- * @done:	callback function when done
- * @gfp:	memory allocation flags
- */
-int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
-		       int cmd_len, int data_direction, void *buffer, unsigned bufflen,
-		       int use_sg, int timeout, int retries, void *privdata,
-		       void (*done)(void *, char *, int, int), gfp_t gfp)
-{
-	struct request *req;
-	struct scsi_io_context *sioc;
-	int err = 0;
-	int write = (data_direction == DMA_TO_DEVICE);
-
-	sioc = kmem_cache_zalloc(scsi_io_context_cache, gfp);
-	if (!sioc)
-		return DRIVER_ERROR << 24;
-
-	req = blk_get_request(sdev->request_queue, write, gfp);
-	if (!req)
-		goto free_sense;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
-	req->cmd_flags |= REQ_QUIET;
-
-	if (use_sg)
-		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
-	else if (bufflen)
-		err = blk_rq_map_kern(req->q, req, buffer, bufflen, gfp);
-
-	if (err)
-		goto free_req;
-
-	req->cmd_len = cmd_len;
-	memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
-	memcpy(req->cmd, cmd, req->cmd_len);
-	req->sense = sioc->sense;
-	req->sense_len = 0;
-	req->timeout = timeout;
-	req->retries = retries;
-	req->end_io_data = sioc;
-
-	sioc->data = privdata;
-	sioc->done = done;
-
-	blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
-	return 0;
-
-free_req:
-	blk_put_request(req);
-free_sense:
-	kmem_cache_free(scsi_io_context_cache, sioc);
-	return DRIVER_ERROR << 24;
-}
-EXPORT_SYMBOL_GPL(scsi_execute_async);
-
 /*
  * Function:    scsi_init_cmd_errh()
  *
@@ -1882,20 +1692,12 @@ int __init scsi_init_queue(void)
 {
 	int i;
 
-	scsi_io_context_cache = kmem_cache_create("scsi_io_context",
-					sizeof(struct scsi_io_context),
-					0, 0, NULL);
-	if (!scsi_io_context_cache) {
-		printk(KERN_ERR "SCSI: can't init scsi io context cache\n");
-		return -ENOMEM;
-	}
-
 	scsi_sdb_cache = kmem_cache_create("scsi_data_buffer",
 					   sizeof(struct scsi_data_buffer),
 					   0, 0, NULL);
 	if (!scsi_sdb_cache) {
 		printk(KERN_ERR "SCSI: can't init scsi sdb cache\n");
-		goto cleanup_io_context;
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < SG_MEMPOOL_NR; i++) {
@@ -1930,8 +1732,6 @@ cleanup_sdb:
 			kmem_cache_destroy(sgp->slab);
 	}
 	kmem_cache_destroy(scsi_sdb_cache);
-cleanup_io_context:
-	kmem_cache_destroy(scsi_io_context_cache);
 
 	return -ENOMEM;
 }
@@ -1940,7 +1740,6 @@ void scsi_exit_queue(void)
 {
 	int i;
 
-	kmem_cache_destroy(scsi_io_context_cache);
 	kmem_cache_destroy(scsi_sdb_cache);
 
 	for (i = 0; i < SG_MEMPOOL_NR; i++) {
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 01a4c58..d9e5ef0 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -370,12 +370,6 @@ extern int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 			    int data_direction, void *buffer, unsigned bufflen,
 			    struct scsi_sense_hdr *, int timeout, int retries,
 			    int *resid);
-extern int scsi_execute_async(struct scsi_device *sdev,
-			      const unsigned char *cmd, int cmd_len, int data_direction,
-			      void *buffer, unsigned bufflen, int use_sg,
-			      int timeout, int retries, void *privdata,
-			      void (*done)(void *, char *, int, int),
-			      gfp_t gfp);
 
 static inline int __must_check scsi_device_reprobe(struct scsi_device *sdev)
 {
-- 
1.5.5.GIT


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 2/3] block: unexport blk_rq_append_bio
  2008-12-13 16:23 ` [PATCH 1/3] " FUJITA Tomonori
@ 2008-12-13 16:23   ` FUJITA Tomonori
  2008-12-13 16:23     ` [PATCH 3/3] block: unexport bio_add_pc_page FUJITA Tomonori
  2009-02-10 17:43     ` [PATCH 2/3] block: unexport blk_rq_append_bio James Bottomley
  0 siblings, 2 replies; 38+ messages in thread
From: FUJITA Tomonori @ 2008-12-13 16:23 UTC (permalink / raw)
  To: James.Bottomley, jens.axboe; +Cc: linux-scsi, fujita.tomonori

Now we can unexport blk_rq_append_bio and remove 'SCSI-ml does
something wrong' comment in blkdev.h. :)

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 block/blk-map.c        |    5 ++---
 include/linux/blkdev.h |    6 ------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/block/blk-map.c b/block/blk-map.c
index e1bb727..a70f7d8 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -9,8 +9,8 @@
 
 #include "blk.h"
 
-int blk_rq_append_bio(struct request_queue *q, struct request *rq,
-		      struct bio *bio)
+static int blk_rq_append_bio(struct request_queue *q, struct request *rq,
+			     struct bio *bio)
 {
 	if (!rq->bio)
 		blk_rq_bio_prep(q, rq, bio);
@@ -24,7 +24,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(blk_rq_append_bio);
 
 static int __blk_rq_unmap_user(struct bio *bio)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8922c25..bbd481f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -725,12 +725,6 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 
 /*
- * Temporary export, until SCSI gets fixed up.
- */
-extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
-			     struct bio *bio);
-
-/*
  * A queue has just exitted congestion.  Note this in the global counter of
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
-- 
1.5.5.GIT


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 3/3] block: unexport bio_add_pc_page
  2008-12-13 16:23   ` [PATCH 2/3] block: unexport blk_rq_append_bio FUJITA Tomonori
@ 2008-12-13 16:23     ` FUJITA Tomonori
  2009-02-10 17:43     ` [PATCH 2/3] block: unexport blk_rq_append_bio James Bottomley
  1 sibling, 0 replies; 38+ messages in thread
From: FUJITA Tomonori @ 2008-12-13 16:23 UTC (permalink / raw)
  To: James.Bottomley, jens.axboe; +Cc: linux-scsi, fujita.tomonori

We can unexport bio_add_pc_page since scsi_req_map_sg has gone.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
---
 fs/bio.c            |    6 +++---
 include/linux/bio.h |    2 --
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/bio.c b/fs/bio.c
index a482e1f..206209c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -464,8 +464,9 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
  *      smaller than PAGE_SIZE, so it is always possible to add a single
  *      page to an empty bio. This should only be used by REQ_PC bios.
  */
-int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
-		    unsigned int len, unsigned int offset)
+static int bio_add_pc_page(struct request_queue *q, struct bio *bio,
+			   struct page *page, unsigned int len,
+			   unsigned int offset)
 {
 	return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors);
 }
@@ -1454,7 +1455,6 @@ EXPORT_SYMBOL(__bio_clone);
 EXPORT_SYMBOL(bio_clone);
 EXPORT_SYMBOL(bio_phys_segments);
 EXPORT_SYMBOL(bio_add_page);
-EXPORT_SYMBOL(bio_add_pc_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
 EXPORT_SYMBOL(bio_map_user);
 EXPORT_SYMBOL(bio_unmap_user);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 6a64209..87e7f77 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -351,8 +351,6 @@ extern struct bio *bio_clone(struct bio *, gfp_t);
 extern void bio_init(struct bio *);
 
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
-extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
-			   unsigned int, unsigned int);
 extern int bio_get_nr_vecs(struct block_device *);
 extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int);
 extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
-- 
1.5.5.GIT


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH 0/3] remove scsi_req_map_sg
  2008-12-13 16:23 [PATCH 0/3] remove scsi_req_map_sg FUJITA Tomonori
  2008-12-13 16:23 ` [PATCH 1/3] " FUJITA Tomonori
@ 2008-12-15  7:28 ` Jens Axboe
  2008-12-18  8:14   ` FUJITA Tomonori
  1 sibling, 1 reply; 38+ messages in thread
From: Jens Axboe @ 2008-12-15  7:28 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, linux-scsi, Kai.Makisara, osst

On Sun, Dec 14 2008, FUJITA Tomonori wrote:
> This patchset removes scsi_req_map_sg. IOW, this patchset removes
> 'SCSI-ml does something wrong' comment in the block layer :)
> 
> /*
>  * Temporary export, until SCSI gets fixed up.
>  */
> extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
>                             struct bio *bio);
> 
> This patchset also removes scsi_execute_async and struct
> scsi_io_context() and unexport bio_add_pc_page().
> 
> 
> This is against scsi-misc and depends on two patchsets:
> 
> 'removing scsi_execute_async in st' the second half patchset:
> 
> http://marc.info/?l=linux-scsi&m=122900494917385&w=2
> 
> 'removing scsi_execute_async in osst' the patchset:
> 
> http://marc.info/?l=linux-scsi&m=122918478521903&w=2

You can add my Acked-by to all three patches, nice work Tomo!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 0/3] remove scsi_req_map_sg
  2008-12-15  7:28 ` [PATCH 0/3] remove scsi_req_map_sg Jens Axboe
@ 2008-12-18  8:14   ` FUJITA Tomonori
  0 siblings, 0 replies; 38+ messages in thread
From: FUJITA Tomonori @ 2008-12-18  8:14 UTC (permalink / raw)
  To: jens.axboe
  Cc: fujita.tomonori, James.Bottomley, linux-scsi, Kai.Makisara, osst

On Mon, 15 Dec 2008 08:28:17 +0100
Jens Axboe <jens.axboe@oracle.com> wrote:

> On Sun, Dec 14 2008, FUJITA Tomonori wrote:
> > This patchset removes scsi_req_map_sg. IOW, this patchset removes
> > 'SCSI-ml does something wrong' comment in the block layer :)
> > 
> > /*
> >  * Temporary export, until SCSI gets fixed up.
> >  */
> > extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
> >                             struct bio *bio);
> > 
> > This patchset also removes scsi_execute_async and struct
> > scsi_io_context() and unexport bio_add_pc_page().
> > 
> > 
> > This is against scsi-misc and depends on two patchsets:
> > 
> > 'removing scsi_execute_async in st' the second half patchset:
> > 
> > http://marc.info/?l=linux-scsi&m=122900494917385&w=2
> > 
> > 'removing scsi_execute_async in osst' the patchset:
> > 
> > http://marc.info/?l=linux-scsi&m=122918478521903&w=2
> 
> You can add my Acked-by to all three patches, nice work Tomo!

Thanks,

I also need your ACKs on the #1-3 patches in the following patchset to
push the nice work. :)

http://marc.info/?l=linux-scsi&m=122958057719487&w=2

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2008-12-13 16:23   ` [PATCH 2/3] block: unexport blk_rq_append_bio FUJITA Tomonori
  2008-12-13 16:23     ` [PATCH 3/3] block: unexport bio_add_pc_page FUJITA Tomonori
@ 2009-02-10 17:43     ` James Bottomley
  2009-02-10 18:19       ` James Bottomley
  1 sibling, 1 reply; 38+ messages in thread
From: James Bottomley @ 2009-02-10 17:43 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: jens.axboe, linux-scsi, Boaz Harrosh

On Sun, 2008-12-14 at 01:23 +0900, FUJITA Tomonori wrote:
> Now we can unexport blk_rq_append_bio and remove 'SCSI-ml does
> something wrong' comment in blkdev.h. :)
> 
> Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
> ---
>  block/blk-map.c        |    5 ++---
>  include/linux/blkdev.h |    6 ------
>  2 files changed, 2 insertions(+), 9 deletions(-)
> 
> diff --git a/block/blk-map.c b/block/blk-map.c
> index e1bb727..a70f7d8 100644
> --- a/block/blk-map.c
> +++ b/block/blk-map.c
> @@ -9,8 +9,8 @@
>  
>  #include "blk.h"
>  
> -int blk_rq_append_bio(struct request_queue *q, struct request *rq,
> -		      struct bio *bio)
> +static int blk_rq_append_bio(struct request_queue *q, struct request *rq,
> +			     struct bio *bio)

There's a current barrier to this: osd_initiator has also become a
consumer of blk_rq_append_bio().

It seems to be emulating block internals, so I think the fix is twofold:

     1. adjust blk_rq_map_kern to call blk_rq_append_bio() instead of
        blk_rq_prep_bio() (with an extra failure path).
     2. make osd_initiator simply call it for additions.

I can code up a patch to see if it works.

James



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-10 17:43     ` [PATCH 2/3] block: unexport blk_rq_append_bio James Bottomley
@ 2009-02-10 18:19       ` James Bottomley
  2009-02-11  0:21         ` FUJITA Tomonori
  2009-02-11  8:17         ` Boaz Harrosh
  0 siblings, 2 replies; 38+ messages in thread
From: James Bottomley @ 2009-02-10 18:19 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: jens.axboe, linux-scsi, Boaz Harrosh

On Tue, 2009-02-10 at 17:43 +0000, James Bottomley wrote:
> There's a current barrier to this: osd_initiator has also become a
> consumer of blk_rq_append_bio().
> 
> It seems to be emulating block internals, so I think the fix is twofold:
> 
>      1. adjust blk_rq_map_kern to call blk_rq_append_bio() instead of
>         blk_rq_prep_bio() (with an extra failure path).
>      2. make osd_initiator simply call it for additions.
> 
> I can code up a patch to see if it works.

So this is the patch to allow blk_rq_map_kern to append to requests,
which is what I think is needed.

Unfortunately unwinding osd_initator's bio dependence and putting it
back on data buffers looks to be a bit of a longer chore.

James

---

diff --git a/block/blk-map.c b/block/blk-map.c
index 25d15ff..1a1e26d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -289,6 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	int reading = rq_data_dir(rq) == READ;
 	int do_copy = 0;
 	struct bio *bio;
+	int ret;
 
 	if (len > (q->max_hw_sectors << 9))
 		return -EINVAL;
@@ -310,7 +311,13 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	if (do_copy)
 		rq->cmd_flags |= REQ_COPY_USER;
 
-	blk_rq_bio_prep(q, rq, bio);
+	ret = blk_rq_append_bio(q, rq, bio);
+	if (unlikely(ret)) {
+		/* request is too big */
+		bio_put(bio);
+		return ret;
+	}
+		
 	blk_queue_bounce(q, &rq->bio);
 	rq->buffer = rq->data = NULL;
 	return 0;



^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-10 18:19       ` James Bottomley
@ 2009-02-11  0:21         ` FUJITA Tomonori
  2009-02-11  8:17         ` Boaz Harrosh
  1 sibling, 0 replies; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-11  0:21 UTC (permalink / raw)
  To: James.Bottomley; +Cc: fujita.tomonori, jens.axboe, linux-scsi, bharrosh

On Tue, 10 Feb 2009 18:19:33 +0000
James Bottomley <James.Bottomley@HansenPartnership.com> wrote:

> On Tue, 2009-02-10 at 17:43 +0000, James Bottomley wrote:
> > There's a current barrier to this: osd_initiator has also become a
> > consumer of blk_rq_append_bio().
> > 
> > It seems to be emulating block internals, so I think the fix is twofold:
> > 
> >      1. adjust blk_rq_map_kern to call blk_rq_append_bio() instead of
> >         blk_rq_prep_bio() (with an extra failure path).
> >      2. make osd_initiator simply call it for additions.
> > 
> > I can code up a patch to see if it works.
> 
> So this is the patch to allow blk_rq_map_kern to append to requests,
> which is what I think is needed.
> 
> Unfortunately unwinding osd_initator's bio dependence and putting it
> back on data buffers looks to be a bit of a longer chore.

Looks like the osd initiator code uses bio nicely; e.g. using
blk_rq_append_bio and bio_map_kern is completely wrong. I'll see the
code to fix it...

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-10 18:19       ` James Bottomley
  2009-02-11  0:21         ` FUJITA Tomonori
@ 2009-02-11  8:17         ` Boaz Harrosh
  2009-02-11  8:19           ` Boaz Harrosh
  2009-02-11  9:15           ` Boaz Harrosh
  1 sibling, 2 replies; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-11  8:17 UTC (permalink / raw)
  To: James Bottomley; +Cc: FUJITA Tomonori, jens.axboe, linux-scsi

James Bottomley wrote:
> On Tue, 2009-02-10 at 17:43 +0000, James Bottomley wrote:
>> There's a current barrier to this: osd_initiator has also become a
>> consumer of blk_rq_append_bio().
>>
>> It seems to be emulating block internals, so I think the fix is twofold:
>>
>>      1. adjust blk_rq_map_kern to call blk_rq_append_bio() instead of
>>         blk_rq_prep_bio() (with an extra failure path).
>>      2. make osd_initiator simply call it for additions.
>>
>> I can code up a patch to see if it works.
> 
> So this is the patch to allow blk_rq_map_kern to append to requests,
> which is what I think is needed.
> 
> Unfortunately unwinding osd_initator's bio dependence and putting it
> back on data buffers looks to be a bit of a longer chore.
> 
> James
> 
> ---
> 
> diff --git a/block/blk-map.c b/block/blk-map.c
> index 25d15ff..1a1e26d 100644
> --- a/block/blk-map.c
> +++ b/block/blk-map.c
> @@ -289,6 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
>  	int reading = rq_data_dir(rq) == READ;
>  	int do_copy = 0;
>  	struct bio *bio;
> +	int ret;
>  
>  	if (len > (q->max_hw_sectors << 9))
>  		return -EINVAL;
> @@ -310,7 +311,13 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
>  	if (do_copy)
>  		rq->cmd_flags |= REQ_COPY_USER;
>  
> -	blk_rq_bio_prep(q, rq, bio);
> +	ret = blk_rq_append_bio(q, rq, bio);
> +	if (unlikely(ret)) {
> +		/* request is too big */
> +		bio_put(bio);
> +		return ret;
> +	}
> +		
>  	blk_queue_bounce(q, &rq->bio);
>  	rq->buffer = rq->data = NULL;
>  	return 0;
> 
> 

This works, with your patch and below code
I'm passing my tests. I would say it is pretty safe too,
since blk_rq_bio_prep would leak the bio if it was called
twice on same request, before.

Thanks, I'll send a proper patch after some more testing.

---
git diff --stat -p
 drivers/scsi/osd/osd_initiator.c |   12 +++---------
 1 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 1696130..9cfdce4 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -824,18 +824,12 @@ EXPORT_SYMBOL(osd_req_add_set_attr_list);
 static int _append_map_kern(struct request *req,
 	void *buff, unsigned len, gfp_t flags)
 {
-	struct bio *bio;
 	int ret;
 
-	bio = bio_map_kern(req->q, buff, len, flags);
-	if (IS_ERR(bio)) {
-		OSD_ERR("Failed bio_map_kern(%p, %d) => %ld\n", buff, len,
-			PTR_ERR(bio));
-		return PTR_ERR(bio);
-	}
-	ret = blk_rq_append_bio(req->q, req, bio);
+	ret = blk_rq_map_kern(req->q, req, buff, len, flags);
 	if (ret) {
-		OSD_ERR("Failed blk_rq_append_bio(%p) => %d\n", bio, ret);
+		OSD_DEBUG("Failed blk_rq_map_kern(%p, %u) => %d\n",
+			buff, len, ret);
 		bio_put(bio);
 	}
 	return ret;


^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11  8:17         ` Boaz Harrosh
@ 2009-02-11  8:19           ` Boaz Harrosh
  2009-02-11  9:15           ` Boaz Harrosh
  1 sibling, 0 replies; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-11  8:19 UTC (permalink / raw)
  To: James Bottomley; +Cc: FUJITA Tomonori, jens.axboe, linux-scsi

Boaz Harrosh wrote:
> James Bottomley wrote:
>> On Tue, 2009-02-10 at 17:43 +0000, James Bottomley wrote:
>>> There's a current barrier to this: osd_initiator has also become a
>>> consumer of blk_rq_append_bio().
>>>
>>> It seems to be emulating block internals, so I think the fix is twofold:
>>>
>>>      1. adjust blk_rq_map_kern to call blk_rq_append_bio() instead of
>>>         blk_rq_prep_bio() (with an extra failure path).
>>>      2. make osd_initiator simply call it for additions.
>>>
>>> I can code up a patch to see if it works.
>> So this is the patch to allow blk_rq_map_kern to append to requests,
>> which is what I think is needed.
>>
>> Unfortunately unwinding osd_initator's bio dependence and putting it
>> back on data buffers looks to be a bit of a longer chore.
>>
>> James
>>
>> ---
>>
>> diff --git a/block/blk-map.c b/block/blk-map.c
>> index 25d15ff..1a1e26d 100644
>> --- a/block/blk-map.c
>> +++ b/block/blk-map.c
>> @@ -289,6 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
>>  	int reading = rq_data_dir(rq) == READ;
>>  	int do_copy = 0;
>>  	struct bio *bio;
>> +	int ret;
>>  
>>  	if (len > (q->max_hw_sectors << 9))
>>  		return -EINVAL;
>> @@ -310,7 +311,13 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
>>  	if (do_copy)
>>  		rq->cmd_flags |= REQ_COPY_USER;
>>  
>> -	blk_rq_bio_prep(q, rq, bio);
>> +	ret = blk_rq_append_bio(q, rq, bio);
>> +	if (unlikely(ret)) {
>> +		/* request is too big */
>> +		bio_put(bio);
>> +		return ret;
>> +	}
>> +		
>>  	blk_queue_bounce(q, &rq->bio);
>>  	rq->buffer = rq->data = NULL;
>>  	return 0;
>>
>>
> 
> This works, with your patch and below code
> I'm passing my tests. I would say it is pretty safe too,
> since blk_rq_bio_prep would leak the bio if it was called
> twice on same request, before.
> 
> Thanks, I'll send a proper patch after some more testing.
> 
> ---
> git diff --stat -p
>  drivers/scsi/osd/osd_initiator.c |   12 +++---------
>  1 files changed, 3 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
> index 1696130..9cfdce4 100644
> --- a/drivers/scsi/osd/osd_initiator.c
> +++ b/drivers/scsi/osd/osd_initiator.c
> @@ -824,18 +824,12 @@ EXPORT_SYMBOL(osd_req_add_set_attr_list);
>  static int _append_map_kern(struct request *req,
>  	void *buff, unsigned len, gfp_t flags)
>  {
> -	struct bio *bio;
>  	int ret;
>  
> -	bio = bio_map_kern(req->q, buff, len, flags);
> -	if (IS_ERR(bio)) {
> -		OSD_ERR("Failed bio_map_kern(%p, %d) => %ld\n", buff, len,
> -			PTR_ERR(bio));
> -		return PTR_ERR(bio);
> -	}
> -	ret = blk_rq_append_bio(req->q, req, bio);
> +	ret = blk_rq_map_kern(req->q, req, buff, len, flags);
>  	if (ret) {
> -		OSD_ERR("Failed blk_rq_append_bio(%p) => %d\n", bio, ret);
> +		OSD_DEBUG("Failed blk_rq_map_kern(%p, %u) => %d\n",
> +			buff, len, ret);
>  		bio_put(bio);
-  		bio_put(bio);
Sorry
>  	}
>  	return ret;
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11  8:17         ` Boaz Harrosh
  2009-02-11  8:19           ` Boaz Harrosh
@ 2009-02-11  9:15           ` Boaz Harrosh
  2009-02-11 14:32             ` FUJITA Tomonori
  1 sibling, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-11  9:15 UTC (permalink / raw)
  To: James Bottomley; +Cc: FUJITA Tomonori, jens.axboe, linux-scsi

Boaz Harrosh wrote:
> James Bottomley wrote:
>> On Tue, 2009-02-10 at 17:43 +0000, James Bottomley wrote:
>>> There's a current barrier to this: osd_initiator has also become a
>>> consumer of blk_rq_append_bio().
>>>
>>> It seems to be emulating block internals, so I think the fix is twofold:
>>>
>>>      1. adjust blk_rq_map_kern to call blk_rq_append_bio() instead of
>>>         blk_rq_prep_bio() (with an extra failure path).
>>>      2. make osd_initiator simply call it for additions.
>>>
>>> I can code up a patch to see if it works.
>> So this is the patch to allow blk_rq_map_kern to append to requests,
>> which is what I think is needed.
>>
>> Unfortunately unwinding osd_initator's bio dependence and putting it
>> back on data buffers looks to be a bit of a longer chore.
>>
>> James
>>
>> ---
>>
>> diff --git a/block/blk-map.c b/block/blk-map.c
>> index 25d15ff..1a1e26d 100644
>> --- a/block/blk-map.c
>> +++ b/block/blk-map.c
>> @@ -289,6 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
>>  	int reading = rq_data_dir(rq) == READ;
>>  	int do_copy = 0;
>>  	struct bio *bio;
>> +	int ret;
>>  
>>  	if (len > (q->max_hw_sectors << 9))
>>  		return -EINVAL;
>> @@ -310,7 +311,13 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
>>  	if (do_copy)
>>  		rq->cmd_flags |= REQ_COPY_USER;
>>  
>> -	blk_rq_bio_prep(q, rq, bio);
>> +	ret = blk_rq_append_bio(q, rq, bio);
>> +	if (unlikely(ret)) {
>> +		/* request is too big */
>> +		bio_put(bio);
>> +		return ret;
>> +	}
>> +		
>>  	blk_queue_bounce(q, &rq->bio);
>>  	rq->buffer = rq->data = NULL;
>>  	return 0;
>>
>>
> 
> This works, with your patch and below code
> I'm passing my tests. I would say it is pretty safe too,
> since blk_rq_bio_prep would leak the bio if it was called
> twice on same request, before.
> 
> Thanks, I'll send a proper patch after some more testing.
> 
> ---
> git diff --stat -p
>  drivers/scsi/osd/osd_initiator.c |   12 +++---------
>  1 files changed, 3 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
> index 1696130..9cfdce4 100644
> --- a/drivers/scsi/osd/osd_initiator.c
> +++ b/drivers/scsi/osd/osd_initiator.c
> @@ -824,18 +824,12 @@ EXPORT_SYMBOL(osd_req_add_set_attr_list);
>  static int _append_map_kern(struct request *req,
>  	void *buff, unsigned len, gfp_t flags)
>  {
> -	struct bio *bio;
>  	int ret;
>  
> -	bio = bio_map_kern(req->q, buff, len, flags);
> -	if (IS_ERR(bio)) {
> -		OSD_ERR("Failed bio_map_kern(%p, %d) => %ld\n", buff, len,
> -			PTR_ERR(bio));
> -		return PTR_ERR(bio);
> -	}
> -	ret = blk_rq_append_bio(req->q, req, bio);
> +	ret = blk_rq_map_kern(req->q, req, buff, len, flags);
>  	if (ret) {
> -		OSD_ERR("Failed blk_rq_append_bio(%p) => %d\n", bio, ret);
> +		OSD_DEBUG("Failed blk_rq_map_kern(%p, %u) => %d\n",
> +			buff, len, ret);
>  		bio_put(bio);
>  	}
>  	return ret;
> 
> --

I spoke too soon. There is one more place that uses blk_rq_append_bio, that is
the place that adds the read/write bio that is received in osd_req_write/read.
The reason I receive a bio at these is because mainly I need a way to
accept struct page* arrays, as well as kernel & user pointers. A bio is a nice
general carrier for any type of memory. Given a bio at hand there are no ways
left to prepare a request from it save the FS generic_make_request() route.

I was thinking of using struct sg_iovec* at one stage but they look very
scary when used with page*, and mapping a page to a pointer but not doing
cache sync and all that jazz. A bio is a very nice carrier of a scatter-gather
list of memory. It has all the API for any needs. blk_rq_append_bio() was the last
way to associate a bio with a request. (except for privileged block-filesystems)

So the first thing we have to decide is what API we need at read/write
today there is:
void osd_req_read(struct osd_request *or,
	const struct osd_obj_id *, struct bio *data_in, u64 offset);

in exofs I use these two:

int osd_req_read_kern(struct osd_request *or,
	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
int osd_req_read_pages(struct osd_request *or,
	const struct osd_obj_id *, u64 offset, u64 length,
	struct page **pages, int page_count);

(Same for write)
pNFS layout driver is very similar.

Thanks
Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11  9:15           ` Boaz Harrosh
@ 2009-02-11 14:32             ` FUJITA Tomonori
  2009-02-11 14:52               ` Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-11 14:32 UTC (permalink / raw)
  To: bharrosh; +Cc: James.Bottomley, fujita.tomonori, jens.axboe, linux-scsi

On Wed, 11 Feb 2009 11:15:53 +0200
Boaz Harrosh <bharrosh@panasas.com> wrote:

> I spoke too soon. There is one more place that uses blk_rq_append_bio, that is
> the place that adds the read/write bio that is received in osd_req_write/read.
> The reason I receive a bio at these is because mainly I need a way to
> accept struct page* arrays, as well as kernel & user pointers. A bio is a nice

The API using an array of pointers to page can handle everything. So
why do the users of ULD (some rare file systems, I guess), osd
initiator, need to build bios and pass them?


> general carrier for any type of memory. Given a bio at hand there are no ways
> left to prepare a request from it save the FS generic_make_request() route.
> 
> I was thinking of using struct sg_iovec* at one stage but they look very
> scary when used with page*, and mapping a page to a pointer but not doing
> cache sync and all that jazz. A bio is a very nice carrier of a scatter-gather
> list of memory. It has all the API for any needs. blk_rq_append_bio() was the last
> way to associate a bio with a request. (except for privileged block-filesystems)

We need to remove the usage of blk_rq_append_bio() in scsi.


> So the first thing we have to decide is what API we need at read/write
> today there is:

You are talking about the API for osd file systems (or something
related with osd), right? If so, I think that you can do whatever you
want to do now. You can make a mistake since it's in-kernel API.


> void osd_req_read(struct osd_request *or,
> 	const struct osd_obj_id *, struct bio *data_in, u64 offset);
> 
> in exofs I use these two:
> 
> int osd_req_read_kern(struct osd_request *or,
> 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
> int osd_req_read_pages(struct osd_request *or,
> 	const struct osd_obj_id *, u64 offset, u64 length,
> 	struct page **pages, int page_count);
> 

As I wrote above, if you have an interface handling 'struct page
**pages', then there should be ok.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 14:32             ` FUJITA Tomonori
@ 2009-02-11 14:52               ` Boaz Harrosh
  2009-02-11 15:01                 ` FUJITA Tomonori
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-11 14:52 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, jens.axboe, linux-scsi

FUJITA Tomonori wrote:
> On Wed, 11 Feb 2009 11:15:53 +0200
> Boaz Harrosh <bharrosh@panasas.com> wrote:
> 
>> I spoke too soon. There is one more place that uses blk_rq_append_bio, that is
>> the place that adds the read/write bio that is received in osd_req_write/read.
>> The reason I receive a bio at these is because mainly I need a way to
>> accept struct page* arrays, as well as kernel & user pointers. A bio is a nice
> 
> The API using an array of pointers to page can handle everything. So
> why do the users of ULD (some rare file systems, I guess), osd
> initiator, need to build bios and pass them?
> 
> 
>> general carrier for any type of memory. Given a bio at hand there are no ways
>> left to prepare a request from it save the FS generic_make_request() route.
>>
>> I was thinking of using struct sg_iovec* at one stage but they look very
>> scary when used with page*, and mapping a page to a pointer but not doing
>> cache sync and all that jazz. A bio is a very nice carrier of a scatter-gather
>> list of memory. It has all the API for any needs. blk_rq_append_bio() was the last
>> way to associate a bio with a request. (except for privileged block-filesystems)
> 
> We need to remove the usage of blk_rq_append_bio() in scsi.
> 
> 
>> So the first thing we have to decide is what API we need at read/write
>> today there is:
> 
> You are talking about the API for osd file systems (or something
> related with osd), right? If so, I think that you can do whatever you
> want to do now. You can make a mistake since it's in-kernel API.
> 
> 
>> void osd_req_read(struct osd_request *or,
>> 	const struct osd_obj_id *, struct bio *data_in, u64 offset);
>>
>> in exofs I use these two:
>>
>> int osd_req_read_kern(struct osd_request *or,
>> 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
>> int osd_req_read_pages(struct osd_request *or,
>> 	const struct osd_obj_id *, u64 offset, u64 length,
>> 	struct page **pages, int page_count);
>>
> 
> As I wrote above, if you have an interface handling 'struct page
> **pages', then there should be ok.

Sorry you miss understand me, I did not explain well.

int osd_req_read_pages(,,,, struct page **pages, int page_count);

Is in exofs.ko. It builds a bio out of **pages then calls
osd_req_read(,,bio,) which will then call blk_rq_append_bio()

So for exofs (filesystem) and pNFS-objects layout driver (Also part of filesystem)
I currently need blk_rq_append_bio(), unless we want to do something new for
these people.

Thanks
Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 14:52               ` Boaz Harrosh
@ 2009-02-11 15:01                 ` FUJITA Tomonori
  2009-02-11 15:07                   ` Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-11 15:01 UTC (permalink / raw)
  To: bharrosh; +Cc: fujita.tomonori, James.Bottomley, jens.axboe, linux-scsi

On Wed, 11 Feb 2009 16:52:37 +0200
Boaz Harrosh <bharrosh@panasas.com> wrote:

> FUJITA Tomonori wrote:
> > On Wed, 11 Feb 2009 11:15:53 +0200
> > Boaz Harrosh <bharrosh@panasas.com> wrote:
> > 
> >> I spoke too soon. There is one more place that uses blk_rq_append_bio, that is
> >> the place that adds the read/write bio that is received in osd_req_write/read.
> >> The reason I receive a bio at these is because mainly I need a way to
> >> accept struct page* arrays, as well as kernel & user pointers. A bio is a nice
> > 
> > The API using an array of pointers to page can handle everything. So
> > why do the users of ULD (some rare file systems, I guess), osd
> > initiator, need to build bios and pass them?
> > 
> > 
> >> general carrier for any type of memory. Given a bio at hand there are no ways
> >> left to prepare a request from it save the FS generic_make_request() route.
> >>
> >> I was thinking of using struct sg_iovec* at one stage but they look very
> >> scary when used with page*, and mapping a page to a pointer but not doing
> >> cache sync and all that jazz. A bio is a very nice carrier of a scatter-gather
> >> list of memory. It has all the API for any needs. blk_rq_append_bio() was the last
> >> way to associate a bio with a request. (except for privileged block-filesystems)
> > 
> > We need to remove the usage of blk_rq_append_bio() in scsi.
> > 
> > 
> >> So the first thing we have to decide is what API we need at read/write
> >> today there is:
> > 
> > You are talking about the API for osd file systems (or something
> > related with osd), right? If so, I think that you can do whatever you
> > want to do now. You can make a mistake since it's in-kernel API.
> > 
> > 
> >> void osd_req_read(struct osd_request *or,
> >> 	const struct osd_obj_id *, struct bio *data_in, u64 offset);
> >>
> >> in exofs I use these two:
> >>
> >> int osd_req_read_kern(struct osd_request *or,
> >> 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
> >> int osd_req_read_pages(struct osd_request *or,
> >> 	const struct osd_obj_id *, u64 offset, u64 length,
> >> 	struct page **pages, int page_count);
> >>
> > 
> > As I wrote above, if you have an interface handling 'struct page
> > **pages', then there should be ok.
> 
> Sorry you miss understand me, I did not explain well.
> 
> int osd_req_read_pages(,,,, struct page **pages, int page_count);
> 
> Is in exofs.ko. It builds a bio out of **pages then calls
> osd_req_read(,,bio,) which will then call blk_rq_append_bio()

As I wrote in the previous mail, why does exofs need to build a bio?
Why can exofs pass pages to osd ULD?


> So for exofs (filesystem) and pNFS-objects layout driver (Also part of filesystem)
> I currently need blk_rq_append_bio(), unless we want to do something new for
> these people.


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 15:01                 ` FUJITA Tomonori
@ 2009-02-11 15:07                   ` Boaz Harrosh
  2009-02-11 15:21                     ` FUJITA Tomonori
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-11 15:07 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, jens.axboe, linux-scsi

FUJITA Tomonori wrote:
> On Wed, 11 Feb 2009 16:52:37 +0200
> Boaz Harrosh <bharrosh@panasas.com> wrote:
> 
>> FUJITA Tomonori wrote:
>>> On Wed, 11 Feb 2009 11:15:53 +0200
>>> Boaz Harrosh <bharrosh@panasas.com> wrote:
>>>
>>>> I spoke too soon. There is one more place that uses blk_rq_append_bio, that is
>>>> the place that adds the read/write bio that is received in osd_req_write/read.
>>>> The reason I receive a bio at these is because mainly I need a way to
>>>> accept struct page* arrays, as well as kernel & user pointers. A bio is a nice
>>> The API using an array of pointers to page can handle everything. So
>>> why do the users of ULD (some rare file systems, I guess), osd
>>> initiator, need to build bios and pass them?
>>>
>>>
>>>> general carrier for any type of memory. Given a bio at hand there are no ways
>>>> left to prepare a request from it save the FS generic_make_request() route.
>>>>
>>>> I was thinking of using struct sg_iovec* at one stage but they look very
>>>> scary when used with page*, and mapping a page to a pointer but not doing
>>>> cache sync and all that jazz. A bio is a very nice carrier of a scatter-gather
>>>> list of memory. It has all the API for any needs. blk_rq_append_bio() was the last
>>>> way to associate a bio with a request. (except for privileged block-filesystems)
>>> We need to remove the usage of blk_rq_append_bio() in scsi.
>>>
>>>
>>>> So the first thing we have to decide is what API we need at read/write
>>>> today there is:
>>> You are talking about the API for osd file systems (or something
>>> related with osd), right? If so, I think that you can do whatever you
>>> want to do now. You can make a mistake since it's in-kernel API.
>>>
>>>
>>>> void osd_req_read(struct osd_request *or,
>>>> 	const struct osd_obj_id *, struct bio *data_in, u64 offset);
>>>>
>>>> in exofs I use these two:
>>>>
>>>> int osd_req_read_kern(struct osd_request *or,
>>>> 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
>>>> int osd_req_read_pages(struct osd_request *or,
>>>> 	const struct osd_obj_id *, u64 offset, u64 length,
>>>> 	struct page **pages, int page_count);
>>>>
>>> As I wrote above, if you have an interface handling 'struct page
>>> **pages', then there should be ok.
>> Sorry you miss understand me, I did not explain well.
>>
>> int osd_req_read_pages(,,,, struct page **pages, int page_count);
>>
>> Is in exofs.ko. It builds a bio out of **pages then calls
>> osd_req_read(,,bio,) which will then call blk_rq_append_bio()
> 
> As I wrote in the previous mail, why does exofs need to build a bio?
> Why can exofs pass pages to osd ULD?
> 
> 

Sure it can pass pages to ULD but then how ULD maks a request out of
pages? And it gets more complicated then that (above multiple times)

But lets try to solve just the pages first.

Boaz




^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 15:07                   ` Boaz Harrosh
@ 2009-02-11 15:21                     ` FUJITA Tomonori
  2009-02-11 15:41                       ` Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-11 15:21 UTC (permalink / raw)
  To: bharrosh; +Cc: fujita.tomonori, James.Bottomley, jens.axboe, linux-scsi

On Wed, 11 Feb 2009 17:07:16 +0200
Boaz Harrosh <bharrosh@panasas.com> wrote:

> FUJITA Tomonori wrote:
> > On Wed, 11 Feb 2009 16:52:37 +0200
> > Boaz Harrosh <bharrosh@panasas.com> wrote:
> > 
> >> FUJITA Tomonori wrote:
> >>> On Wed, 11 Feb 2009 11:15:53 +0200
> >>> Boaz Harrosh <bharrosh@panasas.com> wrote:
> >>>
> >>>> I spoke too soon. There is one more place that uses blk_rq_append_bio, that is
> >>>> the place that adds the read/write bio that is received in osd_req_write/read.
> >>>> The reason I receive a bio at these is because mainly I need a way to
> >>>> accept struct page* arrays, as well as kernel & user pointers. A bio is a nice
> >>> The API using an array of pointers to page can handle everything. So
> >>> why do the users of ULD (some rare file systems, I guess), osd
> >>> initiator, need to build bios and pass them?
> >>>
> >>>
> >>>> general carrier for any type of memory. Given a bio at hand there are no ways
> >>>> left to prepare a request from it save the FS generic_make_request() route.
> >>>>
> >>>> I was thinking of using struct sg_iovec* at one stage but they look very
> >>>> scary when used with page*, and mapping a page to a pointer but not doing
> >>>> cache sync and all that jazz. A bio is a very nice carrier of a scatter-gather
> >>>> list of memory. It has all the API for any needs. blk_rq_append_bio() was the last
> >>>> way to associate a bio with a request. (except for privileged block-filesystems)
> >>> We need to remove the usage of blk_rq_append_bio() in scsi.
> >>>
> >>>
> >>>> So the first thing we have to decide is what API we need at read/write
> >>>> today there is:
> >>> You are talking about the API for osd file systems (or something
> >>> related with osd), right? If so, I think that you can do whatever you
> >>> want to do now. You can make a mistake since it's in-kernel API.
> >>>
> >>>
> >>>> void osd_req_read(struct osd_request *or,
> >>>> 	const struct osd_obj_id *, struct bio *data_in, u64 offset);
> >>>>
> >>>> in exofs I use these two:
> >>>>
> >>>> int osd_req_read_kern(struct osd_request *or,
> >>>> 	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
> >>>> int osd_req_read_pages(struct osd_request *or,
> >>>> 	const struct osd_obj_id *, u64 offset, u64 length,
> >>>> 	struct page **pages, int page_count);
> >>>>
> >>> As I wrote above, if you have an interface handling 'struct page
> >>> **pages', then there should be ok.
> >> Sorry you miss understand me, I did not explain well.
> >>
> >> int osd_req_read_pages(,,,, struct page **pages, int page_count);
> >>
> >> Is in exofs.ko. It builds a bio out of **pages then calls
> >> osd_req_read(,,bio,) which will then call blk_rq_append_bio()
> > 
> > As I wrote in the previous mail, why does exofs need to build a bio?
> > Why can exofs pass pages to osd ULD?
> > 
> > 
> 
> Sure it can pass pages to ULD but then how ULD maks a request out of
> pages?

If you extend blk_rq_map_kern as James did, your ULD make a request
out of passed pages, that is, the block layer properly builds a
request with bio(s).

I think that another possible option is adding a new mapping function
that can handle multiple bios for kernel buffers (as Mike extended
blk_rq_map_user).


> And it gets more complicated then that (above multiple times)

I don't think so. If you don't have time to work on it, please let me
know. I'll fix OSD ULD.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 15:21                     ` FUJITA Tomonori
@ 2009-02-11 15:41                       ` Boaz Harrosh
  2009-02-11 16:04                         ` FUJITA Tomonori
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-11 15:41 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, jens.axboe, linux-scsi

FUJITA Tomonori wrote:
> On Wed, 11 Feb 2009 17:07:16 +0200
> Boaz Harrosh <bharrosh@panasas.com> wrote:
> 
>>>
>>>
>> Sure it can pass pages to ULD but then how ULD maks a request out of
>> pages?
> 
> If you extend blk_rq_map_kern as James did, your ULD make a request
> out of passed pages, that is, the block layer properly builds a
> request with bio(s).

What? I don't understand?
blk_rq_map_kern takes a kernel-pointer and length, how to pass array
of page* ?

> 
> I think that another possible option is adding a new mapping function
> that can handle multiple bios for kernel buffers (as Mike extended
> blk_rq_map_user).
> 

Yes adding a new mapping function can help. Please note I need to
add (append) pages same as bio_add_pc_page() but on request level.

And yet we had an entry that did exactly that, blk_rq_append_bio(), no?

> 
>> And it gets more complicated then that (above multiple times)
> 
> I don't think so. If you don't have time to work on it, please let me
> know. I'll fix OSD ULD.

I have all the time in the world, And yes what James did with blk_rq_map_kern
will help with appending other osd segments on top of data.

If you propose the appropriate new API for block request level I can implement
that in block, and also in OSD. What other entry you want to add/change that solve
my need?

Thanks
Boaz


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 15:41                       ` Boaz Harrosh
@ 2009-02-11 16:04                         ` FUJITA Tomonori
  2009-02-11 16:30                           ` James Bottomley
  0 siblings, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-11 16:04 UTC (permalink / raw)
  To: bharrosh; +Cc: fujita.tomonori, James.Bottomley, jens.axboe, linux-scsi

On Wed, 11 Feb 2009 17:41:35 +0200
Boaz Harrosh <bharrosh@panasas.com> wrote:

> FUJITA Tomonori wrote:
> > On Wed, 11 Feb 2009 17:07:16 +0200
> > Boaz Harrosh <bharrosh@panasas.com> wrote:
> > 
> >>>
> >>>
> >> Sure it can pass pages to ULD but then how ULD maks a request out of
> >> pages?
> > 
> > If you extend blk_rq_map_kern as James did, your ULD make a request
> > out of passed pages, that is, the block layer properly builds a
> > request with bio(s).
> 
> What? I don't understand?
> blk_rq_map_kern takes a kernel-pointer and length, how to pass array
> of page* ?

You can call it multiple times. But I guess that you need something
new since you need to handle user pages.


> > I think that another possible option is adding a new mapping function
> > that can handle multiple bios for kernel buffers (as Mike extended
> > blk_rq_map_user).
> > 
> 
> Yes adding a new mapping function can help. Please note I need to
> add (append) pages same as bio_add_pc_page() but on request level.

I think that what you need already exists.

See how st uses blk_rq_map_user() in scsi-misc (keywords: rq_map_data
and null_mapped). Please read it before saying something like 'we need
to handle kernel buffer'.


> And yet we had an entry that did exactly that, blk_rq_append_bio(), no?

Using blk_rq_append_bio in SCSI is unacceptable.


> >> And it gets more complicated then that (above multiple times)
> > 
> > I don't think so. If you don't have time to work on it, please let me
> > know. I'll fix OSD ULD.
> 
> I have all the time in the world, And yes what James did with blk_rq_map_kern
> will help with appending other osd segments on top of data.
> 
> If you propose the appropriate new API for block request level I can implement
> that in block, and also in OSD. What other entry you want to add/change that solve
> my need?

If you have time, how about making a proposal. ;)

But as I wrote above, I think that blk_rq_map_user() works for you.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 16:04                         ` FUJITA Tomonori
@ 2009-02-11 16:30                           ` James Bottomley
  2009-02-11 17:55                             ` Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: James Bottomley @ 2009-02-11 16:30 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: bharrosh, jens.axboe, linux-scsi

On Thu, 2009-02-12 at 01:04 +0900, FUJITA Tomonori wrote:
> On Wed, 11 Feb 2009 17:41:35 +0200
> Boaz Harrosh <bharrosh@panasas.com> wrote:
> 
> > FUJITA Tomonori wrote:
> > > On Wed, 11 Feb 2009 17:07:16 +0200
> > > Boaz Harrosh <bharrosh@panasas.com> wrote:
> > > 
> > >>>
> > >>>
> > >> Sure it can pass pages to ULD but then how ULD maks a request out of
> > >> pages?
> > > 
> > > If you extend blk_rq_map_kern as James did, your ULD make a request
> > > out of passed pages, that is, the block layer properly builds a
> > > request with bio(s).
> > 
> > What? I don't understand?
> > blk_rq_map_kern takes a kernel-pointer and length, how to pass array
> > of page* ?
> 
> You can call it multiple times. But I guess that you need something
> new since you need to handle user pages.
> 
> 
> > > I think that another possible option is adding a new mapping function
> > > that can handle multiple bios for kernel buffers (as Mike extended
> > > blk_rq_map_user).
> > > 
> > 
> > Yes adding a new mapping function can help. Please note I need to
> > add (append) pages same as bio_add_pc_page() but on request level.
> 
> I think that what you need already exists.
> 
> See how st uses blk_rq_map_user() in scsi-misc (keywords: rq_map_data
> and null_mapped). Please read it before saying something like 'we need
> to handle kernel buffer'.
> 
> 
> > And yet we had an entry that did exactly that, blk_rq_append_bio(), no?
> 
> Using blk_rq_append_bio in SCSI is unacceptable.
> 
> 
> > >> And it gets more complicated then that (above multiple times)
> > > 
> > > I don't think so. If you don't have time to work on it, please let me
> > > know. I'll fix OSD ULD.
> > 
> > I have all the time in the world, And yes what James did with blk_rq_map_kern
> > will help with appending other osd segments on top of data.
> > 
> > If you propose the appropriate new API for block request level I can implement
> > that in block, and also in OSD. What other entry you want to add/change that solve
> > my need?
> 
> If you have time, how about making a proposal. ;)
> 
> But as I wrote above, I think that blk_rq_map_user() works for you.

Part of the problem, I think, is that a typical filesystem either works
with pages (ext3) or bios (btrfs) and puts them in block at the top.
Block then accumulates the pages to bios, elevates the bios into
requests and spits those out to SCSI ... this is why we want to
eliminate bio handlers from SCSI.

Your problem is that you phrase your osd fs/pnfs in terms of bios, so
then you need to emulate the block bio handlers internally (because
you're bypassing block) in your ULD.  However, what we're saying is if
you speak pages instead, you can utilise the standard block pc handlers,
which are designed to speak pages, without reinventing the bio handling
infrastructure.

James



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 16:30                           ` James Bottomley
@ 2009-02-11 17:55                             ` Boaz Harrosh
  2009-02-12  1:30                               ` FUJITA Tomonori
  2009-02-12 14:48                               ` [PATCH 2/3] block: unexport blk_rq_append_bio James Bottomley
  0 siblings, 2 replies; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-11 17:55 UTC (permalink / raw)
  To: James Bottomley; +Cc: FUJITA Tomonori, jens.axboe, linux-scsi

James Bottomley wrote:
> On Thu, 2009-02-12 at 01:04 +0900, FUJITA Tomonori wrote:
>> On Wed, 11 Feb 2009 17:41:35 +0200
>> Boaz Harrosh <bharrosh@panasas.com> wrote:
>>
>>> FUJITA Tomonori wrote:
>>>> On Wed, 11 Feb 2009 17:07:16 +0200
>>>> Boaz Harrosh <bharrosh@panasas.com> wrote:
>>>>
>>>>>>
>>>>> Sure it can pass pages to ULD but then how ULD maks a request out of
>>>>> pages?
>>>> If you extend blk_rq_map_kern as James did, your ULD make a request
>>>> out of passed pages, that is, the block layer properly builds a
>>>> request with bio(s).
>>> What? I don't understand?
>>> blk_rq_map_kern takes a kernel-pointer and length, how to pass array
>>> of page* ?
>> You can call it multiple times. But I guess that you need something
>> new since you need to handle user pages.
>>
>>
>>>> I think that another possible option is adding a new mapping function
>>>> that can handle multiple bios for kernel buffers (as Mike extended
>>>> blk_rq_map_user).
>>>>
>>> Yes adding a new mapping function can help. Please note I need to
>>> add (append) pages same as bio_add_pc_page() but on request level.
>> I think that what you need already exists.
>>
>> See how st uses blk_rq_map_user() in scsi-misc (keywords: rq_map_data
>> and null_mapped). Please read it before saying something like 'we need
>> to handle kernel buffer'.
>>
>>
>>> And yet we had an entry that did exactly that, blk_rq_append_bio(), no?
>> Using blk_rq_append_bio in SCSI is unacceptable.
>>
>>
>>>>> And it gets more complicated then that (above multiple times)
>>>> I don't think so. If you don't have time to work on it, please let me
>>>> know. I'll fix OSD ULD.
>>> I have all the time in the world, And yes what James did with blk_rq_map_kern
>>> will help with appending other osd segments on top of data.
>>>
>>> If you propose the appropriate new API for block request level I can implement
>>> that in block, and also in OSD. What other entry you want to add/change that solve
>>> my need?
>> If you have time, how about making a proposal. ;)
>>
>> But as I wrote above, I think that blk_rq_map_user() works for you.
> 
> Part of the problem, I think, is that a typical filesystem either works
> with pages (ext3) or bios (btrfs) and puts them in block at the top.
> Block then accumulates the pages to bios, elevates the bios into
> requests and spits those out to SCSI ... this is why we want to
> eliminate bio handlers from SCSI.
> 

You are too fast for me, sorry, I did not get that: "... this is why" logical jump.
I thought we where getting read of scatter-gather handlers from SCSI. We never
had any bio handlers in scsi, did we?

> Your problem is that you phrase your osd fs/pnfs in terms of bios, so
> then you need to emulate the block bio handlers internally (because
> you're bypassing block) in your ULD.  However, what we're saying is if
> you speak pages instead, you can utilise the standard block pc handlers,
> which are designed to speak pages, without reinventing the bio handling
> infrastructure.

I did not know I was reinventing the bio handling infrastructure, I thought
I was using it with all available glory, and exported API. I let different users
collect their memory information inside a bio, then in one simple call
the bio is elevated to a request and the request is submitted, block_pc style.

Actually I just started to code using blk_rq_map_user(), and setting of all
struct rq_map_data information, and that feels a little like bio code duplication.
Because I need a collection (link-list) of them. struct rq_map_data assumes linear memory
which I do not have. Also I will need that blk_rq_map_user() will append bios like the change
you proposed to blk_rq_map_kern(), I guess it is doable, only it's lots of more work.
(And that awful blk_rq_unmap_user() which forces me to know if I sent via map_usr or map_kern ...)

I have one question. If bio API is only to be used by block layer internal, why is it
exported at all? Seems a bio has nice API for collecting memory information and is used
by all kind of block users like filesystems, DM, MD, data integrity and others. I would
not mind a:
	struct requst *blk_pc_make_request(bio);
Of sorts, to make it official, same as generic_make_request() but for block_pc users.

> 
> James
> 
> 

I will try the blk_rq_map_user, I'm sure it will not be pretty, though.

Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 17:55                             ` Boaz Harrosh
@ 2009-02-12  1:30                               ` FUJITA Tomonori
  2009-02-12  8:24                                 ` Boaz Harrosh
  2009-02-12 14:48                               ` [PATCH 2/3] block: unexport blk_rq_append_bio James Bottomley
  1 sibling, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-12  1:30 UTC (permalink / raw)
  To: bharrosh; +Cc: James.Bottomley, fujita.tomonori, jens.axboe, linux-scsi

On Wed, 11 Feb 2009 19:55:31 +0200
Boaz Harrosh <bharrosh@panasas.com> wrote:

> > Part of the problem, I think, is that a typical filesystem either works
> > with pages (ext3) or bios (btrfs) and puts them in block at the top.
> > Block then accumulates the pages to bios, elevates the bios into
> > requests and spits those out to SCSI ... this is why we want to
> > eliminate bio handlers from SCSI.
> > 
> 
> You are too fast for me, sorry, I did not get that: "... this is why" logical jump.
> I thought we where getting read of scatter-gather handlers from SCSI. We never
> had any bio handlers in scsi, did we?
> 
> > Your problem is that you phrase your osd fs/pnfs in terms of bios, so
> > then you need to emulate the block bio handlers internally (because
> > you're bypassing block) in your ULD.  However, what we're saying is if
> > you speak pages instead, you can utilise the standard block pc handlers,
> > which are designed to speak pages, without reinventing the bio handling
> > infrastructure.
> 
> I did not know I was reinventing the bio handling infrastructure, I thought

I think that we call something like the following reinventing the bio
handling infrastructure:

static void _abort_unexecuted_bios(struct request *rq)
{
	struct bio *bio;

	while ((bio = rq->bio) != NULL) {
		rq->bio = bio->bi_next;
		bio_endio(bio, 0);
	}
}


> I was using it with all available glory, and exported API. I let different users

include/blkdev.h says:

/*
 * Temporary export, until SCSI gets fixed up.
 */
extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
			     struct bio *bio);


blk_rq_append_bio is not an exported API. We finally fixed SCSI but
you broke it again.


> collect their memory information inside a bio, then in one simple call
> the bio is elevated to a request and the request is submitted, block_pc style.
> 
> Actually I just started to code using blk_rq_map_user(), and setting of all
> struct rq_map_data information, and that feels a little like bio code duplication.
> Because I need a collection (link-list) of them. struct rq_map_data assumes linear memory
> which I do not have. Also I will need that blk_rq_map_user() will append bios like the change
> you proposed to blk_rq_map_kern(), I guess it is doable, only it's lots of more work.
> (And that awful blk_rq_unmap_user() which forces me to know if I sent via map_usr or map_kern ...)

I think that having something like blk_rq_map_kern_page with James'
extension is another option.


> I have one question. If bio API is only to be used by block layer internal, why is it
> exported at all? Seems a bio has nice API for collecting memory information and is used

As I wrote, you use API that should not be exported. And I really
don't think that SCSI ULD needs to play with bio. As James wrote, you
reinvent and duplicate some of the block layer. That's a design that
we should avoid.


> by all kind of block users like filesystems, DM, MD, data integrity and others. I would
> not mind a:
> 	struct requst *blk_pc_make_request(bio);
> Of sorts, to make it official, same as generic_make_request() but for block_pc users.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-12  1:30                               ` FUJITA Tomonori
@ 2009-02-12  8:24                                 ` Boaz Harrosh
  2009-02-12  8:28                                   ` [RFD] blk_rq_map_pages new API Boaz Harrosh
  2009-02-12  8:41                                   ` [PATCH 2/3] block: unexport blk_rq_append_bio FUJITA Tomonori
  0 siblings, 2 replies; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12  8:24 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, jens.axboe, linux-scsi

FUJITA Tomonori wrote:
> On Wed, 11 Feb 2009 19:55:31 +0200
> Boaz Harrosh <bharrosh@panasas.com> wrote:
> 
>>> Part of the problem, I think, is that a typical filesystem either works
>>> with pages (ext3) or bios (btrfs) and puts them in block at the top.
>>> Block then accumulates the pages to bios, elevates the bios into
>>> requests and spits those out to SCSI ... this is why we want to
>>> eliminate bio handlers from SCSI.
>>>
>> You are too fast for me, sorry, I did not get that: "... this is why" logical jump.
>> I thought we where getting read of scatter-gather handlers from SCSI. We never
>> had any bio handlers in scsi, did we?
>>
>>> Your problem is that you phrase your osd fs/pnfs in terms of bios, so
>>> then you need to emulate the block bio handlers internally (because
>>> you're bypassing block) in your ULD.  However, what we're saying is if
>>> you speak pages instead, you can utilise the standard block pc handlers,
>>> which are designed to speak pages, without reinventing the bio handling
>>> infrastructure.
>> I did not know I was reinventing the bio handling infrastructure, I thought
> 
> I think that we call something like the following reinventing the bio
> handling infrastructure:
> 
> static void _abort_unexecuted_bios(struct request *rq)
> {
> 	struct bio *bio;
> 
> 	while ((bio = rq->bio) != NULL) {
> 		rq->bio = bio->bi_next;
> 		bio_endio(bio, 0);
> 	}
> }
> 

No TOMO! The above is because I can start mapping a request even with simple map_kern
and then fail and do not execute the request. If so, when I do a put_request() with a
bio I'm leaking the bio. The above should be put inside put_request() so not
to leak bios, but until then It's here.

It has nothing to do with our discussion.

> 
>> I was using it with all available glory, and exported API. I let different users
> 
> include/blkdev.h says:
> 
> /*
>  * Temporary export, until SCSI gets fixed up.
>  */
> extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
> 			     struct bio *bio);
> 
> 
> blk_rq_append_bio is not an exported API. We finally fixed SCSI but
> you broke it again.
> 

Fine, but I have a very legitimate need, which is:

Scatter-Gather Memory information was collected in a bio. At
some stage I need to make it a request. How?

> 
>> collect their memory information inside a bio, then in one simple call
>> the bio is elevated to a request and the request is submitted, block_pc style.
>>
>> Actually I just started to code using blk_rq_map_user(), and setting of all
>> struct rq_map_data information, and that feels a little like bio code duplication.
>> Because I need a collection (link-list) of them. struct rq_map_data assumes linear memory
>> which I do not have. Also I will need that blk_rq_map_user() will append bios like the change
>> you proposed to blk_rq_map_kern(), I guess it is doable, only it's lots of more work.
>> (And that awful blk_rq_unmap_user() which forces me to know if I sent via map_usr or map_kern ...)
> 
> I think that having something like blk_rq_map_kern_page with James'
> extension is another option.
> 

That is a very good option. I will need a way to specify a page_max_count to pre-allocate
the right size bio, so no bio re-allocations occurs.

Should I send a proposal for this?

[And yet there is such an exported API at bio level why not just open-code above, Linux Kernel
 style]

> 
>> I have one question. If bio API is only to be used by block layer internal, why is it
>> exported at all? Seems a bio has nice API for collecting memory information and is used
> 
> As I wrote, you use API that should not be exported. And I really
> don't think that SCSI ULD needs to play with bio. As James wrote, you
> reinvent and duplicate some of the block layer. That's a design that
> we should avoid.
> 

And as I wrote, I do not! Show me where? There is not a single line of duplication. I'm serving
complex file systems with their crazy memory needs. I need some structure to hold 
scatter-gather information. The best API around that does just that, with nice stuff like
slab pools and pre-allocations, is readily available at bio level. What you are asking me
is to duplicate all that. So it is opposite of what you say, you are asking me to duplicate
code I'm trying to re-use block layer code.

> 
>> by all kind of block users like filesystems, DM, MD, data integrity and others. I would
>> not mind a:
>> 	struct requst *blk_pc_make_request(bio);
>> Of sorts, to make it official, same as generic_make_request() but for block_pc users.

TOMO James please answer my question.
What is bad with bio usage as a memory crier? Why can filesystems, diff-engines, DM, MD,
you name it, use a bio, only OSD cannot? what is the reason? You say duplication of code?
what code in OSD? which code is it duplicating?

Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [RFD] blk_rq_map_pages new API
  2009-02-12  8:24                                 ` Boaz Harrosh
@ 2009-02-12  8:28                                   ` Boaz Harrosh
  2009-02-12  9:19                                     ` Boaz Harrosh
  2009-02-12  8:41                                   ` [PATCH 2/3] block: unexport blk_rq_append_bio FUJITA Tomonori
  1 sibling, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12  8:28 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, jens.axboe, linux-scsi


/**
 * blk_rq_map_pages - Receives an array of pages and appends them to a request structure 
 *
 * @rq:  The request to map
 * @pages_info: A structure that specifies the array of pages, offset and more that need to be
 *              map to the request. the @pages_info->null_mapped is assumed to be 1 and is ignored.
 * @length: Total bytes to map. Array of pages can be larger, stop mapping after @length bytes mapped.
 * @max_pages: When allocating the internal bio use @max_pages as an hint that says the amount of anticipated
 *             pages that will be mapped. This member is optional and can be zero.
 * 
 * blk_rq_map_pages can be called multiple times so the user does not need to allocate a contiguous array
 * of struct page pointers but can call this routine multiple times. In that case max_pages can be set
 * so no bio re-allocation occurs.
 * There is no unmap function for this mapping, the request is completed in the regular way.
 */

int blk_rq_map_pages(struct request rq, struct *rq_map_data pages_info, unsigned length, unsigned max_pages);


Implementation comments:
- Then when this member is available many places that call blk_rq_map_user() rq_map_data and null_mapped set,
  and buff == NULL, can be converted to this member.

- Internal block implementation is refactored to not duplicate any code with blk_rq_map_user().

Thanks for any comments
Boaz


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-12  8:24                                 ` Boaz Harrosh
  2009-02-12  8:28                                   ` [RFD] blk_rq_map_pages new API Boaz Harrosh
@ 2009-02-12  8:41                                   ` FUJITA Tomonori
  2009-02-12  9:14                                     ` Boaz Harrosh
  1 sibling, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-12  8:41 UTC (permalink / raw)
  To: bharrosh; +Cc: fujita.tomonori, James.Bottomley, jens.axboe, linux-scsi

On Thu, 12 Feb 2009 10:24:42 +0200
Boaz Harrosh <bharrosh@panasas.com> wrote:

> FUJITA Tomonori wrote:
> > On Wed, 11 Feb 2009 19:55:31 +0200
> > Boaz Harrosh <bharrosh@panasas.com> wrote:
> > 
> >>> Part of the problem, I think, is that a typical filesystem either works
> >>> with pages (ext3) or bios (btrfs) and puts them in block at the top.
> >>> Block then accumulates the pages to bios, elevates the bios into
> >>> requests and spits those out to SCSI ... this is why we want to
> >>> eliminate bio handlers from SCSI.
> >>>
> >> You are too fast for me, sorry, I did not get that: "... this is why" logical jump.
> >> I thought we where getting read of scatter-gather handlers from SCSI. We never
> >> had any bio handlers in scsi, did we?
> >>
> >>> Your problem is that you phrase your osd fs/pnfs in terms of bios, so
> >>> then you need to emulate the block bio handlers internally (because
> >>> you're bypassing block) in your ULD.  However, what we're saying is if
> >>> you speak pages instead, you can utilise the standard block pc handlers,
> >>> which are designed to speak pages, without reinventing the bio handling
> >>> infrastructure.
> >> I did not know I was reinventing the bio handling infrastructure, I thought
> > 
> > I think that we call something like the following reinventing the bio
> > handling infrastructure:
> > 
> > static void _abort_unexecuted_bios(struct request *rq)
> > {
> > 	struct bio *bio;
> > 
> > 	while ((bio = rq->bio) != NULL) {
> > 		rq->bio = bio->bi_next;
> > 		bio_endio(bio, 0);
> > 	}
> > }
> > 
> 
> No TOMO! The above is because I can start mapping a request even with simple map_kern
> and then fail and do not execute the request. If so, when I do a put_request() with a
> bio I'm leaking the bio. The above should be put inside put_request() so not
> to leak bios, but until then It's here.
> 
> It has nothing to do with our discussion.

Seem that you don't understand what I talk about (and James, I think).

Of course, I understand the reason why you need that function.

The point is that OSD should not know how a request links bios.

OSD ULD should focus on OSD protocol processing. It should not know
the details of requests, bio, etc. OSD ULD should use the proper
helper functions of the block layer if OSD ULD uses the block layer
service.


> >> I was using it with all available glory, and exported API. I let different users
> > 
> > include/blkdev.h says:
> > 
> > /*
> >  * Temporary export, until SCSI gets fixed up.
> >  */
> > extern int blk_rq_append_bio(struct request_queue *q, struct request *rq,
> > 			     struct bio *bio);
> > 
> > 
> > blk_rq_append_bio is not an exported API. We finally fixed SCSI but
> > you broke it again.
> > 
> 
> Fine, but I have a very legitimate need, which is:
> 
> Scatter-Gather Memory information was collected in a bio. At
> some stage I need to make it a request. How?

I don't think that it's not legitimate. See the above.


> >> collect their memory information inside a bio, then in one simple call
> >> the bio is elevated to a request and the request is submitted, block_pc style.
> >>
> >> Actually I just started to code using blk_rq_map_user(), and setting of all
> >> struct rq_map_data information, and that feels a little like bio code duplication.
> >> Because I need a collection (link-list) of them. struct rq_map_data assumes linear memory
> >> which I do not have. Also I will need that blk_rq_map_user() will append bios like the change
> >> you proposed to blk_rq_map_kern(), I guess it is doable, only it's lots of more work.
> >> (And that awful blk_rq_unmap_user() which forces me to know if I sent via map_usr or map_kern ...)
> > 
> > I think that having something like blk_rq_map_kern_page with James'
> > extension is another option.
> > 
> 
> That is a very good option. I will need a way to specify a page_max_count to pre-allocate
> the right size bio, so no bio re-allocations occurs.
> 
> Should I send a proposal for this?
> 
> [And yet there is such an exported API at bio level why not just open-code above, Linux Kernel
>  style]

Please send a patch to implement the proposal.


> >> I have one question. If bio API is only to be used by block layer internal, why is it
> >> exported at all? Seems a bio has nice API for collecting memory information and is used
> > 
> > As I wrote, you use API that should not be exported. And I really
> > don't think that SCSI ULD needs to play with bio. As James wrote, you
> > reinvent and duplicate some of the block layer. That's a design that
> > we should avoid.
> > 
> 
> And as I wrote, I do not! Show me where? There is not a single line of duplication. I'm serving
> complex file systems with their crazy memory needs. I need some structure to hold 
> scatter-gather information. The best API around that does just that, with nice stuff like
> slab pools and pre-allocations, is readily available at bio level. What you are asking me
> is to duplicate all that. So it is opposite of what you say, you are asking me to duplicate
> code I'm trying to re-use block layer code.
> 
> > 
> >> by all kind of block users like filesystems, DM, MD, data integrity and others. I would
> >> not mind a:
> >> 	struct requst *blk_pc_make_request(bio);
> >> Of sorts, to make it official, same as generic_make_request() but for block_pc users.
> 
> TOMO James please answer my question.
> What is bad with bio usage as a memory crier? Why can filesystems, diff-engines, DM, MD,
> you name it, use a bio, only OSD cannot? what is the reason? You say duplication of code?
> what code in OSD? which code is it duplicating?

OSD ULD should focus on OSD protocol processing.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-12  8:41                                   ` [PATCH 2/3] block: unexport blk_rq_append_bio FUJITA Tomonori
@ 2009-02-12  9:14                                     ` Boaz Harrosh
  2009-02-12  9:50                                       ` FUJITA Tomonori
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12  9:14 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, jens.axboe, linux-scsi

FUJITA Tomonori wrote:
> On Thu, 12 Feb 2009 10:24:42 +0200
> Boaz Harrosh <bharrosh@panasas.com> wrote:
> 
>> FUJITA Tomonori wrote:
>>> On Wed, 11 Feb 2009 19:55:31 +0200
>>> Boaz Harrosh <bharrosh@panasas.com> wrote:
>>> static void _abort_unexecuted_bios(struct request *rq)
>>> {
>>> 	struct bio *bio;
>>>
>>> 	while ((bio = rq->bio) != NULL) {
>>> 		rq->bio = bio->bi_next;
>>> 		bio_endio(bio, 0);
>>> 	}
>>> }
>>>
>> No TOMO! The above is because I can start mapping a request even with simple map_kern
>> and then fail and do not execute the request. If so, when I do a put_request() with a
>> bio I'm leaking the bio. The above should be put inside put_request() so not
>> to leak bios, but until then It's here.
>>
>> It has nothing to do with our discussion.
> 
> Seem that you don't understand what I talk about (and James, I think).
> 
> Of course, I understand the reason why you need that function.
> 
> The point is that OSD should not know how a request links bios.
> 
> OSD ULD should focus on OSD protocol processing. It should not know
> the details of requests, bio, etc. OSD ULD should use the proper
> helper functions of the block layer if OSD ULD uses the block layer
> service.
> 

What? I'm talking about a BUG, here. What do you suggest that I leak bios
until a fix to block is accepted?

And again this is unrelated. Even if I do exactly as you suggest which is
use block API minus append_bio I still have that BUG. That must be fixed.

Please stop talking about above code. This is a different thread, unrelated
to blk_rq_append_bio() removal.

> 
> OSD ULD should focus on OSD protocol processing.

Which is what it does exactly. You ask me to interfere at OSD level and
set policy on memory structures. What I did is let osd_write/read just be
a transparent pipe between the upper layer FS code and the low level block
layer. Proof of that, any kind of implementation you suggested is that much
more code at OSD level? Today it is dead simple.

Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFD] blk_rq_map_pages new API
  2009-02-12  8:28                                   ` [RFD] blk_rq_map_pages new API Boaz Harrosh
@ 2009-02-12  9:19                                     ` Boaz Harrosh
  2009-02-12  9:50                                       ` FUJITA Tomonori
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12  9:19 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, jens.axboe, linux-scsi

Boaz Harrosh wrote:
> /**
>  * blk_rq_map_pages - Receives an array of pages and appends them to a request structure 
>  *
>  * @rq:  The request to map
>  * @pages_info: A structure that specifies the array of pages, offset and more that need to be
>  *              map to the request. the @pages_info->null_mapped is assumed to be 1 and is ignored.
>  * @length: Total bytes to map. Array of pages can be larger, stop mapping after @length bytes mapped.
>  * @max_pages: When allocating the internal bio use @max_pages as an hint that says the amount of anticipated
>  *             pages that will be mapped. This member is optional and can be zero.
>  * 
>  * blk_rq_map_pages can be called multiple times so the user does not need to allocate a contiguous array
>  * of struct page pointers but can call this routine multiple times. In that case max_pages can be set
>  * so no bio re-allocation occurs.
>  * There is no unmap function for this mapping, the request is completed in the regular way.
>  */
> 
> int blk_rq_map_pages(struct request rq, struct *rq_map_data pages_info, unsigned length, unsigned max_pages);
> 
> 
> Implementation comments:
> - Then when this member is available many places that call blk_rq_map_user() rq_map_data and null_mapped set,
>   and buff == NULL, can be converted to this member.
> 
> - Internal block implementation is refactored to not duplicate any code with blk_rq_map_user().
> 
> Thanks for any comments
> Boaz
> 
> --

FUJITA Tomonori wrote:
> Please send a patch to implement the proposal.
> 

Is the above API accepted by you? Is it accepted by Jens?
Should I also attempt first comment above.

If its OK I will implement it ASAP.

Thanks
Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-12  9:14                                     ` Boaz Harrosh
@ 2009-02-12  9:50                                       ` FUJITA Tomonori
  2009-02-12 12:19                                         ` [PATCH] [RFC] block: Don't let blk_put_request leak BIOs Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-12  9:50 UTC (permalink / raw)
  To: bharrosh; +Cc: fujita.tomonori, James.Bottomley, jens.axboe, linux-scsi

On Thu, 12 Feb 2009 11:14:02 +0200
Boaz Harrosh <bharrosh@panasas.com> wrote:

> FUJITA Tomonori wrote:
> > On Thu, 12 Feb 2009 10:24:42 +0200
> > Boaz Harrosh <bharrosh@panasas.com> wrote:
> > 
> >> FUJITA Tomonori wrote:
> >>> On Wed, 11 Feb 2009 19:55:31 +0200
> >>> Boaz Harrosh <bharrosh@panasas.com> wrote:
> >>> static void _abort_unexecuted_bios(struct request *rq)
> >>> {
> >>> 	struct bio *bio;
> >>>
> >>> 	while ((bio = rq->bio) != NULL) {
> >>> 		rq->bio = bio->bi_next;
> >>> 		bio_endio(bio, 0);
> >>> 	}
> >>> }
> >>>
> >> No TOMO! The above is because I can start mapping a request even with simple map_kern
> >> and then fail and do not execute the request. If so, when I do a put_request() with a
> >> bio I'm leaking the bio. The above should be put inside put_request() so not
> >> to leak bios, but until then It's here.
> >>
> >> It has nothing to do with our discussion.
> > 
> > Seem that you don't understand what I talk about (and James, I think).
> > 
> > Of course, I understand the reason why you need that function.
> > 
> > The point is that OSD should not know how a request links bios.
> > 
> > OSD ULD should focus on OSD protocol processing. It should not know
> > the details of requests, bio, etc. OSD ULD should use the proper
> > helper functions of the block layer if OSD ULD uses the block layer
> > service.
> > 
> 
> What? I'm talking about a BUG, here. What do you suggest that I leak bios
> until a fix to block is accepted?

Huh? What I said is something like the above function should live in
the block layer. Again, OSD ULD should not know about such.


> And again this is unrelated. Even if I do exactly as you suggest which is
> use block API minus append_bio I still have that BUG. That must be fixed.
> 
> Please stop talking about above code. This is a different thread, unrelated
> to blk_rq_append_bio() removal.
> 
> > 
> > OSD ULD should focus on OSD protocol processing.
> 
> Which is what it does exactly. You ask me to interfere at OSD level and
> set policy on memory structures. What I did is let osd_write/read just be
>
> a transparent pipe between the upper layer FS code and the low level block
> layer. Proof of that, any kind of implementation you suggested is that much
> more code at OSD level? Today it is dead simple.

Why is it so difficult for you to understand the meaning that OSD ULD
should not know how request links bios?

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFD] blk_rq_map_pages new API
  2009-02-12  9:19                                     ` Boaz Harrosh
@ 2009-02-12  9:50                                       ` FUJITA Tomonori
  2009-02-12 10:20                                         ` FUJITA Tomonori
  0 siblings, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-12  9:50 UTC (permalink / raw)
  To: bharrosh; +Cc: fujita.tomonori, James.Bottomley, jens.axboe, linux-scsi

On Thu, 12 Feb 2009 11:19:09 +0200
Boaz Harrosh <bharrosh@panasas.com> wrote:

> Boaz Harrosh wrote:
> > /**
> >  * blk_rq_map_pages - Receives an array of pages and appends them to a request structure 
> >  *
> >  * @rq:  The request to map
> >  * @pages_info: A structure that specifies the array of pages, offset and more that need to be
> >  *              map to the request. the @pages_info->null_mapped is assumed to be 1 and is ignored.
> >  * @length: Total bytes to map. Array of pages can be larger, stop mapping after @length bytes mapped.
> >  * @max_pages: When allocating the internal bio use @max_pages as an hint that says the amount of anticipated
> >  *             pages that will be mapped. This member is optional and can be zero.
> >  * 
> >  * blk_rq_map_pages can be called multiple times so the user does not need to allocate a contiguous array
> >  * of struct page pointers but can call this routine multiple times. In that case max_pages can be set
> >  * so no bio re-allocation occurs.
> >  * There is no unmap function for this mapping, the request is completed in the regular way.
> >  */
> > 
> > int blk_rq_map_pages(struct request rq, struct *rq_map_data pages_info, unsigned length, unsigned max_pages);
> > 
> > 
> > Implementation comments:
> > - Then when this member is available many places that call blk_rq_map_user() rq_map_data and null_mapped set,
> >   and buff == NULL, can be converted to this member.
> > 
> > - Internal block implementation is refactored to not duplicate any code with blk_rq_map_user().
> > 
> > Thanks for any comments
> > Boaz
> > 
> > --
> 
> FUJITA Tomonori wrote:
> > Please send a patch to implement the proposal.
> > 
> 
> Is the above API accepted by you? Is it accepted by Jens?
> Should I also attempt first comment above.
>
> If its OK I will implement it ASAP.

Sorry, I'm not sure until I see the actual code. You always find
tricky things when you actually write the code.

The patch should be small. It's not difficult at all to write a patch
to see how it works.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFD] blk_rq_map_pages new API
  2009-02-12  9:50                                       ` FUJITA Tomonori
@ 2009-02-12 10:20                                         ` FUJITA Tomonori
  2009-02-12 11:34                                           ` Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: FUJITA Tomonori @ 2009-02-12 10:20 UTC (permalink / raw)
  To: bharrosh; +Cc: James.Bottomley, jens.axboe, linux-scsi

On Thu, 12 Feb 2009 18:50:33 +0900
FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> wrote:

> On Thu, 12 Feb 2009 11:19:09 +0200
> Boaz Harrosh <bharrosh@panasas.com> wrote:
> 
> > Boaz Harrosh wrote:
> > > /**
> > >  * blk_rq_map_pages - Receives an array of pages and appends them to a request structure 
> > >  *
> > >  * @rq:  The request to map
> > >  * @pages_info: A structure that specifies the array of pages, offset and more that need to be
> > >  *              map to the request. the @pages_info->null_mapped is assumed to be 1 and is ignored.
> > >  * @length: Total bytes to map. Array of pages can be larger, stop mapping after @length bytes mapped.
> > >  * @max_pages: When allocating the internal bio use @max_pages as an hint that says the amount of anticipated
> > >  *             pages that will be mapped. This member is optional and can be zero.
> > >  * 
> > >  * blk_rq_map_pages can be called multiple times so the user does not need to allocate a contiguous array
> > >  * of struct page pointers but can call this routine multiple times. In that case max_pages can be set
> > >  * so no bio re-allocation occurs.
> > >  * There is no unmap function for this mapping, the request is completed in the regular way.
> > >  */
> > > 
> > > int blk_rq_map_pages(struct request rq, struct *rq_map_data pages_info, unsigned length, unsigned max_pages);
> > > 
> > > 
> > > Implementation comments:
> > > - Then when this member is available many places that call blk_rq_map_user() rq_map_data and null_mapped set,
> > >   and buff == NULL, can be converted to this member.
> > > 
> > > - Internal block implementation is refactored to not duplicate any code with blk_rq_map_user().
> > > 
> > > Thanks for any comments
> > > Boaz
> > > 
> > > --
> > 
> > FUJITA Tomonori wrote:
> > > Please send a patch to implement the proposal.
> > > 
> > 
> > Is the above API accepted by you? Is it accepted by Jens?
> > Should I also attempt first comment above.
> >
> > If its OK I will implement it ASAP.
> 
> Sorry, I'm not sure until I see the actual code. You always find
> tricky things when you actually write the code.
> 
> The patch should be small. It's not difficult at all to write a patch
> to see how it works.

BTW, the idea is fine by me:

I have some page frames and want to build a request out of them.

That's what st and osst want. If OSD ULD wants it, adding a new API
for it sounds a good idea to me. Then we can remove some features of
blk_rq_map_user that I added for st and osst.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFD] blk_rq_map_pages new API
  2009-02-12 10:20                                         ` FUJITA Tomonori
@ 2009-02-12 11:34                                           ` Boaz Harrosh
  0 siblings, 0 replies; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12 11:34 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: James.Bottomley, jens.axboe, linux-scsi

FUJITA Tomonori wrote:
> On Thu, 12 Feb 2009 18:50:33 +0900
> FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> wrote:
> 
>> On Thu, 12 Feb 2009 11:19:09 +0200
>> Boaz Harrosh <bharrosh@panasas.com> wrote:
>>
>>> Boaz Harrosh wrote:
>>>> /**
>>>>  * blk_rq_map_pages - Receives an array of pages and appends them to a request structure 
>>>>  *
>>>>  * @rq:  The request to map
>>>>  * @pages_info: A structure that specifies the array of pages, offset and more that need to be
>>>>  *              map to the request. the @pages_info->null_mapped is assumed to be 1 and is ignored.
>>>>  * @length: Total bytes to map. Array of pages can be larger, stop mapping after @length bytes mapped.
>>>>  * @max_pages: When allocating the internal bio use @max_pages as an hint that says the amount of anticipated
>>>>  *             pages that will be mapped. This member is optional and can be zero.
>>>>  * 
>>>>  * blk_rq_map_pages can be called multiple times so the user does not need to allocate a contiguous array
>>>>  * of struct page pointers but can call this routine multiple times. In that case max_pages can be set
>>>>  * so no bio re-allocation occurs.
>>>>  * There is no unmap function for this mapping, the request is completed in the regular way.
>>>>  */
>>>>
>>>> int blk_rq_map_pages(struct request rq, struct *rq_map_data pages_info, unsigned length, unsigned max_pages);
>>>>
>>>>
>>>> Implementation comments:
>>>> - Then when this member is available many places that call blk_rq_map_user() rq_map_data and null_mapped set,
>>>>   and buff == NULL, can be converted to this member.
>>>>
>>>> - Internal block implementation is refactored to not duplicate any code with blk_rq_map_user().
>>>>
>>>> Thanks for any comments
>>>> Boaz
>>>>
>>>> --
>>> FUJITA Tomonori wrote:
>>>> Please send a patch to implement the proposal.
>>>>
>>> Is the above API accepted by you? Is it accepted by Jens?
>>> Should I also attempt first comment above.
>>>
>>> If its OK I will implement it ASAP.
>> Sorry, I'm not sure until I see the actual code. You always find
>> tricky things when you actually write the code.
>>
>> The patch should be small. It's not difficult at all to write a patch
>> to see how it works.
> 
> BTW, the idea is fine by me:
> 
> I have some page frames and want to build a request out of them.
> 
> That's what st and osst want. If OSD ULD wants it, adding a new API
> for it sounds a good idea to me. Then we can remove some features of
> blk_rq_map_user that I added for st and osst.

Right, I agree with you totally. I'm on it, I'll try an RFC see how it
goes, and we can decide from there.

Thanks TOMO
Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH] [RFC] block: Don't let blk_put_request leak BIOs
  2009-02-12  9:50                                       ` FUJITA Tomonori
@ 2009-02-12 12:19                                         ` Boaz Harrosh
  2009-02-12 13:49                                           ` Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12 12:19 UTC (permalink / raw)
  To: FUJITA Tomonori, jens.axboe; +Cc: James.Bottomley, linux-scsi, linux-kernel


If a block ULD had allocated a request and mapped some memory into it,
using one of blk_rq_map_xxx routines, but then for some reason failed to
execute the request through one of the blk_execute_request routines.
Then the associated BIO would leak, unless ULD resorts to low-level loops
intimate of block internals.

[RFC]
  This code will also catch situations where LLD failed to complete
  the request before aborting it. Such situations are a BUG. Should we
  use WARN_ON_ONCE() in that case. The situation above is possible and
  can happen normally in memory pressure situations so maybe we should
  devise a bit-flag that ULD denotes that the request was aborted and
  only WARN_ON if flag was not set.

  I'm sending this before any-tests so people can comment on possible
  pitfalls.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 block/blk-core.c                 |   16 ++++++++++++++++
 drivers/scsi/osd/osd_initiator.c |   17 +----------------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a824e49..eac96c2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1055,6 +1055,21 @@ void part_round_stats(int cpu, struct hd_struct *part)
 EXPORT_SYMBOL_GPL(part_round_stats);
 
 /*
+ * If one of the blk_rq_map_xxx() was called but the request was not
+ * executed by the block layer, then we must release BIOs. Otherwise they
+ * will leak.
+ */
+static void _abort_unexecuted_bios(struct request *req)
+{
+	struct bio *bio;
+
+	while ((bio = req->bio) != NULL) {
+		req->bio = bio->bi_next;
+		bio_endio(bio, 0);
+	}
+}
+
+/*
  * queue lock must be held
  */
 void __blk_put_request(struct request_queue *q, struct request *req)
@@ -1066,6 +1081,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 
 	elv_completed_request(q, req);
 
+	_abort_unexecuted_bios(req);
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 0bbbf27..8b1cf72 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -335,20 +335,6 @@ struct osd_request *osd_start_request(struct osd_dev *dev, gfp_t gfp)
 }
 EXPORT_SYMBOL(osd_start_request);
 
-/*
- * If osd_finalize_request() was called but the request was not executed through
- * the block layer, then we must release BIOs.
- */
-static void _abort_unexecuted_bios(struct request *rq)
-{
-	struct bio *bio;
-
-	while ((bio = rq->bio) != NULL) {
-		rq->bio = bio->bi_next;
-		bio_endio(bio, 0);
-	}
-}
-
 static void _osd_free_seg(struct osd_request *or __unused,
 	struct _osd_req_data_segment *seg)
 {
@@ -370,11 +356,10 @@ void osd_end_request(struct osd_request *or)
 
 	if (rq) {
 		if (rq->next_rq) {
-			_abort_unexecuted_bios(rq->next_rq);
 			blk_put_request(rq->next_rq);
+			rq->next_rq = NULL;
 		}
 
-		_abort_unexecuted_bios(rq);
 		blk_put_request(rq);
 	}
 	_osd_request_free(or);
-- 
1.6.0.1



^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH] [RFC] block: Don't let blk_put_request leak BIOs
  2009-02-12 12:19                                         ` [PATCH] [RFC] block: Don't let blk_put_request leak BIOs Boaz Harrosh
@ 2009-02-12 13:49                                           ` Boaz Harrosh
  2009-02-12 13:56                                             ` Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12 13:49 UTC (permalink / raw)
  To: jens.axboe, James.Bottomley, FUJITA Tomonori; +Cc: linux-scsi, linux-kernel

Boaz Harrosh wrote:
> If a block ULD had allocated a request and mapped some memory into it,
> using one of blk_rq_map_xxx routines, but then for some reason failed to
> execute the request through one of the blk_execute_request routines.
> Then the associated BIO would leak, unless ULD resorts to low-level loops
> intimate of block internals.
> 
> [RFC]
>   This code will also catch situations where LLD failed to complete
>   the request before aborting it. Such situations are a BUG. Should we
>   use WARN_ON_ONCE() in that case. The situation above is possible and
>   can happen normally in memory pressure situations so maybe we should
>   devise a bit-flag that ULD denotes that the request was aborted and
>   only WARN_ON if flag was not set.
> 
>   I'm sending this before any-tests so people can comment on possible
>   pitfalls.
> 
> Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
> ---

I've booted a Linux PC with lots of sata disks, connected an iscsi
target, ran OSD tests. It looks like it's working which means
request->bio is set to NULL after it is used in the regular path.

This needs to sit in Linux next and be tested for a long while.

Jens I'll be waiting for your comment and will send a proper
patch for the block bits. We will have to time this with James
to see when the OSD bits can be submitted after that, then TOMO's
patch for un-exporting blk_req_append_bio can be merged. Or maybe
it can all go in one patch through scsi?

Thanks
Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] [RFC] block: Don't let blk_put_request leak BIOs
  2009-02-12 13:49                                           ` Boaz Harrosh
@ 2009-02-12 13:56                                             ` Boaz Harrosh
  2009-02-12 17:27                                               ` [PATCH 1/2] " Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12 13:56 UTC (permalink / raw)
  To: jens.axboe, James.Bottomley, FUJITA Tomonori; +Cc: linux-scsi, linux-kernel

Boaz Harrosh wrote:
> Boaz Harrosh wrote:
>> If a block ULD had allocated a request and mapped some memory into it,
>> using one of blk_rq_map_xxx routines, but then for some reason failed to
>> execute the request through one of the blk_execute_request routines.
>> Then the associated BIO would leak, unless ULD resorts to low-level loops
>> intimate of block internals.
>>
>> [RFC]
>>   This code will also catch situations where LLD failed to complete
>>   the request before aborting it. Such situations are a BUG. Should we
>>   use WARN_ON_ONCE() in that case. The situation above is possible and
>>   can happen normally in memory pressure situations so maybe we should
>>   devise a bit-flag that ULD denotes that the request was aborted and
>>   only WARN_ON if flag was not set.
>>
>>   I'm sending this before any-tests so people can comment on possible
>>   pitfalls.
>>
>> Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
>> ---
> 
> I've booted a Linux PC with lots of sata disks, connected an iscsi
> target, ran OSD tests. It looks like it's working which means
> request->bio is set to NULL after it is used in the regular path.
> 
> This needs to sit in Linux next and be tested for a long while.
> 
> Jens I'll be waiting for your comment and will send a proper
> patch for the block bits. We will have to time this with James
> to see when the OSD bits can be submitted after that, then TOMO's
> patch for un-exporting blk_req_append_bio can be merged. Or maybe
> it can all go in one patch through scsi?
> 
> Thanks
> Boaz

Spock to soon this patch is shit, I'll look into it

Sorry for the noise

Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-11 17:55                             ` Boaz Harrosh
  2009-02-12  1:30                               ` FUJITA Tomonori
@ 2009-02-12 14:48                               ` James Bottomley
  2009-02-12 16:51                                 ` Boaz Harrosh
  1 sibling, 1 reply; 38+ messages in thread
From: James Bottomley @ 2009-02-12 14:48 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: FUJITA Tomonori, jens.axboe, linux-scsi

On Wed, 2009-02-11 at 19:55 +0200, Boaz Harrosh wrote:
> James Bottomley wrote:
> > On Thu, 2009-02-12 at 01:04 +0900, FUJITA Tomonori wrote:
> >> On Wed, 11 Feb 2009 17:41:35 +0200
> >> Boaz Harrosh <bharrosh@panasas.com> wrote:
> >>
> >>> FUJITA Tomonori wrote:
> >>>> On Wed, 11 Feb 2009 17:07:16 +0200
> >>>> Boaz Harrosh <bharrosh@panasas.com> wrote:
> >>>>
> >>>>>>
> >>>>> Sure it can pass pages to ULD but then how ULD maks a request out of
> >>>>> pages?
> >>>> If you extend blk_rq_map_kern as James did, your ULD make a request
> >>>> out of passed pages, that is, the block layer properly builds a
> >>>> request with bio(s).
> >>> What? I don't understand?
> >>> blk_rq_map_kern takes a kernel-pointer and length, how to pass array
> >>> of page* ?
> >> You can call it multiple times. But I guess that you need something
> >> new since you need to handle user pages.
> >>
> >>
> >>>> I think that another possible option is adding a new mapping function
> >>>> that can handle multiple bios for kernel buffers (as Mike extended
> >>>> blk_rq_map_user).
> >>>>
> >>> Yes adding a new mapping function can help. Please note I need to
> >>> add (append) pages same as bio_add_pc_page() but on request level.
> >> I think that what you need already exists.
> >>
> >> See how st uses blk_rq_map_user() in scsi-misc (keywords: rq_map_data
> >> and null_mapped). Please read it before saying something like 'we need
> >> to handle kernel buffer'.
> >>
> >>
> >>> And yet we had an entry that did exactly that, blk_rq_append_bio(), no?
> >> Using blk_rq_append_bio in SCSI is unacceptable.
> >>
> >>
> >>>>> And it gets more complicated then that (above multiple times)
> >>>> I don't think so. If you don't have time to work on it, please let me
> >>>> know. I'll fix OSD ULD.
> >>> I have all the time in the world, And yes what James did with blk_rq_map_kern
> >>> will help with appending other osd segments on top of data.
> >>>
> >>> If you propose the appropriate new API for block request level I can implement
> >>> that in block, and also in OSD. What other entry you want to add/change that solve
> >>> my need?
> >> If you have time, how about making a proposal. ;)
> >>
> >> But as I wrote above, I think that blk_rq_map_user() works for you.
> > 
> > Part of the problem, I think, is that a typical filesystem either works
> > with pages (ext3) or bios (btrfs) and puts them in block at the top.
> > Block then accumulates the pages to bios, elevates the bios into
> > requests and spits those out to SCSI ... this is why we want to
> > eliminate bio handlers from SCSI.
> > 
> 
> You are too fast for me, sorry, I did not get that: "... this is why" logical jump.
> I thought we where getting read of scatter-gather handlers from SCSI. We never
> had any bio handlers in scsi, did we?

Yes, but we've been trying to cut down on them.  In fact with both osd
and integrity, the use of bio handlers has been increasing.

> > Your problem is that you phrase your osd fs/pnfs in terms of bios, so
> > then you need to emulate the block bio handlers internally (because
> > you're bypassing block) in your ULD.  However, what we're saying is if
> > you speak pages instead, you can utilise the standard block pc handlers,
> > which are designed to speak pages, without reinventing the bio handling
> > infrastructure.
> 
> I did not know I was reinventing the bio handling infrastructure, I thought
> I was using it with all available glory, and exported API. I let different users
> collect their memory information inside a bio, then in one simple call
> the bio is elevated to a request and the request is submitted, block_pc style.

Everything in your last sentence is how the block layer handles bios
(although it's more sophisticated), so this is the duplication.  The
problem is that how bios are put together is a block internal thing.
Nothing outside of block does that today ... if you construct your own
multi-bio requests, we have to remember that drivers/scsi/osd needs
updating as well if block ever alters this (say to cope with a new
elevator strategy).

> Actually I just started to code using blk_rq_map_user(), and setting of all
> struct rq_map_data information, and that feels a little like bio code duplication.
> Because I need a collection (link-list) of them. struct rq_map_data assumes linear memory
> which I do not have. Also I will need that blk_rq_map_user() will append bios like the change
> you proposed to blk_rq_map_kern(), I guess it is doable, only it's lots of more work.
> (And that awful blk_rq_unmap_user() which forces me to know if I sent via map_usr or map_kern ...)

blk_rq_map_user does append pages to bios ... that's where I got the
changes to blk_rq_map_kern() from.

> I have one question. If bio API is only to be used by block layer internal, why is it
> exported at all? Seems a bio has nice API for collecting memory information and is used
> by all kind of block users like filesystems, DM, MD, data integrity and others. I would
> not mind a:
> 	struct requst *blk_pc_make_request(bio);
> Of sorts, to make it official, same as generic_make_request() but for block_pc users.

bios are supposed to be used by filesystems ... they represent the input
to block.  In principle, it seems nicer that PC requests deal with pages
as their natural input ... that way we don't have to worry about
exporting the block interfaces for bio->request construction.

> I will try the blk_rq_map_user, I'm sure it will not be pretty, though.

Great, thanks ... we can also adjust block interfaces to help.

James



^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH 2/3] block: unexport blk_rq_append_bio
  2009-02-12 14:48                               ` [PATCH 2/3] block: unexport blk_rq_append_bio James Bottomley
@ 2009-02-12 16:51                                 ` Boaz Harrosh
  0 siblings, 0 replies; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12 16:51 UTC (permalink / raw)
  To: James Bottomley; +Cc: FUJITA Tomonori, jens.axboe, linux-scsi

James Bottomley wrote:
> 
> bios are supposed to be used by filesystems ... they represent the input
> to block.  In principle, it seems nicer that PC requests deal with pages
> as their natural input ... that way we don't have to worry about
> exporting the block interfaces for bio->request construction.
> 

This is my point. I'm a filesystem and I want a natural route from
bio to request, only with a paradigm shift as you call it.
For a filesystem the most natural and comfortable vehicle is a bio.

So the interface I would like to have is:

    struct request *blk_make_request(struct request_queue *, struct bio *, gfp_t);

And let block layer take care of it's internals. Off course when crafting above
the generic_make_request will use above API so there is absolutely no duplication
of code and block layer is free to change as it feels like, only in a single place.

>> I will try the blk_rq_map_user, I'm sure it will not be pretty, though.
> 
> Great, thanks ... we can also adjust block interfaces to help.
> 
> James
> 

I'm on it. It's not as bad as I thought, and it should work for what I have now.
I'm touching fast-path block layer code though, so we'll have to test it for a long
while.

I have a gut feeling that I will regret it some day, but we'll cross that bridge when
we get there.

Thank you for your help and concern.
Boaz

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH 1/2] [RFC] block: Don't let blk_put_request leak BIOs
  2009-02-12 13:56                                             ` Boaz Harrosh
@ 2009-02-12 17:27                                               ` Boaz Harrosh
  2009-02-12 17:30                                                 ` [PATCH 2/2] [RFC] libosd: Don't let osd abuse block internals, now that it's fixed Boaz Harrosh
  0 siblings, 1 reply; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12 17:27 UTC (permalink / raw)
  To: jens.axboe, James.Bottomley, FUJITA Tomonori; +Cc: linux-scsi, linux-kernel


If a block ULD had allocated a request and mapped some memory into it,
but then for some reason failed to execute the request through one of
the blk_execute_request_xxx routines. Then the associated bio would leak,
unless ULD resorts to low-level loops intimate of block internals.

For this to work I have fixed a couple of places in block/ where
request->bio != NULL ownership was not honored. And a small cleanup
at sg_io() while at it.

[RFC]
  This code will also catch situations where LLD failed to complete
  the request before aborting it. Such situations are a BUG. Should we
  use WARN_ON_ONCE() in that case. The situation above is possible and
  can happen normally in memory pressure situations so maybe we should
  devise a bit-flag that ULD denotes that the request was aborted and
  only WARN_ON if flag was not set.
  For the duration of linux-next I'm leaving the WARN_ON to catch any
  problems like found above, and possible memory leaks. Before submission
  a complimentary patch should remove the WARN_ON. (Or this patch can be
  rebased)

  Please comment on possible pitfalls.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 block/blk-core.c   |   17 +++++++++++++++++
 block/blk-merge.c  |    2 ++
 block/scsi_ioctl.c |   21 ++++-----------------
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a824e49..3c1f920 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1055,6 +1055,22 @@ void part_round_stats(int cpu, struct hd_struct *part)
 EXPORT_SYMBOL_GPL(part_round_stats);
 
 /*
+ * If one of the blk_rq_map_xxx() was called but the request was not
+ * executed by the block layer, then we must release BIOs. Otherwise they
+ * will leak.
+ */
+static void _abort_unexecuted_bios(struct request *req)
+{
+	struct bio *bio;
+
+	WARN_ON(req->bio != NULL);
+	while (unlikely((bio = req->bio) != NULL)) {
+		req->bio = bio->bi_next;
+		bio_endio(bio, 0);
+	}
+}
+
+/*
  * queue lock must be held
  */
 void __blk_put_request(struct request_queue *q, struct request *req)
@@ -1066,6 +1082,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 
 	elv_completed_request(q, req);
 
+	_abort_unexecuted_bios(req);
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
 	 * it didn't come out of our reserved rq pools
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b92f5b0..463e797 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -398,6 +398,8 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	if (blk_rq_cpu_valid(next))
 		req->cpu = next->cpu;
 
+	/* owner-ship of bio passed from next to req */
+	next->bio = NULL;
 	__blk_put_request(q, next);
 	return 1;
 }
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ee9c67d..626ee27 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -214,21 +214,10 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
 	return 0;
 }
 
-/*
- * unmap a request that was previously mapped to this sg_io_hdr. handles
- * both sg and non-sg sg_io_hdr.
- */
-static int blk_unmap_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr)
-{
-	blk_rq_unmap_user(rq->bio);
-	blk_put_request(rq);
-	return 0;
-}
-
 static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 				 struct bio *bio)
 {
-	int r, ret = 0;
+	int ret = 0;
 
 	/*
 	 * fill in all the output members
@@ -253,12 +242,10 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 			ret = -EFAULT;
 	}
 
-	rq->bio = bio;
-	r = blk_unmap_sghdr_rq(rq, hdr);
-	if (ret)
-		r = ret;
+	blk_rq_unmap_user(bio);
+	blk_put_request(rq);
 
-	return r;
+	return ret;
 }
 
 static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
-- 
1.6.0.1



^ permalink raw reply related	[flat|nested] 38+ messages in thread

* [PATCH 2/2] [RFC] libosd: Don't let osd abuse block internals, now that it's fixed
  2009-02-12 17:27                                               ` [PATCH 1/2] " Boaz Harrosh
@ 2009-02-12 17:30                                                 ` Boaz Harrosh
  0 siblings, 0 replies; 38+ messages in thread
From: Boaz Harrosh @ 2009-02-12 17:30 UTC (permalink / raw)
  To: jens.axboe, James.Bottomley, FUJITA Tomonori; +Cc: linux-scsi, linux-kernel


blk_put_request will delete any BIOs that where mapped but not
executed, so osd_initiator does not have to take care of this situation
any more.

This patch is dependent on block patch titled:

  [PATCH 1/2] block: Don't let blk_put_request leak BIOs

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 drivers/scsi/osd/osd_initiator.c |   17 +----------------
 1 files changed, 1 insertions(+), 16 deletions(-)

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 0bbbf27..8b1cf72 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -335,20 +335,6 @@ struct osd_request *osd_start_request(struct osd_dev *dev, gfp_t gfp)
 }
 EXPORT_SYMBOL(osd_start_request);
 
-/*
- * If osd_finalize_request() was called but the request was not executed through
- * the block layer, then we must release BIOs.
- */
-static void _abort_unexecuted_bios(struct request *rq)
-{
-	struct bio *bio;
-
-	while ((bio = rq->bio) != NULL) {
-		rq->bio = bio->bi_next;
-		bio_endio(bio, 0);
-	}
-}
-
 static void _osd_free_seg(struct osd_request *or __unused,
 	struct _osd_req_data_segment *seg)
 {
@@ -370,11 +356,10 @@ void osd_end_request(struct osd_request *or)
 
 	if (rq) {
 		if (rq->next_rq) {
-			_abort_unexecuted_bios(rq->next_rq);
 			blk_put_request(rq->next_rq);
+			rq->next_rq = NULL;
 		}
 
-		_abort_unexecuted_bios(rq);
 		blk_put_request(rq);
 	}
 	_osd_request_free(or);
-- 
1.6.0.1


^ permalink raw reply related	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2009-02-12 17:30 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-12-13 16:23 [PATCH 0/3] remove scsi_req_map_sg FUJITA Tomonori
2008-12-13 16:23 ` [PATCH 1/3] " FUJITA Tomonori
2008-12-13 16:23   ` [PATCH 2/3] block: unexport blk_rq_append_bio FUJITA Tomonori
2008-12-13 16:23     ` [PATCH 3/3] block: unexport bio_add_pc_page FUJITA Tomonori
2009-02-10 17:43     ` [PATCH 2/3] block: unexport blk_rq_append_bio James Bottomley
2009-02-10 18:19       ` James Bottomley
2009-02-11  0:21         ` FUJITA Tomonori
2009-02-11  8:17         ` Boaz Harrosh
2009-02-11  8:19           ` Boaz Harrosh
2009-02-11  9:15           ` Boaz Harrosh
2009-02-11 14:32             ` FUJITA Tomonori
2009-02-11 14:52               ` Boaz Harrosh
2009-02-11 15:01                 ` FUJITA Tomonori
2009-02-11 15:07                   ` Boaz Harrosh
2009-02-11 15:21                     ` FUJITA Tomonori
2009-02-11 15:41                       ` Boaz Harrosh
2009-02-11 16:04                         ` FUJITA Tomonori
2009-02-11 16:30                           ` James Bottomley
2009-02-11 17:55                             ` Boaz Harrosh
2009-02-12  1:30                               ` FUJITA Tomonori
2009-02-12  8:24                                 ` Boaz Harrosh
2009-02-12  8:28                                   ` [RFD] blk_rq_map_pages new API Boaz Harrosh
2009-02-12  9:19                                     ` Boaz Harrosh
2009-02-12  9:50                                       ` FUJITA Tomonori
2009-02-12 10:20                                         ` FUJITA Tomonori
2009-02-12 11:34                                           ` Boaz Harrosh
2009-02-12  8:41                                   ` [PATCH 2/3] block: unexport blk_rq_append_bio FUJITA Tomonori
2009-02-12  9:14                                     ` Boaz Harrosh
2009-02-12  9:50                                       ` FUJITA Tomonori
2009-02-12 12:19                                         ` [PATCH] [RFC] block: Don't let blk_put_request leak BIOs Boaz Harrosh
2009-02-12 13:49                                           ` Boaz Harrosh
2009-02-12 13:56                                             ` Boaz Harrosh
2009-02-12 17:27                                               ` [PATCH 1/2] " Boaz Harrosh
2009-02-12 17:30                                                 ` [PATCH 2/2] [RFC] libosd: Don't let osd abuse block internals, now that it's fixed Boaz Harrosh
2009-02-12 14:48                               ` [PATCH 2/3] block: unexport blk_rq_append_bio James Bottomley
2009-02-12 16:51                                 ` Boaz Harrosh
2008-12-15  7:28 ` [PATCH 0/3] remove scsi_req_map_sg Jens Axboe
2008-12-18  8:14   ` FUJITA Tomonori

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.