linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/6] blkdev: blkdev_issue_fn cleanups
@ 2010-03-29  6:09 Dmitry Monakhov
  2010-03-29  6:09 ` [PATCH 1/6] blkdev: pass gfp_mask and flags to blkdev_issue_flush Dmitry Monakhov
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Monakhov @ 2010-03-29  6:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: jens.axboe, Dmitry Monakhov

- Convert all blkdev_issue_xxx function to common set of flags
- Optimize generic discard submitting procedure.
- Discard payload cleanup
Since introducing compat discard call was rejected by now.
This patch is contains only cleanups and one new library function
which later will be used instead of copy-pasted code-blocks in
different filesystem.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] blkdev: pass gfp_mask and flags to blkdev_issue_flush
  2010-03-29  6:09 [PATCH 0/6] blkdev: blkdev_issue_fn cleanups Dmitry Monakhov
@ 2010-03-29  6:09 ` Dmitry Monakhov
  2010-03-29  6:09   ` [PATCH 2/6] blkdev: generalize flags for blkdev_issue_fn functions Dmitry Monakhov
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Monakhov @ 2010-03-29  6:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: jens.axboe, Dmitry Monakhov

In some places caller don't want to wait a request to complete.
Flags make blkdev_issue_flush() more flexible. This patch just
convert existing callers to new interface without chaining
existing allocation/wait behavior.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 block/blk-barrier.c                |   43 +++++++++++++++++++++---------------
 drivers/block/drbd/drbd_int.h      |    3 +-
 drivers/block/drbd/drbd_receiver.c |    3 +-
 fs/block_dev.c                     |    3 +-
 fs/ext3/fsync.c                    |    3 +-
 fs/ext4/fsync.c                    |    6 +++-
 fs/jbd2/checkpoint.c               |    3 +-
 fs/jbd2/commit.c                   |    6 +++-
 fs/reiserfs/file.c                 |    3 +-
 fs/xfs/linux-2.6/xfs_super.c       |    3 +-
 include/linux/blkdev.h             |   10 ++++++-
 11 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 8618d89..7e6e810 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -285,26 +285,31 @@ static void bio_end_empty_barrier(struct bio *bio, int err)
 			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
 	}
-
-	complete(bio->bi_private);
+	if (bio->bi_private)
+		complete(bio->bi_private);
+	bio_put(bio);
 }
 
 /**
  * blkdev_issue_flush - queue a flush
  * @bdev:	blockdev to issue flush for
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
  * @error_sector:	error sector
+ * @flags:	BLKDEV_IFL_* flags to control behaviour
  *
  * Description:
  *    Issue a flush for the block device in question. Caller can supply
  *    room for storing the error offset in case of a flush error, if they
- *    wish to.
+ *    wish to. If WAIT flag is not passed then caller may check only what
+ *    request was pushed in some internal queue for later handling.
  */
-int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
+int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
+		sector_t *error_sector, unsigned long flags)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q;
 	struct bio *bio;
-	int ret;
+	int ret = 0;
 
 	if (bdev->bd_disk == NULL)
 		return -ENXIO;
@@ -313,23 +318,25 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 	if (!q)
 		return -ENXIO;
 
-	bio = bio_alloc(GFP_KERNEL, 0);
+	bio = bio_alloc(gfp_mask, 0);
 	bio->bi_end_io = bio_end_empty_barrier;
-	bio->bi_private = &wait;
 	bio->bi_bdev = bdev;
-	submit_bio(WRITE_BARRIER, bio);
+	if (test_bit(__BLKDEV_IFL_WAIT, &flags))
+		bio->bi_private = &wait;
 
-	wait_for_completion(&wait);
-
-	/*
-	 * The driver must store the error location in ->bi_sector, if
-	 * it supports it. For non-stacked drivers, this should be copied
-	 * from blk_rq_pos(rq).
-	 */
-	if (error_sector)
-		*error_sector = bio->bi_sector;
+	bio_get(bio);
+	submit_bio(WRITE_BARRIER, bio);
+	if (test_bit(__BLKDEV_IFL_WAIT, &flags)) {
+		wait_for_completion(&wait);
+		/*
+		 * The driver must store the error location in ->bi_sector, if
+		 * it supports it. For non-stacked drivers, this should be
+		 * copied from blk_rq_pos(rq).
+		 */
+		if (error_sector)
+			*error_sector = bio->bi_sector;
+	}
 
-	ret = 0;
 	if (bio_flagged(bio, BIO_EOPNOTSUPP))
 		ret = -EOPNOTSUPP;
 	else if (!bio_flagged(bio, BIO_UPTODATE))
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 2bf3a6e..1c00aa0 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -2241,7 +2241,8 @@ static inline void drbd_md_flush(struct drbd_conf *mdev)
 	if (test_bit(MD_NO_BARRIER, &mdev->flags))
 		return;
 
-	r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL);
+	r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL,
+			BLKDEV_IFL_WAIT);
 	if (r) {
 		set_bit(MD_NO_BARRIER, &mdev->flags);
 		dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index d065c64..ad395db 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -945,7 +945,8 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d
 	int rv;
 
 	if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
-		rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL);
+		rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
+					NULL, BLKDEV_IFL_WAIT);
 		if (rv) {
 			dev_err(DEV, "local disk flush failed with status %d\n", rv);
 			/* would rather check on EOPNOTSUPP, but that is not reliable.
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d11d028..b1c8062 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -413,7 +413,8 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
 	if (error)
 		return error;
 	
-	error = blkdev_issue_flush(bdev, NULL);
+	error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL,
+				(BLKDEV_IFL_WAIT));
 	if (error == -EOPNOTSUPP)
 		error = 0;
 	return error;
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 8209f26..9492f60 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -91,7 +91,8 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
 	 * storage
 	 */
 	if (test_opt(inode->i_sb, BARRIER))
-		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+				BLKDEV_IFL_WAIT);
 out:
 	return ret;
 }
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 98bd140..99778d1 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -100,9 +100,11 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
 		if (ext4_should_writeback_data(inode) &&
 		    (journal->j_fs_dev != journal->j_dev) &&
 		    (journal->j_flags & JBD2_BARRIER))
-			blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+			blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
+					NULL, BLKDEV_IFL_WAIT);
 		jbd2_log_wait_commit(journal, commit_tid);
 	} else if (journal->j_flags & JBD2_BARRIER)
-		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
+			BLKDEV_IFL_WAIT);
 	return ret;
 }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 8868493..8f3ea7b 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -529,7 +529,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	 */
 	if ((journal->j_fs_dev != journal->j_dev) &&
 	    (journal->j_flags & JBD2_BARRIER))
-		blkdev_issue_flush(journal->j_fs_dev, NULL);
+		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
+			BLKDEV_IFL_WAIT);
 	if (!(journal->j_flags & JBD2_ABORT))
 		jbd2_journal_update_superblock(journal, 1);
 	return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 1bc74b6..6f900d8 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -717,7 +717,8 @@ start_journal_io:
 	if (commit_transaction->t_flushed_data_blocks &&
 	    (journal->j_fs_dev != journal->j_dev) &&
 	    (journal->j_flags & JBD2_BARRIER))
-		blkdev_issue_flush(journal->j_fs_dev, NULL);
+		blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
+			BLKDEV_IFL_WAIT);
 
 	/* Done it all: now write the commit record asynchronously. */
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -727,7 +728,8 @@ start_journal_io:
 		if (err)
 			__jbd2_journal_abort_hard(journal);
 		if (journal->j_flags & JBD2_BARRIER)
-			blkdev_issue_flush(journal->j_dev, NULL);
+			blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
+				BLKDEV_IFL_WAIT);
 	}
 
 	err = journal_finish_inode_data_buffers(journal, commit_transaction);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index da2dba0..5e4f8c5 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -147,7 +147,8 @@ static int reiserfs_sync_file(struct file *filp,
 	barrier_done = reiserfs_commit_for_inode(inode);
 	reiserfs_write_unlock(inode->i_sb);
 	if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
-		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
+		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, 
+			BLKDEV_IFL_WAIT);
 	if (barrier_done < 0)
 		return barrier_done;
 	return (err < 0) ? -EIO : 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 25ea240..291b0ae 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -724,7 +724,8 @@ void
 xfs_blkdev_issue_flush(
 	xfs_buftarg_t		*buftarg)
 {
-	blkdev_issue_flush(buftarg->bt_bdev, NULL);
+	blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
+			BLKDEV_IFL_WAIT);
 }
 
 STATIC void
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ebd22db..c8ee5cf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1012,8 +1012,14 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 		return NULL;
 	return bqt->tag_index[tag];
 }
-
-extern int blkdev_issue_flush(struct block_device *, sector_t *);
+enum{
+	__BLKDEV_IFL_WAIT,	/* wait for completion */
+	__BLKDEV_IFL_BARRIER,	/*issue request with barrier */
+};
+#define BLKDEV_IFL_WAIT		(1 << __BLKDEV_IFL_WAIT)
+#define BLKDEV_IFL_BARRIER	(1 << __BLKDEV_IFL_BARRIER)
+extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
+			unsigned long);
 #define DISCARD_FL_WAIT		0x01	/* wait for completion */
 #define DISCARD_FL_BARRIER	0x02	/* issue DISCARD_BARRIER request */
 extern int blkdev_issue_discard(struct block_device *, sector_t sector,
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/6] blkdev: generalize flags for blkdev_issue_fn functions
  2010-03-29  6:09 ` [PATCH 1/6] blkdev: pass gfp_mask and flags to blkdev_issue_flush Dmitry Monakhov
@ 2010-03-29  6:09   ` Dmitry Monakhov
  2010-03-29  6:09     ` [PATCH 3/6] blkdev: add blkdev_issue helper functions Dmitry Monakhov
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Monakhov @ 2010-03-29  6:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: jens.axboe, Dmitry Monakhov

The patch just convert all blkdev_issue_xxx function to common
flags. Wait/allocation semantics not changed.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 block/blk-barrier.c    |   10 +++++-----
 block/ioctl.c          |    2 +-
 fs/btrfs/extent-tree.c |    2 +-
 fs/gfs2/rgrp.c         |    5 +++--
 include/linux/blkdev.h |    8 +++-----
 mm/swapfile.c          |    8 +++++---
 6 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 7e6e810..56254b1 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -368,17 +368,17 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
  * @sector:	start sector
  * @nr_sects:	number of sectors to discard
  * @gfp_mask:	memory allocation flags (for bio_alloc)
- * @flags:	DISCARD_FL_* flags to control behaviour
+ * @flags:	BLKDEV_IFL_* flags to control behaviour
  *
  * Description:
  *    Issue a discard request for the sectors in question.
  */
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-		sector_t nr_sects, gfp_t gfp_mask, int flags)
+		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
-	int type = flags & DISCARD_FL_BARRIER ?
+	int type = flags & BLKDEV_IFL_BARRIER ?
 		DISCARD_BARRIER : DISCARD_NOBARRIER;
 	struct bio *bio;
 	struct page *page;
@@ -401,7 +401,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
-		if (flags & DISCARD_FL_WAIT)
+		if (flags & BLKDEV_IFL_BARRIER)
 			bio->bi_private = &wait;
 
 		/*
@@ -432,7 +432,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio_get(bio);
 		submit_bio(type, bio);
 
-		if (flags & DISCARD_FL_WAIT)
+		if (flags & BLKDEV_IFL_BARRIER)
 			wait_for_completion(&wait);
 
 		if (bio_flagged(bio, BIO_EOPNOTSUPP))
diff --git a/block/ioctl.c b/block/ioctl.c
index be48ea5..d803cac 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -125,7 +125,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 	if (start + len > (bdev->bd_inode->i_size >> 9))
 		return -EINVAL;
 	return blkdev_issue_discard(bdev, start, len, GFP_KERNEL,
-				    DISCARD_FL_WAIT);
+				    BLKDEV_IFL_WAIT);
 }
 
 static int put_ushort(unsigned long arg, unsigned short val)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 559f724..7453ac4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1588,7 +1588,7 @@ static void btrfs_issue_discard(struct block_device *bdev,
 				u64 start, u64 len)
 {
 	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
-			     DISCARD_FL_BARRIER);
+			BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 }
 
 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 503b842..bf011dc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -854,7 +854,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				if ((start + nr_sects) != blk) {
 					rv = blkdev_issue_discard(bdev, start,
 							    nr_sects, GFP_NOFS,
-							    DISCARD_FL_BARRIER);
+							    BLKDEV_IFL_WAIT |
+							    BLKDEV_IFL_BARRIER);
 					if (rv)
 						goto fail;
 					nr_sects = 0;
@@ -869,7 +870,7 @@ start_new_extent:
 	}
 	if (nr_sects) {
 		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
-					 DISCARD_FL_BARRIER);
+					 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 		if (rv)
 			goto fail;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c8ee5cf..84ec105 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1020,10 +1020,8 @@ enum{
 #define BLKDEV_IFL_BARRIER	(1 << __BLKDEV_IFL_BARRIER)
 extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
 			unsigned long);
-#define DISCARD_FL_WAIT		0x01	/* wait for completion */
-#define DISCARD_FL_BARRIER	0x02	/* issue DISCARD_BARRIER request */
-extern int blkdev_issue_discard(struct block_device *, sector_t sector,
-		sector_t nr_sects, gfp_t, int flags);
+extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 
 static inline int sb_issue_discard(struct super_block *sb,
 				   sector_t block, sector_t nr_blocks)
@@ -1031,7 +1029,7 @@ static inline int sb_issue_discard(struct super_block *sb,
 	block <<= (sb->s_blocksize_bits - 9);
 	nr_blocks <<= (sb->s_blocksize_bits - 9);
 	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
-				    DISCARD_FL_BARRIER);
+				   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 }
 
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c0585b..6c48271 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -139,7 +139,8 @@ static int discard_swap(struct swap_info_struct *si)
 	nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
 	if (nr_blocks) {
 		err = blkdev_issue_discard(si->bdev, start_block,
-				nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER);
+				nr_blocks, GFP_KERNEL,
+				BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 		if (err)
 			return err;
 		cond_resched();
@@ -150,7 +151,8 @@ static int discard_swap(struct swap_info_struct *si)
 		nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
 
 		err = blkdev_issue_discard(si->bdev, start_block,
-				nr_blocks, GFP_KERNEL, DISCARD_FL_BARRIER);
+				nr_blocks, GFP_KERNEL,
+				BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 		if (err)
 			break;
 
@@ -189,7 +191,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
 			start_block <<= PAGE_SHIFT - 9;
 			nr_blocks <<= PAGE_SHIFT - 9;
 			if (blkdev_issue_discard(si->bdev, start_block,
-				    nr_blocks, GFP_NOIO, DISCARD_FL_BARRIER))
+				    nr_blocks, GFP_NOIO, BLKDEV_IFL_BARRIER))
 				break;
 		}
 
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/6] blkdev: add blkdev_issue helper functions
  2010-03-29  6:09   ` [PATCH 2/6] blkdev: generalize flags for blkdev_issue_fn functions Dmitry Monakhov
@ 2010-03-29  6:09     ` Dmitry Monakhov
  2010-03-29  6:09       ` [PATCH 4/6] blkdev: add discard payload flag Dmitry Monakhov
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Monakhov @ 2010-03-29  6:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: jens.axboe, Dmitry Monakhov

Move blkdev_issue_discard from blk-barrier.c because it is not barrier
related.

Add new helper functions:
__blkdev_issue_zeroout: Explicitly zero given range via zeroed write request.
This function is useful in many places.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 block/Makefile         |    2 +-
 block/blk-barrier.c    |  103 ---------------------
 block/blk-lib.c        |  232 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |    3 +-
 4 files changed, 235 insertions(+), 105 deletions(-)
 create mode 100644 block/blk-lib.c

diff --git a/block/Makefile b/block/Makefile
index cb2d515..0bb499a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-			blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
+			blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 56254b1..1242bdd 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -347,106 +347,3 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-static void blkdev_discard_end_io(struct bio *bio, int err)
-{
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	}
-
-	if (bio->bi_private)
-		complete(bio->bi_private);
-	__free_page(bio_page(bio));
-
-	bio_put(bio);
-}
-
-/**
- * blkdev_issue_discard - queue a discard
- * @bdev:	blockdev to issue discard for
- * @sector:	start sector
- * @nr_sects:	number of sectors to discard
- * @gfp_mask:	memory allocation flags (for bio_alloc)
- * @flags:	BLKDEV_IFL_* flags to control behaviour
- *
- * Description:
- *    Issue a discard request for the sectors in question.
- */
-int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
-{
-	DECLARE_COMPLETION_ONSTACK(wait);
-	struct request_queue *q = bdev_get_queue(bdev);
-	int type = flags & BLKDEV_IFL_BARRIER ?
-		DISCARD_BARRIER : DISCARD_NOBARRIER;
-	struct bio *bio;
-	struct page *page;
-	int ret = 0;
-
-	if (!q)
-		return -ENXIO;
-
-	if (!blk_queue_discard(q))
-		return -EOPNOTSUPP;
-
-	while (nr_sects && !ret) {
-		unsigned int sector_size = q->limits.logical_block_size;
-		unsigned int max_discard_sectors =
-			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
-
-		bio = bio_alloc(gfp_mask, 1);
-		if (!bio)
-			goto out;
-		bio->bi_sector = sector;
-		bio->bi_end_io = blkdev_discard_end_io;
-		bio->bi_bdev = bdev;
-		if (flags & BLKDEV_IFL_BARRIER)
-			bio->bi_private = &wait;
-
-		/*
-		 * Add a zeroed one-sector payload as that's what
-		 * our current implementations need.  If we'll ever need
-		 * more the interface will need revisiting.
-		 */
-		page = alloc_page(gfp_mask | __GFP_ZERO);
-		if (!page)
-			goto out_free_bio;
-		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
-			goto out_free_page;
-
-		/*
-		 * And override the bio size - the way discard works we
-		 * touch many more blocks on disk than the actual payload
-		 * length.
-		 */
-		if (nr_sects > max_discard_sectors) {
-			bio->bi_size = max_discard_sectors << 9;
-			nr_sects -= max_discard_sectors;
-			sector += max_discard_sectors;
-		} else {
-			bio->bi_size = nr_sects << 9;
-			nr_sects = 0;
-		}
-
-		bio_get(bio);
-		submit_bio(type, bio);
-
-		if (flags & BLKDEV_IFL_BARRIER)
-			wait_for_completion(&wait);
-
-		if (bio_flagged(bio, BIO_EOPNOTSUPP))
-			ret = -EOPNOTSUPP;
-		else if (!bio_flagged(bio, BIO_UPTODATE))
-			ret = -EIO;
-		bio_put(bio);
-	}
-	return ret;
-out_free_page:
-	__free_page(page);
-out_free_bio:
-	bio_put(bio);
-out:
-	return -ENOMEM;
-}
-EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-lib.c b/block/blk-lib.c
new file mode 100644
index 0000000..d0f84f6
--- /dev/null
+++ b/block/blk-lib.c
@@ -0,0 +1,232 @@
+/*
+ * Functions related to generic helpers functions
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/scatterlist.h>
+
+#include "blk.h"
+
+struct bio_batch
+{
+	atomic_t 		done;
+	unsigned long 		flags;
+	struct completion 	*wait;
+	bio_end_io_t		*end_io;
+};
+
+static void bio_batch_end_io(struct bio *bio, int err)
+{
+	struct bio_batch *bb = bio->bi_private;
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bb->flags);
+		else
+			clear_bit(BIO_UPTODATE, &bb->flags);
+	}
+	if (bb) {
+		if (bb->end_io)
+			bb->end_io(bio, err);
+		atomic_inc(&bb->done);
+		complete(bb->wait);
+	}
+	bio_put(bio);
+}
+
+/**
+ * __blkdev_issue_zeroout generate number of zero filed write bios
+ * @bdev:	blockdev to issue
+ * @sector:	start sector
+ * @nr_sects:	number of sectors to write
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
+ * @flags:	BLKDEV_IFL_* flags to control behaviour
+ *
+ * Description:
+ *  Generate and issue number of bios with zerofiled pages.
+ *  Send barrier at the beginning and at the end if requested. This guarantie
+ *  correct request ordering. Empty barrier allow us to avoid post queue flush.
+ */
+
+int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+{
+	int ret = 0;
+	struct bio *bio;
+	struct bio_batch bb;
+	unsigned int sz, issued = 0;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	atomic_set(&bb.done, 0);
+	bb.flags = 1 << BIO_UPTODATE;
+	bb.wait = &wait;
+	bb.end_io = NULL;
+
+	if (flags & BLKDEV_IFL_BARRIER) {
+		/* issue async barrier before the data */
+		ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
+		if (ret)
+			return ret;
+	}
+submit:
+	while (nr_sects != 0) {
+		bio = bio_alloc(gfp_mask,
+				min(nr_sects, (sector_t)BIO_MAX_PAGES));
+		if (!bio)
+			break;
+
+		bio->bi_sector = sector;
+		bio->bi_bdev   = bdev;
+		bio->bi_end_io = bio_batch_end_io;
+		if (flags & BLKDEV_IFL_WAIT)
+			bio->bi_private = &bb;
+
+		while(nr_sects != 0) {
+			sz = min(PAGE_SIZE >> 9 , nr_sects);
+			if (sz == 0)
+				/* bio has maximum size possible */
+				break;
+			ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
+			nr_sects -= ret >> 9;
+			sector += ret >> 9;
+			if (ret < (sz << 9))
+				break;
+		}
+		issued++;
+		submit_bio(WRITE, bio);
+	}
+	/*
+	 * When all data bios are in flight. Send final barrier if requeted.
+	 */
+	if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
+		ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
+					flags & BLKDEV_IFL_WAIT);
+
+
+	if (flags & BLKDEV_IFL_WAIT)
+		/* Wait for bios in-flight */
+		while ( issued != atomic_read(&bb.done))
+			wait_for_completion(&wait);
+
+	if (!test_bit(BIO_UPTODATE, &bb.flags))
+		/* One of bios in the batch was completed with error.*/
+		ret = -EIO;
+
+	if (ret)
+		goto out;
+
+	if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+	if (nr_sects != 0)
+		goto submit;
+out:
+	return ret;
+}
+EXPORT_SYMBOL(__blkdev_issue_zeroout);
+
+static void blkdev_discard_end_io(struct bio *bio, int err)
+{
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
+
+	if (bio->bi_private)
+		complete(bio->bi_private);
+	__free_page(bio_page(bio));
+
+	bio_put(bio);
+}
+
+/**
+ * blkdev_issue_discard - queue a discard
+ * @bdev:	blockdev to issue discard for
+ * @sector:	start sector
+ * @nr_sects:	number of sectors to discard
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
+ * @flags:	BLKDEV_IFL_* flags to control behaviour
+ *
+ * Description:
+ *    Issue a discard request for the sectors in question.
+ */
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+	struct request_queue *q = bdev_get_queue(bdev);
+	int type = flags & BLKDEV_IFL_BARRIER ?
+		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	struct bio *bio;
+	struct page *page;
+	int ret = 0;
+
+	if (!q)
+		return -ENXIO;
+
+	if (!blk_queue_discard(q))
+		return -EOPNOTSUPP;
+
+	while (nr_sects && !ret) {
+		unsigned int sector_size = q->limits.logical_block_size;
+		unsigned int max_discard_sectors =
+			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+
+		bio = bio_alloc(gfp_mask, 1);
+		if (!bio)
+			goto out;
+		bio->bi_sector = sector;
+		bio->bi_end_io = blkdev_discard_end_io;
+		bio->bi_bdev = bdev;
+		if (flags & BLKDEV_IFL_BARRIER)
+			bio->bi_private = &wait;
+
+		/*
+		 * Add a zeroed one-sector payload as that's what
+		 * our current implementations need.  If we'll ever need
+		 * more the interface will need revisiting.
+		 */
+		page = alloc_page(gfp_mask | __GFP_ZERO);
+		if (!page)
+			goto out_free_bio;
+		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+			goto out_free_page;
+
+		/*
+		 * And override the bio size - the way discard works we
+		 * touch many more blocks on disk than the actual payload
+		 * length.
+		 */
+		if (nr_sects > max_discard_sectors) {
+			bio->bi_size = max_discard_sectors << 9;
+			nr_sects -= max_discard_sectors;
+			sector += max_discard_sectors;
+		} else {
+			bio->bi_size = nr_sects << 9;
+			nr_sects = 0;
+		}
+
+		bio_get(bio);
+		submit_bio(type, bio);
+
+		if (flags & BLKDEV_IFL_BARRIER)
+			wait_for_completion(&wait);
+
+		if (bio_flagged(bio, BIO_EOPNOTSUPP))
+			ret = -EOPNOTSUPP;
+		else if (!bio_flagged(bio, BIO_UPTODATE))
+			ret = -EIO;
+		bio_put(bio);
+	}
+	return ret;
+out_free_page:
+	__free_page(page);
+out_free_bio:
+	bio_put(bio);
+out:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 84ec105..8bc6966 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1022,7 +1022,8 @@ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
 			unsigned long);
 extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
-
+extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 static inline int sb_issue_discard(struct super_block *sb,
 				   sector_t block, sector_t nr_blocks)
 {
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/6] blkdev: add discard payload flag
  2010-03-29  6:09     ` [PATCH 3/6] blkdev: add blkdev_issue helper functions Dmitry Monakhov
@ 2010-03-29  6:09       ` Dmitry Monakhov
  2010-03-29  6:09         ` [PATCH 5/6] blkdev: optimize discard request logic Dmitry Monakhov
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Monakhov @ 2010-03-29  6:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: jens.axboe, Dmitry Monakhov

Not all implementations require discard bio to be filled with one-sector
sized payload. Let's allocate payload only when necessary.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 block/blk-lib.c        |   24 +++++++++++++-----------
 drivers/scsi/sd.c      |    1 +
 include/linux/blkdev.h |    5 ++++-
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index d0f84f6..4c5a278 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -184,17 +184,19 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		if (flags & BLKDEV_IFL_BARRIER)
 			bio->bi_private = &wait;
 
-		/*
-		 * Add a zeroed one-sector payload as that's what
-		 * our current implementations need.  If we'll ever need
-		 * more the interface will need revisiting.
-		 */
-		page = alloc_page(gfp_mask | __GFP_ZERO);
-		if (!page)
-			goto out_free_bio;
-		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
-			goto out_free_page;
-
+		if (blk_queue_discard_mem(q)) {
+			/*
+			 * Add a zeroed one-sector payload as that's what
+			 * our current implementations need.  If we'll ever
+			 * need more the interface will need revisiting.
+			 */
+			page = alloc_page(gfp_mask | __GFP_ZERO);
+			if (!page)
+				goto out_free_bio;
+			if (bio_add_pc_page(q, bio, page, sector_size, 0) <
+				sector_size)
+				goto out_free_page;
+		}
 		/*
 		 * And override the bio size - the way discard works we
 		 * touch many more blocks on disk than the actual payload
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1dd4d84..9994977 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1513,6 +1513,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 			q->limits.discard_zeroes_data = 1;
 
 		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD_MEM, q);
 	}
 
 	sdkp->capacity = lba + 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bc6966..c8cb07f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -461,7 +461,8 @@ struct request_queue
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
-#define QUEUE_FLAG_NOXMERGES   17	/* No extended merges */
+#define QUEUE_FLAG_DISCARD_MEM 17	/* require memory in DISCARD request*/
+#define QUEUE_FLAG_NOXMERGES   18	/* No extended merges */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -595,6 +596,8 @@ enum {
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
+#define blk_queue_discard_mem(q) \
+	test_bit(QUEUE_FLAG_DISCARD_MEM, &(q)->queue_flags)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 5/6] blkdev: optimize discard request logic
  2010-03-29  6:09       ` [PATCH 4/6] blkdev: add discard payload flag Dmitry Monakhov
@ 2010-03-29  6:09         ` Dmitry Monakhov
  2010-03-29  6:09           ` [PATCH 6/6] blkdev: optimize discard payload utilization Dmitry Monakhov
  0 siblings, 1 reply; 7+ messages in thread
From: Dmitry Monakhov @ 2010-03-29  6:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: jens.axboe, Dmitry Monakhov

Current submit procedure is suboptimal. Each bio is flagged with
barrier flag, so we will end up with following trace:
for_each_bio(discar_bios) {
  q->pre_flush
  handle(bio);
  disk->flush_cache
  q->post_flush
}
But in fact user want following semantics:
q->pre_flush()
for_each_bio(discar_bios)
  handle(bio)
q->pre_flush()

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 block/blk-lib.c |  141 +++++++++++++++++++++++++++++++-----------------------
 1 files changed, 81 insertions(+), 60 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 4c5a278..5326d91 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -129,17 +129,22 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
 
 static void blkdev_discard_end_io(struct bio *bio, int err)
 {
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	}
-
-	if (bio->bi_private)
-		complete(bio->bi_private);
-	__free_page(bio_page(bio));
+	if (bio_page(bio))
+		put_page(bio_page(bio));
+}
 
-	bio_put(bio);
+static int add_discard_payload(struct bio *bio, gfp_t gfp_mask, int len)
+{
+	struct page *page;
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+
+	page = alloc_page(gfp_mask | __GFP_ZERO);
+	if (!page)
+			return -ENOMEM;
+	if (bio_add_pc_page(q, bio, page, len, 0) < len)
+		BUG();
+	get_page(page);
+	return 0;
 }
 
 /**
@@ -153,16 +158,18 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
  * Description:
  *    Issue a discard request for the sectors in question.
  */
+
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
+	int ret = 0;
+	struct bio *bio;
+	struct bio_batch bb;
+	unsigned int sz, issued = 0;
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
-	int type = flags & BLKDEV_IFL_BARRIER ?
-		DISCARD_BARRIER : DISCARD_NOBARRIER;
-	struct bio *bio;
-	struct page *page;
-	int ret = 0;
+	unsigned int sect_sz;
+	sector_t max_discard_sectors;
 
 	if (!q)
 		return -ENXIO;
@@ -170,65 +177,79 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	if (!blk_queue_discard(q))
 		return -EOPNOTSUPP;
 
-	while (nr_sects && !ret) {
-		unsigned int sector_size = q->limits.logical_block_size;
-		unsigned int max_discard_sectors =
-			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+	sect_sz = q->limits.logical_block_size;
+	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+	atomic_set(&bb.done, 0);
+	bb.flags = 1 << BIO_UPTODATE;
+	bb.wait = &wait;
+	bb.end_io = blkdev_discard_end_io;
 
-		bio = bio_alloc(gfp_mask, 1);
+	if (flags & BLKDEV_IFL_BARRIER) {
+		/* issue async barrier before the data */
+		ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
+		if (ret)
+			return ret;
+	}
+submit:
+	while (nr_sects != 0) {
+		bio = bio_alloc(gfp_mask,
+				min(nr_sects, (sector_t)BIO_MAX_PAGES));
 		if (!bio)
-			goto out;
+			break;
+
 		bio->bi_sector = sector;
-		bio->bi_end_io = blkdev_discard_end_io;
-		bio->bi_bdev = bdev;
-		if (flags & BLKDEV_IFL_BARRIER)
-			bio->bi_private = &wait;
+		bio->bi_bdev   = bdev;
+		bio->bi_end_io = bio_batch_end_io;
+
+		if (flags && BLKDEV_IFL_WAIT)
+			bio->bi_private = &bb;
 
 		if (blk_queue_discard_mem(q)) {
 			/*
 			 * Add a zeroed one-sector payload as that's what
-			 * our current implementations need.  If we'll ever
+			 * our current implementations need. If we'll ever
 			 * need more the interface will need revisiting.
 			 */
-			page = alloc_page(gfp_mask | __GFP_ZERO);
-			if (!page)
-				goto out_free_bio;
-			if (bio_add_pc_page(q, bio, page, sector_size, 0) <
-				sector_size)
-				goto out_free_page;
-		}
-		/*
-		 * And override the bio size - the way discard works we
-		 * touch many more blocks on disk than the actual payload
-		 * length.
-		 */
-		if (nr_sects > max_discard_sectors) {
-			bio->bi_size = max_discard_sectors << 9;
-			nr_sects -= max_discard_sectors;
-			sector += max_discard_sectors;
-		} else {
-			bio->bi_size = nr_sects << 9;
-			nr_sects = 0;
+			ret = add_discard_payload(bio, gfp_mask, sect_sz);
+			if (ret) {
+				bio_put(bio);
+				break;
+			}
 		}
+		sz = min(max_discard_sectors , nr_sects);
+		bio->bi_size = sz << 9;
+		nr_sects -= sz;
+		sector += sz;
 
-		bio_get(bio);
-		submit_bio(type, bio);
+		issued++;
+		submit_bio(DISCARD_NOBARRIER, bio);
+	}
+	/*
+	 * When all data bios are in flight. Send final barrier if requeted.
+	 */
+	if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
+		ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
+					flags & BLKDEV_IFL_WAIT);
 
-		if (flags & BLKDEV_IFL_BARRIER)
+	if (flags & BLKDEV_IFL_WAIT)
+		/* Wait for submitted bios and then continue */
+		while ( issued != atomic_read(&bb.done))
 			wait_for_completion(&wait);
 
-		if (bio_flagged(bio, BIO_EOPNOTSUPP))
-			ret = -EOPNOTSUPP;
-		else if (!bio_flagged(bio, BIO_UPTODATE))
-			ret = -EIO;
-		bio_put(bio);
+	if (!test_bit(BIO_UPTODATE, &bb.flags))
+		/* One of bios in the batch was completed with error.*/
+		ret = -EIO;
+
+	if (ret)
+		goto out;
+
+	if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
+		ret = -EOPNOTSUPP;
+		goto out;
 	}
-	return ret;
-out_free_page:
-	__free_page(page);
-out_free_bio:
-	bio_put(bio);
+	if (nr_sects != 0)
+		goto submit;
 out:
-	return -ENOMEM;
+	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 6/6] blkdev: optimize discard payload utilization
  2010-03-29  6:09         ` [PATCH 5/6] blkdev: optimize discard request logic Dmitry Monakhov
@ 2010-03-29  6:09           ` Dmitry Monakhov
  0 siblings, 0 replies; 7+ messages in thread
From: Dmitry Monakhov @ 2010-03-29  6:09 UTC (permalink / raw)
  To: linux-kernel; +Cc: jens.axboe, Dmitry Monakhov

Currently each discard bio require one page as payload storage
But only one sector is actually used. Let's share allocated page
with several requests. A bad thing about implementation is that
it use global spinlock. But we may easily to switch to per-cpu
buckets if it becomes a bottleneck.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 block/blk-lib.c |   45 ++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 5326d91..ffa07ee 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,6 +9,8 @@
 
 #include "blk.h"
 
+static DEFINE_SPINLOCK(payload_mem_lock);
+
 struct bio_batch
 {
 	atomic_t 		done;
@@ -135,15 +137,48 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 
 static int add_discard_payload(struct bio *bio, gfp_t gfp_mask, int len)
 {
+	static struct page *cur_page = NULL;
+	static int cur_offset = 0;
 	struct page *page;
+	int offset;
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-
-	page = alloc_page(gfp_mask | __GFP_ZERO);
-	if (!page)
+again:
+	spin_lock(&payload_mem_lock);
+	if (cur_offset + len > PAGE_SIZE)
+		/* There is no more space in the page */
+		if (cur_page) {
+			put_page(cur_page);
+			cur_page = NULL;
+			cur_offset = 0;
+		}
+	if (!cur_page) {
+		spin_unlock(&payload_mem_lock);
+		page = alloc_page(gfp_mask | __GFP_ZERO);
+		if (!page)
 			return -ENOMEM;
-	if (bio_add_pc_page(q, bio, page, len, 0) < len)
-		BUG();
+
+		spin_lock(&payload_mem_lock);
+		/*
+		 * Check cur_page one more time after we reacquired the lock.
+		 * Because it may be changed by other task.
+		 */
+		if (cur_page) {
+			spin_unlock(&payload_mem_lock);
+			put_page(page);
+			goto again;
+		} else
+			cur_page = page;
+	}
+	page = cur_page;
 	get_page(page);
+	offset = cur_offset;
+	cur_offset += len;
+
+	spin_unlock(&payload_mem_lock);
+
+	if (bio_add_pc_page(q, bio, page, len, offset) < len)
+		BUG();
+
 	return 0;
 }
 
-- 
1.6.6.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2010-03-29  6:10 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-29  6:09 [PATCH 0/6] blkdev: blkdev_issue_fn cleanups Dmitry Monakhov
2010-03-29  6:09 ` [PATCH 1/6] blkdev: pass gfp_mask and flags to blkdev_issue_flush Dmitry Monakhov
2010-03-29  6:09   ` [PATCH 2/6] blkdev: generalize flags for blkdev_issue_fn functions Dmitry Monakhov
2010-03-29  6:09     ` [PATCH 3/6] blkdev: add blkdev_issue helper functions Dmitry Monakhov
2010-03-29  6:09       ` [PATCH 4/6] blkdev: add discard payload flag Dmitry Monakhov
2010-03-29  6:09         ` [PATCH 5/6] blkdev: optimize discard request logic Dmitry Monakhov
2010-03-29  6:09           ` [PATCH 6/6] blkdev: optimize discard payload utilization Dmitry Monakhov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).