linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
@ 2020-05-29 14:11 Kaitao Cheng
  2020-05-29 14:13 ` Jens Axboe
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Kaitao Cheng @ 2020-05-29 14:11 UTC (permalink / raw)
  To: axboe, hch, sth, viro, clm, jaegeuk, hch, mark, dhowells, balbi
  Cc: damien.lemoal, bvanassche, ming.lei, martin.petersen, satyat,
	chaitanya.kulkarni, houtao1, asml.silence, ajay.joshi,
	linux-kernel, songmuchun, hoeppner, heiko.carstens, gor,
	borntraeger, linux-s390, sagi, linux-nvme, gregkh, linux-usb,
	josef, dsterba, linux-btrfs, chao, linux-f2fs-devel,
	darrick.wong, linux-xfs, linux-fsdevel, jlbec, joseph.qi,
	ocfs2-devel, deepa.kernel, Kaitao Cheng

There is a function named ilog2() exist which can replace blksize.
The generated code will be shorter and more efficient on some
architecture, such as arm64. And ilog2() can be optimized according
to different architecture.

Signed-off-by: Kaitao Cheng <pilgrimtao@gmail.com>
---
changes in v2:
	Remove all blksize_bits

 drivers/nvme/target/io-cmd-bdev.c            |  2 +-
 drivers/s390/block/dasd_ioctl.c              |  2 +-
 drivers/usb/gadget/function/storage_common.c |  2 +-
 fs/block_dev.c                               |  6 +++---
 fs/btrfs/disk-io.c                           |  4 ++--
 fs/buffer.c                                  |  2 +-
 fs/direct-io.c                               |  2 +-
 fs/f2fs/data.c                               |  2 +-
 fs/iomap/direct-io.c                         |  2 +-
 fs/ocfs2/super.c                             |  2 +-
 fs/romfs/super.c                             |  2 +-
 include/linux/blkdev.h                       | 11 -----------
 12 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index bcf979eb8e83..58bd947e232e 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -63,7 +63,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
 		return ret;
 	}
 	ns->size = i_size_read(ns->bdev->bd_inode);
-	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+	ns->blksize_shift = ilog2(bdev_logical_block_size(ns->bdev));
 	return 0;
 }
 
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 777734d1b4e5..55adb134451b 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -228,7 +228,7 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata)
 	 */
 	if (fdata->start_unit == 0) {
 		struct block_device *bdev = bdget_disk(block->gdp, 0);
-		bdev->bd_inode->i_blkbits = blksize_bits(fdata->blksize);
+		bdev->bd_inode->i_blkbits = ilog2(fdata->blksize);
 		bdput(bdev);
 	}
 
diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c
index f7e6c42558eb..eada3e801dd7 100644
--- a/drivers/usb/gadget/function/storage_common.c
+++ b/drivers/usb/gadget/function/storage_common.c
@@ -233,7 +233,7 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
 		blkbits = 11;
 	} else if (inode->i_bdev) {
 		blksize = bdev_logical_block_size(inode->i_bdev);
-		blkbits = blksize_bits(blksize);
+		blkbits = ilog2(blksize);
 	} else {
 		blksize = 512;
 		blkbits = 9;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index a333a648244e..d18496dfc6e7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -115,7 +115,7 @@ static void set_init_blocksize(struct block_device *bdev)
 		bsize <<= 1;
 	}
 	bdev->bd_block_size = bsize;
-	bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+	bdev->bd_inode->i_blkbits = ilog2(bsize);
 }
 
 int set_blocksize(struct block_device *bdev, int size)
@@ -132,7 +132,7 @@ int set_blocksize(struct block_device *bdev, int size)
 	if (bdev->bd_block_size != size) {
 		sync_blockdev(bdev);
 		bdev->bd_block_size = size;
-		bdev->bd_inode->i_blkbits = blksize_bits(size);
+		bdev->bd_inode->i_blkbits = ilog2(size);
 		kill_bdev(bdev);
 	}
 	return 0;
@@ -147,7 +147,7 @@ int sb_set_blocksize(struct super_block *sb, int size)
 	/* If we get here, we know size is power of two
 	 * and it's value is between 512 and PAGE_SIZE */
 	sb->s_blocksize = size;
-	sb->s_blocksize_bits = blksize_bits(size);
+	sb->s_blocksize_bits = ilog2(size);
 	return sb->s_blocksize;
 }
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7c6f0bbb54a5..711b9fc31c94 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2765,7 +2765,7 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
 
 	fs_info->sb = sb;
 	sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
-	sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
+	sb->s_blocksize_bits = ilog2(BTRFS_BDEV_BLOCKSIZE);
 
 	ret = percpu_counter_init(&fs_info->dio_bytes, 0, GFP_KERNEL);
 	if (ret)
@@ -3059,7 +3059,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
 
 	sb->s_blocksize = sectorsize;
-	sb->s_blocksize_bits = blksize_bits(sectorsize);
+	sb->s_blocksize_bits = ilog2(sectorsize);
 	memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE);
 
 	mutex_lock(&fs_info->chunk_mutex);
diff --git a/fs/buffer.c b/fs/buffer.c
index fc8831c392d7..fa92e0afe349 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -907,7 +907,7 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
 	loff_t sz = i_size_read(bdev->bd_inode);
 
 	if (sz) {
-		unsigned int sizebits = blksize_bits(size);
+		unsigned int sizebits = ilog2(size);
 		retval = (sz >> sizebits);
 	}
 	return retval;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1543b5af400e..7ea2cd3effcc 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1148,7 +1148,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 
 	if (align & blocksize_mask) {
 		if (bdev)
-			blkbits = blksize_bits(bdev_logical_block_size(bdev));
+			blkbits = ilog2(bdev_logical_block_size(bdev));
 		blocksize_mask = (1 << blkbits) - 1;
 		if (align & blocksize_mask)
 			goto out;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cb05f71cf850..b896da27942a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3458,7 +3458,7 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
 
 	if (align & blocksize_mask) {
 		if (bdev)
-			blkbits = blksize_bits(bdev_logical_block_size(bdev));
+			blkbits = ilog2(bdev_logical_block_size(bdev));
 		blocksize_mask = (1 << blkbits) - 1;
 		if (align & blocksize_mask)
 			return -EINVAL;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index ec7b78e6feca..2a807657d544 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -203,7 +203,7 @@ static loff_t
 iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 		struct iomap_dio *dio, struct iomap *iomap)
 {
-	unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
+	unsigned int blkbits = ilog2(bdev_logical_block_size(iomap->bdev));
 	unsigned int fs_block_size = i_blocksize(inode), pad;
 	unsigned int align = iov_iter_alignment(dio->submit.iter);
 	struct bio *bio;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 71ea9ce71a6b..9b5622881d34 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2054,7 +2054,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	osb->osb_dx_seed[3] = le32_to_cpu(di->id2.i_super.s_uuid_hash);
 
 	osb->sb = sb;
-	osb->s_sectsize_bits = blksize_bits(sector_size);
+	osb->s_sectsize_bits = ilog2(sector_size);
 	BUG_ON(!osb->s_sectsize_bits);
 
 	spin_lock_init(&osb->dc_task_lock);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index e582d001f792..4f6963570739 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -470,7 +470,7 @@ static int romfs_fill_super(struct super_block *sb, struct fs_context *fc)
 		sb_set_blocksize(sb, ROMBSIZE);
 	} else {
 		sb->s_blocksize = ROMBSIZE;
-		sb->s_blocksize_bits = blksize_bits(ROMBSIZE);
+		sb->s_blocksize_bits = ilog2(ROMBSIZE);
 	}
 #endif
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7d10f4e63232..16038b609c14 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1502,17 +1502,6 @@ static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 	return !(addr & alignment) && !(len & alignment);
 }
 
-/* assumes size > 256 */
-static inline unsigned int blksize_bits(unsigned int size)
-{
-	unsigned int bits = 8;
-	do {
-		bits++;
-		size >>= 1;
-	} while (size > 256);
-	return bits;
-}
-
 static inline unsigned int block_size(struct block_device *bdev)
 {
 	return bdev->bd_block_size;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-05-29 14:11 [PATCH v2] blkdev: Replace blksize_bits() with ilog2() Kaitao Cheng
@ 2020-05-29 14:13 ` Jens Axboe
  2020-06-01  7:22   ` Tao pilgrim
  2020-05-29 14:15 ` Christoph Hellwig
  2020-05-29 20:27 ` Matthew Wilcox
  2 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-05-29 14:13 UTC (permalink / raw)
  To: Kaitao Cheng, hch, sth, viro, clm, jaegeuk, hch, mark, dhowells, balbi
  Cc: damien.lemoal, bvanassche, ming.lei, martin.petersen, satyat,
	chaitanya.kulkarni, houtao1, asml.silence, ajay.joshi,
	linux-kernel, songmuchun, hoeppner, heiko.carstens, gor,
	borntraeger, linux-s390, sagi, linux-nvme, gregkh, linux-usb,
	josef, dsterba, linux-btrfs, chao, linux-f2fs-devel,
	darrick.wong, linux-xfs, linux-fsdevel, jlbec, joseph.qi,
	ocfs2-devel, deepa.kernel

On 5/29/20 8:11 AM, Kaitao Cheng wrote:
> There is a function named ilog2() exist which can replace blksize.
> The generated code will be shorter and more efficient on some
> architecture, such as arm64. And ilog2() can be optimized according
> to different architecture.

When you posted this last time, I said:

"I like the simplification, but do you have any results to back up
 that claim? Is the generated code shorter? Runs faster?"

which you handily ignored, yet sending out a new version. I'm not
going to apply this without justification, your commit message is
handwavy at best.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-05-29 14:11 [PATCH v2] blkdev: Replace blksize_bits() with ilog2() Kaitao Cheng
  2020-05-29 14:13 ` Jens Axboe
@ 2020-05-29 14:15 ` Christoph Hellwig
  2020-05-29 20:27 ` Matthew Wilcox
  2 siblings, 0 replies; 10+ messages in thread
From: Christoph Hellwig @ 2020-05-29 14:15 UTC (permalink / raw)
  To: Kaitao Cheng
  Cc: axboe, hch, sth, viro, clm, jaegeuk, hch, mark, dhowells, balbi,
	damien.lemoal, bvanassche, ming.lei, martin.petersen, satyat,
	chaitanya.kulkarni, houtao1, asml.silence, ajay.joshi,
	linux-kernel, songmuchun, hoeppner, heiko.carstens, gor,
	borntraeger, linux-s390, sagi, linux-nvme, gregkh, linux-usb,
	josef, dsterba, linux-btrfs, chao, linux-f2fs-devel,
	darrick.wong, linux-xfs, linux-fsdevel, jlbec, joseph.qi,
	ocfs2-devel, deepa.kernel

>  	ns->size = i_size_read(ns->bdev->bd_inode);
> -	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
> +	ns->blksize_shift = ilog2(bdev_logical_block_size(ns->bdev));

This should just be:

	ns->blksize_shift = ns->bdev->bd_inode->i_blkbits;

> diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
> index 777734d1b4e5..55adb134451b 100644
> --- a/drivers/s390/block/dasd_ioctl.c
> +++ b/drivers/s390/block/dasd_ioctl.c
> @@ -228,7 +228,7 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata)
>  	 */
>  	if (fdata->start_unit == 0) {
>  		struct block_device *bdev = bdget_disk(block->gdp, 0);
> -		bdev->bd_inode->i_blkbits = blksize_bits(fdata->blksize);
> +		bdev->bd_inode->i_blkbits = ilog2(fdata->blksize);

This also needs to set bdev->bd_block_size, so this probably warrants
a separate fix that be backported.  It might be nice to split out
a helper that sets bd_block_size and bd_inode->i_blkbits together
so that such a use is more obvious.

>  	} else if (inode->i_bdev) {
>  		blksize = bdev_logical_block_size(inode->i_bdev);
> -		blkbits = blksize_bits(blksize);
> +		blkbits = ilog2(blksize);

This can just use inode->i_bdev->bd_inode->i_blkbits.

> diff --git a/fs/buffer.c b/fs/buffer.c
> index fc8831c392d7..fa92e0afe349 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -907,7 +907,7 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
>  	loff_t sz = i_size_read(bdev->bd_inode);
>  
>  	if (sz) {
> -		unsigned int sizebits = blksize_bits(size);
> +		unsigned int sizebits = ilog2(size);

bdev->bd_inode->i_blkbits.

> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index 1543b5af400e..7ea2cd3effcc 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -1148,7 +1148,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  
>  	if (align & blocksize_mask) {
>  		if (bdev)
> -			blkbits = blksize_bits(bdev_logical_block_size(bdev));
> +			blkbits = ilog2(bdev_logical_block_size(bdev));

bdev->bd_inode->i_blkbits.

> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index cb05f71cf850..b896da27942a 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3458,7 +3458,7 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
>  
>  	if (align & blocksize_mask) {
>  		if (bdev)
> -			blkbits = blksize_bits(bdev_logical_block_size(bdev));
> +			blkbits = ilog2(bdev_logical_block_size(bdev));

bdev->bd_inode->i_blkbits.

>  		blocksize_mask = (1 << blkbits) - 1;
>  		if (align & blocksize_mask)
>  			return -EINVAL;
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index ec7b78e6feca..2a807657d544 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -203,7 +203,7 @@ static loff_t
>  iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
>  		struct iomap_dio *dio, struct iomap *iomap)
>  {
> -	unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
> +	unsigned int blkbits = ilog2(bdev_logical_block_size(iomap->bdev));

iomap->bdev->bd_inode->i_blkbits.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-05-29 14:11 [PATCH v2] blkdev: Replace blksize_bits() with ilog2() Kaitao Cheng
  2020-05-29 14:13 ` Jens Axboe
  2020-05-29 14:15 ` Christoph Hellwig
@ 2020-05-29 20:27 ` Matthew Wilcox
  2020-05-29 22:27   ` Bart Van Assche
  2 siblings, 1 reply; 10+ messages in thread
From: Matthew Wilcox @ 2020-05-29 20:27 UTC (permalink / raw)
  To: Kaitao Cheng
  Cc: axboe, hch, sth, viro, clm, jaegeuk, hch, mark, dhowells, balbi,
	damien.lemoal, bvanassche, ming.lei, martin.petersen, satyat,
	chaitanya.kulkarni, houtao1, asml.silence, ajay.joshi,
	linux-kernel, songmuchun, hoeppner, heiko.carstens, gor,
	borntraeger, linux-s390, sagi, linux-nvme, gregkh, linux-usb,
	josef, dsterba, linux-btrfs, chao, linux-f2fs-devel,
	darrick.wong, linux-xfs, linux-fsdevel, jlbec, joseph.qi,
	ocfs2-devel, deepa.kernel

On Fri, May 29, 2020 at 10:11:00PM +0800, Kaitao Cheng wrote:
> There is a function named ilog2() exist which can replace blksize.
> The generated code will be shorter and more efficient on some
> architecture, such as arm64. And ilog2() can be optimized according
> to different architecture.

We'd get the same benefit from a smaller patch with just:

--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1502,15 +1502,9 @@ static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 	return !(addr & alignment) && !(len & alignment);
 }
 
-/* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
-	unsigned int bits = 8;
-	do {
-		bits++;
-		size >>= 1;
-	} while (size > 256);
-	return bits;
+	return ilog2(size);
 }
 
 static inline unsigned int block_size(struct block_device *bdev)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-05-29 20:27 ` Matthew Wilcox
@ 2020-05-29 22:27   ` Bart Van Assche
  2020-05-29 22:39     ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Bart Van Assche @ 2020-05-29 22:27 UTC (permalink / raw)
  To: Matthew Wilcox, Kaitao Cheng
  Cc: axboe, hch, sth, viro, clm, jaegeuk, hch, mark, dhowells, balbi,
	damien.lemoal, ming.lei, martin.petersen, satyat,
	chaitanya.kulkarni, houtao1, asml.silence, ajay.joshi,
	linux-kernel, songmuchun, hoeppner, heiko.carstens, gor,
	borntraeger, linux-s390, sagi, linux-nvme, gregkh, linux-usb,
	josef, dsterba, linux-btrfs, chao, linux-f2fs-devel,
	darrick.wong, linux-xfs, linux-fsdevel, jlbec, joseph.qi,
	ocfs2-devel, deepa.kernel

On 2020-05-29 13:27, Matthew Wilcox wrote:
> On Fri, May 29, 2020 at 10:11:00PM +0800, Kaitao Cheng wrote:
>> There is a function named ilog2() exist which can replace blksize.
>> The generated code will be shorter and more efficient on some
>> architecture, such as arm64. And ilog2() can be optimized according
>> to different architecture.
> 
> We'd get the same benefit from a smaller patch with just:
> 
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1502,15 +1502,9 @@ static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
>  	return !(addr & alignment) && !(len & alignment);
>  }
>  
> -/* assumes size > 256 */
>  static inline unsigned int blksize_bits(unsigned int size)
>  {
> -	unsigned int bits = 8;
> -	do {
> -		bits++;
> -		size >>= 1;
> -	} while (size > 256);
> -	return bits;
> +	return ilog2(size);
>  }
>  
>  static inline unsigned int block_size(struct block_device *bdev)

Hi Matthew,

I had suggested to change all blksize_bits() calls into ilog2() calls
because I think that a single function to calculate the logarithm base 2
is sufficient.

Thanks,

Bart.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-05-29 22:27   ` Bart Van Assche
@ 2020-05-29 22:39     ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2020-05-29 22:39 UTC (permalink / raw)
  To: Bart Van Assche, Matthew Wilcox, Kaitao Cheng
  Cc: hch, sth, viro, clm, jaegeuk, hch, mark, dhowells, balbi,
	damien.lemoal, ming.lei, martin.petersen, satyat,
	chaitanya.kulkarni, houtao1, asml.silence, ajay.joshi,
	linux-kernel, songmuchun, hoeppner, heiko.carstens, gor,
	borntraeger, linux-s390, sagi, linux-nvme, gregkh, linux-usb,
	josef, dsterba, linux-btrfs, chao, linux-f2fs-devel,
	darrick.wong, linux-xfs, linux-fsdevel, jlbec, joseph.qi,
	ocfs2-devel, deepa.kernel

On 5/29/20 4:27 PM, Bart Van Assche wrote:
> On 2020-05-29 13:27, Matthew Wilcox wrote:
>> On Fri, May 29, 2020 at 10:11:00PM +0800, Kaitao Cheng wrote:
>>> There is a function named ilog2() exist which can replace blksize.
>>> The generated code will be shorter and more efficient on some
>>> architecture, such as arm64. And ilog2() can be optimized according
>>> to different architecture.
>>
>> We'd get the same benefit from a smaller patch with just:
>>
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -1502,15 +1502,9 @@ static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
>>  	return !(addr & alignment) && !(len & alignment);
>>  }
>>  
>> -/* assumes size > 256 */
>>  static inline unsigned int blksize_bits(unsigned int size)
>>  {
>> -	unsigned int bits = 8;
>> -	do {
>> -		bits++;
>> -		size >>= 1;
>> -	} while (size > 256);
>> -	return bits;
>> +	return ilog2(size);
>>  }
>>  
>>  static inline unsigned int block_size(struct block_device *bdev)
> 
> Hi Matthew,
> 
> I had suggested to change all blksize_bits() calls into ilog2() calls
> because I think that a single function to calculate the logarithm base 2
> is sufficient.

I think this should be a two-parter:

1) Use the inode blkbits where appropriate
2) Then do this change

I'm leaning towards just doing it in blksize_bits() instead of updating
every caller, unless there aren't that many left once we've gone
through patch 1.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-05-29 14:13 ` Jens Axboe
@ 2020-06-01  7:22   ` Tao pilgrim
  2020-06-01  8:44     ` Greg KH
  0 siblings, 1 reply; 10+ messages in thread
From: Tao pilgrim @ 2020-06-01  7:22 UTC (permalink / raw)
  To: Jens Axboe
  Cc: hch, sth, viro, clm, jaegeuk, hch, Mark Fasheh, dhowells, balbi,
	damien.lemoal, bvanassche, ming.lei, martin.petersen, satyat,
	chaitanya.kulkarni, houtao1, asml.silence, ajay.joshi,
	linux-kernel, Muchun Song, hoeppner, heiko.carstens, gor,
	borntraeger, linux-s390, sagi, linux-nvme, gregkh, linux-usb,
	Josef Bacik, dsterba, linux-btrfs, chao, linux-f2fs-devel,
	darrick.wong, linux-xfs, linux-fsdevel, jlbec, joseph.qi,
	ocfs2-devel, deepa.kernel

On Fri, May 29, 2020 at 10:13 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> On 5/29/20 8:11 AM, Kaitao Cheng wrote:
> > There is a function named ilog2() exist which can replace blksize.
> > The generated code will be shorter and more efficient on some
> > architecture, such as arm64. And ilog2() can be optimized according
> > to different architecture.
>
> When you posted this last time, I said:
>
> "I like the simplification, but do you have any results to back up
>  that claim? Is the generated code shorter? Runs faster?"
>

Hi  Jens Axboe:

I did a test on ARM64.
unsigned int ckt_blksize(int size)
{
   return blksize_bits(size);
}
unsigned int ckt_ilog2(int size)
{
    return ilog2(size);
}

When I compiled it into assembly code, I got the following result,

0000000000000088 <ckt_blksize>:
      88: 2a0003e8 mov w8, w0
      8c: 321d03e0 orr w0, wzr, #0x8
      90: 11000400 add w0, w0, #0x1
      94: 7108051f cmp w8, #0x201
      98: 53017d08 lsr w8, w8, #1
      9c: 54ffffa8 b.hi 90 <ckt_blksize+0x8>
      a0: d65f03c0 ret
      a4: d503201f nop

00000000000000a8 <ckt_ilog2>:
      a8: 320013e8 orr w8, wzr, #0x1f
      ac: 5ac01009 clz w9, w0
      b0: 4b090108 sub w8, w8, w9
      b4: 7100001f cmp w0, #0x0
      b8: 5a9f1100 csinv w0, w8, wzr, ne
      bc: d65f03c0 ret

The generated code of ilog2  is shorter , and  runs faster


> which you handily ignored, yet sending out a new version. I'm not
> going to apply this without justification, your commit message is
> handwavy at best.
>

Do I need to write the test content into commit message?



-- 
Yours,
Kaitao Cheng

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-06-01  7:22   ` Tao pilgrim
@ 2020-06-01  8:44     ` Greg KH
  2020-06-02  5:51       ` Christoph Hellwig
  0 siblings, 1 reply; 10+ messages in thread
From: Greg KH @ 2020-06-01  8:44 UTC (permalink / raw)
  To: Tao pilgrim
  Cc: Jens Axboe, hch, sth, viro, clm, jaegeuk, hch, Mark Fasheh,
	dhowells, balbi, damien.lemoal, bvanassche, ming.lei,
	martin.petersen, satyat, chaitanya.kulkarni, houtao1,
	asml.silence, ajay.joshi, linux-kernel, Muchun Song, hoeppner,
	heiko.carstens, gor, borntraeger, linux-s390, sagi, linux-nvme,
	linux-usb, Josef Bacik, dsterba, linux-btrfs, chao,
	linux-f2fs-devel, darrick.wong, linux-xfs, linux-fsdevel, jlbec,
	joseph.qi, ocfs2-devel, deepa.kernel

On Mon, Jun 01, 2020 at 03:22:01PM +0800, Tao pilgrim wrote:
> On Fri, May 29, 2020 at 10:13 PM Jens Axboe <axboe@kernel.dk> wrote:
> >
> > On 5/29/20 8:11 AM, Kaitao Cheng wrote:
> > > There is a function named ilog2() exist which can replace blksize.
> > > The generated code will be shorter and more efficient on some
> > > architecture, such as arm64. And ilog2() can be optimized according
> > > to different architecture.
> >
> > When you posted this last time, I said:
> >
> > "I like the simplification, but do you have any results to back up
> >  that claim? Is the generated code shorter? Runs faster?"
> >
> 
> Hi  Jens Axboe:
> 
> I did a test on ARM64.
> unsigned int ckt_blksize(int size)
> {
>    return blksize_bits(size);
> }
> unsigned int ckt_ilog2(int size)
> {
>     return ilog2(size);
> }
> 
> When I compiled it into assembly code, I got the following result,
> 
> 0000000000000088 <ckt_blksize>:
>       88: 2a0003e8 mov w8, w0
>       8c: 321d03e0 orr w0, wzr, #0x8
>       90: 11000400 add w0, w0, #0x1
>       94: 7108051f cmp w8, #0x201
>       98: 53017d08 lsr w8, w8, #1
>       9c: 54ffffa8 b.hi 90 <ckt_blksize+0x8>
>       a0: d65f03c0 ret
>       a4: d503201f nop
> 
> 00000000000000a8 <ckt_ilog2>:
>       a8: 320013e8 orr w8, wzr, #0x1f
>       ac: 5ac01009 clz w9, w0
>       b0: 4b090108 sub w8, w8, w9
>       b4: 7100001f cmp w0, #0x0
>       b8: 5a9f1100 csinv w0, w8, wzr, ne
>       bc: d65f03c0 ret
> 
> The generated code of ilog2  is shorter , and  runs faster

But does this code path actually show up anywhere that is actually
measurable as mattering?

If so, please show that benchmark results.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-06-01  8:44     ` Greg KH
@ 2020-06-02  5:51       ` Christoph Hellwig
  2020-06-02  6:13         ` Greg KH
  0 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2020-06-02  5:51 UTC (permalink / raw)
  To: Greg KH
  Cc: Tao pilgrim, Jens Axboe, hch, sth, viro, clm, jaegeuk, hch,
	Mark Fasheh, dhowells, balbi, damien.lemoal, bvanassche,
	ming.lei, martin.petersen, satyat, chaitanya.kulkarni, houtao1,
	asml.silence, ajay.joshi, linux-kernel, Muchun Song, hoeppner,
	heiko.carstens, gor, borntraeger, linux-s390, sagi, linux-nvme,
	linux-usb, Josef Bacik, dsterba, linux-btrfs, chao,
	linux-f2fs-devel, darrick.wong, linux-xfs, linux-fsdevel, jlbec,
	joseph.qi, ocfs2-devel, deepa.kernel

On Mon, Jun 01, 2020 at 10:44:26AM +0200, Greg KH wrote:
> But does this code path actually show up anywhere that is actually
> measurable as mattering?
> 
> If so, please show that benchmark results.

I think the requests are starting to be a bit unreasonable.  Tao is
replacing a reimplementation of a standard function with that standard
function / compiler builtin.  We don't put such a high burden on that.

And once the proper existing fields are used where possible as shown
in my reply just replacing the rest seems totally obvious - quite
contrary I think keeping a reimplementation would need a high bar.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] blkdev: Replace blksize_bits() with ilog2()
  2020-06-02  5:51       ` Christoph Hellwig
@ 2020-06-02  6:13         ` Greg KH
  0 siblings, 0 replies; 10+ messages in thread
From: Greg KH @ 2020-06-02  6:13 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Tao pilgrim, Jens Axboe, sth, viro, clm, jaegeuk, hch,
	Mark Fasheh, dhowells, balbi, damien.lemoal, bvanassche,
	ming.lei, martin.petersen, satyat, chaitanya.kulkarni, houtao1,
	asml.silence, ajay.joshi, linux-kernel, Muchun Song, hoeppner,
	heiko.carstens, gor, borntraeger, linux-s390, sagi, linux-nvme,
	linux-usb, Josef Bacik, dsterba, linux-btrfs, chao,
	linux-f2fs-devel, darrick.wong, linux-xfs, linux-fsdevel, jlbec,
	joseph.qi, ocfs2-devel, deepa.kernel

On Tue, Jun 02, 2020 at 07:51:52AM +0200, Christoph Hellwig wrote:
> On Mon, Jun 01, 2020 at 10:44:26AM +0200, Greg KH wrote:
> > But does this code path actually show up anywhere that is actually
> > measurable as mattering?
> > 
> > If so, please show that benchmark results.
> 
> I think the requests are starting to be a bit unreasonable.  Tao is
> replacing a reimplementation of a standard function with that standard
> function / compiler builtin.  We don't put such a high burden on that.

That's fine, but to say it is "faster" usually means we want to see it
actually going faster somehow :)

> And once the proper existing fields are used where possible as shown
> in my reply just replacing the rest seems totally obvious - quite
> contrary I think keeping a reimplementation would need a high bar.

Your patch makes sense, I was not objecting to that.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-06-02  6:13 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-29 14:11 [PATCH v2] blkdev: Replace blksize_bits() with ilog2() Kaitao Cheng
2020-05-29 14:13 ` Jens Axboe
2020-06-01  7:22   ` Tao pilgrim
2020-06-01  8:44     ` Greg KH
2020-06-02  5:51       ` Christoph Hellwig
2020-06-02  6:13         ` Greg KH
2020-05-29 14:15 ` Christoph Hellwig
2020-05-29 20:27 ` Matthew Wilcox
2020-05-29 22:27   ` Bart Van Assche
2020-05-29 22:39     ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).