All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22 10:31 ` [PATCH v3 2/3] block: introduce new field bd_flags in block_device Yu Kuai
@ 2023-11-22  3:30   ` Ming Lei
  2023-11-22  6:15     ` Yu Kuai
  2023-11-22  3:52   ` Michael Kelley
  2023-11-22  7:28   ` Christoph Hellwig
  2 siblings, 1 reply; 16+ messages in thread
From: Ming Lei @ 2023-11-22  3:30 UTC (permalink / raw)
  To: Yu Kuai; +Cc: axboe, linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On Wed, Nov 22, 2023 at 06:31:02PM +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> There are multiple switches in struct block_device, use separate bool
> fields for them is not gracefully. Add a new field bd_flags and replace
> swithes to a bit, there are no functional changes, and prepare to add
> a new switch in the next patch. In order to keep bd_flags in the first
> cacheline and prevent layout to be affected, define it as u16.
> 
> Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  block/bdev.c              | 15 ++++++++-------
>  block/blk-core.c          |  7 ++++---
>  block/genhd.c             | 15 +++++++++++----
>  block/ioctl.c             |  6 +++++-
>  include/linux/blk_types.h | 27 +++++++++++++++++++++------
>  include/linux/blkdev.h    |  5 +++--
>  6 files changed, 52 insertions(+), 23 deletions(-)
> 
> diff --git a/block/bdev.c b/block/bdev.c
> index e4cfb7adb645..10f524a7416c 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
>  	bdev->bd_partno = partno;
>  	bdev->bd_inode = inode;
>  	bdev->bd_queue = disk->queue;
> -	if (partno)
> -		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	else
> -		bdev->bd_has_submit_bio = false;
> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	bdev->bd_stats = alloc_percpu(struct disk_stats);
>  	if (!bdev->bd_stats) {
>  		iput(inode);
> @@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>  		bdev->bd_holder = NULL;
>  		bdev->bd_holder_ops = NULL;
>  		mutex_unlock(&bdev->bd_holder_lock);
> -		if (bdev->bd_write_holder)
> +		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
>  			unblock = true;
>  	}
>  	if (!whole->bd_holders)
> @@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>  	 */
>  	if (unblock) {
>  		disk_unblock_events(bdev->bd_disk);
> -		bdev->bd_write_holder = false;
> +		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
>  	}
>  }
>  
> @@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
>  		 * writeable reference is too fragile given the way @mode is
>  		 * used in blkdev_get/put().
>  		 */
> -		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
> +		if ((mode & BLK_OPEN_WRITE) &&
> +		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
>  		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
> -			bdev->bd_write_holder = true;
> +			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
>  			unblock_events = false;
>  		}
>  	}
> diff --git a/block/blk-core.c b/block/blk-core.c
> index fdf25b8d6e78..f9f8b12ba626 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -482,7 +482,8 @@ __setup("fail_make_request=", setup_fail_make_request);
>  
>  bool should_fail_request(struct block_device *part, unsigned int bytes)
>  {
> -	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
> +	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
> +		should_fail(&fail_make_request, bytes);
>  }
>  
>  static int __init fail_make_request_debugfs(void)
> @@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
>  	if (unlikely(!blk_crypto_bio_prep(&bio)))
>  		return;
>  
> -	if (!bio->bi_bdev->bd_has_submit_bio) {
> +	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
>  		blk_mq_submit_bio(bio);
>  	} else if (likely(bio_queue_enter(bio) == 0)) {
>  		struct gendisk *disk = bio->bi_bdev->bd_disk;
> @@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
>  	 */
>  	if (current->bio_list)
>  		bio_list_add(&current->bio_list[0], bio);
> -	else if (!bio->bi_bdev->bd_has_submit_bio)
> +	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
>  		__submit_bio_noacct_mq(bio);
>  	else
>  		__submit_bio_noacct(bio);
> diff --git a/block/genhd.c b/block/genhd.c
> index c9d06f72c587..57f96c0c8da0 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
>  	elevator_init_mq(disk->queue);
>  
>  	/* Mark bdev as having a submit_bio, if needed */
> -	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
> +	if (disk->fops->submit_bio)
> +		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
>  
>  	/*
>  	 * If the driver provides an explicit major number it also must provide
> @@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
>  ssize_t part_fail_show(struct device *dev,
>  		       struct device_attribute *attr, char *buf)
>  {
> -	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
> +	return sprintf(buf, "%d\n",
> +		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
>  }
>  
>  ssize_t part_fail_store(struct device *dev,
> @@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
>  {
>  	int i;
>  
> -	if (count > 0 && sscanf(buf, "%d", &i) > 0)
> -		dev_to_bdev(dev)->bd_make_it_fail = i;
> +	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
> +		if (!i)
> +			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
> +		else
> +			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
> +
> +	}
>  
>  	return count;
>  }
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 4160f4e6bd5b..a64440f4c96b 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
>  		if (ret)
>  			return ret;
>  	}
> -	bdev->bd_read_only = n;
> +
> +	if (!n)
> +		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
> +	else
> +		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
>  	return 0;
>  }
>  
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index f7d40692dd94..de652045111b 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -37,6 +37,11 @@ struct bio_crypt_ctx;
>  #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
>  #define SECTOR_MASK		(PAGE_SECTORS - 1)
>  
> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
> +#define BD_FLAG_WRITE_HOLDER	1
> +#define BD_FLAG_HAS_SUBMIT_BIO	2
> +#define BD_FLAG_MAKE_IT_FAIL	3
> +
>  struct block_device {
>  	sector_t		bd_start_sect;
>  	sector_t		bd_nr_sectors;
> @@ -44,10 +49,8 @@ struct block_device {
>  	struct request_queue *	bd_queue;
>  	struct disk_stats __percpu *bd_stats;
>  	unsigned long		bd_stamp;
> -	bool			bd_read_only;	/* read-only policy */
> +	unsigned short		bd_flags;
>  	u8			bd_partno;
> -	bool			bd_write_holder;
> -	bool			bd_has_submit_bio;
>  	dev_t			bd_dev;
>  	struct inode		*bd_inode;	/* will die */
>  
> @@ -67,9 +70,6 @@ struct block_device {
>  	struct super_block	*bd_fsfreeze_sb;
>  
>  	struct partition_meta_info *bd_meta_info;
> -#ifdef CONFIG_FAIL_MAKE_REQUEST
> -	bool			bd_make_it_fail;
> -#endif
>  	/*
>  	 * keep this out-of-line as it's both big and not needed in the fast
>  	 * path
> @@ -86,6 +86,21 @@ struct block_device {
>  #define bdev_kobj(_bdev) \
>  	(&((_bdev)->bd_device.kobj))
>  
> +static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
> +{
> +	return bdev->bd_flags & (1U << bit);
> +}
> +
> +static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
> +{
> +	bdev->bd_flags |= (1U << bit);
> +}
> +
> +static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
> +{
> +	bdev->bd_flags &= ~(1U << bit);
> +}

'U' becomes incorrect after .bd_flags is changed to 'unsigned short'.


Thanks,
Ming


^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22 10:31 ` [PATCH v3 2/3] block: introduce new field bd_flags in block_device Yu Kuai
  2023-11-22  3:30   ` Ming Lei
@ 2023-11-22  3:52   ` Michael Kelley
  2023-11-22  7:06     ` Yu Kuai
  2023-11-22  7:28   ` Christoph Hellwig
  2 siblings, 1 reply; 16+ messages in thread
From: Michael Kelley @ 2023-11-22  3:52 UTC (permalink / raw)
  To: Yu Kuai, ming.lei, axboe
  Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

From: Yu Kuai <yukuai1@huaweicloud.com> Sent: Wednesday, November 22, 2023 2:31 AM
> 
> There are multiple switches in struct block_device, use separate bool
> fields for them is not gracefully. Add a new field bd_flags and replace
> swithes to a bit, there are no functional changes, and prepare to add
> a new switch in the next patch. In order to keep bd_flags in the first
> cacheline and prevent layout to be affected, define it as u16.
> 
> Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  block/bdev.c              | 15 ++++++++-------
>  block/blk-core.c          |  7 ++++---
>  block/genhd.c             | 15 +++++++++++----
>  block/ioctl.c             |  6 +++++-
>  include/linux/blk_types.h | 27 +++++++++++++++++++++------
>  include/linux/blkdev.h    |  5 +++--
>  6 files changed, 52 insertions(+), 23 deletions(-)
> 
> diff --git a/block/bdev.c b/block/bdev.c
> index e4cfb7adb645..10f524a7416c 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk
> *disk, u8 partno)
>  	bdev->bd_partno = partno;
>  	bdev->bd_inode = inode;
>  	bdev->bd_queue = disk->queue;
> -	if (partno)
> -		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	else
> -		bdev->bd_has_submit_bio = false;
> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	bdev->bd_stats = alloc_percpu(struct disk_stats);
>  	if (!bdev->bd_stats) {
>  		iput(inode);
> @@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>  		bdev->bd_holder = NULL;
>  		bdev->bd_holder_ops = NULL;
>  		mutex_unlock(&bdev->bd_holder_lock);
> -		if (bdev->bd_write_holder)
> +		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
>  			unblock = true;
>  	}
>  	if (!whole->bd_holders)
> @@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>  	 */
>  	if (unblock) {
>  		disk_unblock_events(bdev->bd_disk);
> -		bdev->bd_write_holder = false;
> +		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
>  	}
>  }
> 
> @@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev,
> blk_mode_t mode, void *holder,
>  		 * writeable reference is too fragile given the way @mode is
>  		 * used in blkdev_get/put().
>  		 */
> -		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
> +		if ((mode & BLK_OPEN_WRITE) &&
> +		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
>  		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
> -			bdev->bd_write_holder = true;
> +			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
>  			unblock_events = false;
>  		}
>  	}
> diff --git a/block/blk-core.c b/block/blk-core.c
> index fdf25b8d6e78..f9f8b12ba626 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -482,7 +482,8 @@ __setup("fail_make_request=",
> setup_fail_make_request);
> 
>  bool should_fail_request(struct block_device *part, unsigned int bytes)
>  {
> -	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
> +	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
> +		should_fail(&fail_make_request, bytes);
>  }
> 
>  static int __init fail_make_request_debugfs(void)
> @@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
>  	if (unlikely(!blk_crypto_bio_prep(&bio)))
>  		return;
> 
> -	if (!bio->bi_bdev->bd_has_submit_bio) {
> +	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
>  		blk_mq_submit_bio(bio);
>  	} else if (likely(bio_queue_enter(bio) == 0)) {
>  		struct gendisk *disk = bio->bi_bdev->bd_disk;
> @@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
>  	 */
>  	if (current->bio_list)
>  		bio_list_add(&current->bio_list[0], bio);
> -	else if (!bio->bi_bdev->bd_has_submit_bio)
> +	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
>  		__submit_bio_noacct_mq(bio);
>  	else
>  		__submit_bio_noacct(bio);
> diff --git a/block/genhd.c b/block/genhd.c
> index c9d06f72c587..57f96c0c8da0 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device
> *parent, struct gendisk *disk,
>  	elevator_init_mq(disk->queue);
> 
>  	/* Mark bdev as having a submit_bio, if needed */
> -	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
> +	if (disk->fops->submit_bio)
> +		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
> 
>  	/*
>  	 * If the driver provides an explicit major number it also must provide
> @@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
>  ssize_t part_fail_show(struct device *dev,
>  		       struct device_attribute *attr, char *buf)
>  {
> -	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
> +	return sprintf(buf, "%d\n",
> +		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
>  }
> 
>  ssize_t part_fail_store(struct device *dev,
> @@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
>  {
>  	int i;
> 
> -	if (count > 0 && sscanf(buf, "%d", &i) > 0)
> -		dev_to_bdev(dev)->bd_make_it_fail = i;
> +	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
> +		if (!i)
> +			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
> +		else
> +			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
> +
> +	}
> 
>  	return count;
>  }
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 4160f4e6bd5b..a64440f4c96b 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
>  		if (ret)
>  			return ret;
>  	}
> -	bdev->bd_read_only = n;
> +
> +	if (!n)
> +		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
> +	else
> +		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
>  	return 0;
>  }
> 
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index f7d40692dd94..de652045111b 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -37,6 +37,11 @@ struct bio_crypt_ctx;
>  #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
>  #define SECTOR_MASK		(PAGE_SECTORS - 1)
> 
> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
> +#define BD_FLAG_WRITE_HOLDER	1
> +#define BD_FLAG_HAS_SUBMIT_BIO	2
> +#define BD_FLAG_MAKE_IT_FAIL	3
> +
>  struct block_device {
>  	sector_t		bd_start_sect;
>  	sector_t		bd_nr_sectors;
> @@ -44,10 +49,8 @@ struct block_device {
>  	struct request_queue *	bd_queue;
>  	struct disk_stats __percpu *bd_stats;
>  	unsigned long		bd_stamp;
> -	bool			bd_read_only;	/* read-only policy */
> +	unsigned short		bd_flags;
>  	u8			bd_partno;
> -	bool			bd_write_holder;
> -	bool			bd_has_submit_bio;
>  	dev_t			bd_dev;
>  	struct inode		*bd_inode;	/* will die */
> 
> @@ -67,9 +70,6 @@ struct block_device {
>  	struct super_block	*bd_fsfreeze_sb;
> 
>  	struct partition_meta_info *bd_meta_info;
> -#ifdef CONFIG_FAIL_MAKE_REQUEST
> -	bool			bd_make_it_fail;
> -#endif
>  	/*
>  	 * keep this out-of-line as it's both big and not needed in the fast
>  	 * path
> @@ -86,6 +86,21 @@ struct block_device {
>  #define bdev_kobj(_bdev) \
>  	(&((_bdev)->bd_device.kobj))
> 
> +static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
> +{
> +	return bdev->bd_flags & (1U << bit);
> +}
> +
> +static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
> +{
> +	bdev->bd_flags |= (1U << bit);
> +}
> +
> +static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
> +{
> +	bdev->bd_flags &= ~(1U << bit);
> +}

It seems like there's atomicity problem with this approach.  In the above
code, setting and clearing a bd_flag is *not* atomic with respect to
setting/clearing some other bd_flag.  For example, setting/clearing
BD_FLAG_MAKE_IT_FAIL via the /sys interface could race with
setting/clearing BD_FLAG_READ_ONLY via an ioctl, and one of the
two changes could be lost.  This problem wouldn't happen when
each flag was a separate field.  Admittedly, the likelihood of a
problem with BD_FLAG_MAKE_IT_FAIL is extremely low, but
conceptually the lack of atomicity is still wrong.  There might
be a similar problem with BD_FLAG_WRITE_HOLDER, but I
didn't investigate thoroughly.

Michael

> +
>  /*
>   * Block error status values.  See block/blk-core:blk_errors for the details.
>   * Alpha cannot write a byte atomically, so we need to use 32-bit value.
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 51fa7ffdee83..fc1d55ef5107 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -741,13 +741,14 @@ void disk_uevent(struct gendisk *disk, enum
> kobject_action action);
> 
>  static inline int get_disk_ro(struct gendisk *disk)
>  {
> -	return disk->part0->bd_read_only ||
> +	return bdev_flagged(disk->part0, BD_FLAG_READ_ONLY) ||
>  		test_bit(GD_READ_ONLY, &disk->state);
>  }
> 
>  static inline int bdev_read_only(struct block_device *bdev)
>  {
> -	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
> +	return bdev_flagged(bdev, BD_FLAG_READ_ONLY) ||
> +		get_disk_ro(bdev->bd_disk);
>  }
> 
>  bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
> --
> 2.39.2


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22  3:30   ` Ming Lei
@ 2023-11-22  6:15     ` Yu Kuai
  0 siblings, 0 replies; 16+ messages in thread
From: Yu Kuai @ 2023-11-22  6:15 UTC (permalink / raw)
  To: Ming Lei, Yu Kuai
  Cc: axboe, linux-block, linux-kernel, yi.zhang, yangerkun, yukuai (C)

Hi,

在 2023/11/22 11:30, Ming Lei 写道:
> On Wed, Nov 22, 2023 at 06:31:02PM +0800, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> There are multiple switches in struct block_device, use separate bool
>> fields for them is not gracefully. Add a new field bd_flags and replace
>> swithes to a bit, there are no functional changes, and prepare to add
>> a new switch in the next patch. In order to keep bd_flags in the first
>> cacheline and prevent layout to be affected, define it as u16.
>>
>> Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/bdev.c              | 15 ++++++++-------
>>   block/blk-core.c          |  7 ++++---
>>   block/genhd.c             | 15 +++++++++++----
>>   block/ioctl.c             |  6 +++++-
>>   include/linux/blk_types.h | 27 +++++++++++++++++++++------
>>   include/linux/blkdev.h    |  5 +++--
>>   6 files changed, 52 insertions(+), 23 deletions(-)
>>
>> diff --git a/block/bdev.c b/block/bdev.c
>> index e4cfb7adb645..10f524a7416c 100644
>> --- a/block/bdev.c
>> +++ b/block/bdev.c
>> @@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
>>   	bdev->bd_partno = partno;
>>   	bdev->bd_inode = inode;
>>   	bdev->bd_queue = disk->queue;
>> -	if (partno)
>> -		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
>> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
>> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>>   	else
>> -		bdev->bd_has_submit_bio = false;
>> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>>   	bdev->bd_stats = alloc_percpu(struct disk_stats);
>>   	if (!bdev->bd_stats) {
>>   		iput(inode);
>> @@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>>   		bdev->bd_holder = NULL;
>>   		bdev->bd_holder_ops = NULL;
>>   		mutex_unlock(&bdev->bd_holder_lock);
>> -		if (bdev->bd_write_holder)
>> +		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
>>   			unblock = true;
>>   	}
>>   	if (!whole->bd_holders)
>> @@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>>   	 */
>>   	if (unblock) {
>>   		disk_unblock_events(bdev->bd_disk);
>> -		bdev->bd_write_holder = false;
>> +		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
>>   	}
>>   }
>>   
>> @@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
>>   		 * writeable reference is too fragile given the way @mode is
>>   		 * used in blkdev_get/put().
>>   		 */
>> -		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
>> +		if ((mode & BLK_OPEN_WRITE) &&
>> +		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
>>   		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
>> -			bdev->bd_write_holder = true;
>> +			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
>>   			unblock_events = false;
>>   		}
>>   	}
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index fdf25b8d6e78..f9f8b12ba626 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -482,7 +482,8 @@ __setup("fail_make_request=", setup_fail_make_request);
>>   
>>   bool should_fail_request(struct block_device *part, unsigned int bytes)
>>   {
>> -	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
>> +	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
>> +		should_fail(&fail_make_request, bytes);
>>   }
>>   
>>   static int __init fail_make_request_debugfs(void)
>> @@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
>>   	if (unlikely(!blk_crypto_bio_prep(&bio)))
>>   		return;
>>   
>> -	if (!bio->bi_bdev->bd_has_submit_bio) {
>> +	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
>>   		blk_mq_submit_bio(bio);
>>   	} else if (likely(bio_queue_enter(bio) == 0)) {
>>   		struct gendisk *disk = bio->bi_bdev->bd_disk;
>> @@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
>>   	 */
>>   	if (current->bio_list)
>>   		bio_list_add(&current->bio_list[0], bio);
>> -	else if (!bio->bi_bdev->bd_has_submit_bio)
>> +	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
>>   		__submit_bio_noacct_mq(bio);
>>   	else
>>   		__submit_bio_noacct(bio);
>> diff --git a/block/genhd.c b/block/genhd.c
>> index c9d06f72c587..57f96c0c8da0 100644
>> --- a/block/genhd.c
>> +++ b/block/genhd.c
>> @@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
>>   	elevator_init_mq(disk->queue);
>>   
>>   	/* Mark bdev as having a submit_bio, if needed */
>> -	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
>> +	if (disk->fops->submit_bio)
>> +		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
>>   
>>   	/*
>>   	 * If the driver provides an explicit major number it also must provide
>> @@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
>>   ssize_t part_fail_show(struct device *dev,
>>   		       struct device_attribute *attr, char *buf)
>>   {
>> -	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
>> +	return sprintf(buf, "%d\n",
>> +		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
>>   }
>>   
>>   ssize_t part_fail_store(struct device *dev,
>> @@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
>>   {
>>   	int i;
>>   
>> -	if (count > 0 && sscanf(buf, "%d", &i) > 0)
>> -		dev_to_bdev(dev)->bd_make_it_fail = i;
>> +	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
>> +		if (!i)
>> +			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
>> +		else
>> +			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
>> +
>> +	}
>>   
>>   	return count;
>>   }
>> diff --git a/block/ioctl.c b/block/ioctl.c
>> index 4160f4e6bd5b..a64440f4c96b 100644
>> --- a/block/ioctl.c
>> +++ b/block/ioctl.c
>> @@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
>>   		if (ret)
>>   			return ret;
>>   	}
>> -	bdev->bd_read_only = n;
>> +
>> +	if (!n)
>> +		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
>> +	else
>> +		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
>>   	return 0;
>>   }
>>   
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index f7d40692dd94..de652045111b 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -37,6 +37,11 @@ struct bio_crypt_ctx;
>>   #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
>>   #define SECTOR_MASK		(PAGE_SECTORS - 1)
>>   
>> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
>> +#define BD_FLAG_WRITE_HOLDER	1
>> +#define BD_FLAG_HAS_SUBMIT_BIO	2
>> +#define BD_FLAG_MAKE_IT_FAIL	3
>> +
>>   struct block_device {
>>   	sector_t		bd_start_sect;
>>   	sector_t		bd_nr_sectors;
>> @@ -44,10 +49,8 @@ struct block_device {
>>   	struct request_queue *	bd_queue;
>>   	struct disk_stats __percpu *bd_stats;
>>   	unsigned long		bd_stamp;
>> -	bool			bd_read_only;	/* read-only policy */
>> +	unsigned short		bd_flags;
>>   	u8			bd_partno;
>> -	bool			bd_write_holder;
>> -	bool			bd_has_submit_bio;
>>   	dev_t			bd_dev;
>>   	struct inode		*bd_inode;	/* will die */
>>   
>> @@ -67,9 +70,6 @@ struct block_device {
>>   	struct super_block	*bd_fsfreeze_sb;
>>   
>>   	struct partition_meta_info *bd_meta_info;
>> -#ifdef CONFIG_FAIL_MAKE_REQUEST
>> -	bool			bd_make_it_fail;
>> -#endif
>>   	/*
>>   	 * keep this out-of-line as it's both big and not needed in the fast
>>   	 * path
>> @@ -86,6 +86,21 @@ struct block_device {
>>   #define bdev_kobj(_bdev) \
>>   	(&((_bdev)->bd_device.kobj))
>>   
>> +static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
>> +{
>> +	return bdev->bd_flags & (1U << bit);
>> +}
>> +
>> +static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
>> +{
>> +	bdev->bd_flags |= (1U << bit);
>> +}
>> +
>> +static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
>> +{
>> +	bdev->bd_flags &= ~(1U << bit);
>> +}
> 
> 'U' becomes incorrect after .bd_flags is changed to 'unsigned short'.

Currently, bi_flags is unsigned short, and this is the same as
bio_set/clear_flag(), will this behaviour cause any problems?

Thanks,
Kuai

> 
> 
> Thanks,
> Ming
> 
> .
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22  3:52   ` Michael Kelley
@ 2023-11-22  7:06     ` Yu Kuai
  0 siblings, 0 replies; 16+ messages in thread
From: Yu Kuai @ 2023-11-22  7:06 UTC (permalink / raw)
  To: Michael Kelley, Yu Kuai, ming.lei, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, yukuai (C)

Hi,

在 2023/11/22 11:52, Michael Kelley 写道:
> From: Yu Kuai <yukuai1@huaweicloud.com> Sent: Wednesday, November 22, 2023 2:31 AM
>>
>> There are multiple switches in struct block_device, use separate bool
>> fields for them is not gracefully. Add a new field bd_flags and replace
>> swithes to a bit, there are no functional changes, and prepare to add
>> a new switch in the next patch. In order to keep bd_flags in the first
>> cacheline and prevent layout to be affected, define it as u16.
>>
>> Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/bdev.c              | 15 ++++++++-------
>>   block/blk-core.c          |  7 ++++---
>>   block/genhd.c             | 15 +++++++++++----
>>   block/ioctl.c             |  6 +++++-
>>   include/linux/blk_types.h | 27 +++++++++++++++++++++------
>>   include/linux/blkdev.h    |  5 +++--
>>   6 files changed, 52 insertions(+), 23 deletions(-)
>>
>> diff --git a/block/bdev.c b/block/bdev.c
>> index e4cfb7adb645..10f524a7416c 100644
>> --- a/block/bdev.c
>> +++ b/block/bdev.c
>> @@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk
>> *disk, u8 partno)
>>   	bdev->bd_partno = partno;
>>   	bdev->bd_inode = inode;
>>   	bdev->bd_queue = disk->queue;
>> -	if (partno)
>> -		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
>> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
>> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>>   	else
>> -		bdev->bd_has_submit_bio = false;
>> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>>   	bdev->bd_stats = alloc_percpu(struct disk_stats);
>>   	if (!bdev->bd_stats) {
>>   		iput(inode);
>> @@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>>   		bdev->bd_holder = NULL;
>>   		bdev->bd_holder_ops = NULL;
>>   		mutex_unlock(&bdev->bd_holder_lock);
>> -		if (bdev->bd_write_holder)
>> +		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
>>   			unblock = true;
>>   	}
>>   	if (!whole->bd_holders)
>> @@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>>   	 */
>>   	if (unblock) {
>>   		disk_unblock_events(bdev->bd_disk);
>> -		bdev->bd_write_holder = false;
>> +		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
>>   	}
>>   }
>>
>> @@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev,
>> blk_mode_t mode, void *holder,
>>   		 * writeable reference is too fragile given the way @mode is
>>   		 * used in blkdev_get/put().
>>   		 */
>> -		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
>> +		if ((mode & BLK_OPEN_WRITE) &&
>> +		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
>>   		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
>> -			bdev->bd_write_holder = true;
>> +			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
>>   			unblock_events = false;
>>   		}
>>   	}
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index fdf25b8d6e78..f9f8b12ba626 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -482,7 +482,8 @@ __setup("fail_make_request=",
>> setup_fail_make_request);
>>
>>   bool should_fail_request(struct block_device *part, unsigned int bytes)
>>   {
>> -	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
>> +	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
>> +		should_fail(&fail_make_request, bytes);
>>   }
>>
>>   static int __init fail_make_request_debugfs(void)
>> @@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
>>   	if (unlikely(!blk_crypto_bio_prep(&bio)))
>>   		return;
>>
>> -	if (!bio->bi_bdev->bd_has_submit_bio) {
>> +	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
>>   		blk_mq_submit_bio(bio);
>>   	} else if (likely(bio_queue_enter(bio) == 0)) {
>>   		struct gendisk *disk = bio->bi_bdev->bd_disk;
>> @@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
>>   	 */
>>   	if (current->bio_list)
>>   		bio_list_add(&current->bio_list[0], bio);
>> -	else if (!bio->bi_bdev->bd_has_submit_bio)
>> +	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
>>   		__submit_bio_noacct_mq(bio);
>>   	else
>>   		__submit_bio_noacct(bio);
>> diff --git a/block/genhd.c b/block/genhd.c
>> index c9d06f72c587..57f96c0c8da0 100644
>> --- a/block/genhd.c
>> +++ b/block/genhd.c
>> @@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device
>> *parent, struct gendisk *disk,
>>   	elevator_init_mq(disk->queue);
>>
>>   	/* Mark bdev as having a submit_bio, if needed */
>> -	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
>> +	if (disk->fops->submit_bio)
>> +		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
>>
>>   	/*
>>   	 * If the driver provides an explicit major number it also must provide
>> @@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
>>   ssize_t part_fail_show(struct device *dev,
>>   		       struct device_attribute *attr, char *buf)
>>   {
>> -	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
>> +	return sprintf(buf, "%d\n",
>> +		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
>>   }
>>
>>   ssize_t part_fail_store(struct device *dev,
>> @@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
>>   {
>>   	int i;
>>
>> -	if (count > 0 && sscanf(buf, "%d", &i) > 0)
>> -		dev_to_bdev(dev)->bd_make_it_fail = i;
>> +	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
>> +		if (!i)
>> +			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
>> +		else
>> +			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
>> +
>> +	}
>>
>>   	return count;
>>   }
>> diff --git a/block/ioctl.c b/block/ioctl.c
>> index 4160f4e6bd5b..a64440f4c96b 100644
>> --- a/block/ioctl.c
>> +++ b/block/ioctl.c
>> @@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
>>   		if (ret)
>>   			return ret;
>>   	}
>> -	bdev->bd_read_only = n;
>> +
>> +	if (!n)
>> +		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
>> +	else
>> +		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
>>   	return 0;
>>   }
>>
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index f7d40692dd94..de652045111b 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -37,6 +37,11 @@ struct bio_crypt_ctx;
>>   #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
>>   #define SECTOR_MASK		(PAGE_SECTORS - 1)
>>
>> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
>> +#define BD_FLAG_WRITE_HOLDER	1
>> +#define BD_FLAG_HAS_SUBMIT_BIO	2
>> +#define BD_FLAG_MAKE_IT_FAIL	3
>> +
>>   struct block_device {
>>   	sector_t		bd_start_sect;
>>   	sector_t		bd_nr_sectors;
>> @@ -44,10 +49,8 @@ struct block_device {
>>   	struct request_queue *	bd_queue;
>>   	struct disk_stats __percpu *bd_stats;
>>   	unsigned long		bd_stamp;
>> -	bool			bd_read_only;	/* read-only policy */
>> +	unsigned short		bd_flags;
>>   	u8			bd_partno;
>> -	bool			bd_write_holder;
>> -	bool			bd_has_submit_bio;
>>   	dev_t			bd_dev;
>>   	struct inode		*bd_inode;	/* will die */
>>
>> @@ -67,9 +70,6 @@ struct block_device {
>>   	struct super_block	*bd_fsfreeze_sb;
>>
>>   	struct partition_meta_info *bd_meta_info;
>> -#ifdef CONFIG_FAIL_MAKE_REQUEST
>> -	bool			bd_make_it_fail;
>> -#endif
>>   	/*
>>   	 * keep this out-of-line as it's both big and not needed in the fast
>>   	 * path
>> @@ -86,6 +86,21 @@ struct block_device {
>>   #define bdev_kobj(_bdev) \
>>   	(&((_bdev)->bd_device.kobj))
>>
>> +static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
>> +{
>> +	return bdev->bd_flags & (1U << bit);
>> +}
>> +
>> +static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
>> +{
>> +	bdev->bd_flags |= (1U << bit);
>> +}
>> +
>> +static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
>> +{
>> +	bdev->bd_flags &= ~(1U << bit);
>> +}
> 
> It seems like there's atomicity problem with this approach.  In the above
> code, setting and clearing a bd_flag is *not* atomic with respect to
> setting/clearing some other bd_flag.  For example, setting/clearing
> BD_FLAG_MAKE_IT_FAIL via the /sys interface could race with
> setting/clearing BD_FLAG_READ_ONLY via an ioctl, and one of the
> two changes could be lost.  This problem wouldn't happen when
> each flag was a separate field.  Admittedly, the likelihood of a
> problem with BD_FLAG_MAKE_IT_FAIL is extremely low, but
> conceptually the lack of atomicity is still wrong.  There might
> be a similar problem with BD_FLAG_WRITE_HOLDER, but I
> didn't investigate thoroughly.

Thanks for the explanation, however, currently bio->bi_opf,
bio->bi_flags and req->rq_flags, and I can't understand now how do they
prevent to change bit concurrently? I'm probably missing something...

Thanks,
Kuai

> 
> Michael
> 
>> +
>>   /*
>>    * Block error status values.  See block/blk-core:blk_errors for the details.
>>    * Alpha cannot write a byte atomically, so we need to use 32-bit value.
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 51fa7ffdee83..fc1d55ef5107 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -741,13 +741,14 @@ void disk_uevent(struct gendisk *disk, enum
>> kobject_action action);
>>
>>   static inline int get_disk_ro(struct gendisk *disk)
>>   {
>> -	return disk->part0->bd_read_only ||
>> +	return bdev_flagged(disk->part0, BD_FLAG_READ_ONLY) ||
>>   		test_bit(GD_READ_ONLY, &disk->state);
>>   }
>>
>>   static inline int bdev_read_only(struct block_device *bdev)
>>   {
>> -	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
>> +	return bdev_flagged(bdev, BD_FLAG_READ_ONLY) ||
>> +		get_disk_ro(bdev->bd_disk);
>>   }
>>
>>   bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
>> --
>> 2.39.2
> 
> .
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device
  2023-11-22 10:31 ` [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device Yu Kuai
@ 2023-11-22  7:23   ` Christoph Hellwig
  2023-11-22 11:17   ` Yu Kuai
  1 sibling, 0 replies; 16+ messages in thread
From: Christoph Hellwig @ 2023-11-22  7:23 UTC (permalink / raw)
  To: Yu Kuai
  Cc: ming.lei, axboe, linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22 10:31 ` [PATCH v3 2/3] block: introduce new field bd_flags in block_device Yu Kuai
  2023-11-22  3:30   ` Ming Lei
  2023-11-22  3:52   ` Michael Kelley
@ 2023-11-22  7:28   ` Christoph Hellwig
  2023-11-22  7:45     ` Ming Lei
  2 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2023-11-22  7:28 UTC (permalink / raw)
  To: Yu Kuai
  Cc: ming.lei, axboe, linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	else
> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);

While the block layer has a bit of history of using wrappers for
testing, setting and clearing flags, I have to say I always find them
rather confusing when reading the code.

> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */

I know this is copied from the existing field, but can you expand
it a bit?

> +#define BD_FLAG_WRITE_HOLDER	1
> +#define BD_FLAG_HAS_SUBMIT_BIO	2
> +#define BD_FLAG_MAKE_IT_FAIL	3

And also write comments for these. 

> +
>  struct block_device {
>  	sector_t		bd_start_sect;
>  	sector_t		bd_nr_sectors;
> @@ -44,10 +49,8 @@ struct block_device {
>  	struct request_queue *	bd_queue;
>  	struct disk_stats __percpu *bd_stats;
>  	unsigned long		bd_stamp;
> -	bool			bd_read_only;	/* read-only policy */
> +	unsigned short		bd_flags;

I suspect you really need an unsigned long and atomic bit ops here.
Even a lock would probably not work on alpha as it could affect
the other fields in the same 32-bit alignment.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22  7:28   ` Christoph Hellwig
@ 2023-11-22  7:45     ` Ming Lei
  2023-11-22  7:53       ` Christoph Hellwig
  0 siblings, 1 reply; 16+ messages in thread
From: Ming Lei @ 2023-11-22  7:45 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Yu Kuai, axboe, linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On Tue, Nov 21, 2023 at 11:28:56PM -0800, Christoph Hellwig wrote:
> > +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
> > +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
> >  	else
> > +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
> 
> While the block layer has a bit of history of using wrappers for
> testing, setting and clearing flags, I have to say I always find them
> rather confusing when reading the code.
> 
> > +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
> 
> I know this is copied from the existing field, but can you expand
> it a bit?
> 
> > +#define BD_FLAG_WRITE_HOLDER	1
> > +#define BD_FLAG_HAS_SUBMIT_BIO	2
> > +#define BD_FLAG_MAKE_IT_FAIL	3
> 
> And also write comments for these. 
> 
> > +
> >  struct block_device {
> >  	sector_t		bd_start_sect;
> >  	sector_t		bd_nr_sectors;
> > @@ -44,10 +49,8 @@ struct block_device {
> >  	struct request_queue *	bd_queue;
> >  	struct disk_stats __percpu *bd_stats;
> >  	unsigned long		bd_stamp;
> > -	bool			bd_read_only;	/* read-only policy */
> > +	unsigned short		bd_flags;
> 
> I suspect you really need an unsigned long and atomic bit ops here.
> Even a lock would probably not work on alpha as it could affect
> the other fields in the same 32-bit alignment.
 
All the existed 'bool' flags are not atomic RW, so I think it isn't
necessary to define 'bd_flags' as 'unsigned long' for replacing them.

Thanks, 
Ming


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22  7:45     ` Ming Lei
@ 2023-11-22  7:53       ` Christoph Hellwig
  2023-11-22  8:19         ` Ming Lei
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2023-11-22  7:53 UTC (permalink / raw)
  To: Ming Lei
  Cc: Christoph Hellwig, Yu Kuai, axboe, linux-block, linux-kernel,
	yukuai3, yi.zhang, yangerkun

On Wed, Nov 22, 2023 at 03:45:24PM +0800, Ming Lei wrote:
> All the existed 'bool' flags are not atomic RW, so I think it isn't
> necessary to define 'bd_flags' as 'unsigned long' for replacing them.

So because the old code wasn't correct we'll never bother?  The new
flag and the new placement certainly make this more critical as well.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22  7:53       ` Christoph Hellwig
@ 2023-11-22  8:19         ` Ming Lei
  2023-11-22 12:47           ` Christoph Hellwig
  0 siblings, 1 reply; 16+ messages in thread
From: Ming Lei @ 2023-11-22  8:19 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Yu Kuai, axboe, linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On Tue, Nov 21, 2023 at 11:53:17PM -0800, Christoph Hellwig wrote:
> On Wed, Nov 22, 2023 at 03:45:24PM +0800, Ming Lei wrote:
> > All the existed 'bool' flags are not atomic RW, so I think it isn't
> > necessary to define 'bd_flags' as 'unsigned long' for replacing them.
> 
> So because the old code wasn't correct we'll never bother?  The new
> flag and the new placement certainly make this more critical as well.

Can you explain why the old code was wrong?

1) ->bd_read_only and ->bd_make_it_fail

- set from userspace interface(ioctl or sysfs)
- check in IO code path

so changing it into atomic bit doesn't make difference from user
viewpoint.

2) ->bd_write_holder

disk->open_mutex is held for read & write this flag

3) ->bd_has_submit_bio

This flag is setup as oneshot before adding disk, and check in FS io code
path.

Of course, defining it as 'unsigned long' can extend its future usage, but
it depends on the atomic requirement of each flag itself.


Thanks, 
Ming


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v3 0/3] block: warn once for each partition in bio_check_ro()
@ 2023-11-22 10:31 Yu Kuai
  2023-11-22 10:31 ` [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device Yu Kuai
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Yu Kuai @ 2023-11-22 10:31 UTC (permalink / raw)
  To: ming.lei, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

Changes in v3:
 - add patch 1 from Ming, swap bd_inode layout with bd_openers and
 bd_size_lock;
 - change bd_flags from u32 to u16 in patch 2, prevent to affect layout of
 other fields;

Changes in v2:
 - don't use test/set_bit() for new field, because unsigned long will
 cause that some field can't be placed in the first cacheline(64 bytes),
 use unsigned int for new field and test/set/clear it like 'bio->bi_flags'.

Ming Lei (1):
  block: move .bd_inode into 1st cacheline of block_device

Yu Kuai (2):
  block: introduce new field bd_flags in block_device
  block: warn once for each partition in bio_check_ro()

 block/bdev.c              | 15 ++++++++-------
 block/blk-core.c          | 21 +++++++++++++++------
 block/genhd.c             | 15 +++++++++++----
 block/ioctl.c             |  6 +++++-
 include/linux/blk_types.h | 31 ++++++++++++++++++++++++-------
 include/linux/blkdev.h    |  5 +++--
 6 files changed, 66 insertions(+), 27 deletions(-)

-- 
2.39.2


^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device
  2023-11-22 10:31 [PATCH v3 0/3] block: warn once for each partition in bio_check_ro() Yu Kuai
@ 2023-11-22 10:31 ` Yu Kuai
  2023-11-22  7:23   ` Christoph Hellwig
  2023-11-22 11:17   ` Yu Kuai
  2023-11-22 10:31 ` [PATCH v3 2/3] block: introduce new field bd_flags in block_device Yu Kuai
  2023-11-22 10:31 ` [PATCH v3 3/3] block: warn once for each partition in bio_check_ro() Yu Kuai
  2 siblings, 2 replies; 16+ messages in thread
From: Yu Kuai @ 2023-11-22 10:31 UTC (permalink / raw)
  To: ming.lei, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Ming Lei <ming.lei@redhat.com>

The .bd_inode field of block_device is used in IO fast path of
blkdev_write_iter() and blkdev_llseek(), so it is more efficient to keep
it into the 1st cacheline.

.bd_openers is only touched in open()/close(), and .bd_size_lock is only
for updating bdev capacity, which is in slow path too.

So swap .bd_inode layout with .bd_openers & .bd_size_lock to move
.bd_inode into the 1st cache line.

Cc: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 include/linux/blk_types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d5c5e59ddbd2..f7d40692dd94 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -49,9 +49,10 @@ struct block_device {
 	bool			bd_write_holder;
 	bool			bd_has_submit_bio;
 	dev_t			bd_dev;
+	struct inode		*bd_inode;	/* will die */
+
 	atomic_t		bd_openers;
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
-	struct inode *		bd_inode;	/* will die */
 	void *			bd_claiming;
 	void *			bd_holder;
 	const struct blk_holder_ops *bd_holder_ops;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22 10:31 [PATCH v3 0/3] block: warn once for each partition in bio_check_ro() Yu Kuai
  2023-11-22 10:31 ` [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device Yu Kuai
@ 2023-11-22 10:31 ` Yu Kuai
  2023-11-22  3:30   ` Ming Lei
                     ` (2 more replies)
  2023-11-22 10:31 ` [PATCH v3 3/3] block: warn once for each partition in bio_check_ro() Yu Kuai
  2 siblings, 3 replies; 16+ messages in thread
From: Yu Kuai @ 2023-11-22 10:31 UTC (permalink / raw)
  To: ming.lei, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

There are multiple switches in struct block_device, use separate bool
fields for them is not gracefully. Add a new field bd_flags and replace
swithes to a bit, there are no functional changes, and prepare to add
a new switch in the next patch. In order to keep bd_flags in the first
cacheline and prevent layout to be affected, define it as u16.

Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/bdev.c              | 15 ++++++++-------
 block/blk-core.c          |  7 ++++---
 block/genhd.c             | 15 +++++++++++----
 block/ioctl.c             |  6 +++++-
 include/linux/blk_types.h | 27 +++++++++++++++++++++------
 include/linux/blkdev.h    |  5 +++--
 6 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index e4cfb7adb645..10f524a7416c 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 	bdev->bd_partno = partno;
 	bdev->bd_inode = inode;
 	bdev->bd_queue = disk->queue;
-	if (partno)
-		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
+	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
+		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
 	else
-		bdev->bd_has_submit_bio = false;
+		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
 	bdev->bd_stats = alloc_percpu(struct disk_stats);
 	if (!bdev->bd_stats) {
 		iput(inode);
@@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
 		bdev->bd_holder = NULL;
 		bdev->bd_holder_ops = NULL;
 		mutex_unlock(&bdev->bd_holder_lock);
-		if (bdev->bd_write_holder)
+		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
 			unblock = true;
 	}
 	if (!whole->bd_holders)
@@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
 	 */
 	if (unblock) {
 		disk_unblock_events(bdev->bd_disk);
-		bdev->bd_write_holder = false;
+		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
 	}
 }
 
@@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
 		 * writeable reference is too fragile given the way @mode is
 		 * used in blkdev_get/put().
 		 */
-		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
+		if ((mode & BLK_OPEN_WRITE) &&
+		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
 		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
-			bdev->bd_write_holder = true;
+			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
 			unblock_events = false;
 		}
 	}
diff --git a/block/blk-core.c b/block/blk-core.c
index fdf25b8d6e78..f9f8b12ba626 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -482,7 +482,8 @@ __setup("fail_make_request=", setup_fail_make_request);
 
 bool should_fail_request(struct block_device *part, unsigned int bytes)
 {
-	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
+	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
+		should_fail(&fail_make_request, bytes);
 }
 
 static int __init fail_make_request_debugfs(void)
@@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
 	if (unlikely(!blk_crypto_bio_prep(&bio)))
 		return;
 
-	if (!bio->bi_bdev->bd_has_submit_bio) {
+	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
 		blk_mq_submit_bio(bio);
 	} else if (likely(bio_queue_enter(bio) == 0)) {
 		struct gendisk *disk = bio->bi_bdev->bd_disk;
@@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
 	 */
 	if (current->bio_list)
 		bio_list_add(&current->bio_list[0], bio);
-	else if (!bio->bi_bdev->bd_has_submit_bio)
+	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
 		__submit_bio_noacct_mq(bio);
 	else
 		__submit_bio_noacct(bio);
diff --git a/block/genhd.c b/block/genhd.c
index c9d06f72c587..57f96c0c8da0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 	elevator_init_mq(disk->queue);
 
 	/* Mark bdev as having a submit_bio, if needed */
-	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
+	if (disk->fops->submit_bio)
+		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
 
 	/*
 	 * If the driver provides an explicit major number it also must provide
@@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
 ssize_t part_fail_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
+	return sprintf(buf, "%d\n",
+		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
 }
 
 ssize_t part_fail_store(struct device *dev,
@@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
 {
 	int i;
 
-	if (count > 0 && sscanf(buf, "%d", &i) > 0)
-		dev_to_bdev(dev)->bd_make_it_fail = i;
+	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
+		if (!i)
+			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
+		else
+			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
+
+	}
 
 	return count;
 }
diff --git a/block/ioctl.c b/block/ioctl.c
index 4160f4e6bd5b..a64440f4c96b 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
 		if (ret)
 			return ret;
 	}
-	bdev->bd_read_only = n;
+
+	if (!n)
+		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
+	else
+		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
 	return 0;
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f7d40692dd94..de652045111b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -37,6 +37,11 @@ struct bio_crypt_ctx;
 #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
 #define SECTOR_MASK		(PAGE_SECTORS - 1)
 
+#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
+#define BD_FLAG_WRITE_HOLDER	1
+#define BD_FLAG_HAS_SUBMIT_BIO	2
+#define BD_FLAG_MAKE_IT_FAIL	3
+
 struct block_device {
 	sector_t		bd_start_sect;
 	sector_t		bd_nr_sectors;
@@ -44,10 +49,8 @@ struct block_device {
 	struct request_queue *	bd_queue;
 	struct disk_stats __percpu *bd_stats;
 	unsigned long		bd_stamp;
-	bool			bd_read_only;	/* read-only policy */
+	unsigned short		bd_flags;
 	u8			bd_partno;
-	bool			bd_write_holder;
-	bool			bd_has_submit_bio;
 	dev_t			bd_dev;
 	struct inode		*bd_inode;	/* will die */
 
@@ -67,9 +70,6 @@ struct block_device {
 	struct super_block	*bd_fsfreeze_sb;
 
 	struct partition_meta_info *bd_meta_info;
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-	bool			bd_make_it_fail;
-#endif
 	/*
 	 * keep this out-of-line as it's both big and not needed in the fast
 	 * path
@@ -86,6 +86,21 @@ struct block_device {
 #define bdev_kobj(_bdev) \
 	(&((_bdev)->bd_device.kobj))
 
+static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
+{
+	return bdev->bd_flags & (1U << bit);
+}
+
+static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
+{
+	bdev->bd_flags |= (1U << bit);
+}
+
+static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
+{
+	bdev->bd_flags &= ~(1U << bit);
+}
+
 /*
  * Block error status values.  See block/blk-core:blk_errors for the details.
  * Alpha cannot write a byte atomically, so we need to use 32-bit value.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 51fa7ffdee83..fc1d55ef5107 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -741,13 +741,14 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action);
 
 static inline int get_disk_ro(struct gendisk *disk)
 {
-	return disk->part0->bd_read_only ||
+	return bdev_flagged(disk->part0, BD_FLAG_READ_ONLY) ||
 		test_bit(GD_READ_ONLY, &disk->state);
 }
 
 static inline int bdev_read_only(struct block_device *bdev)
 {
-	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
+	return bdev_flagged(bdev, BD_FLAG_READ_ONLY) ||
+		get_disk_ro(bdev->bd_disk);
 }
 
 bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH v3 3/3] block: warn once for each partition in bio_check_ro()
  2023-11-22 10:31 [PATCH v3 0/3] block: warn once for each partition in bio_check_ro() Yu Kuai
  2023-11-22 10:31 ` [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device Yu Kuai
  2023-11-22 10:31 ` [PATCH v3 2/3] block: introduce new field bd_flags in block_device Yu Kuai
@ 2023-11-22 10:31 ` Yu Kuai
  2 siblings, 0 replies; 16+ messages in thread
From: Yu Kuai @ 2023-11-22 10:31 UTC (permalink / raw)
  To: ming.lei, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

Commit 1b0a151c10a6 ("blk-core: use pr_warn_ratelimited() in
bio_check_ro()") fix message storm by limit the rate, however, there
will still be lots of message in the long term. Fix it better by warn
once for each partition.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-core.c          | 14 +++++++++++---
 include/linux/blk_types.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f9f8b12ba626..6575f684d41e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -502,9 +502,17 @@ static inline void bio_check_ro(struct bio *bio)
 	if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
 		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
 			return;
-		pr_warn_ratelimited("Trying to write to read-only block-device %pg\n",
-				    bio->bi_bdev);
-		/* Older lvm-tools actually trigger this */
+
+		if (bdev_flagged(bio->bi_bdev, BD_FLAG_RO_WARNED))
+			return;
+
+		bdev_set_flag(bio->bi_bdev, BD_FLAG_RO_WARNED);
+		/*
+		 * Use ioctl to set underlying disk of raid/dm to read-only
+		 * will trigger this.
+		 */
+		pr_warn("Trying to write to read-only block-device %pg\n",
+			bio->bi_bdev);
 	}
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index de652045111b..a2185d2f02d9 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -41,6 +41,7 @@ struct bio_crypt_ctx;
 #define BD_FLAG_WRITE_HOLDER	1
 #define BD_FLAG_HAS_SUBMIT_BIO	2
 #define BD_FLAG_MAKE_IT_FAIL	3
+#define BD_FLAG_RO_WARNED	4
 
 struct block_device {
 	sector_t		bd_start_sect;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device
  2023-11-22 10:31 ` [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device Yu Kuai
  2023-11-22  7:23   ` Christoph Hellwig
@ 2023-11-22 11:17   ` Yu Kuai
  1 sibling, 0 replies; 16+ messages in thread
From: Yu Kuai @ 2023-11-22 11:17 UTC (permalink / raw)
  To: Yu Kuai, ming.lei, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, yukuai (C)

Hi,

在 2023/11/22 18:31, Yu Kuai 写道:
> From: Ming Lei <ming.lei@redhat.com>
> 
> The .bd_inode field of block_device is used in IO fast path of
> blkdev_write_iter() and blkdev_llseek(), so it is more efficient to keep
> it into the 1st cacheline.
> 
> .bd_openers is only touched in open()/close(), and .bd_size_lock is only
> for updating bdev capacity, which is in slow path too.
> 
> So swap .bd_inode layout with .bd_openers & .bd_size_lock to move
> .bd_inode into the 1st cache line.
> 
> Cc: Yu Kuai <yukuai3@huawei.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>   include/linux/blk_types.h | 3 ++-
>   1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index d5c5e59ddbd2..f7d40692dd94 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -49,9 +49,10 @@ struct block_device {
>   	bool			bd_write_holder;
>   	bool			bd_has_submit_bio;
>   	dev_t			bd_dev;
> +	struct inode		*bd_inode;	/* will die */

Now that we're here, and bdev->bd_inode is always point to the
field inode of struct bdev_inode, which is next to the field bdev,
and the comment "will die" have been exist for a long time.

Maybe I can try to replace all the reference of bdev->bd_inode with
a helper, and then remove this field, then it'll be acceptable to add
a new field "unsigned long bd_flags".

Thanks,
Kuai


> +
>   	atomic_t		bd_openers;
>   	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
> -	struct inode *		bd_inode;	/* will die */
>   	void *			bd_claiming;
>   	void *			bd_holder;
>   	const struct blk_holder_ops *bd_holder_ops;
> 


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22  8:19         ` Ming Lei
@ 2023-11-22 12:47           ` Christoph Hellwig
  2023-11-23  2:19             ` Ming Lei
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Hellwig @ 2023-11-22 12:47 UTC (permalink / raw)
  To: Ming Lei
  Cc: Christoph Hellwig, Yu Kuai, axboe, linux-block, linux-kernel,
	yukuai3, yi.zhang, yangerkun

On Wed, Nov 22, 2023 at 04:19:40PM +0800, Ming Lei wrote:
> On Tue, Nov 21, 2023 at 11:53:17PM -0800, Christoph Hellwig wrote:
> > On Wed, Nov 22, 2023 at 03:45:24PM +0800, Ming Lei wrote:
> > > All the existed 'bool' flags are not atomic RW, so I think it isn't
> > > necessary to define 'bd_flags' as 'unsigned long' for replacing them.
> > 
> > So because the old code wasn't correct we'll never bother?  The new
> > flag and the new placement certainly make this more critical as well.
> 
> Can you explain why the old code was wrong?
> 
> 1) ->bd_read_only and ->bd_make_it_fail
> 
> - set from userspace interface(ioctl or sysfs)
> - check in IO code path
> 
> so changing it into atomic bit doesn't make difference from user
> viewpoint.

> 
> 2) ->bd_write_holder
> 
> disk->open_mutex is held for read & write this flag
> 
> 3) ->bd_has_submit_bio
> 
> This flag is setup as oneshot before adding disk, and check in FS io code
> path.

On architectures that can't do byte-level atomics all three can corrupt
each other, and even worse bd_partno.  Granted that is only alpha these
days IIRC, but it's still buggy.


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH v3 2/3] block: introduce new field bd_flags in block_device
  2023-11-22 12:47           ` Christoph Hellwig
@ 2023-11-23  2:19             ` Ming Lei
  0 siblings, 0 replies; 16+ messages in thread
From: Ming Lei @ 2023-11-23  2:19 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Yu Kuai, axboe, linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On Wed, Nov 22, 2023 at 04:47:51AM -0800, Christoph Hellwig wrote:
> On Wed, Nov 22, 2023 at 04:19:40PM +0800, Ming Lei wrote:
> > On Tue, Nov 21, 2023 at 11:53:17PM -0800, Christoph Hellwig wrote:
> > > On Wed, Nov 22, 2023 at 03:45:24PM +0800, Ming Lei wrote:
> > > > All the existed 'bool' flags are not atomic RW, so I think it isn't
> > > > necessary to define 'bd_flags' as 'unsigned long' for replacing them.
> > > 
> > > So because the old code wasn't correct we'll never bother?  The new
> > > flag and the new placement certainly make this more critical as well.
> > 
> > Can you explain why the old code was wrong?
> > 
> > 1) ->bd_read_only and ->bd_make_it_fail
> > 
> > - set from userspace interface(ioctl or sysfs)
> > - check in IO code path
> > 
> > so changing it into atomic bit doesn't make difference from user
> > viewpoint.
> 
> > 
> > 2) ->bd_write_holder
> > 
> > disk->open_mutex is held for read & write this flag
> > 
> > 3) ->bd_has_submit_bio
> > 
> > This flag is setup as oneshot before adding disk, and check in FS io code
> > path.
> 
> On architectures that can't do byte-level atomics all three can corrupt
> each other

Yeah, C/C++ doesn't provide such guarantee, but many modern ARCHs [1]
guarantees that RW on naturally aligned type is atomic.

I verified the point on x86/arm64/ppc64le by the following code, and
all three STOREs are done in single instruction.

	struct data {
		int b;
		char a;
		char a2;
		char a3;
		char a4;
	} __attribute__((aligned(8)));
	
	void atomic_test()
	{
		struct data d;
	
		d.b = 1;
		d.a = 2;
		d.a3 = 3;
	
		printf("%d %d %d\n", d.b, d.a, d.a3);
	}

[1] https://preshing.com/20130618/atomic-vs-non-atomic-operations/

> and even worse bd_partno.  Granted that is only alpha these
> days IIRC, but it's still buggy.

bd_has_submit_bio and bd_partno can be thought as read only, and the
two can be corrupted?

bd_dev may have similar trouble with bd_partno for ARCHs which don't
provide atomic RW on naturally aligned int.


Thanks,
Ming


^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-11-23  2:19 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-22 10:31 [PATCH v3 0/3] block: warn once for each partition in bio_check_ro() Yu Kuai
2023-11-22 10:31 ` [PATCH v3 1/3] block: move .bd_inode into 1st cacheline of block_device Yu Kuai
2023-11-22  7:23   ` Christoph Hellwig
2023-11-22 11:17   ` Yu Kuai
2023-11-22 10:31 ` [PATCH v3 2/3] block: introduce new field bd_flags in block_device Yu Kuai
2023-11-22  3:30   ` Ming Lei
2023-11-22  6:15     ` Yu Kuai
2023-11-22  3:52   ` Michael Kelley
2023-11-22  7:06     ` Yu Kuai
2023-11-22  7:28   ` Christoph Hellwig
2023-11-22  7:45     ` Ming Lei
2023-11-22  7:53       ` Christoph Hellwig
2023-11-22  8:19         ` Ming Lei
2023-11-22 12:47           ` Christoph Hellwig
2023-11-23  2:19             ` Ming Lei
2023-11-22 10:31 ` [PATCH v3 3/3] block: warn once for each partition in bio_check_ro() Yu Kuai

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.