All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>, Tejun Heo <tj@kernel.org>,
	Josef Bacik <josef@toxicpanda.com>, Coly Li <colyli@suse.de>,
	Mike Snitzer <snitzer@redhat.com>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Jan Kara <jack@suse.cz>,
	Johannes Thumshirn <johannes.thumshirn@wdc.com>,
	dm-devel@redhat.com, Jan Kara <jack@suse.com>,
	linux-block@vger.kernel.org, linux-bcache@vger.kernel.org,
	linux-mtd@lists.infradead.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org
Subject: Re: [PATCH 25/45] block: simplify bdev/disk lookup in blkdev_get
Date: Mon, 30 Nov 2020 10:24:28 +0100	[thread overview]
Message-ID: <20201130092428.GC11250@quack2.suse.cz> (raw)
In-Reply-To: <20201128161510.347752-26-hch@lst.de>

On Sat 28-11-20 17:14:50, Christoph Hellwig wrote:
> To simplify block device lookup and a few other upcoming areas, make sure
> that we always have a struct block_device available for each disk and
> each partition, and only find existing block devices in bdget.  The only
> downside of this is that each device and partition uses a little more
> memory.  The upside will be that a lot of code can be simplified.
> 
> With that all we need to look up the block device is to lookup the inode
> and do a few sanity checks on the gendisk, instead of the separate lookup
> for the gendisk.  For blk-cgroup which wants to access a gendisk without
> opening it, a new blkdev_{get,put}_no_open low-level interface is added
> to replace the previous get_gendisk use.
> 
> Note that the change to look up block device directly instead of the two
> step lookup using struct gendisk causes a subtile change in behavior:
> accessing a non-existing partition on an existing block device can now
> cause a call to request_module.  That call is harmless, and in practice
> no recent system will access these nodes as they aren't created by udev
> and static /dev/ setups are unusual.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good to me! You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

									Honza

> ---
>  block/blk-cgroup.c         |  42 ++++----
>  block/blk-iocost.c         |  36 +++----
>  block/blk.h                |   2 +-
>  block/genhd.c              | 210 +++++--------------------------------
>  block/partitions/core.c    |  29 ++---
>  fs/block_dev.c             | 177 +++++++++++++++++--------------
>  include/linux/blk-cgroup.h |   4 +-
>  include/linux/blkdev.h     |   6 ++
>  include/linux/genhd.h      |   7 +-
>  9 files changed, 194 insertions(+), 319 deletions(-)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 54fbe1e80cc41a..19650eb42b9f00 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -556,22 +556,22 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
>  }
>  
>  /**
> - * blkg_conf_prep - parse and prepare for per-blkg config update
> + * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
>   * @inputp: input string pointer
>   *
>   * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
> - * from @input and get and return the matching gendisk.  *@inputp is
> + * from @input and get and return the matching bdev.  *@inputp is
>   * updated to point past the device node prefix.  Returns an ERR_PTR()
>   * value on error.
>   *
>   * Use this function iff blkg_conf_prep() can't be used for some reason.
>   */
> -struct gendisk *blkcg_conf_get_disk(char **inputp)
> +struct block_device *blkcg_conf_open_bdev(char **inputp)
>  {
>  	char *input = *inputp;
>  	unsigned int major, minor;
> -	struct gendisk *disk;
> -	int key_len, part;
> +	struct block_device *bdev;
> +	int key_len;
>  
>  	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
>  		return ERR_PTR(-EINVAL);
> @@ -581,16 +581,16 @@ struct gendisk *blkcg_conf_get_disk(char **inputp)
>  		return ERR_PTR(-EINVAL);
>  	input = skip_spaces(input);
>  
> -	disk = get_gendisk(MKDEV(major, minor), &part);
> -	if (!disk)
> +	bdev = blkdev_get_no_open(MKDEV(major, minor));
> +	if (!bdev)
>  		return ERR_PTR(-ENODEV);
> -	if (part) {
> -		put_disk_and_module(disk);
> +	if (bdev_is_partition(bdev)) {
> +		blkdev_put_no_open(bdev);
>  		return ERR_PTR(-ENODEV);
>  	}
>  
>  	*inputp = input;
> -	return disk;
> +	return bdev;
>  }
>  
>  /**
> @@ -607,18 +607,18 @@ struct gendisk *blkcg_conf_get_disk(char **inputp)
>   */
>  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  		   char *input, struct blkg_conf_ctx *ctx)
> -	__acquires(rcu) __acquires(&disk->queue->queue_lock)
> +	__acquires(rcu) __acquires(&bdev->bd_disk->queue->queue_lock)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct request_queue *q;
>  	struct blkcg_gq *blkg;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	q = disk->queue;
> +	q = bdev->bd_disk->queue;
>  
>  	rcu_read_lock();
>  	spin_lock_irq(&q->queue_lock);
> @@ -689,7 +689,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  			goto success;
>  	}
>  success:
> -	ctx->disk = disk;
> +	ctx->bdev = bdev;
>  	ctx->blkg = blkg;
>  	ctx->body = input;
>  	return 0;
> @@ -700,7 +700,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  	spin_unlock_irq(&q->queue_lock);
>  	rcu_read_unlock();
>  fail:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	/*
>  	 * If queue was bypassing, we should retry.  Do so after a
>  	 * short msleep().  It isn't strictly necessary but queue
> @@ -723,11 +723,11 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
>   * with blkg_conf_prep().
>   */
>  void blkg_conf_finish(struct blkg_conf_ctx *ctx)
> -	__releases(&ctx->disk->queue->queue_lock) __releases(rcu)
> +	__releases(&ctx->bdev->bd_disk->queue->queue_lock) __releases(rcu)
>  {
> -	spin_unlock_irq(&ctx->disk->queue->queue_lock);
> +	spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock);
>  	rcu_read_unlock();
> -	put_disk_and_module(ctx->disk);
> +	blkdev_put_no_open(ctx->bdev);
>  }
>  EXPORT_SYMBOL_GPL(blkg_conf_finish);
>  
> diff --git a/block/blk-iocost.c b/block/blk-iocost.c
> index bbe86d1199dc5b..8e20fe4bddecf7 100644
> --- a/block/blk-iocost.c
> +++ b/block/blk-iocost.c
> @@ -3120,23 +3120,23 @@ static const match_table_t qos_tokens = {
>  static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
>  			     size_t nbytes, loff_t off)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct ioc *ioc;
>  	u32 qos[NR_QOS_PARAMS];
>  	bool enable, user;
>  	char *p;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	ioc = q_to_ioc(disk->queue);
> +	ioc = q_to_ioc(bdev->bd_disk->queue);
>  	if (!ioc) {
> -		ret = blk_iocost_init(disk->queue);
> +		ret = blk_iocost_init(bdev->bd_disk->queue);
>  		if (ret)
>  			goto err;
> -		ioc = q_to_ioc(disk->queue);
> +		ioc = q_to_ioc(bdev->bd_disk->queue);
>  	}
>  
>  	spin_lock_irq(&ioc->lock);
> @@ -3231,12 +3231,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
>  	ioc_refresh_params(ioc, true);
>  	spin_unlock_irq(&ioc->lock);
>  
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return nbytes;
>  einval:
>  	ret = -EINVAL;
>  err:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return ret;
>  }
>  
> @@ -3287,23 +3287,23 @@ static const match_table_t i_lcoef_tokens = {
>  static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
>  				    size_t nbytes, loff_t off)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct ioc *ioc;
>  	u64 u[NR_I_LCOEFS];
>  	bool user;
>  	char *p;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	ioc = q_to_ioc(disk->queue);
> +	ioc = q_to_ioc(bdev->bd_disk->queue);
>  	if (!ioc) {
> -		ret = blk_iocost_init(disk->queue);
> +		ret = blk_iocost_init(bdev->bd_disk->queue);
>  		if (ret)
>  			goto err;
> -		ioc = q_to_ioc(disk->queue);
> +		ioc = q_to_ioc(bdev->bd_disk->queue);
>  	}
>  
>  	spin_lock_irq(&ioc->lock);
> @@ -3356,13 +3356,13 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
>  	ioc_refresh_params(ioc, true);
>  	spin_unlock_irq(&ioc->lock);
>  
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return nbytes;
>  
>  einval:
>  	ret = -EINVAL;
>  err:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return ret;
>  }
>  
> diff --git a/block/blk.h b/block/blk.h
> index dfab98465db9a5..c4839abcfa27eb 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -352,7 +352,6 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
>  
>  int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
>  void blk_free_devt(dev_t devt);
> -void blk_invalidate_devt(dev_t devt);
>  char *disk_name(struct gendisk *hd, int partno, char *buf);
>  #define ADDPART_FLAG_NONE	0
>  #define ADDPART_FLAG_RAID	1
> @@ -384,6 +383,7 @@ static inline void hd_free_part(struct hd_struct *part)
>  {
>  	free_percpu(part->dkstats);
>  	kfree(part->info);
> +	bdput(part->bdev);
>  	percpu_ref_exit(&part->ref);
>  }
>  
> diff --git a/block/genhd.c b/block/genhd.c
> index f46e89226fdf91..bf8fa82f135f4e 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -27,17 +27,11 @@
>  
>  static struct kobject *block_depr;
>  
> -static DEFINE_XARRAY(bdev_map);
> -static DEFINE_MUTEX(bdev_map_lock);
> +DECLARE_RWSEM(bdev_lookup_sem);
>  
>  /* for extended dynamic devt allocation, currently only one major is used */
>  #define NR_EXT_DEVT		(1 << MINORBITS)
> -
> -/* For extended devt allocation.  ext_devt_lock prevents look up
> - * results from going away underneath its user.
> - */
> -static DEFINE_SPINLOCK(ext_devt_lock);
> -static DEFINE_IDR(ext_devt_idr);
> +static DEFINE_IDA(ext_devt_ida);
>  
>  static void disk_check_events(struct disk_events *ev,
>  			      unsigned int *clearing_ptr);
> @@ -580,14 +574,7 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
>  		return 0;
>  	}
>  
> -	/* allocate ext devt */
> -	idr_preload(GFP_KERNEL);
> -
> -	spin_lock_bh(&ext_devt_lock);
> -	idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
> -	spin_unlock_bh(&ext_devt_lock);
> -
> -	idr_preload_end();
> +	idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
>  	if (idx < 0)
>  		return idx == -ENOSPC ? -EBUSY : idx;
>  
> @@ -606,26 +593,8 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
>   */
>  void blk_free_devt(dev_t devt)
>  {
> -	if (devt == MKDEV(0, 0))
> -		return;
> -
> -	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
> -		spin_lock_bh(&ext_devt_lock);
> -		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> -}
> -
> -/*
> - * We invalidate devt by assigning NULL pointer for devt in idr.
> - */
> -void blk_invalidate_devt(dev_t devt)
> -{
> -	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
> -		spin_lock_bh(&ext_devt_lock);
> -		idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt)));
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> +	if (MAJOR(devt) == BLOCK_EXT_MAJOR)
> +		ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt)));
>  }
>  
>  static char *bdevt_str(dev_t devt, char *buf)
> @@ -640,28 +609,6 @@ static char *bdevt_str(dev_t devt, char *buf)
>  	return buf;
>  }
>  
> -static void blk_register_region(struct gendisk *disk)
> -{
> -	int i;
> -
> -	mutex_lock(&bdev_map_lock);
> -	for (i = 0; i < disk->minors; i++) {
> -		if (xa_insert(&bdev_map, disk_devt(disk) + i, disk, GFP_KERNEL))
> -			WARN_ON_ONCE(1);
> -	}
> -	mutex_unlock(&bdev_map_lock);
> -}
> -
> -static void blk_unregister_region(struct gendisk *disk)
> -{
> -	int i;
> -
> -	mutex_lock(&bdev_map_lock);
> -	for (i = 0; i < disk->minors; i++)
> -		xa_erase(&bdev_map, disk_devt(disk) + i);
> -	mutex_unlock(&bdev_map_lock);
> -}
> -
>  static void disk_scan_partitions(struct gendisk *disk)
>  {
>  	struct block_device *bdev;
> @@ -805,7 +752,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
>  		WARN_ON(ret);
>  		bdi_set_owner(bdi, dev);
> -		blk_register_region(disk);
> +		bdev_add(disk->part0.bdev, devt);
>  	}
>  	register_disk(parent, disk, groups);
>  	if (register_queue)
> @@ -847,8 +794,8 @@ static void invalidate_partition(struct gendisk *disk, int partno)
>  	__invalidate_device(bdev, true);
>  
>  	/*
> -	 * Unhash the bdev inode for this device so that it gets evicted as soon
> -	 * as last inode reference is dropped.
> +	 * Unhash the bdev inode for this device so that it can't be looked
> +	 * up any more even if openers still hold references to it.
>  	 */
>  	remove_inode_hash(bdev->bd_inode);
>  	bdput(bdev);
> @@ -890,7 +837,8 @@ void del_gendisk(struct gendisk *disk)
>  	 * Block lookups of the disk until all bdevs are unhashed and the
>  	 * disk is marked as dead (GENHD_FL_UP cleared).
>  	 */
> -	down_write(&disk->lookup_sem);
> +	down_write(&bdev_lookup_sem);
> +
>  	/* invalidate stuff */
>  	disk_part_iter_init(&piter, disk,
>  			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
> @@ -903,7 +851,7 @@ void del_gendisk(struct gendisk *disk)
>  	invalidate_partition(disk, 0);
>  	set_capacity(disk, 0);
>  	disk->flags &= ~GENHD_FL_UP;
> -	up_write(&disk->lookup_sem);
> +	up_write(&bdev_lookup_sem);
>  
>  	if (!(disk->flags & GENHD_FL_HIDDEN)) {
>  		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
> @@ -916,16 +864,6 @@ void del_gendisk(struct gendisk *disk)
>  	}
>  
>  	blk_unregister_queue(disk);
> -	
> -	if (!(disk->flags & GENHD_FL_HIDDEN))
> -		blk_unregister_region(disk);
> -	/*
> -	 * Remove gendisk pointer from idr so that it cannot be looked up
> -	 * while RCU period before freeing gendisk is running to prevent
> -	 * use-after-free issues. Note that the device number stays
> -	 * "in-use" until we really free the gendisk.
> -	 */
> -	blk_invalidate_devt(disk_devt(disk));
>  
>  	kobject_put(disk->part0.holder_dir);
>  	kobject_put(disk->slave_dir);
> @@ -964,7 +902,7 @@ static ssize_t disk_badblocks_store(struct device *dev,
>  	return badblocks_store(disk->bb, page, len, 0);
>  }
>  
> -static void request_gendisk_module(dev_t devt)
> +void blk_request_module(dev_t devt)
>  {
>  	unsigned int major = MAJOR(devt);
>  	struct blk_major_name **n;
> @@ -984,84 +922,6 @@ static void request_gendisk_module(dev_t devt)
>  		request_module("block-major-%d", MAJOR(devt));
>  }
>  
> -static bool get_disk_and_module(struct gendisk *disk)
> -{
> -	struct module *owner;
> -
> -	if (!disk->fops)
> -		return false;
> -	owner = disk->fops->owner;
> -	if (owner && !try_module_get(owner))
> -		return false;
> -	if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) {
> -		module_put(owner);
> -		return false;
> -	}
> -	return true;
> -
> -}
> -
> -/**
> - * get_gendisk - get partitioning information for a given device
> - * @devt: device to get partitioning information for
> - * @partno: returned partition index
> - *
> - * This function gets the structure containing partitioning
> - * information for the given device @devt.
> - *
> - * Context: can sleep
> - */
> -struct gendisk *get_gendisk(dev_t devt, int *partno)
> -{
> -	struct gendisk *disk = NULL;
> -
> -	might_sleep();
> -
> -	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
> -		mutex_lock(&bdev_map_lock);
> -		disk = xa_load(&bdev_map, devt);
> -		if (!disk) {
> -			mutex_unlock(&bdev_map_lock);
> -			request_gendisk_module(devt);
> -			mutex_lock(&bdev_map_lock);
> -			disk = xa_load(&bdev_map, devt);
> -		}
> -		if (disk && !get_disk_and_module(disk))
> -			disk = NULL;
> -		if (disk)
> -			*partno = devt - disk_devt(disk);
> -		mutex_unlock(&bdev_map_lock);
> -	} else {
> -		struct hd_struct *part;
> -
> -		spin_lock_bh(&ext_devt_lock);
> -		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
> -		if (part && get_disk_and_module(part_to_disk(part))) {
> -			*partno = part->partno;
> -			disk = part_to_disk(part);
> -		}
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> -
> -	if (!disk)
> -		return NULL;
> -
> -	/*
> -	 * Synchronize with del_gendisk() to not return disk that is being
> -	 * destroyed.
> -	 */
> -	down_read(&disk->lookup_sem);
> -	if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
> -		     !(disk->flags & GENHD_FL_UP))) {
> -		up_read(&disk->lookup_sem);
> -		put_disk_and_module(disk);
> -		disk = NULL;
> -	} else {
> -		up_read(&disk->lookup_sem);
> -	}
> -	return disk;
> -}
> -
>  /**
>   * bdget_disk - do bdget() by gendisk and partition number
>   * @disk: gendisk of interest
> @@ -1559,11 +1419,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
>   *
>   * This function releases all allocated resources of the gendisk.
>   *
> - * The struct gendisk refcount is incremented with get_gendisk() or
> - * get_disk_and_module(), and its refcount is decremented with
> - * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this
> - * function is called.
> - *
>   * Drivers which used __device_add_disk() have a gendisk with a request_queue
>   * assigned. Since the request_queue sits on top of the gendisk for these
>   * drivers we also call blk_put_queue() for them, and we expect the
> @@ -1748,16 +1603,17 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	if (!disk)
>  		return NULL;
>  
> +	disk->part0.bdev = bdev_alloc(disk, 0);
> +	if (!disk->part0.bdev)
> +		goto out_free_disk;
> +
>  	disk->part0.dkstats = alloc_percpu(struct disk_stats);
>  	if (!disk->part0.dkstats)
> -		goto out_free_disk;
> +		goto out_bdput;
>  
> -	init_rwsem(&disk->lookup_sem);
>  	disk->node_id = node_id;
> -	if (disk_expand_part_tbl(disk, 0)) {
> -		free_percpu(disk->part0.dkstats);
> -		goto out_free_disk;
> -	}
> +	if (disk_expand_part_tbl(disk, 0))
> +		goto out_free_bdstats;
>  
>  	ptbl = rcu_dereference_protected(disk->part_tbl, 1);
>  	rcu_assign_pointer(ptbl->part[0], &disk->part0);
> @@ -1773,7 +1629,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	 */
>  	hd_sects_seq_init(&disk->part0);
>  	if (hd_ref_init(&disk->part0))
> -		goto out_free_part0;
> +		goto out_free_bdstats;
>  
>  	disk->minors = minors;
>  	rand_initialize_disk(disk);
> @@ -1782,8 +1638,10 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	device_initialize(disk_to_dev(disk));
>  	return disk;
>  
> -out_free_part0:
> -	hd_free_part(&disk->part0);
> +out_free_bdstats:
> +	free_percpu(disk->part0.dkstats);
> +out_bdput:
> +	bdput(disk->part0.bdev);
>  out_free_disk:
>  	kfree(disk);
>  	return NULL;
> @@ -1807,26 +1665,6 @@ void put_disk(struct gendisk *disk)
>  }
>  EXPORT_SYMBOL(put_disk);
>  
> -/**
> - * put_disk_and_module - decrements the module and gendisk refcount
> - * @disk: the struct gendisk to decrement the refcount for
> - *
> - * This is a counterpart of get_disk_and_module() and thus also of
> - * get_gendisk().
> - *
> - * Context: Any context, but the last reference must not be dropped from
> - *          atomic context.
> - */
> -void put_disk_and_module(struct gendisk *disk)
> -{
> -	if (disk) {
> -		struct module *owner = disk->fops->owner;
> -
> -		put_disk(disk);
> -		module_put(owner);
> -	}
> -}
> -
>  static void set_disk_ro_uevent(struct gendisk *gd, int ro)
>  {
>  	char event[] = "DISK_RO=1";
> diff --git a/block/partitions/core.c b/block/partitions/core.c
> index a02e224115943d..696bd9ff63c64a 100644
> --- a/block/partitions/core.c
> +++ b/block/partitions/core.c
> @@ -340,12 +340,11 @@ void delete_partition(struct hd_struct *part)
>  	device_del(part_to_dev(part));
>  
>  	/*
> -	 * Remove gendisk pointer from idr so that it cannot be looked up
> -	 * while RCU period before freeing gendisk is running to prevent
> -	 * use-after-free issues. Note that the device number stays
> -	 * "in-use" until we really free the gendisk.
> +	 * Remove the block device from the inode hash, so that it cannot be
> +	 * looked up any more even when openers still hold references.
>  	 */
> -	blk_invalidate_devt(part_devt(part));
> +	remove_inode_hash(part->bdev->bd_inode);
> +
>  	percpu_ref_kill(&part->ref);
>  }
>  
> @@ -368,6 +367,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	dev_t devt = MKDEV(0, 0);
>  	struct device *ddev = disk_to_dev(disk);
>  	struct device *pdev;
> +	struct block_device *bdev;
>  	struct disk_part_tbl *ptbl;
>  	const char *dname;
>  	int err;
> @@ -402,11 +402,15 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	if (!p)
>  		return ERR_PTR(-EBUSY);
>  
> +	err = -ENOMEM;
>  	p->dkstats = alloc_percpu(struct disk_stats);
> -	if (!p->dkstats) {
> -		err = -ENOMEM;
> +	if (!p->dkstats)
>  		goto out_free;
> -	}
> +
> +	bdev = bdev_alloc(disk, partno);
> +	if (!bdev)
> +		goto out_free_stats;
> +	p->bdev = bdev;
>  
>  	hd_sects_seq_init(p);
>  	pdev = part_to_dev(p);
> @@ -420,10 +424,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  		struct partition_meta_info *pinfo;
>  
>  		pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id);
> -		if (!pinfo) {
> -			err = -ENOMEM;
> -			goto out_free_stats;
> -		}
> +		if (!pinfo)
> +			goto out_bdput;
>  		memcpy(pinfo, info, sizeof(*info));
>  		p->info = pinfo;
>  	}
> @@ -470,6 +472,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	}
>  
>  	/* everything is up and running, commence */
> +	bdev_add(bdev, devt);
>  	rcu_assign_pointer(ptbl->part[partno], p);
>  
>  	/* suppress uevent if the disk suppresses it */
> @@ -479,6 +482,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  
>  out_free_info:
>  	kfree(p->info);
> +out_bdput:
> +	bdput(bdev);
>  out_free_stats:
>  	free_percpu(p->dkstats);
>  out_free:
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 6d6e4d50834cd0..b350ed3af83bad 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -863,31 +863,46 @@ void __init bdev_cache_init(void)
>  	blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
>  }
>  
> -static struct block_device *bdget(dev_t dev)
> +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
>  {
>  	struct block_device *bdev;
>  	struct inode *inode;
>  
> -	inode = iget_locked(blockdev_superblock, dev);
> +	inode = new_inode(blockdev_superblock);
>  	if (!inode)
>  		return NULL;
> +	inode->i_mode = S_IFBLK;
> +	inode->i_rdev = 0;
> +	inode->i_data.a_ops = &def_blk_aops;
> +	mapping_set_gfp_mask(&inode->i_data, GFP_USER);
> +
> +	bdev = I_BDEV(inode);
> +	spin_lock_init(&bdev->bd_size_lock);
> +	bdev->bd_disk = disk;
> +	bdev->bd_partno = partno;
> +	bdev->bd_contains = NULL;
> +	bdev->bd_super = NULL;
> +	bdev->bd_inode = inode;
> +	bdev->bd_part_count = 0;
> +	return bdev;
> +}
>  
> -	bdev = &BDEV_I(inode)->bdev;
> +void bdev_add(struct block_device *bdev, dev_t dev)
> +{
> +	bdev->bd_dev = dev;
> +	bdev->bd_inode->i_rdev = dev;
> +	bdev->bd_inode->i_ino = dev;
> +	insert_inode_hash(bdev->bd_inode);
> +}
>  
> -	if (inode->i_state & I_NEW) {
> -		spin_lock_init(&bdev->bd_size_lock);
> -		bdev->bd_contains = NULL;
> -		bdev->bd_super = NULL;
> -		bdev->bd_inode = inode;
> -		bdev->bd_part_count = 0;
> -		bdev->bd_dev = dev;
> -		inode->i_mode = S_IFBLK;
> -		inode->i_rdev = dev;
> -		inode->i_data.a_ops = &def_blk_aops;
> -		mapping_set_gfp_mask(&inode->i_data, GFP_USER);
> -		unlock_new_inode(inode);
> -	}
> -	return bdev;
> +static struct block_device *bdget(dev_t dev)
> +{
> +	struct inode *inode;
> +
> +	inode = ilookup(blockdev_superblock, dev);
> +	if (!inode)
> +		return NULL;
> +	return &BDEV_I(inode)->bdev;
>  }
>  
>  /**
> @@ -1004,27 +1019,6 @@ int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
>  }
>  EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
>  
> -static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
> -{
> -	struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
> -
> -	if (!disk)
> -		return NULL;
> -	/*
> -	 * Now that we hold gendisk reference we make sure bdev we looked up is
> -	 * not stale. If it is, it means device got removed and created before
> -	 * we looked up gendisk and we fail open in such case. Associating
> -	 * unhashed bdev with newly created gendisk could lead to two bdevs
> -	 * (and thus two independent caches) being associated with one device
> -	 * which is bad.
> -	 */
> -	if (inode_unhashed(bdev->bd_inode)) {
> -		put_disk_and_module(disk);
> -		return NULL;
> -	}
> -	return disk;
> -}
> -
>  static void bd_clear_claiming(struct block_device *whole, void *holder)
>  {
>  	lockdep_assert_held(&bdev_lock);
> @@ -1347,19 +1341,17 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
>   *  mutex_lock(part->bd_mutex)
>   *    mutex_lock_nested(whole->bd_mutex, 1)
>   */
> -static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
> -		int partno, fmode_t mode)
> +static int __blkdev_get(struct block_device *bdev, fmode_t mode)
>  {
> +	struct gendisk *disk = bdev->bd_disk;
>  	int ret;
>  
>  	if (!bdev->bd_openers) {
> -		bdev->bd_disk = disk;
>  		bdev->bd_contains = bdev;
> -		bdev->bd_partno = partno;
>  
> -		if (!partno) {
> +		if (!bdev->bd_partno) {
>  			ret = -ENXIO;
> -			bdev->bd_part = disk_get_part(disk, partno);
> +			bdev->bd_part = disk_get_part(disk, 0);
>  			if (!bdev->bd_part)
>  				goto out_clear;
>  
> @@ -1388,7 +1380,7 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  			struct block_device *whole = bdget_disk(disk, 0);
>  
>  			mutex_lock_nested(&whole->bd_mutex, 1);
> -			ret = __blkdev_get(whole, disk, 0, mode);
> +			ret = __blkdev_get(whole, mode);
>  			if (ret) {
>  				mutex_unlock(&whole->bd_mutex);
>  				bdput(whole);
> @@ -1398,7 +1390,7 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  			mutex_unlock(&whole->bd_mutex);
>  
>  			bdev->bd_contains = whole;
> -			bdev->bd_part = disk_get_part(disk, partno);
> +			bdev->bd_part = disk_get_part(disk, bdev->bd_partno);
>  			if (!(disk->flags & GENHD_FL_UP) ||
>  			    !bdev->bd_part || !bdev->bd_part->nr_sects) {
>  				__blkdev_put(whole, mode, 1);
> @@ -1430,12 +1422,53 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  
>   out_clear:
>  	disk_put_part(bdev->bd_part);
> -	bdev->bd_disk = NULL;
>  	bdev->bd_part = NULL;
>  	bdev->bd_contains = NULL;
>  	return ret;
>  }
>  
> +struct block_device *blkdev_get_no_open(dev_t dev)
> +{
> +	struct block_device *bdev;
> +	struct gendisk *disk;
> +
> +	down_read(&bdev_lookup_sem);
> +	bdev = bdget(dev);
> +	if (!bdev) {
> +		up_read(&bdev_lookup_sem);
> +		blk_request_module(dev);
> +		down_read(&bdev_lookup_sem);
> +
> +		bdev = bdget(dev);
> +		if (!bdev)
> +			goto unlock;
> +	}
> +
> +	disk = bdev->bd_disk;
> +	if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
> +		goto bdput;
> +	if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
> +		goto put_disk;
> +	if (!try_module_get(bdev->bd_disk->fops->owner))
> +		goto put_disk;
> +	up_read(&bdev_lookup_sem);
> +	return bdev;
> +put_disk:
> +	put_disk(disk);
> +bdput:
> +	bdput(bdev);
> +unlock:
> +	up_read(&bdev_lookup_sem);
> +	return NULL;
> +}
> +
> +void blkdev_put_no_open(struct block_device *bdev)
> +{
> +	module_put(bdev->bd_disk->fops->owner);
> +	put_disk(bdev->bd_disk);
> +	bdput(bdev);
> +}
> +
>  /**
>   * blkdev_get_by_dev - open a block device by device number
>   * @dev: device number of block device to open
> @@ -1463,7 +1496,6 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	bool unblock_events = true;
>  	struct block_device *bdev;
>  	struct gendisk *disk;
> -	int partno;
>  	int ret;
>  
>  	ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
> @@ -1473,18 +1505,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	if (ret)
>  		return ERR_PTR(ret);
>  
> -	bdev = bdget(dev);
> -	if (!bdev)
> -		return ERR_PTR(-ENOMEM);
> -
>  	/*
>  	 * If we lost a race with 'disk' being deleted, try again.  See md.c.
>  	 */
>  retry:
> -	ret = -ENXIO;
> -	disk = bdev_get_gendisk(bdev, &partno);
> -	if (!disk)
> -		goto bdput;
> +	bdev = blkdev_get_no_open(dev);
> +	if (!bdev)
> +		return ERR_PTR(-ENXIO);
> +	disk = bdev->bd_disk;
>  
>  	if (mode & FMODE_EXCL) {
>  		WARN_ON_ONCE(!holder);
> @@ -1492,7 +1520,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  		ret = -ENOMEM;
>  		claiming = bdget_disk(disk, 0);
>  		if (!claiming)
> -			goto put_disk;
> +			goto put_blkdev;
>  		ret = bd_prepare_to_claim(bdev, claiming, holder);
>  		if (ret)
>  			goto put_claiming;
> @@ -1501,12 +1529,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	disk_block_events(disk);
>  
>  	mutex_lock(&bdev->bd_mutex);
> -	ret =__blkdev_get(bdev, disk, partno, mode);
> -	if (!(mode & FMODE_EXCL)) {
> -		; /* nothing to do here */
> -	} else if (ret) {
> -		bd_abort_claiming(bdev, claiming, holder);
> -	} else {
> +	ret =__blkdev_get(bdev, mode);
> +	if (ret)
> +		goto abort_claiming;
> +	if (mode & FMODE_EXCL) {
>  		bd_finish_claiming(bdev, claiming, holder);
>  
>  		/*
> @@ -1526,21 +1552,23 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  
>  	if (unblock_events)
>  		disk_unblock_events(disk);
> +	if (mode & FMODE_EXCL)
> +		bdput(claiming);
> +	return bdev;
>  
> +abort_claiming:
> +	if (mode & FMODE_EXCL)
> +		bd_abort_claiming(bdev, claiming, holder);
> +	mutex_unlock(&bdev->bd_mutex);
> +	disk_unblock_events(disk);
>  put_claiming:
>  	if (mode & FMODE_EXCL)
>  		bdput(claiming);
> -put_disk:
> -	if (ret)
> -		put_disk_and_module(disk);
> +put_blkdev:
> +	blkdev_put_no_open(bdev);
>  	if (ret == -ERESTARTSYS)
>  		goto retry;
> -bdput:
> -	if (ret) {
> -		bdput(bdev);
> -		return ERR_PTR(ret);
> -	}
> -	return bdev;
> +	return ERR_PTR(ret);
>  }
>  EXPORT_SYMBOL(blkdev_get_by_dev);
>  
> @@ -1641,7 +1669,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
>  
>  		disk_put_part(bdev->bd_part);
>  		bdev->bd_part = NULL;
> -		bdev->bd_disk = NULL;
>  		if (bdev_is_partition(bdev))
>  			victim = bdev->bd_contains;
>  		bdev->bd_contains = NULL;
> @@ -1699,12 +1726,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
>  	 * from userland - e.g. eject(1).
>  	 */
>  	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
> -
>  	mutex_unlock(&bdev->bd_mutex);
>  
>  	__blkdev_put(bdev, mode, 0);
> -	bdput(bdev);
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  }
>  EXPORT_SYMBOL(blkdev_put);
>  
> diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
> index c8fc9792ac776d..b9f3c246c3c908 100644
> --- a/include/linux/blk-cgroup.h
> +++ b/include/linux/blk-cgroup.h
> @@ -197,12 +197,12 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
>  u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
>  
>  struct blkg_conf_ctx {
> -	struct gendisk			*disk;
> +	struct block_device		*bdev;
>  	struct blkcg_gq			*blkg;
>  	char				*body;
>  };
>  
> -struct gendisk *blkcg_conf_get_disk(char **inputp);
> +struct block_device *blkcg_conf_open_bdev(char **inputp);
>  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  		   char *input, struct blkg_conf_ctx *ctx);
>  void blkg_conf_finish(struct blkg_conf_ctx *ctx);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index bdd7339bcda462..5d48b92f5e4348 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1994,6 +1994,12 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
>  		void *holder);
>  void blkdev_put(struct block_device *bdev, fmode_t mode);
>  
> +/* just for blk-cgroup, don't use elsewhere */
> +struct block_device *blkdev_get_no_open(dev_t dev);
> +void blkdev_put_no_open(struct block_device *bdev);
> +
> +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
> +void bdev_add(struct block_device *bdev, dev_t dev);
>  struct block_device *I_BDEV(struct inode *inode);
>  struct block_device *bdget_part(struct hd_struct *part);
>  struct block_device *bdgrab(struct block_device *bdev);
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index ca5e356084c353..42a51653c7303e 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -65,6 +65,7 @@ struct hd_struct {
>  	struct disk_stats __percpu *dkstats;
>  	struct percpu_ref ref;
>  
> +	struct block_device *bdev;
>  	struct device __dev;
>  	struct kobject *holder_dir;
>  	int policy, partno;
> @@ -193,7 +194,6 @@ struct gendisk {
>  	int flags;
>  	unsigned long state;
>  #define GD_NEED_PART_SCAN		0
> -	struct rw_semaphore lookup_sem;
>  	struct kobject *slave_dir;
>  
>  	struct timer_rand_state *random;
> @@ -300,7 +300,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk)
>  }
>  
>  extern void del_gendisk(struct gendisk *gp);
> -extern struct gendisk *get_gendisk(dev_t dev, int *partno);
>  extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
>  
>  extern void set_disk_ro(struct gendisk *disk, int flag);
> @@ -338,7 +337,6 @@ int blk_drop_partitions(struct block_device *bdev);
>  
>  extern struct gendisk *__alloc_disk_node(int minors, int node_id);
>  extern void put_disk(struct gendisk *disk);
> -extern void put_disk_and_module(struct gendisk *disk);
>  
>  #define alloc_disk_node(minors, node_id)				\
>  ({									\
> @@ -388,7 +386,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
>  }
>  #endif /* CONFIG_SYSFS */
>  
> +extern struct rw_semaphore bdev_lookup_sem;
> +
>  dev_t blk_lookup_devt(const char *name, int partno);
> +void blk_request_module(dev_t devt);
>  #ifdef CONFIG_BLOCK
>  void printk_all_partitions(void);
>  #else /* CONFIG_BLOCK */
> -- 
> 2.29.2
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack@suse.cz>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>, Jan Kara <jack@suse.cz>,
	Mike Snitzer <snitzer@redhat.com>,
	linux-mm@kvack.org,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Jan Kara <jack@suse.com>, Josef Bacik <josef@toxicpanda.com>,
	Coly Li <colyli@suse.de>,
	linux-block@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	dm-devel@redhat.com, linux-mtd@lists.infradead.org,
	Johannes Thumshirn <johannes.thumshirn@wdc.com>,
	Tejun Heo <tj@kernel.org>,
	linux-bcache@vger.kernel.org
Subject: Re: [PATCH 25/45] block: simplify bdev/disk lookup in blkdev_get
Date: Mon, 30 Nov 2020 10:24:28 +0100	[thread overview]
Message-ID: <20201130092428.GC11250@quack2.suse.cz> (raw)
In-Reply-To: <20201128161510.347752-26-hch@lst.de>

On Sat 28-11-20 17:14:50, Christoph Hellwig wrote:
> To simplify block device lookup and a few other upcoming areas, make sure
> that we always have a struct block_device available for each disk and
> each partition, and only find existing block devices in bdget.  The only
> downside of this is that each device and partition uses a little more
> memory.  The upside will be that a lot of code can be simplified.
> 
> With that all we need to look up the block device is to lookup the inode
> and do a few sanity checks on the gendisk, instead of the separate lookup
> for the gendisk.  For blk-cgroup which wants to access a gendisk without
> opening it, a new blkdev_{get,put}_no_open low-level interface is added
> to replace the previous get_gendisk use.
> 
> Note that the change to look up block device directly instead of the two
> step lookup using struct gendisk causes a subtile change in behavior:
> accessing a non-existing partition on an existing block device can now
> cause a call to request_module.  That call is harmless, and in practice
> no recent system will access these nodes as they aren't created by udev
> and static /dev/ setups are unusual.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good to me! You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

									Honza

> ---
>  block/blk-cgroup.c         |  42 ++++----
>  block/blk-iocost.c         |  36 +++----
>  block/blk.h                |   2 +-
>  block/genhd.c              | 210 +++++--------------------------------
>  block/partitions/core.c    |  29 ++---
>  fs/block_dev.c             | 177 +++++++++++++++++--------------
>  include/linux/blk-cgroup.h |   4 +-
>  include/linux/blkdev.h     |   6 ++
>  include/linux/genhd.h      |   7 +-
>  9 files changed, 194 insertions(+), 319 deletions(-)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 54fbe1e80cc41a..19650eb42b9f00 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -556,22 +556,22 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
>  }
>  
>  /**
> - * blkg_conf_prep - parse and prepare for per-blkg config update
> + * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
>   * @inputp: input string pointer
>   *
>   * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
> - * from @input and get and return the matching gendisk.  *@inputp is
> + * from @input and get and return the matching bdev.  *@inputp is
>   * updated to point past the device node prefix.  Returns an ERR_PTR()
>   * value on error.
>   *
>   * Use this function iff blkg_conf_prep() can't be used for some reason.
>   */
> -struct gendisk *blkcg_conf_get_disk(char **inputp)
> +struct block_device *blkcg_conf_open_bdev(char **inputp)
>  {
>  	char *input = *inputp;
>  	unsigned int major, minor;
> -	struct gendisk *disk;
> -	int key_len, part;
> +	struct block_device *bdev;
> +	int key_len;
>  
>  	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
>  		return ERR_PTR(-EINVAL);
> @@ -581,16 +581,16 @@ struct gendisk *blkcg_conf_get_disk(char **inputp)
>  		return ERR_PTR(-EINVAL);
>  	input = skip_spaces(input);
>  
> -	disk = get_gendisk(MKDEV(major, minor), &part);
> -	if (!disk)
> +	bdev = blkdev_get_no_open(MKDEV(major, minor));
> +	if (!bdev)
>  		return ERR_PTR(-ENODEV);
> -	if (part) {
> -		put_disk_and_module(disk);
> +	if (bdev_is_partition(bdev)) {
> +		blkdev_put_no_open(bdev);
>  		return ERR_PTR(-ENODEV);
>  	}
>  
>  	*inputp = input;
> -	return disk;
> +	return bdev;
>  }
>  
>  /**
> @@ -607,18 +607,18 @@ struct gendisk *blkcg_conf_get_disk(char **inputp)
>   */
>  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  		   char *input, struct blkg_conf_ctx *ctx)
> -	__acquires(rcu) __acquires(&disk->queue->queue_lock)
> +	__acquires(rcu) __acquires(&bdev->bd_disk->queue->queue_lock)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct request_queue *q;
>  	struct blkcg_gq *blkg;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	q = disk->queue;
> +	q = bdev->bd_disk->queue;
>  
>  	rcu_read_lock();
>  	spin_lock_irq(&q->queue_lock);
> @@ -689,7 +689,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  			goto success;
>  	}
>  success:
> -	ctx->disk = disk;
> +	ctx->bdev = bdev;
>  	ctx->blkg = blkg;
>  	ctx->body = input;
>  	return 0;
> @@ -700,7 +700,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  	spin_unlock_irq(&q->queue_lock);
>  	rcu_read_unlock();
>  fail:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	/*
>  	 * If queue was bypassing, we should retry.  Do so after a
>  	 * short msleep().  It isn't strictly necessary but queue
> @@ -723,11 +723,11 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
>   * with blkg_conf_prep().
>   */
>  void blkg_conf_finish(struct blkg_conf_ctx *ctx)
> -	__releases(&ctx->disk->queue->queue_lock) __releases(rcu)
> +	__releases(&ctx->bdev->bd_disk->queue->queue_lock) __releases(rcu)
>  {
> -	spin_unlock_irq(&ctx->disk->queue->queue_lock);
> +	spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock);
>  	rcu_read_unlock();
> -	put_disk_and_module(ctx->disk);
> +	blkdev_put_no_open(ctx->bdev);
>  }
>  EXPORT_SYMBOL_GPL(blkg_conf_finish);
>  
> diff --git a/block/blk-iocost.c b/block/blk-iocost.c
> index bbe86d1199dc5b..8e20fe4bddecf7 100644
> --- a/block/blk-iocost.c
> +++ b/block/blk-iocost.c
> @@ -3120,23 +3120,23 @@ static const match_table_t qos_tokens = {
>  static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
>  			     size_t nbytes, loff_t off)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct ioc *ioc;
>  	u32 qos[NR_QOS_PARAMS];
>  	bool enable, user;
>  	char *p;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	ioc = q_to_ioc(disk->queue);
> +	ioc = q_to_ioc(bdev->bd_disk->queue);
>  	if (!ioc) {
> -		ret = blk_iocost_init(disk->queue);
> +		ret = blk_iocost_init(bdev->bd_disk->queue);
>  		if (ret)
>  			goto err;
> -		ioc = q_to_ioc(disk->queue);
> +		ioc = q_to_ioc(bdev->bd_disk->queue);
>  	}
>  
>  	spin_lock_irq(&ioc->lock);
> @@ -3231,12 +3231,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
>  	ioc_refresh_params(ioc, true);
>  	spin_unlock_irq(&ioc->lock);
>  
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return nbytes;
>  einval:
>  	ret = -EINVAL;
>  err:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return ret;
>  }
>  
> @@ -3287,23 +3287,23 @@ static const match_table_t i_lcoef_tokens = {
>  static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
>  				    size_t nbytes, loff_t off)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct ioc *ioc;
>  	u64 u[NR_I_LCOEFS];
>  	bool user;
>  	char *p;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	ioc = q_to_ioc(disk->queue);
> +	ioc = q_to_ioc(bdev->bd_disk->queue);
>  	if (!ioc) {
> -		ret = blk_iocost_init(disk->queue);
> +		ret = blk_iocost_init(bdev->bd_disk->queue);
>  		if (ret)
>  			goto err;
> -		ioc = q_to_ioc(disk->queue);
> +		ioc = q_to_ioc(bdev->bd_disk->queue);
>  	}
>  
>  	spin_lock_irq(&ioc->lock);
> @@ -3356,13 +3356,13 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
>  	ioc_refresh_params(ioc, true);
>  	spin_unlock_irq(&ioc->lock);
>  
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return nbytes;
>  
>  einval:
>  	ret = -EINVAL;
>  err:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return ret;
>  }
>  
> diff --git a/block/blk.h b/block/blk.h
> index dfab98465db9a5..c4839abcfa27eb 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -352,7 +352,6 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
>  
>  int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
>  void blk_free_devt(dev_t devt);
> -void blk_invalidate_devt(dev_t devt);
>  char *disk_name(struct gendisk *hd, int partno, char *buf);
>  #define ADDPART_FLAG_NONE	0
>  #define ADDPART_FLAG_RAID	1
> @@ -384,6 +383,7 @@ static inline void hd_free_part(struct hd_struct *part)
>  {
>  	free_percpu(part->dkstats);
>  	kfree(part->info);
> +	bdput(part->bdev);
>  	percpu_ref_exit(&part->ref);
>  }
>  
> diff --git a/block/genhd.c b/block/genhd.c
> index f46e89226fdf91..bf8fa82f135f4e 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -27,17 +27,11 @@
>  
>  static struct kobject *block_depr;
>  
> -static DEFINE_XARRAY(bdev_map);
> -static DEFINE_MUTEX(bdev_map_lock);
> +DECLARE_RWSEM(bdev_lookup_sem);
>  
>  /* for extended dynamic devt allocation, currently only one major is used */
>  #define NR_EXT_DEVT		(1 << MINORBITS)
> -
> -/* For extended devt allocation.  ext_devt_lock prevents look up
> - * results from going away underneath its user.
> - */
> -static DEFINE_SPINLOCK(ext_devt_lock);
> -static DEFINE_IDR(ext_devt_idr);
> +static DEFINE_IDA(ext_devt_ida);
>  
>  static void disk_check_events(struct disk_events *ev,
>  			      unsigned int *clearing_ptr);
> @@ -580,14 +574,7 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
>  		return 0;
>  	}
>  
> -	/* allocate ext devt */
> -	idr_preload(GFP_KERNEL);
> -
> -	spin_lock_bh(&ext_devt_lock);
> -	idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
> -	spin_unlock_bh(&ext_devt_lock);
> -
> -	idr_preload_end();
> +	idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
>  	if (idx < 0)
>  		return idx == -ENOSPC ? -EBUSY : idx;
>  
> @@ -606,26 +593,8 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
>   */
>  void blk_free_devt(dev_t devt)
>  {
> -	if (devt == MKDEV(0, 0))
> -		return;
> -
> -	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
> -		spin_lock_bh(&ext_devt_lock);
> -		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> -}
> -
> -/*
> - * We invalidate devt by assigning NULL pointer for devt in idr.
> - */
> -void blk_invalidate_devt(dev_t devt)
> -{
> -	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
> -		spin_lock_bh(&ext_devt_lock);
> -		idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt)));
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> +	if (MAJOR(devt) == BLOCK_EXT_MAJOR)
> +		ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt)));
>  }
>  
>  static char *bdevt_str(dev_t devt, char *buf)
> @@ -640,28 +609,6 @@ static char *bdevt_str(dev_t devt, char *buf)
>  	return buf;
>  }
>  
> -static void blk_register_region(struct gendisk *disk)
> -{
> -	int i;
> -
> -	mutex_lock(&bdev_map_lock);
> -	for (i = 0; i < disk->minors; i++) {
> -		if (xa_insert(&bdev_map, disk_devt(disk) + i, disk, GFP_KERNEL))
> -			WARN_ON_ONCE(1);
> -	}
> -	mutex_unlock(&bdev_map_lock);
> -}
> -
> -static void blk_unregister_region(struct gendisk *disk)
> -{
> -	int i;
> -
> -	mutex_lock(&bdev_map_lock);
> -	for (i = 0; i < disk->minors; i++)
> -		xa_erase(&bdev_map, disk_devt(disk) + i);
> -	mutex_unlock(&bdev_map_lock);
> -}
> -
>  static void disk_scan_partitions(struct gendisk *disk)
>  {
>  	struct block_device *bdev;
> @@ -805,7 +752,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
>  		WARN_ON(ret);
>  		bdi_set_owner(bdi, dev);
> -		blk_register_region(disk);
> +		bdev_add(disk->part0.bdev, devt);
>  	}
>  	register_disk(parent, disk, groups);
>  	if (register_queue)
> @@ -847,8 +794,8 @@ static void invalidate_partition(struct gendisk *disk, int partno)
>  	__invalidate_device(bdev, true);
>  
>  	/*
> -	 * Unhash the bdev inode for this device so that it gets evicted as soon
> -	 * as last inode reference is dropped.
> +	 * Unhash the bdev inode for this device so that it can't be looked
> +	 * up any more even if openers still hold references to it.
>  	 */
>  	remove_inode_hash(bdev->bd_inode);
>  	bdput(bdev);
> @@ -890,7 +837,8 @@ void del_gendisk(struct gendisk *disk)
>  	 * Block lookups of the disk until all bdevs are unhashed and the
>  	 * disk is marked as dead (GENHD_FL_UP cleared).
>  	 */
> -	down_write(&disk->lookup_sem);
> +	down_write(&bdev_lookup_sem);
> +
>  	/* invalidate stuff */
>  	disk_part_iter_init(&piter, disk,
>  			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
> @@ -903,7 +851,7 @@ void del_gendisk(struct gendisk *disk)
>  	invalidate_partition(disk, 0);
>  	set_capacity(disk, 0);
>  	disk->flags &= ~GENHD_FL_UP;
> -	up_write(&disk->lookup_sem);
> +	up_write(&bdev_lookup_sem);
>  
>  	if (!(disk->flags & GENHD_FL_HIDDEN)) {
>  		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
> @@ -916,16 +864,6 @@ void del_gendisk(struct gendisk *disk)
>  	}
>  
>  	blk_unregister_queue(disk);
> -	
> -	if (!(disk->flags & GENHD_FL_HIDDEN))
> -		blk_unregister_region(disk);
> -	/*
> -	 * Remove gendisk pointer from idr so that it cannot be looked up
> -	 * while RCU period before freeing gendisk is running to prevent
> -	 * use-after-free issues. Note that the device number stays
> -	 * "in-use" until we really free the gendisk.
> -	 */
> -	blk_invalidate_devt(disk_devt(disk));
>  
>  	kobject_put(disk->part0.holder_dir);
>  	kobject_put(disk->slave_dir);
> @@ -964,7 +902,7 @@ static ssize_t disk_badblocks_store(struct device *dev,
>  	return badblocks_store(disk->bb, page, len, 0);
>  }
>  
> -static void request_gendisk_module(dev_t devt)
> +void blk_request_module(dev_t devt)
>  {
>  	unsigned int major = MAJOR(devt);
>  	struct blk_major_name **n;
> @@ -984,84 +922,6 @@ static void request_gendisk_module(dev_t devt)
>  		request_module("block-major-%d", MAJOR(devt));
>  }
>  
> -static bool get_disk_and_module(struct gendisk *disk)
> -{
> -	struct module *owner;
> -
> -	if (!disk->fops)
> -		return false;
> -	owner = disk->fops->owner;
> -	if (owner && !try_module_get(owner))
> -		return false;
> -	if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) {
> -		module_put(owner);
> -		return false;
> -	}
> -	return true;
> -
> -}
> -
> -/**
> - * get_gendisk - get partitioning information for a given device
> - * @devt: device to get partitioning information for
> - * @partno: returned partition index
> - *
> - * This function gets the structure containing partitioning
> - * information for the given device @devt.
> - *
> - * Context: can sleep
> - */
> -struct gendisk *get_gendisk(dev_t devt, int *partno)
> -{
> -	struct gendisk *disk = NULL;
> -
> -	might_sleep();
> -
> -	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
> -		mutex_lock(&bdev_map_lock);
> -		disk = xa_load(&bdev_map, devt);
> -		if (!disk) {
> -			mutex_unlock(&bdev_map_lock);
> -			request_gendisk_module(devt);
> -			mutex_lock(&bdev_map_lock);
> -			disk = xa_load(&bdev_map, devt);
> -		}
> -		if (disk && !get_disk_and_module(disk))
> -			disk = NULL;
> -		if (disk)
> -			*partno = devt - disk_devt(disk);
> -		mutex_unlock(&bdev_map_lock);
> -	} else {
> -		struct hd_struct *part;
> -
> -		spin_lock_bh(&ext_devt_lock);
> -		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
> -		if (part && get_disk_and_module(part_to_disk(part))) {
> -			*partno = part->partno;
> -			disk = part_to_disk(part);
> -		}
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> -
> -	if (!disk)
> -		return NULL;
> -
> -	/*
> -	 * Synchronize with del_gendisk() to not return disk that is being
> -	 * destroyed.
> -	 */
> -	down_read(&disk->lookup_sem);
> -	if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
> -		     !(disk->flags & GENHD_FL_UP))) {
> -		up_read(&disk->lookup_sem);
> -		put_disk_and_module(disk);
> -		disk = NULL;
> -	} else {
> -		up_read(&disk->lookup_sem);
> -	}
> -	return disk;
> -}
> -
>  /**
>   * bdget_disk - do bdget() by gendisk and partition number
>   * @disk: gendisk of interest
> @@ -1559,11 +1419,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
>   *
>   * This function releases all allocated resources of the gendisk.
>   *
> - * The struct gendisk refcount is incremented with get_gendisk() or
> - * get_disk_and_module(), and its refcount is decremented with
> - * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this
> - * function is called.
> - *
>   * Drivers which used __device_add_disk() have a gendisk with a request_queue
>   * assigned. Since the request_queue sits on top of the gendisk for these
>   * drivers we also call blk_put_queue() for them, and we expect the
> @@ -1748,16 +1603,17 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	if (!disk)
>  		return NULL;
>  
> +	disk->part0.bdev = bdev_alloc(disk, 0);
> +	if (!disk->part0.bdev)
> +		goto out_free_disk;
> +
>  	disk->part0.dkstats = alloc_percpu(struct disk_stats);
>  	if (!disk->part0.dkstats)
> -		goto out_free_disk;
> +		goto out_bdput;
>  
> -	init_rwsem(&disk->lookup_sem);
>  	disk->node_id = node_id;
> -	if (disk_expand_part_tbl(disk, 0)) {
> -		free_percpu(disk->part0.dkstats);
> -		goto out_free_disk;
> -	}
> +	if (disk_expand_part_tbl(disk, 0))
> +		goto out_free_bdstats;
>  
>  	ptbl = rcu_dereference_protected(disk->part_tbl, 1);
>  	rcu_assign_pointer(ptbl->part[0], &disk->part0);
> @@ -1773,7 +1629,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	 */
>  	hd_sects_seq_init(&disk->part0);
>  	if (hd_ref_init(&disk->part0))
> -		goto out_free_part0;
> +		goto out_free_bdstats;
>  
>  	disk->minors = minors;
>  	rand_initialize_disk(disk);
> @@ -1782,8 +1638,10 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	device_initialize(disk_to_dev(disk));
>  	return disk;
>  
> -out_free_part0:
> -	hd_free_part(&disk->part0);
> +out_free_bdstats:
> +	free_percpu(disk->part0.dkstats);
> +out_bdput:
> +	bdput(disk->part0.bdev);
>  out_free_disk:
>  	kfree(disk);
>  	return NULL;
> @@ -1807,26 +1665,6 @@ void put_disk(struct gendisk *disk)
>  }
>  EXPORT_SYMBOL(put_disk);
>  
> -/**
> - * put_disk_and_module - decrements the module and gendisk refcount
> - * @disk: the struct gendisk to decrement the refcount for
> - *
> - * This is a counterpart of get_disk_and_module() and thus also of
> - * get_gendisk().
> - *
> - * Context: Any context, but the last reference must not be dropped from
> - *          atomic context.
> - */
> -void put_disk_and_module(struct gendisk *disk)
> -{
> -	if (disk) {
> -		struct module *owner = disk->fops->owner;
> -
> -		put_disk(disk);
> -		module_put(owner);
> -	}
> -}
> -
>  static void set_disk_ro_uevent(struct gendisk *gd, int ro)
>  {
>  	char event[] = "DISK_RO=1";
> diff --git a/block/partitions/core.c b/block/partitions/core.c
> index a02e224115943d..696bd9ff63c64a 100644
> --- a/block/partitions/core.c
> +++ b/block/partitions/core.c
> @@ -340,12 +340,11 @@ void delete_partition(struct hd_struct *part)
>  	device_del(part_to_dev(part));
>  
>  	/*
> -	 * Remove gendisk pointer from idr so that it cannot be looked up
> -	 * while RCU period before freeing gendisk is running to prevent
> -	 * use-after-free issues. Note that the device number stays
> -	 * "in-use" until we really free the gendisk.
> +	 * Remove the block device from the inode hash, so that it cannot be
> +	 * looked up any more even when openers still hold references.
>  	 */
> -	blk_invalidate_devt(part_devt(part));
> +	remove_inode_hash(part->bdev->bd_inode);
> +
>  	percpu_ref_kill(&part->ref);
>  }
>  
> @@ -368,6 +367,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	dev_t devt = MKDEV(0, 0);
>  	struct device *ddev = disk_to_dev(disk);
>  	struct device *pdev;
> +	struct block_device *bdev;
>  	struct disk_part_tbl *ptbl;
>  	const char *dname;
>  	int err;
> @@ -402,11 +402,15 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	if (!p)
>  		return ERR_PTR(-EBUSY);
>  
> +	err = -ENOMEM;
>  	p->dkstats = alloc_percpu(struct disk_stats);
> -	if (!p->dkstats) {
> -		err = -ENOMEM;
> +	if (!p->dkstats)
>  		goto out_free;
> -	}
> +
> +	bdev = bdev_alloc(disk, partno);
> +	if (!bdev)
> +		goto out_free_stats;
> +	p->bdev = bdev;
>  
>  	hd_sects_seq_init(p);
>  	pdev = part_to_dev(p);
> @@ -420,10 +424,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  		struct partition_meta_info *pinfo;
>  
>  		pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id);
> -		if (!pinfo) {
> -			err = -ENOMEM;
> -			goto out_free_stats;
> -		}
> +		if (!pinfo)
> +			goto out_bdput;
>  		memcpy(pinfo, info, sizeof(*info));
>  		p->info = pinfo;
>  	}
> @@ -470,6 +472,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	}
>  
>  	/* everything is up and running, commence */
> +	bdev_add(bdev, devt);
>  	rcu_assign_pointer(ptbl->part[partno], p);
>  
>  	/* suppress uevent if the disk suppresses it */
> @@ -479,6 +482,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  
>  out_free_info:
>  	kfree(p->info);
> +out_bdput:
> +	bdput(bdev);
>  out_free_stats:
>  	free_percpu(p->dkstats);
>  out_free:
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 6d6e4d50834cd0..b350ed3af83bad 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -863,31 +863,46 @@ void __init bdev_cache_init(void)
>  	blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
>  }
>  
> -static struct block_device *bdget(dev_t dev)
> +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
>  {
>  	struct block_device *bdev;
>  	struct inode *inode;
>  
> -	inode = iget_locked(blockdev_superblock, dev);
> +	inode = new_inode(blockdev_superblock);
>  	if (!inode)
>  		return NULL;
> +	inode->i_mode = S_IFBLK;
> +	inode->i_rdev = 0;
> +	inode->i_data.a_ops = &def_blk_aops;
> +	mapping_set_gfp_mask(&inode->i_data, GFP_USER);
> +
> +	bdev = I_BDEV(inode);
> +	spin_lock_init(&bdev->bd_size_lock);
> +	bdev->bd_disk = disk;
> +	bdev->bd_partno = partno;
> +	bdev->bd_contains = NULL;
> +	bdev->bd_super = NULL;
> +	bdev->bd_inode = inode;
> +	bdev->bd_part_count = 0;
> +	return bdev;
> +}
>  
> -	bdev = &BDEV_I(inode)->bdev;
> +void bdev_add(struct block_device *bdev, dev_t dev)
> +{
> +	bdev->bd_dev = dev;
> +	bdev->bd_inode->i_rdev = dev;
> +	bdev->bd_inode->i_ino = dev;
> +	insert_inode_hash(bdev->bd_inode);
> +}
>  
> -	if (inode->i_state & I_NEW) {
> -		spin_lock_init(&bdev->bd_size_lock);
> -		bdev->bd_contains = NULL;
> -		bdev->bd_super = NULL;
> -		bdev->bd_inode = inode;
> -		bdev->bd_part_count = 0;
> -		bdev->bd_dev = dev;
> -		inode->i_mode = S_IFBLK;
> -		inode->i_rdev = dev;
> -		inode->i_data.a_ops = &def_blk_aops;
> -		mapping_set_gfp_mask(&inode->i_data, GFP_USER);
> -		unlock_new_inode(inode);
> -	}
> -	return bdev;
> +static struct block_device *bdget(dev_t dev)
> +{
> +	struct inode *inode;
> +
> +	inode = ilookup(blockdev_superblock, dev);
> +	if (!inode)
> +		return NULL;
> +	return &BDEV_I(inode)->bdev;
>  }
>  
>  /**
> @@ -1004,27 +1019,6 @@ int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
>  }
>  EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
>  
> -static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
> -{
> -	struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
> -
> -	if (!disk)
> -		return NULL;
> -	/*
> -	 * Now that we hold gendisk reference we make sure bdev we looked up is
> -	 * not stale. If it is, it means device got removed and created before
> -	 * we looked up gendisk and we fail open in such case. Associating
> -	 * unhashed bdev with newly created gendisk could lead to two bdevs
> -	 * (and thus two independent caches) being associated with one device
> -	 * which is bad.
> -	 */
> -	if (inode_unhashed(bdev->bd_inode)) {
> -		put_disk_and_module(disk);
> -		return NULL;
> -	}
> -	return disk;
> -}
> -
>  static void bd_clear_claiming(struct block_device *whole, void *holder)
>  {
>  	lockdep_assert_held(&bdev_lock);
> @@ -1347,19 +1341,17 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
>   *  mutex_lock(part->bd_mutex)
>   *    mutex_lock_nested(whole->bd_mutex, 1)
>   */
> -static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
> -		int partno, fmode_t mode)
> +static int __blkdev_get(struct block_device *bdev, fmode_t mode)
>  {
> +	struct gendisk *disk = bdev->bd_disk;
>  	int ret;
>  
>  	if (!bdev->bd_openers) {
> -		bdev->bd_disk = disk;
>  		bdev->bd_contains = bdev;
> -		bdev->bd_partno = partno;
>  
> -		if (!partno) {
> +		if (!bdev->bd_partno) {
>  			ret = -ENXIO;
> -			bdev->bd_part = disk_get_part(disk, partno);
> +			bdev->bd_part = disk_get_part(disk, 0);
>  			if (!bdev->bd_part)
>  				goto out_clear;
>  
> @@ -1388,7 +1380,7 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  			struct block_device *whole = bdget_disk(disk, 0);
>  
>  			mutex_lock_nested(&whole->bd_mutex, 1);
> -			ret = __blkdev_get(whole, disk, 0, mode);
> +			ret = __blkdev_get(whole, mode);
>  			if (ret) {
>  				mutex_unlock(&whole->bd_mutex);
>  				bdput(whole);
> @@ -1398,7 +1390,7 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  			mutex_unlock(&whole->bd_mutex);
>  
>  			bdev->bd_contains = whole;
> -			bdev->bd_part = disk_get_part(disk, partno);
> +			bdev->bd_part = disk_get_part(disk, bdev->bd_partno);
>  			if (!(disk->flags & GENHD_FL_UP) ||
>  			    !bdev->bd_part || !bdev->bd_part->nr_sects) {
>  				__blkdev_put(whole, mode, 1);
> @@ -1430,12 +1422,53 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  
>   out_clear:
>  	disk_put_part(bdev->bd_part);
> -	bdev->bd_disk = NULL;
>  	bdev->bd_part = NULL;
>  	bdev->bd_contains = NULL;
>  	return ret;
>  }
>  
> +struct block_device *blkdev_get_no_open(dev_t dev)
> +{
> +	struct block_device *bdev;
> +	struct gendisk *disk;
> +
> +	down_read(&bdev_lookup_sem);
> +	bdev = bdget(dev);
> +	if (!bdev) {
> +		up_read(&bdev_lookup_sem);
> +		blk_request_module(dev);
> +		down_read(&bdev_lookup_sem);
> +
> +		bdev = bdget(dev);
> +		if (!bdev)
> +			goto unlock;
> +	}
> +
> +	disk = bdev->bd_disk;
> +	if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
> +		goto bdput;
> +	if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
> +		goto put_disk;
> +	if (!try_module_get(bdev->bd_disk->fops->owner))
> +		goto put_disk;
> +	up_read(&bdev_lookup_sem);
> +	return bdev;
> +put_disk:
> +	put_disk(disk);
> +bdput:
> +	bdput(bdev);
> +unlock:
> +	up_read(&bdev_lookup_sem);
> +	return NULL;
> +}
> +
> +void blkdev_put_no_open(struct block_device *bdev)
> +{
> +	module_put(bdev->bd_disk->fops->owner);
> +	put_disk(bdev->bd_disk);
> +	bdput(bdev);
> +}
> +
>  /**
>   * blkdev_get_by_dev - open a block device by device number
>   * @dev: device number of block device to open
> @@ -1463,7 +1496,6 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	bool unblock_events = true;
>  	struct block_device *bdev;
>  	struct gendisk *disk;
> -	int partno;
>  	int ret;
>  
>  	ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
> @@ -1473,18 +1505,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	if (ret)
>  		return ERR_PTR(ret);
>  
> -	bdev = bdget(dev);
> -	if (!bdev)
> -		return ERR_PTR(-ENOMEM);
> -
>  	/*
>  	 * If we lost a race with 'disk' being deleted, try again.  See md.c.
>  	 */
>  retry:
> -	ret = -ENXIO;
> -	disk = bdev_get_gendisk(bdev, &partno);
> -	if (!disk)
> -		goto bdput;
> +	bdev = blkdev_get_no_open(dev);
> +	if (!bdev)
> +		return ERR_PTR(-ENXIO);
> +	disk = bdev->bd_disk;
>  
>  	if (mode & FMODE_EXCL) {
>  		WARN_ON_ONCE(!holder);
> @@ -1492,7 +1520,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  		ret = -ENOMEM;
>  		claiming = bdget_disk(disk, 0);
>  		if (!claiming)
> -			goto put_disk;
> +			goto put_blkdev;
>  		ret = bd_prepare_to_claim(bdev, claiming, holder);
>  		if (ret)
>  			goto put_claiming;
> @@ -1501,12 +1529,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	disk_block_events(disk);
>  
>  	mutex_lock(&bdev->bd_mutex);
> -	ret =__blkdev_get(bdev, disk, partno, mode);
> -	if (!(mode & FMODE_EXCL)) {
> -		; /* nothing to do here */
> -	} else if (ret) {
> -		bd_abort_claiming(bdev, claiming, holder);
> -	} else {
> +	ret =__blkdev_get(bdev, mode);
> +	if (ret)
> +		goto abort_claiming;
> +	if (mode & FMODE_EXCL) {
>  		bd_finish_claiming(bdev, claiming, holder);
>  
>  		/*
> @@ -1526,21 +1552,23 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  
>  	if (unblock_events)
>  		disk_unblock_events(disk);
> +	if (mode & FMODE_EXCL)
> +		bdput(claiming);
> +	return bdev;
>  
> +abort_claiming:
> +	if (mode & FMODE_EXCL)
> +		bd_abort_claiming(bdev, claiming, holder);
> +	mutex_unlock(&bdev->bd_mutex);
> +	disk_unblock_events(disk);
>  put_claiming:
>  	if (mode & FMODE_EXCL)
>  		bdput(claiming);
> -put_disk:
> -	if (ret)
> -		put_disk_and_module(disk);
> +put_blkdev:
> +	blkdev_put_no_open(bdev);
>  	if (ret == -ERESTARTSYS)
>  		goto retry;
> -bdput:
> -	if (ret) {
> -		bdput(bdev);
> -		return ERR_PTR(ret);
> -	}
> -	return bdev;
> +	return ERR_PTR(ret);
>  }
>  EXPORT_SYMBOL(blkdev_get_by_dev);
>  
> @@ -1641,7 +1669,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
>  
>  		disk_put_part(bdev->bd_part);
>  		bdev->bd_part = NULL;
> -		bdev->bd_disk = NULL;
>  		if (bdev_is_partition(bdev))
>  			victim = bdev->bd_contains;
>  		bdev->bd_contains = NULL;
> @@ -1699,12 +1726,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
>  	 * from userland - e.g. eject(1).
>  	 */
>  	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
> -
>  	mutex_unlock(&bdev->bd_mutex);
>  
>  	__blkdev_put(bdev, mode, 0);
> -	bdput(bdev);
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  }
>  EXPORT_SYMBOL(blkdev_put);
>  
> diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
> index c8fc9792ac776d..b9f3c246c3c908 100644
> --- a/include/linux/blk-cgroup.h
> +++ b/include/linux/blk-cgroup.h
> @@ -197,12 +197,12 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
>  u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
>  
>  struct blkg_conf_ctx {
> -	struct gendisk			*disk;
> +	struct block_device		*bdev;
>  	struct blkcg_gq			*blkg;
>  	char				*body;
>  };
>  
> -struct gendisk *blkcg_conf_get_disk(char **inputp);
> +struct block_device *blkcg_conf_open_bdev(char **inputp);
>  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  		   char *input, struct blkg_conf_ctx *ctx);
>  void blkg_conf_finish(struct blkg_conf_ctx *ctx);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index bdd7339bcda462..5d48b92f5e4348 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1994,6 +1994,12 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
>  		void *holder);
>  void blkdev_put(struct block_device *bdev, fmode_t mode);
>  
> +/* just for blk-cgroup, don't use elsewhere */
> +struct block_device *blkdev_get_no_open(dev_t dev);
> +void blkdev_put_no_open(struct block_device *bdev);
> +
> +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
> +void bdev_add(struct block_device *bdev, dev_t dev);
>  struct block_device *I_BDEV(struct inode *inode);
>  struct block_device *bdget_part(struct hd_struct *part);
>  struct block_device *bdgrab(struct block_device *bdev);
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index ca5e356084c353..42a51653c7303e 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -65,6 +65,7 @@ struct hd_struct {
>  	struct disk_stats __percpu *dkstats;
>  	struct percpu_ref ref;
>  
> +	struct block_device *bdev;
>  	struct device __dev;
>  	struct kobject *holder_dir;
>  	int policy, partno;
> @@ -193,7 +194,6 @@ struct gendisk {
>  	int flags;
>  	unsigned long state;
>  #define GD_NEED_PART_SCAN		0
> -	struct rw_semaphore lookup_sem;
>  	struct kobject *slave_dir;
>  
>  	struct timer_rand_state *random;
> @@ -300,7 +300,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk)
>  }
>  
>  extern void del_gendisk(struct gendisk *gp);
> -extern struct gendisk *get_gendisk(dev_t dev, int *partno);
>  extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
>  
>  extern void set_disk_ro(struct gendisk *disk, int flag);
> @@ -338,7 +337,6 @@ int blk_drop_partitions(struct block_device *bdev);
>  
>  extern struct gendisk *__alloc_disk_node(int minors, int node_id);
>  extern void put_disk(struct gendisk *disk);
> -extern void put_disk_and_module(struct gendisk *disk);
>  
>  #define alloc_disk_node(minors, node_id)				\
>  ({									\
> @@ -388,7 +386,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
>  }
>  #endif /* CONFIG_SYSFS */
>  
> +extern struct rw_semaphore bdev_lookup_sem;
> +
>  dev_t blk_lookup_devt(const char *name, int partno);
> +void blk_request_module(dev_t devt);
>  #ifdef CONFIG_BLOCK
>  void printk_all_partitions(void);
>  #else /* CONFIG_BLOCK */
> -- 
> 2.29.2
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

______________________________________________________
Linux MTD discussion mailing list
http://lists.infradead.org/mailman/listinfo/linux-mtd/

WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack@suse.cz>
To: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>, Jan Kara <jack@suse.cz>,
	Mike Snitzer <snitzer@redhat.com>,
	linux-mm@kvack.org,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Jan Kara <jack@suse.com>, Josef Bacik <josef@toxicpanda.com>,
	Coly Li <colyli@suse.de>,
	linux-block@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	dm-devel@redhat.com, linux-mtd@lists.infradead.org,
	Johannes Thumshirn <johannes.thumshirn@wdc.com>,
	Tejun Heo <tj@kernel.org>,
	linux-bcache@vger.kernel.org
Subject: Re: [dm-devel] [PATCH 25/45] block: simplify bdev/disk lookup in blkdev_get
Date: Mon, 30 Nov 2020 10:24:28 +0100	[thread overview]
Message-ID: <20201130092428.GC11250@quack2.suse.cz> (raw)
In-Reply-To: <20201128161510.347752-26-hch@lst.de>

On Sat 28-11-20 17:14:50, Christoph Hellwig wrote:
> To simplify block device lookup and a few other upcoming areas, make sure
> that we always have a struct block_device available for each disk and
> each partition, and only find existing block devices in bdget.  The only
> downside of this is that each device and partition uses a little more
> memory.  The upside will be that a lot of code can be simplified.
> 
> With that all we need to look up the block device is to lookup the inode
> and do a few sanity checks on the gendisk, instead of the separate lookup
> for the gendisk.  For blk-cgroup which wants to access a gendisk without
> opening it, a new blkdev_{get,put}_no_open low-level interface is added
> to replace the previous get_gendisk use.
> 
> Note that the change to look up block device directly instead of the two
> step lookup using struct gendisk causes a subtile change in behavior:
> accessing a non-existing partition on an existing block device can now
> cause a call to request_module.  That call is harmless, and in practice
> no recent system will access these nodes as they aren't created by udev
> and static /dev/ setups are unusual.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good to me! You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

									Honza

> ---
>  block/blk-cgroup.c         |  42 ++++----
>  block/blk-iocost.c         |  36 +++----
>  block/blk.h                |   2 +-
>  block/genhd.c              | 210 +++++--------------------------------
>  block/partitions/core.c    |  29 ++---
>  fs/block_dev.c             | 177 +++++++++++++++++--------------
>  include/linux/blk-cgroup.h |   4 +-
>  include/linux/blkdev.h     |   6 ++
>  include/linux/genhd.h      |   7 +-
>  9 files changed, 194 insertions(+), 319 deletions(-)
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 54fbe1e80cc41a..19650eb42b9f00 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -556,22 +556,22 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
>  }
>  
>  /**
> - * blkg_conf_prep - parse and prepare for per-blkg config update
> + * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
>   * @inputp: input string pointer
>   *
>   * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
> - * from @input and get and return the matching gendisk.  *@inputp is
> + * from @input and get and return the matching bdev.  *@inputp is
>   * updated to point past the device node prefix.  Returns an ERR_PTR()
>   * value on error.
>   *
>   * Use this function iff blkg_conf_prep() can't be used for some reason.
>   */
> -struct gendisk *blkcg_conf_get_disk(char **inputp)
> +struct block_device *blkcg_conf_open_bdev(char **inputp)
>  {
>  	char *input = *inputp;
>  	unsigned int major, minor;
> -	struct gendisk *disk;
> -	int key_len, part;
> +	struct block_device *bdev;
> +	int key_len;
>  
>  	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
>  		return ERR_PTR(-EINVAL);
> @@ -581,16 +581,16 @@ struct gendisk *blkcg_conf_get_disk(char **inputp)
>  		return ERR_PTR(-EINVAL);
>  	input = skip_spaces(input);
>  
> -	disk = get_gendisk(MKDEV(major, minor), &part);
> -	if (!disk)
> +	bdev = blkdev_get_no_open(MKDEV(major, minor));
> +	if (!bdev)
>  		return ERR_PTR(-ENODEV);
> -	if (part) {
> -		put_disk_and_module(disk);
> +	if (bdev_is_partition(bdev)) {
> +		blkdev_put_no_open(bdev);
>  		return ERR_PTR(-ENODEV);
>  	}
>  
>  	*inputp = input;
> -	return disk;
> +	return bdev;
>  }
>  
>  /**
> @@ -607,18 +607,18 @@ struct gendisk *blkcg_conf_get_disk(char **inputp)
>   */
>  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  		   char *input, struct blkg_conf_ctx *ctx)
> -	__acquires(rcu) __acquires(&disk->queue->queue_lock)
> +	__acquires(rcu) __acquires(&bdev->bd_disk->queue->queue_lock)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct request_queue *q;
>  	struct blkcg_gq *blkg;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	q = disk->queue;
> +	q = bdev->bd_disk->queue;
>  
>  	rcu_read_lock();
>  	spin_lock_irq(&q->queue_lock);
> @@ -689,7 +689,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  			goto success;
>  	}
>  success:
> -	ctx->disk = disk;
> +	ctx->bdev = bdev;
>  	ctx->blkg = blkg;
>  	ctx->body = input;
>  	return 0;
> @@ -700,7 +700,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  	spin_unlock_irq(&q->queue_lock);
>  	rcu_read_unlock();
>  fail:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	/*
>  	 * If queue was bypassing, we should retry.  Do so after a
>  	 * short msleep().  It isn't strictly necessary but queue
> @@ -723,11 +723,11 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
>   * with blkg_conf_prep().
>   */
>  void blkg_conf_finish(struct blkg_conf_ctx *ctx)
> -	__releases(&ctx->disk->queue->queue_lock) __releases(rcu)
> +	__releases(&ctx->bdev->bd_disk->queue->queue_lock) __releases(rcu)
>  {
> -	spin_unlock_irq(&ctx->disk->queue->queue_lock);
> +	spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock);
>  	rcu_read_unlock();
> -	put_disk_and_module(ctx->disk);
> +	blkdev_put_no_open(ctx->bdev);
>  }
>  EXPORT_SYMBOL_GPL(blkg_conf_finish);
>  
> diff --git a/block/blk-iocost.c b/block/blk-iocost.c
> index bbe86d1199dc5b..8e20fe4bddecf7 100644
> --- a/block/blk-iocost.c
> +++ b/block/blk-iocost.c
> @@ -3120,23 +3120,23 @@ static const match_table_t qos_tokens = {
>  static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
>  			     size_t nbytes, loff_t off)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct ioc *ioc;
>  	u32 qos[NR_QOS_PARAMS];
>  	bool enable, user;
>  	char *p;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	ioc = q_to_ioc(disk->queue);
> +	ioc = q_to_ioc(bdev->bd_disk->queue);
>  	if (!ioc) {
> -		ret = blk_iocost_init(disk->queue);
> +		ret = blk_iocost_init(bdev->bd_disk->queue);
>  		if (ret)
>  			goto err;
> -		ioc = q_to_ioc(disk->queue);
> +		ioc = q_to_ioc(bdev->bd_disk->queue);
>  	}
>  
>  	spin_lock_irq(&ioc->lock);
> @@ -3231,12 +3231,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
>  	ioc_refresh_params(ioc, true);
>  	spin_unlock_irq(&ioc->lock);
>  
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return nbytes;
>  einval:
>  	ret = -EINVAL;
>  err:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return ret;
>  }
>  
> @@ -3287,23 +3287,23 @@ static const match_table_t i_lcoef_tokens = {
>  static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
>  				    size_t nbytes, loff_t off)
>  {
> -	struct gendisk *disk;
> +	struct block_device *bdev;
>  	struct ioc *ioc;
>  	u64 u[NR_I_LCOEFS];
>  	bool user;
>  	char *p;
>  	int ret;
>  
> -	disk = blkcg_conf_get_disk(&input);
> -	if (IS_ERR(disk))
> -		return PTR_ERR(disk);
> +	bdev = blkcg_conf_open_bdev(&input);
> +	if (IS_ERR(bdev))
> +		return PTR_ERR(bdev);
>  
> -	ioc = q_to_ioc(disk->queue);
> +	ioc = q_to_ioc(bdev->bd_disk->queue);
>  	if (!ioc) {
> -		ret = blk_iocost_init(disk->queue);
> +		ret = blk_iocost_init(bdev->bd_disk->queue);
>  		if (ret)
>  			goto err;
> -		ioc = q_to_ioc(disk->queue);
> +		ioc = q_to_ioc(bdev->bd_disk->queue);
>  	}
>  
>  	spin_lock_irq(&ioc->lock);
> @@ -3356,13 +3356,13 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
>  	ioc_refresh_params(ioc, true);
>  	spin_unlock_irq(&ioc->lock);
>  
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return nbytes;
>  
>  einval:
>  	ret = -EINVAL;
>  err:
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  	return ret;
>  }
>  
> diff --git a/block/blk.h b/block/blk.h
> index dfab98465db9a5..c4839abcfa27eb 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -352,7 +352,6 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector);
>  
>  int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
>  void blk_free_devt(dev_t devt);
> -void blk_invalidate_devt(dev_t devt);
>  char *disk_name(struct gendisk *hd, int partno, char *buf);
>  #define ADDPART_FLAG_NONE	0
>  #define ADDPART_FLAG_RAID	1
> @@ -384,6 +383,7 @@ static inline void hd_free_part(struct hd_struct *part)
>  {
>  	free_percpu(part->dkstats);
>  	kfree(part->info);
> +	bdput(part->bdev);
>  	percpu_ref_exit(&part->ref);
>  }
>  
> diff --git a/block/genhd.c b/block/genhd.c
> index f46e89226fdf91..bf8fa82f135f4e 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -27,17 +27,11 @@
>  
>  static struct kobject *block_depr;
>  
> -static DEFINE_XARRAY(bdev_map);
> -static DEFINE_MUTEX(bdev_map_lock);
> +DECLARE_RWSEM(bdev_lookup_sem);
>  
>  /* for extended dynamic devt allocation, currently only one major is used */
>  #define NR_EXT_DEVT		(1 << MINORBITS)
> -
> -/* For extended devt allocation.  ext_devt_lock prevents look up
> - * results from going away underneath its user.
> - */
> -static DEFINE_SPINLOCK(ext_devt_lock);
> -static DEFINE_IDR(ext_devt_idr);
> +static DEFINE_IDA(ext_devt_ida);
>  
>  static void disk_check_events(struct disk_events *ev,
>  			      unsigned int *clearing_ptr);
> @@ -580,14 +574,7 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
>  		return 0;
>  	}
>  
> -	/* allocate ext devt */
> -	idr_preload(GFP_KERNEL);
> -
> -	spin_lock_bh(&ext_devt_lock);
> -	idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT);
> -	spin_unlock_bh(&ext_devt_lock);
> -
> -	idr_preload_end();
> +	idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL);
>  	if (idx < 0)
>  		return idx == -ENOSPC ? -EBUSY : idx;
>  
> @@ -606,26 +593,8 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
>   */
>  void blk_free_devt(dev_t devt)
>  {
> -	if (devt == MKDEV(0, 0))
> -		return;
> -
> -	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
> -		spin_lock_bh(&ext_devt_lock);
> -		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> -}
> -
> -/*
> - * We invalidate devt by assigning NULL pointer for devt in idr.
> - */
> -void blk_invalidate_devt(dev_t devt)
> -{
> -	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
> -		spin_lock_bh(&ext_devt_lock);
> -		idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt)));
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> +	if (MAJOR(devt) == BLOCK_EXT_MAJOR)
> +		ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt)));
>  }
>  
>  static char *bdevt_str(dev_t devt, char *buf)
> @@ -640,28 +609,6 @@ static char *bdevt_str(dev_t devt, char *buf)
>  	return buf;
>  }
>  
> -static void blk_register_region(struct gendisk *disk)
> -{
> -	int i;
> -
> -	mutex_lock(&bdev_map_lock);
> -	for (i = 0; i < disk->minors; i++) {
> -		if (xa_insert(&bdev_map, disk_devt(disk) + i, disk, GFP_KERNEL))
> -			WARN_ON_ONCE(1);
> -	}
> -	mutex_unlock(&bdev_map_lock);
> -}
> -
> -static void blk_unregister_region(struct gendisk *disk)
> -{
> -	int i;
> -
> -	mutex_lock(&bdev_map_lock);
> -	for (i = 0; i < disk->minors; i++)
> -		xa_erase(&bdev_map, disk_devt(disk) + i);
> -	mutex_unlock(&bdev_map_lock);
> -}
> -
>  static void disk_scan_partitions(struct gendisk *disk)
>  {
>  	struct block_device *bdev;
> @@ -805,7 +752,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
>  		WARN_ON(ret);
>  		bdi_set_owner(bdi, dev);
> -		blk_register_region(disk);
> +		bdev_add(disk->part0.bdev, devt);
>  	}
>  	register_disk(parent, disk, groups);
>  	if (register_queue)
> @@ -847,8 +794,8 @@ static void invalidate_partition(struct gendisk *disk, int partno)
>  	__invalidate_device(bdev, true);
>  
>  	/*
> -	 * Unhash the bdev inode for this device so that it gets evicted as soon
> -	 * as last inode reference is dropped.
> +	 * Unhash the bdev inode for this device so that it can't be looked
> +	 * up any more even if openers still hold references to it.
>  	 */
>  	remove_inode_hash(bdev->bd_inode);
>  	bdput(bdev);
> @@ -890,7 +837,8 @@ void del_gendisk(struct gendisk *disk)
>  	 * Block lookups of the disk until all bdevs are unhashed and the
>  	 * disk is marked as dead (GENHD_FL_UP cleared).
>  	 */
> -	down_write(&disk->lookup_sem);
> +	down_write(&bdev_lookup_sem);
> +
>  	/* invalidate stuff */
>  	disk_part_iter_init(&piter, disk,
>  			     DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
> @@ -903,7 +851,7 @@ void del_gendisk(struct gendisk *disk)
>  	invalidate_partition(disk, 0);
>  	set_capacity(disk, 0);
>  	disk->flags &= ~GENHD_FL_UP;
> -	up_write(&disk->lookup_sem);
> +	up_write(&bdev_lookup_sem);
>  
>  	if (!(disk->flags & GENHD_FL_HIDDEN)) {
>  		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
> @@ -916,16 +864,6 @@ void del_gendisk(struct gendisk *disk)
>  	}
>  
>  	blk_unregister_queue(disk);
> -	
> -	if (!(disk->flags & GENHD_FL_HIDDEN))
> -		blk_unregister_region(disk);
> -	/*
> -	 * Remove gendisk pointer from idr so that it cannot be looked up
> -	 * while RCU period before freeing gendisk is running to prevent
> -	 * use-after-free issues. Note that the device number stays
> -	 * "in-use" until we really free the gendisk.
> -	 */
> -	blk_invalidate_devt(disk_devt(disk));
>  
>  	kobject_put(disk->part0.holder_dir);
>  	kobject_put(disk->slave_dir);
> @@ -964,7 +902,7 @@ static ssize_t disk_badblocks_store(struct device *dev,
>  	return badblocks_store(disk->bb, page, len, 0);
>  }
>  
> -static void request_gendisk_module(dev_t devt)
> +void blk_request_module(dev_t devt)
>  {
>  	unsigned int major = MAJOR(devt);
>  	struct blk_major_name **n;
> @@ -984,84 +922,6 @@ static void request_gendisk_module(dev_t devt)
>  		request_module("block-major-%d", MAJOR(devt));
>  }
>  
> -static bool get_disk_and_module(struct gendisk *disk)
> -{
> -	struct module *owner;
> -
> -	if (!disk->fops)
> -		return false;
> -	owner = disk->fops->owner;
> -	if (owner && !try_module_get(owner))
> -		return false;
> -	if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) {
> -		module_put(owner);
> -		return false;
> -	}
> -	return true;
> -
> -}
> -
> -/**
> - * get_gendisk - get partitioning information for a given device
> - * @devt: device to get partitioning information for
> - * @partno: returned partition index
> - *
> - * This function gets the structure containing partitioning
> - * information for the given device @devt.
> - *
> - * Context: can sleep
> - */
> -struct gendisk *get_gendisk(dev_t devt, int *partno)
> -{
> -	struct gendisk *disk = NULL;
> -
> -	might_sleep();
> -
> -	if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
> -		mutex_lock(&bdev_map_lock);
> -		disk = xa_load(&bdev_map, devt);
> -		if (!disk) {
> -			mutex_unlock(&bdev_map_lock);
> -			request_gendisk_module(devt);
> -			mutex_lock(&bdev_map_lock);
> -			disk = xa_load(&bdev_map, devt);
> -		}
> -		if (disk && !get_disk_and_module(disk))
> -			disk = NULL;
> -		if (disk)
> -			*partno = devt - disk_devt(disk);
> -		mutex_unlock(&bdev_map_lock);
> -	} else {
> -		struct hd_struct *part;
> -
> -		spin_lock_bh(&ext_devt_lock);
> -		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
> -		if (part && get_disk_and_module(part_to_disk(part))) {
> -			*partno = part->partno;
> -			disk = part_to_disk(part);
> -		}
> -		spin_unlock_bh(&ext_devt_lock);
> -	}
> -
> -	if (!disk)
> -		return NULL;
> -
> -	/*
> -	 * Synchronize with del_gendisk() to not return disk that is being
> -	 * destroyed.
> -	 */
> -	down_read(&disk->lookup_sem);
> -	if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
> -		     !(disk->flags & GENHD_FL_UP))) {
> -		up_read(&disk->lookup_sem);
> -		put_disk_and_module(disk);
> -		disk = NULL;
> -	} else {
> -		up_read(&disk->lookup_sem);
> -	}
> -	return disk;
> -}
> -
>  /**
>   * bdget_disk - do bdget() by gendisk and partition number
>   * @disk: gendisk of interest
> @@ -1559,11 +1419,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
>   *
>   * This function releases all allocated resources of the gendisk.
>   *
> - * The struct gendisk refcount is incremented with get_gendisk() or
> - * get_disk_and_module(), and its refcount is decremented with
> - * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this
> - * function is called.
> - *
>   * Drivers which used __device_add_disk() have a gendisk with a request_queue
>   * assigned. Since the request_queue sits on top of the gendisk for these
>   * drivers we also call blk_put_queue() for them, and we expect the
> @@ -1748,16 +1603,17 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	if (!disk)
>  		return NULL;
>  
> +	disk->part0.bdev = bdev_alloc(disk, 0);
> +	if (!disk->part0.bdev)
> +		goto out_free_disk;
> +
>  	disk->part0.dkstats = alloc_percpu(struct disk_stats);
>  	if (!disk->part0.dkstats)
> -		goto out_free_disk;
> +		goto out_bdput;
>  
> -	init_rwsem(&disk->lookup_sem);
>  	disk->node_id = node_id;
> -	if (disk_expand_part_tbl(disk, 0)) {
> -		free_percpu(disk->part0.dkstats);
> -		goto out_free_disk;
> -	}
> +	if (disk_expand_part_tbl(disk, 0))
> +		goto out_free_bdstats;
>  
>  	ptbl = rcu_dereference_protected(disk->part_tbl, 1);
>  	rcu_assign_pointer(ptbl->part[0], &disk->part0);
> @@ -1773,7 +1629,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	 */
>  	hd_sects_seq_init(&disk->part0);
>  	if (hd_ref_init(&disk->part0))
> -		goto out_free_part0;
> +		goto out_free_bdstats;
>  
>  	disk->minors = minors;
>  	rand_initialize_disk(disk);
> @@ -1782,8 +1638,10 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	device_initialize(disk_to_dev(disk));
>  	return disk;
>  
> -out_free_part0:
> -	hd_free_part(&disk->part0);
> +out_free_bdstats:
> +	free_percpu(disk->part0.dkstats);
> +out_bdput:
> +	bdput(disk->part0.bdev);
>  out_free_disk:
>  	kfree(disk);
>  	return NULL;
> @@ -1807,26 +1665,6 @@ void put_disk(struct gendisk *disk)
>  }
>  EXPORT_SYMBOL(put_disk);
>  
> -/**
> - * put_disk_and_module - decrements the module and gendisk refcount
> - * @disk: the struct gendisk to decrement the refcount for
> - *
> - * This is a counterpart of get_disk_and_module() and thus also of
> - * get_gendisk().
> - *
> - * Context: Any context, but the last reference must not be dropped from
> - *          atomic context.
> - */
> -void put_disk_and_module(struct gendisk *disk)
> -{
> -	if (disk) {
> -		struct module *owner = disk->fops->owner;
> -
> -		put_disk(disk);
> -		module_put(owner);
> -	}
> -}
> -
>  static void set_disk_ro_uevent(struct gendisk *gd, int ro)
>  {
>  	char event[] = "DISK_RO=1";
> diff --git a/block/partitions/core.c b/block/partitions/core.c
> index a02e224115943d..696bd9ff63c64a 100644
> --- a/block/partitions/core.c
> +++ b/block/partitions/core.c
> @@ -340,12 +340,11 @@ void delete_partition(struct hd_struct *part)
>  	device_del(part_to_dev(part));
>  
>  	/*
> -	 * Remove gendisk pointer from idr so that it cannot be looked up
> -	 * while RCU period before freeing gendisk is running to prevent
> -	 * use-after-free issues. Note that the device number stays
> -	 * "in-use" until we really free the gendisk.
> +	 * Remove the block device from the inode hash, so that it cannot be
> +	 * looked up any more even when openers still hold references.
>  	 */
> -	blk_invalidate_devt(part_devt(part));
> +	remove_inode_hash(part->bdev->bd_inode);
> +
>  	percpu_ref_kill(&part->ref);
>  }
>  
> @@ -368,6 +367,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	dev_t devt = MKDEV(0, 0);
>  	struct device *ddev = disk_to_dev(disk);
>  	struct device *pdev;
> +	struct block_device *bdev;
>  	struct disk_part_tbl *ptbl;
>  	const char *dname;
>  	int err;
> @@ -402,11 +402,15 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	if (!p)
>  		return ERR_PTR(-EBUSY);
>  
> +	err = -ENOMEM;
>  	p->dkstats = alloc_percpu(struct disk_stats);
> -	if (!p->dkstats) {
> -		err = -ENOMEM;
> +	if (!p->dkstats)
>  		goto out_free;
> -	}
> +
> +	bdev = bdev_alloc(disk, partno);
> +	if (!bdev)
> +		goto out_free_stats;
> +	p->bdev = bdev;
>  
>  	hd_sects_seq_init(p);
>  	pdev = part_to_dev(p);
> @@ -420,10 +424,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  		struct partition_meta_info *pinfo;
>  
>  		pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id);
> -		if (!pinfo) {
> -			err = -ENOMEM;
> -			goto out_free_stats;
> -		}
> +		if (!pinfo)
> +			goto out_bdput;
>  		memcpy(pinfo, info, sizeof(*info));
>  		p->info = pinfo;
>  	}
> @@ -470,6 +472,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	}
>  
>  	/* everything is up and running, commence */
> +	bdev_add(bdev, devt);
>  	rcu_assign_pointer(ptbl->part[partno], p);
>  
>  	/* suppress uevent if the disk suppresses it */
> @@ -479,6 +482,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  
>  out_free_info:
>  	kfree(p->info);
> +out_bdput:
> +	bdput(bdev);
>  out_free_stats:
>  	free_percpu(p->dkstats);
>  out_free:
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 6d6e4d50834cd0..b350ed3af83bad 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -863,31 +863,46 @@ void __init bdev_cache_init(void)
>  	blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
>  }
>  
> -static struct block_device *bdget(dev_t dev)
> +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
>  {
>  	struct block_device *bdev;
>  	struct inode *inode;
>  
> -	inode = iget_locked(blockdev_superblock, dev);
> +	inode = new_inode(blockdev_superblock);
>  	if (!inode)
>  		return NULL;
> +	inode->i_mode = S_IFBLK;
> +	inode->i_rdev = 0;
> +	inode->i_data.a_ops = &def_blk_aops;
> +	mapping_set_gfp_mask(&inode->i_data, GFP_USER);
> +
> +	bdev = I_BDEV(inode);
> +	spin_lock_init(&bdev->bd_size_lock);
> +	bdev->bd_disk = disk;
> +	bdev->bd_partno = partno;
> +	bdev->bd_contains = NULL;
> +	bdev->bd_super = NULL;
> +	bdev->bd_inode = inode;
> +	bdev->bd_part_count = 0;
> +	return bdev;
> +}
>  
> -	bdev = &BDEV_I(inode)->bdev;
> +void bdev_add(struct block_device *bdev, dev_t dev)
> +{
> +	bdev->bd_dev = dev;
> +	bdev->bd_inode->i_rdev = dev;
> +	bdev->bd_inode->i_ino = dev;
> +	insert_inode_hash(bdev->bd_inode);
> +}
>  
> -	if (inode->i_state & I_NEW) {
> -		spin_lock_init(&bdev->bd_size_lock);
> -		bdev->bd_contains = NULL;
> -		bdev->bd_super = NULL;
> -		bdev->bd_inode = inode;
> -		bdev->bd_part_count = 0;
> -		bdev->bd_dev = dev;
> -		inode->i_mode = S_IFBLK;
> -		inode->i_rdev = dev;
> -		inode->i_data.a_ops = &def_blk_aops;
> -		mapping_set_gfp_mask(&inode->i_data, GFP_USER);
> -		unlock_new_inode(inode);
> -	}
> -	return bdev;
> +static struct block_device *bdget(dev_t dev)
> +{
> +	struct inode *inode;
> +
> +	inode = ilookup(blockdev_superblock, dev);
> +	if (!inode)
> +		return NULL;
> +	return &BDEV_I(inode)->bdev;
>  }
>  
>  /**
> @@ -1004,27 +1019,6 @@ int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole,
>  }
>  EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
>  
> -static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
> -{
> -	struct gendisk *disk = get_gendisk(bdev->bd_dev, partno);
> -
> -	if (!disk)
> -		return NULL;
> -	/*
> -	 * Now that we hold gendisk reference we make sure bdev we looked up is
> -	 * not stale. If it is, it means device got removed and created before
> -	 * we looked up gendisk and we fail open in such case. Associating
> -	 * unhashed bdev with newly created gendisk could lead to two bdevs
> -	 * (and thus two independent caches) being associated with one device
> -	 * which is bad.
> -	 */
> -	if (inode_unhashed(bdev->bd_inode)) {
> -		put_disk_and_module(disk);
> -		return NULL;
> -	}
> -	return disk;
> -}
> -
>  static void bd_clear_claiming(struct block_device *whole, void *holder)
>  {
>  	lockdep_assert_held(&bdev_lock);
> @@ -1347,19 +1341,17 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
>   *  mutex_lock(part->bd_mutex)
>   *    mutex_lock_nested(whole->bd_mutex, 1)
>   */
> -static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
> -		int partno, fmode_t mode)
> +static int __blkdev_get(struct block_device *bdev, fmode_t mode)
>  {
> +	struct gendisk *disk = bdev->bd_disk;
>  	int ret;
>  
>  	if (!bdev->bd_openers) {
> -		bdev->bd_disk = disk;
>  		bdev->bd_contains = bdev;
> -		bdev->bd_partno = partno;
>  
> -		if (!partno) {
> +		if (!bdev->bd_partno) {
>  			ret = -ENXIO;
> -			bdev->bd_part = disk_get_part(disk, partno);
> +			bdev->bd_part = disk_get_part(disk, 0);
>  			if (!bdev->bd_part)
>  				goto out_clear;
>  
> @@ -1388,7 +1380,7 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  			struct block_device *whole = bdget_disk(disk, 0);
>  
>  			mutex_lock_nested(&whole->bd_mutex, 1);
> -			ret = __blkdev_get(whole, disk, 0, mode);
> +			ret = __blkdev_get(whole, mode);
>  			if (ret) {
>  				mutex_unlock(&whole->bd_mutex);
>  				bdput(whole);
> @@ -1398,7 +1390,7 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  			mutex_unlock(&whole->bd_mutex);
>  
>  			bdev->bd_contains = whole;
> -			bdev->bd_part = disk_get_part(disk, partno);
> +			bdev->bd_part = disk_get_part(disk, bdev->bd_partno);
>  			if (!(disk->flags & GENHD_FL_UP) ||
>  			    !bdev->bd_part || !bdev->bd_part->nr_sects) {
>  				__blkdev_put(whole, mode, 1);
> @@ -1430,12 +1422,53 @@ static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
>  
>   out_clear:
>  	disk_put_part(bdev->bd_part);
> -	bdev->bd_disk = NULL;
>  	bdev->bd_part = NULL;
>  	bdev->bd_contains = NULL;
>  	return ret;
>  }
>  
> +struct block_device *blkdev_get_no_open(dev_t dev)
> +{
> +	struct block_device *bdev;
> +	struct gendisk *disk;
> +
> +	down_read(&bdev_lookup_sem);
> +	bdev = bdget(dev);
> +	if (!bdev) {
> +		up_read(&bdev_lookup_sem);
> +		blk_request_module(dev);
> +		down_read(&bdev_lookup_sem);
> +
> +		bdev = bdget(dev);
> +		if (!bdev)
> +			goto unlock;
> +	}
> +
> +	disk = bdev->bd_disk;
> +	if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
> +		goto bdput;
> +	if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP)
> +		goto put_disk;
> +	if (!try_module_get(bdev->bd_disk->fops->owner))
> +		goto put_disk;
> +	up_read(&bdev_lookup_sem);
> +	return bdev;
> +put_disk:
> +	put_disk(disk);
> +bdput:
> +	bdput(bdev);
> +unlock:
> +	up_read(&bdev_lookup_sem);
> +	return NULL;
> +}
> +
> +void blkdev_put_no_open(struct block_device *bdev)
> +{
> +	module_put(bdev->bd_disk->fops->owner);
> +	put_disk(bdev->bd_disk);
> +	bdput(bdev);
> +}
> +
>  /**
>   * blkdev_get_by_dev - open a block device by device number
>   * @dev: device number of block device to open
> @@ -1463,7 +1496,6 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	bool unblock_events = true;
>  	struct block_device *bdev;
>  	struct gendisk *disk;
> -	int partno;
>  	int ret;
>  
>  	ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
> @@ -1473,18 +1505,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	if (ret)
>  		return ERR_PTR(ret);
>  
> -	bdev = bdget(dev);
> -	if (!bdev)
> -		return ERR_PTR(-ENOMEM);
> -
>  	/*
>  	 * If we lost a race with 'disk' being deleted, try again.  See md.c.
>  	 */
>  retry:
> -	ret = -ENXIO;
> -	disk = bdev_get_gendisk(bdev, &partno);
> -	if (!disk)
> -		goto bdput;
> +	bdev = blkdev_get_no_open(dev);
> +	if (!bdev)
> +		return ERR_PTR(-ENXIO);
> +	disk = bdev->bd_disk;
>  
>  	if (mode & FMODE_EXCL) {
>  		WARN_ON_ONCE(!holder);
> @@ -1492,7 +1520,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  		ret = -ENOMEM;
>  		claiming = bdget_disk(disk, 0);
>  		if (!claiming)
> -			goto put_disk;
> +			goto put_blkdev;
>  		ret = bd_prepare_to_claim(bdev, claiming, holder);
>  		if (ret)
>  			goto put_claiming;
> @@ -1501,12 +1529,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  	disk_block_events(disk);
>  
>  	mutex_lock(&bdev->bd_mutex);
> -	ret =__blkdev_get(bdev, disk, partno, mode);
> -	if (!(mode & FMODE_EXCL)) {
> -		; /* nothing to do here */
> -	} else if (ret) {
> -		bd_abort_claiming(bdev, claiming, holder);
> -	} else {
> +	ret =__blkdev_get(bdev, mode);
> +	if (ret)
> +		goto abort_claiming;
> +	if (mode & FMODE_EXCL) {
>  		bd_finish_claiming(bdev, claiming, holder);
>  
>  		/*
> @@ -1526,21 +1552,23 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
>  
>  	if (unblock_events)
>  		disk_unblock_events(disk);
> +	if (mode & FMODE_EXCL)
> +		bdput(claiming);
> +	return bdev;
>  
> +abort_claiming:
> +	if (mode & FMODE_EXCL)
> +		bd_abort_claiming(bdev, claiming, holder);
> +	mutex_unlock(&bdev->bd_mutex);
> +	disk_unblock_events(disk);
>  put_claiming:
>  	if (mode & FMODE_EXCL)
>  		bdput(claiming);
> -put_disk:
> -	if (ret)
> -		put_disk_and_module(disk);
> +put_blkdev:
> +	blkdev_put_no_open(bdev);
>  	if (ret == -ERESTARTSYS)
>  		goto retry;
> -bdput:
> -	if (ret) {
> -		bdput(bdev);
> -		return ERR_PTR(ret);
> -	}
> -	return bdev;
> +	return ERR_PTR(ret);
>  }
>  EXPORT_SYMBOL(blkdev_get_by_dev);
>  
> @@ -1641,7 +1669,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
>  
>  		disk_put_part(bdev->bd_part);
>  		bdev->bd_part = NULL;
> -		bdev->bd_disk = NULL;
>  		if (bdev_is_partition(bdev))
>  			victim = bdev->bd_contains;
>  		bdev->bd_contains = NULL;
> @@ -1699,12 +1726,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
>  	 * from userland - e.g. eject(1).
>  	 */
>  	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
> -
>  	mutex_unlock(&bdev->bd_mutex);
>  
>  	__blkdev_put(bdev, mode, 0);
> -	bdput(bdev);
> -	put_disk_and_module(disk);
> +	blkdev_put_no_open(bdev);
>  }
>  EXPORT_SYMBOL(blkdev_put);
>  
> diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
> index c8fc9792ac776d..b9f3c246c3c908 100644
> --- a/include/linux/blk-cgroup.h
> +++ b/include/linux/blk-cgroup.h
> @@ -197,12 +197,12 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
>  u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
>  
>  struct blkg_conf_ctx {
> -	struct gendisk			*disk;
> +	struct block_device		*bdev;
>  	struct blkcg_gq			*blkg;
>  	char				*body;
>  };
>  
> -struct gendisk *blkcg_conf_get_disk(char **inputp);
> +struct block_device *blkcg_conf_open_bdev(char **inputp);
>  int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
>  		   char *input, struct blkg_conf_ctx *ctx);
>  void blkg_conf_finish(struct blkg_conf_ctx *ctx);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index bdd7339bcda462..5d48b92f5e4348 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1994,6 +1994,12 @@ void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
>  		void *holder);
>  void blkdev_put(struct block_device *bdev, fmode_t mode);
>  
> +/* just for blk-cgroup, don't use elsewhere */
> +struct block_device *blkdev_get_no_open(dev_t dev);
> +void blkdev_put_no_open(struct block_device *bdev);
> +
> +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
> +void bdev_add(struct block_device *bdev, dev_t dev);
>  struct block_device *I_BDEV(struct inode *inode);
>  struct block_device *bdget_part(struct hd_struct *part);
>  struct block_device *bdgrab(struct block_device *bdev);
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index ca5e356084c353..42a51653c7303e 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -65,6 +65,7 @@ struct hd_struct {
>  	struct disk_stats __percpu *dkstats;
>  	struct percpu_ref ref;
>  
> +	struct block_device *bdev;
>  	struct device __dev;
>  	struct kobject *holder_dir;
>  	int policy, partno;
> @@ -193,7 +194,6 @@ struct gendisk {
>  	int flags;
>  	unsigned long state;
>  #define GD_NEED_PART_SCAN		0
> -	struct rw_semaphore lookup_sem;
>  	struct kobject *slave_dir;
>  
>  	struct timer_rand_state *random;
> @@ -300,7 +300,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk)
>  }
>  
>  extern void del_gendisk(struct gendisk *gp);
> -extern struct gendisk *get_gendisk(dev_t dev, int *partno);
>  extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
>  
>  extern void set_disk_ro(struct gendisk *disk, int flag);
> @@ -338,7 +337,6 @@ int blk_drop_partitions(struct block_device *bdev);
>  
>  extern struct gendisk *__alloc_disk_node(int minors, int node_id);
>  extern void put_disk(struct gendisk *disk);
> -extern void put_disk_and_module(struct gendisk *disk);
>  
>  #define alloc_disk_node(minors, node_id)				\
>  ({									\
> @@ -388,7 +386,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
>  }
>  #endif /* CONFIG_SYSFS */
>  
> +extern struct rw_semaphore bdev_lookup_sem;
> +
>  dev_t blk_lookup_devt(const char *name, int partno);
> +void blk_request_module(dev_t devt);
>  #ifdef CONFIG_BLOCK
>  void printk_all_partitions(void);
>  #else /* CONFIG_BLOCK */
> -- 
> 2.29.2
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel


  parent reply	other threads:[~2020-11-30  9:25 UTC|newest]

Thread overview: 294+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-28 16:14 merge struct block_device and struct hd_struct v4 Christoph Hellwig
2020-11-28 16:14 ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14 ` Christoph Hellwig
2020-11-28 16:14 ` [PATCH 01/45] blk-cgroup: fix a hd_struct leak in blkcg_fill_root_iostats Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-28 16:14 ` [PATCH 02/45] filemap: consistently use ->f_mapping over ->i_mapping Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  9:20   ` Johannes Thumshirn
2020-11-30  9:20     ` [dm-devel] " Johannes Thumshirn
2020-11-30  9:20     ` Johannes Thumshirn
2020-11-28 16:14 ` [PATCH 03/45] fs: remove get_super_thawed and get_super_exclusive_thawed Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-28 16:14 ` [PATCH 04/45] fs: simplify freeze_bdev/thaw_bdev Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30 17:55   ` Darrick J. Wong
2020-11-30 17:55     ` [dm-devel] " Darrick J. Wong
2020-11-30 17:55     ` Darrick J. Wong
2020-11-28 16:14 ` [PATCH 05/45] mtip32xx: remove the call to fsync_bdev on removal Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:07   ` Hannes Reinecke
2020-11-30  7:07     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:07     ` Hannes Reinecke
2020-11-30 14:48   ` Johannes Thumshirn
2020-11-30 14:48     ` [dm-devel] " Johannes Thumshirn
2020-11-30 14:48     ` Johannes Thumshirn
2020-11-28 16:14 ` [PATCH 06/45] zram: do not call set_blocksize Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-28 16:14 ` [PATCH 07/45] loop: " Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:07   ` Hannes Reinecke
2020-11-30  7:07     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:07     ` Hannes Reinecke
2020-11-30 11:26   ` Johannes Thumshirn
2020-11-30 11:26     ` [dm-devel] " Johannes Thumshirn
2020-11-30 11:26     ` Johannes Thumshirn
2020-11-28 16:14 ` [PATCH 08/45] dm: simplify flush_bio initialization in __send_empty_flush Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:08   ` Hannes Reinecke
2020-11-30  7:08     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:08     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 09/45] dm: remove the block_device reference in struct mapped_device Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-28 16:14 ` [PATCH 10/45] block: remove a duplicate __disk_get_part prototype Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-28 16:14 ` [PATCH 11/45] block: remove a superflous check in blkpg_do_ioctl Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  9:20   ` Johannes Thumshirn
2020-11-30  9:20     ` [dm-devel] " Johannes Thumshirn
2020-11-30  9:20     ` Johannes Thumshirn
2020-11-28 16:14 ` [PATCH 12/45] block: add a bdev_kobj helper Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-28 16:14 ` [PATCH 13/45] block: use disk_part_iter_exit in disk_part_iter_next Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-28 16:14 ` [PATCH 14/45] block: use put_device in put_disk Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:09   ` Hannes Reinecke
2020-11-30  7:09     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:09     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 15/45] block: change the hash used for looking up block devices Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:10   ` Hannes Reinecke
2020-11-30  7:10     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:10     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 16/45] block: switch bdgrab to use igrab Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:14   ` Hannes Reinecke
2020-11-30  7:14     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:14     ` Hannes Reinecke
2020-11-30  9:09   ` Jan Kara
2020-11-30  9:09     ` [dm-devel] " Jan Kara
2020-11-30  9:09     ` Jan Kara
2020-11-30 14:52   ` Johannes Thumshirn
2020-11-30 14:52     ` [dm-devel] " Johannes Thumshirn
2020-11-30 14:52     ` Johannes Thumshirn
2020-11-28 16:14 ` [PATCH 17/45] init: refactor name_to_dev_t Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:15   ` Hannes Reinecke
2020-11-30  7:15     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:15     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 18/45] init: refactor devt_from_partuuid Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:16   ` Hannes Reinecke
2020-11-30  7:16     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:16     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 19/45] init: cleanup match_dev_by_uuid and match_dev_by_label Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:17   ` Hannes Reinecke
2020-11-30  7:17     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:17     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 20/45] block: refactor __blkdev_put Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:17   ` Hannes Reinecke
2020-11-30  7:17     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:17     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 21/45] block: refactor blkdev_get Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:28   ` Hannes Reinecke
2020-11-30  7:28     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:28     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 22/45] block: move bdput() to the callers of __blkdev_get Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:29   ` Hannes Reinecke
2020-11-30  7:29     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:29     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 23/45] block: opencode devcgroup_inode_permission Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:30   ` Hannes Reinecke
2020-11-30  7:30     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:30     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 24/45] block: remove i_bdev Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:31   ` Hannes Reinecke
2020-11-30  7:31     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:31     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 25/45] block: simplify bdev/disk lookup in blkdev_get Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:36   ` Hannes Reinecke
2020-11-30  7:36     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:36     ` Hannes Reinecke
2020-11-30  9:24   ` Jan Kara [this message]
2020-11-30  9:24     ` [dm-devel] " Jan Kara
2020-11-30  9:24     ` Jan Kara
2020-12-02 21:52   ` Tejun Heo
2020-12-02 21:52     ` [dm-devel] " Tejun Heo
2020-12-02 21:52     ` Tejun Heo
2020-11-28 16:14 ` [PATCH 26/45] block: remove ->bd_contains Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:37   ` Hannes Reinecke
2020-11-30  7:37     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:37     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 27/45] block: simplify the block device claiming interface Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:37   ` Hannes Reinecke
2020-11-30  7:37     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:37     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 28/45] block: simplify part_to_disk Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:38   ` Hannes Reinecke
2020-11-30  7:38     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:38     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 29/45] block: initialize struct block_device in bdev_alloc Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:38   ` Hannes Reinecke
2020-11-30  7:38     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:38     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 30/45] block: remove the nr_sects field in struct hd_struct Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:39   ` Hannes Reinecke
2020-11-30  7:39     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:39     ` Hannes Reinecke
2020-11-30  9:44   ` Jan Kara
2020-11-30  9:44     ` [dm-devel] " Jan Kara
2020-11-30  9:44     ` Jan Kara
2020-11-30 14:51     ` Christoph Hellwig
2020-11-30 14:51       ` [dm-devel] " Christoph Hellwig
2020-11-30 14:51       ` Christoph Hellwig
2020-11-30 15:17       ` Jan Kara
2020-11-30 15:17         ` [dm-devel] " Jan Kara
2020-11-30 15:17         ` Jan Kara
2020-11-28 16:14 ` [PATCH 31/45] block: move disk stat accounting to struct block_device Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:40   ` Hannes Reinecke
2020-11-30  7:40     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:40     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 32/45] block: move the start_sect field " Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:41   ` Hannes Reinecke
2020-11-30  7:41     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:41     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 33/45] block: move the partition_meta_info " Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:41   ` Hannes Reinecke
2020-11-30  7:41     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:41     ` Hannes Reinecke
2020-11-28 16:14 ` [PATCH 34/45] block: move holder_dir " Christoph Hellwig
2020-11-28 16:14   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:14   ` Christoph Hellwig
2020-11-30  7:42   ` Hannes Reinecke
2020-11-30  7:42     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:42     ` Hannes Reinecke
2020-11-28 16:15 ` [PATCH 35/45] block: move make_it_fail " Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:43   ` Hannes Reinecke
2020-11-30  7:43     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:43     ` Hannes Reinecke
2020-11-28 16:15 ` [PATCH 36/45] block: move the policy field " Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:44   ` Hannes Reinecke
2020-11-30  7:44     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:44     ` Hannes Reinecke
2020-11-28 16:15 ` [PATCH 37/45] block: allocate struct hd_struct as part of struct bdev_inode Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:46   ` Hannes Reinecke
2020-11-30  7:46     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:46     ` Hannes Reinecke
2020-11-30  9:53   ` Jan Kara
2020-11-30  9:53     ` [dm-devel] " Jan Kara
2020-11-30  9:53     ` Jan Kara
2020-11-28 16:15 ` [PATCH 38/45] block: switch partition lookup to use struct block_device Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:47   ` Hannes Reinecke
2020-11-30  7:47     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:47     ` Hannes Reinecke
2020-11-28 16:15 ` [PATCH 39/45] block: remove the partno field from struct hd_struct Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:47   ` Hannes Reinecke
2020-11-30  7:47     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:47     ` Hannes Reinecke
2020-11-28 16:15 ` [PATCH 40/45] block: pass a block_device to blk_alloc_devt Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:48   ` Hannes Reinecke
2020-11-30  7:48     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:48     ` Hannes Reinecke
2020-11-28 16:15 ` [PATCH 41/45] block: pass a block_device to invalidate_partition Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:49   ` Hannes Reinecke
2020-11-30  7:49     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:49     ` Hannes Reinecke
2020-11-28 16:15 ` [PATCH 42/45] block: switch disk_part_iter_* to use a struct block_device Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:50   ` Hannes Reinecke
2020-11-30  7:50     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:50     ` Hannes Reinecke
2020-11-30 10:20   ` Jan Kara
2020-11-30 10:20     ` [dm-devel] " Jan Kara
2020-11-30 10:20     ` Jan Kara
2020-11-28 16:15 ` [PATCH 43/45] f2fs: remove a few bd_part checks Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:50   ` Hannes Reinecke
2020-11-30  7:50     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:50     ` Hannes Reinecke
2020-11-28 16:15 ` [PATCH 44/45] block: merge struct block_device and struct hd_struct Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:51   ` Hannes Reinecke
2020-11-30  7:51     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:51     ` Hannes Reinecke
2020-11-30 10:29   ` Jan Kara
2020-11-30 10:29     ` [dm-devel] " Jan Kara
2020-11-30 10:29     ` Jan Kara
2020-11-28 16:15 ` [PATCH 45/45] block: stop using bdget_disk for partition 0 Christoph Hellwig
2020-11-28 16:15   ` [dm-devel] " Christoph Hellwig
2020-11-28 16:15   ` Christoph Hellwig
2020-11-30  7:51   ` Hannes Reinecke
2020-11-30  7:51     ` [dm-devel] " Hannes Reinecke
2020-11-30  7:51     ` Hannes Reinecke
2020-11-30 17:19 ` merge struct block_device and struct hd_struct v4 Christoph Hellwig
2020-11-30 17:19   ` [dm-devel] " Christoph Hellwig
2020-11-30 17:19   ` Christoph Hellwig
2020-11-30 17:51   ` Jens Axboe
2020-11-30 17:51     ` [dm-devel] " Jens Axboe
2020-11-30 17:51     ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201130092428.GC11250@quack2.suse.cz \
    --to=jack@suse.cz \
    --cc=axboe@kernel.dk \
    --cc=colyli@suse.de \
    --cc=dm-devel@redhat.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hch@lst.de \
    --cc=jack@suse.com \
    --cc=johannes.thumshirn@wdc.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-bcache@vger.kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-mtd@lists.infradead.org \
    --cc=snitzer@redhat.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.